aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet
diff options
context:
space:
mode:
authorJohn Baldwin <jhb@FreeBSD.org>2014-09-04 19:09:08 +0000
committerJohn Baldwin <jhb@FreeBSD.org>2014-09-04 19:09:08 +0000
commita7c7f2a7e29e2eeaa90c70e6136026b91b1475b9 (patch)
treea476f605db291679cc7b0a56bbadc44fc2c13f55 /sys/netinet
parent9908eab82ee51f39158ba749e818052411a907d4 (diff)
downloadsrc-a7c7f2a7e29e2eeaa90c70e6136026b91b1475b9.tar.gz
src-a7c7f2a7e29e2eeaa90c70e6136026b91b1475b9.zip
In tcp_input(), don't acquire the pcbinfo global write lock for SYN
packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon <jcharbon@verisign.com> Reviewed by: adrian, hiren, jhb, Mike Bentkofsky
Notes
Notes: svn path=/head/; revision=271119
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c27
-rw-r--r--sys/netinet/tcp_syncache.c3
2 files changed, 15 insertions, 15 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index c8404fcd8926..1be94340b035 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
/*
* Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are two cases
+ * connection then first acquire pcbinfo lock. There are three cases
* where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, and ACKs for
- * a connection in TIMEWAIT.
+ * flags: ACKs moving a connection out of the syncache, ACKs for a
+ * connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
+ if ((thflags & (TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
} else
@@ -982,10 +982,11 @@ relocked:
* now be in TIMEWAIT.
*/
#ifdef INVARIANTS
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+ if ((thflags & (TH_FIN | TH_RST)) != 0)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#endif
- if (tp->t_state != TCPS_ESTABLISHED) {
+ if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
if (ti_locked == TI_UNLOCKED) {
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
@@ -1026,17 +1027,13 @@ relocked:
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one. Because listen
- * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
- * held in this case.
+ * attempt or the completion of a previous one.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
bzero(&inc, sizeof(inc));
#ifdef INET6
if (isipv6) {
@@ -1059,6 +1056,8 @@ relocked:
* socket appended to the listen queue in SYN_RECEIVED state.
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1339,8 +1338,12 @@ relocked:
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
/*
* Entry added to syncache and mbuf consumed.
- * Everything already unlocked by syncache_add().
+ * Only the listen socket is unlocked by syncache_add().
*/
+ if (ti_locked == TI_WLOCKED) {
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 9ade7f5ff271..55a504460e6d 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct syncache scs;
struct ucred *cred;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
("%s: unexpected tcp flags", __func__));
@@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#ifdef MAC
if (mac_syncache_init(&maclabel) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
goto done;
} else
mac_syncache_create(maclabel, inp);
#endif
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
/*
* Remember the IP options, if any.