aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_input.c
diff options
context:
space:
mode:
authorJulien Charbon <jch@FreeBSD.org>2015-08-03 12:13:54 +0000
committerJulien Charbon <jch@FreeBSD.org>2015-08-03 12:13:54 +0000
commitff9b006d61dcf000f044dbaee56954be204137f4 (patch)
treeb0a37aa79ef7873f11fc9e5037d69c1487c31c9c /sys/netinet/tcp_input.c
parente553ca49948a983329fb4f357d3fa6abdd2a0c3d (diff)
downloadsrc-ff9b006d61dcf000f044dbaee56954be204137f4.tar.gz
src-ff9b006d61dcf000f044dbaee56954be204137f4.zip
Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
- The existing TCP INP_INFO lock continues to protect the global inpcb list stability during full list traversal (e.g. tcp_pcblist()). - A new INP_LIST lock protects inpcb list actual modifications (inp allocation and free) and inpcb global counters. It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input()) and INP_INFO_WLOCK only in occasional operations that walk all connections. PR: 183659 Differential Revision: https://reviews.freebsd.org/D2599 Reviewed by: jhb, adrian Tested by: adrian, nitroboost-gmail.com Sponsored by: Verisign, Inc.
Notes
Notes: svn path=/head/; revision=286227
Diffstat (limited to 'sys/netinet/tcp_input.c')
-rw-r--r--sys/netinet/tcp_input.c114
1 files changed, 59 insertions, 55 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 3ca37b549129..e7ef6dbb80d8 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -608,7 +608,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_WLOCKED 2
+#define TI_RLOCKED 2
#ifdef TCPDEBUG
/*
@@ -797,8 +797,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
@@ -820,8 +820,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
findpcb:
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
}
@@ -969,20 +969,20 @@ findpcb:
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -991,7 +991,7 @@ relocked:
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (IPPROTO_DONE);
}
/*
@@ -1022,16 +1022,16 @@ relocked:
*/
#ifdef INVARIANTS
if ((thflags & (TH_FIN | TH_RST)) != 0)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
@@ -1039,9 +1039,9 @@ relocked:
}
goto relocked;
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
#ifdef MAC
@@ -1096,7 +1096,7 @@ relocked:
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1148,7 +1148,11 @@ relocked:
*/
INP_WUNLOCK(inp); /* listen socket */
inp = sotoinpcb(so);
- INP_WLOCK(inp); /* new connection */
+ /*
+ * New connection inpcb is already locked by
+ * syncache_expand().
+ */
+ INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
("%s: ", __func__));
@@ -1379,8 +1383,8 @@ relocked:
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
*/
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1429,8 +1433,8 @@ relocked:
dropwithreset:
TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1453,8 +1457,8 @@ dropunlock:
if (m != NULL)
TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1511,13 +1515,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
@@ -1690,8 +1694,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1794,8 +1798,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -2031,9 +2035,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2106,8 +2110,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED,
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED,
("%s: TH_RST ti_locked %d, th %p tp %p",
__func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT,
@@ -2150,9 +2154,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* Send challenge ACK for any SYN in synchronized state.
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
- KASSERT(ti_locked == TI_WLOCKED,
+ KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
@@ -2265,9 +2269,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
@@ -2768,9 +2772,9 @@ process_ACK:
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -2784,7 +2788,7 @@ process_ACK:
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -2999,18 +3003,18 @@ dodata: /* XXX */
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -3065,8 +3069,8 @@ dropafterack:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3076,8 +3080,8 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3088,8 +3092,8 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS