diff options
author | Julien Charbon <jch@FreeBSD.org> | 2015-08-03 12:13:54 +0000 |
---|---|---|
committer | Julien Charbon <jch@FreeBSD.org> | 2015-08-03 12:13:54 +0000 |
commit | ff9b006d61dcf000f044dbaee56954be204137f4 (patch) | |
tree | b0a37aa79ef7873f11fc9e5037d69c1487c31c9c /sys/netinet/tcp_input.c | |
parent | e553ca49948a983329fb4f357d3fa6abdd2a0c3d (diff) | |
download | src-ff9b006d61dcf000f044dbaee56954be204137f4.tar.gz src-ff9b006d61dcf000f044dbaee56954be204137f4.zip |
Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
- The existing TCP INP_INFO lock continues to protect the global inpcb list
stability during full list traversal (e.g. tcp_pcblist()).
- A new INP_LIST lock protects inpcb list actual modifications (inp allocation
and free) and inpcb global counters.
It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
and INP_INFO_WLOCK only in occasional operations that walk all connections.
PR: 183659
Differential Revision: https://reviews.freebsd.org/D2599
Reviewed by: jhb, adrian
Tested by: adrian, nitroboost-gmail.com
Sponsored by: Verisign, Inc.
Notes
Notes:
svn path=/head/; revision=286227
Diffstat (limited to 'sys/netinet/tcp_input.c')
-rw-r--r-- | sys/netinet/tcp_input.c | 114 |
1 files changed, 59 insertions, 55 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 3ca37b549129..e7ef6dbb80d8 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -608,7 +608,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) char *s = NULL; /* address and port logging */ int ti_locked; #define TI_UNLOCKED 1 -#define TI_WLOCKED 2 +#define TI_RLOCKED 2 #ifdef TCPDEBUG /* @@ -797,8 +797,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) * connection in TIMEWAIT and SYNs not targeting a listening socket. */ if ((thflags & (TH_FIN | TH_RST)) != 0) { - INP_INFO_WLOCK(&V_tcbinfo); - ti_locked = TI_WLOCKED; + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; @@ -820,8 +820,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) findpcb: #ifdef INVARIANTS - if (ti_locked == TI_WLOCKED) { - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) { + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } @@ -969,20 +969,20 @@ findpcb: relocked: if (inp->inp_flags & INP_TIMEWAIT) { if (ti_locked == TI_UNLOCKED) { - if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { + if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); - INP_INFO_WLOCK(&V_tcbinfo); - ti_locked = TI_WLOCKED; + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } } else - ti_locked = TI_WLOCKED; + ti_locked = TI_RLOCKED; } - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); @@ -991,7 +991,7 @@ relocked: */ if (tcp_twcheck(inp, &to, th, m, tlen)) goto findpcb; - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); return (IPPROTO_DONE); } /* @@ -1022,16 +1022,16 @@ relocked: */ #ifdef INVARIANTS if ((thflags & (TH_FIN | TH_RST)) != 0) - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); #endif if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) { if (ti_locked == TI_UNLOCKED) { - if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { + if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); - INP_INFO_WLOCK(&V_tcbinfo); - ti_locked = TI_WLOCKED; + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; @@ -1039,9 +1039,9 @@ relocked: } goto relocked; } else - ti_locked = TI_WLOCKED; + ti_locked = TI_RLOCKED; } - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } #ifdef MAC @@ -1096,7 +1096,7 @@ relocked: */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1148,7 +1148,11 @@ relocked: */ INP_WUNLOCK(inp); /* listen socket */ inp = sotoinpcb(so); - INP_WLOCK(inp); /* new connection */ + /* + * New connection inpcb is already locked by + * syncache_expand(). + */ + INP_WLOCK_ASSERT(inp); tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); @@ -1379,8 +1383,8 @@ relocked: * Entry added to syncache and mbuf consumed. * Only the listen socket is unlocked by syncache_add(). */ - if (ti_locked == TI_WLOCKED) { - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); @@ -1429,8 +1433,8 @@ relocked: dropwithreset: TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); - if (ti_locked == TI_WLOCKED) { - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS @@ -1453,8 +1457,8 @@ dropunlock: if (m != NULL) TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); - if (ti_locked == TI_WLOCKED) { - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS @@ -1511,13 +1515,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { - KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for " + KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS - if (ti_locked == TI_WLOCKED) - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); @@ -1690,8 +1694,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, /* * This is a pure ack for outstanding data. */ - if (ti_locked == TI_WLOCKED) - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); @@ -1794,8 +1798,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * nothing on the reassembly queue and we have enough * buffer space to take it. */ - if (ti_locked == TI_WLOCKED) - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; /* Clean receiver SACK report if present */ @@ -2031,9 +2035,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_state_change(tp, TCPS_SYN_RECEIVED); } - KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: " + KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " "ti_locked %d", __func__, ti_locked)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2106,8 +2110,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_WLOCKED, + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, ("%s: TH_RST ti_locked %d, th %p tp %p", __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, @@ -2150,9 +2154,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * Send challenge ACK for any SYN in synchronized state. */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) { - KASSERT(ti_locked == TI_WLOCKED, + KASSERT(ti_locked == TI_RLOCKED, ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && @@ -2265,9 +2269,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { - KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && " + KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " @@ -2768,9 +2772,9 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } @@ -2784,7 +2788,7 @@ process_ACK: */ case TCPS_LAST_ACK: if (ourfinisacked) { - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } @@ -2999,18 +3003,18 @@ dodata: /* XXX */ * standard timers. */ case TCPS_FIN_WAIT_2: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata " + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " "TCP_FIN_WAIT_2 ti_locked: %d", __func__, ti_locked)); tcp_twstart(tp); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); return; } } - if (ti_locked == TI_WLOCKED) - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG @@ -3065,8 +3069,8 @@ dropafterack: tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - if (ti_locked == TI_WLOCKED) - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; @@ -3076,8 +3080,8 @@ dropafterack: return; dropwithreset: - if (ti_locked == TI_WLOCKED) - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; if (tp != NULL) { @@ -3088,8 +3092,8 @@ dropwithreset: return; drop: - if (ti_locked == TI_WLOCKED) { - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS |