diff options
| author | Gleb Smirnoff <glebius@FreeBSD.org> | 2026-04-12 18:33:07 +0000 |
|---|---|---|
| committer | Gleb Smirnoff <glebius@FreeBSD.org> | 2026-04-12 18:33:07 +0000 |
| commit | 40dbb06fa73cac37d57563c07e55efd0cabbd488 (patch) | |
| tree | c11f41fa7c8baac00322576c2a72446f3fbf2eb7 | |
| parent | ce283e115b023514a8886c1c1f1c68df7cd5e9a9 (diff) | |
inpcb: retire INP_DROPPED and in_pcbdrop()
The inpcb flag INP_DROPPED served two purposes.
It was used by TCP and subsystems running on top of TCP as a flag that
marks a connection that is now in TCPS_CLOSED, but was in some other state
before (not a new-born connection). Create a new TCP flag TF_DISCONNECTED
for this purpose.
The in_pcbdrop() was a TCP's version of in_pcbdisconnect() that also sets
INP_DROPPED. Use in_pcbdisconnect() instead.
Second purpose of INP_DROPPED was a negative lookup mask in
inp_smr_lock(), as SMR-protected lookup may see inpcbs that had been
removed from the hash. We already have had INP_INHASHLIST that marks
inpcb that is in hash. Convert it into INP_UNCONNECTED with the opposite
meaning. This allows to combine it with INP_FREED for the negative lookup
mask.
The Chelsio/ToE and kTLS changes are done with some style refactoring,
like moving inp/tp assignments up and using macros for that. However, no
deep thinking was taken to check if those checks are really needed, it
could be that some are not.
Reviewed by: rrs
Differential Revision: https://reviews.freebsd.org/D56186
29 files changed, 258 insertions, 363 deletions
diff --git a/sys/dev/cxgbe/crypto/t6_kern_tls.c b/sys/dev/cxgbe/crypto/t6_kern_tls.c index 454b2e264a0e..584e5015acfa 100644 --- a/sys/dev/cxgbe/crypto/t6_kern_tls.c +++ b/sys/dev/cxgbe/crypto/t6_kern_tls.c @@ -458,15 +458,15 @@ t6_tls_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, } inp = params->tls.inp; + tp = intotcpcb(inp); INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); error = ECONNRESET; goto failed; } tlsp->inp = inp; - tp = intotcpcb(inp); if (tp->t_flags & TF_REQ_TSTMP) { tlsp->using_timestamps = true; if ((tp->ts_offset & 0xfffffff) != 0) { @@ -501,7 +501,7 @@ t6_tls_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, goto failed; } - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); error = ECONNRESET; goto failed; diff --git a/sys/dev/cxgbe/crypto/t7_kern_tls.c b/sys/dev/cxgbe/crypto/t7_kern_tls.c index d9710b5bd13f..b6078b9b53b6 100644 --- a/sys/dev/cxgbe/crypto/t7_kern_tls.c +++ b/sys/dev/cxgbe/crypto/t7_kern_tls.c @@ -246,7 +246,7 @@ t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, inp = params->tls.inp; INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (intotcpcb(inp)->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); error = ECONNRESET; goto failed; diff --git a/sys/dev/cxgbe/cxgbei/cxgbei.c b/sys/dev/cxgbe/cxgbei/cxgbei.c index ccca45f5f761..4b341c9d37b2 100644 --- a/sys/dev/cxgbe/cxgbei/cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/cxgbei.c @@ -499,10 +499,11 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len; } + tp = intotcpcb(inp); INP_WLOCK(inp); - if (__predict_false(inp->inp_flags & INP_DROPPED)) { - CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, pdu_len, inp->inp_flags); + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { + CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, pdu_len, tp->t_flags); INP_WUNLOCK(inp); icl_cxgbei_conn_pdu_free(NULL, ip); toep->ulpcb2 = NULL; @@ -513,7 +514,6 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) * T6+ does not report data PDUs received via DDP without F * set. This can result in gaps in the TCP sequence space. */ - tp = intotcpcb(inp); MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt); tp->rcv_nxt = icp->icp_seq + pdu_len; tp->t_rcvtime = ticks; @@ -652,10 +652,11 @@ do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) toep->ofld_rxq->rx_iscsi_data_digest_errors++; } + tp = intotcpcb(inp); INP_WLOCK(inp); - if (__predict_false(inp->inp_flags & INP_DROPPED)) { - CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, pdu_len, inp->inp_flags); + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { + CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, pdu_len, tp->t_flags); INP_WUNLOCK(inp); icl_cxgbei_conn_pdu_free(NULL, ip); toep->ulpcb2 = NULL; @@ -663,8 +664,6 @@ do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) return (0); } - tp = intotcpcb(inp); - /* * If icc is NULL, the connection is being closed in * icl_cxgbei_conn_close(), just drop this data. diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c index d90d7904a8ae..2e7767a0fc27 100644 --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -434,6 +434,7 @@ icl_cxgbei_tx_main(void *arg) struct toepcb *toep = icc->toep; struct socket *so = ic->ic_socket; struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct icl_pdu *ip; struct mbuf *m; struct mbufq mq; @@ -476,7 +477,7 @@ icl_cxgbei_tx_main(void *arg) INP_WLOCK(inp); ICL_CONN_UNLOCK(ic); - if (__predict_false(inp->inp_flags & INP_DROPPED) || + if (__predict_false(tp->t_flags & TF_DISCONNECTED) || __predict_false((toep->flags & TPF_ATTACHED) == 0)) { mbufq_drain(&mq); } else { @@ -1080,7 +1081,7 @@ icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); error = ENOTCONN; goto out; @@ -1334,6 +1335,7 @@ icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct cxgbei_ddp_state *ddp; struct ppod_reservation *prsv; struct inpcb *inp; + struct tcpcb *tp; struct mbufq mq; uint32_t itt; int rc = 0; @@ -1421,8 +1423,9 @@ no_ddp: * detached already. */ inp = sotoinpcb(ic->ic_socket); + tp = intotcpcb(inp); INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) != 0) { + if ((tp->t_flags & TF_DISCONNECTED) != 0) { INP_WUNLOCK(inp); mbufq_drain(&mq); t4_free_page_pods(prsv); @@ -1497,6 +1500,7 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ppod_reservation *prsv; struct ctl_sg_entry *sgl, sg_entry; struct inpcb *inp; + struct tcpcb *tp; struct mbufq mq; int sg_entries = ctsio->kern_sg_entries; uint32_t ttt; @@ -1597,9 +1601,10 @@ no_ddp: return (ECONNRESET); } inp = sotoinpcb(ic->ic_socket); + tp = intotcpcb(inp); INP_WLOCK(inp); ICL_CONN_UNLOCK(ic); - if ((inp->inp_flags & INP_DROPPED) != 0) { + if ((tp->t_flags & TF_DISCONNECTED) != 0) { INP_WUNLOCK(inp); mbufq_drain(&mq); t4_free_page_pods(prsv); diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c index cbf4bae00a60..372fc5418b91 100644 --- a/sys/dev/cxgbe/iw_cxgbe/qp.c +++ b/sys/dev/cxgbe/iw_cxgbe/qp.c @@ -64,7 +64,7 @@ struct cpl_set_tcb_rpl; #include "iw_cxgbe.h" #include "user.h" -static int creds(struct toepcb *toep, struct inpcb *inp, size_t wrsize); +static int creds(struct toepcb *toep, struct tcpcb *tp, size_t wrsize); static int max_fr_immd = T4_MAX_FR_IMMD;//SYSCTL parameter later... static int alloc_ird(struct c4iw_dev *dev, u32 ird) @@ -1149,7 +1149,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, term->ecode = qhp->attr.ecode; } else build_term_codes(err_cqe, &term->layer_etype, &term->ecode); - ret = creds(toep, inp, sizeof(*wqe)); + ret = creds(toep, tp, sizeof(*wqe)); if (ret) { free_wrqe(wr); return; @@ -1253,8 +1253,7 @@ rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) int ret; struct wrqe *wr; struct socket *so = ep->com.so; - struct inpcb *inp = sotoinpcb(so); - struct tcpcb *tp = intotcpcb(inp); + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); struct toepcb *toep = tp->t_toe; KASSERT(rhp == qhp->rhp && ep == qhp->ep, ("%s: EDOOFUS", __func__)); @@ -1277,7 +1276,7 @@ rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) c4iw_init_wr_wait(&ep->com.wr_wait); - ret = creds(toep, inp, sizeof(*wqe)); + ret = creds(toep, tp, sizeof(*wqe)); if (ret) { free_wrqe(wr); return ret; @@ -1315,14 +1314,14 @@ static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) } static int -creds(struct toepcb *toep, struct inpcb *inp, size_t wrsize) +creds(struct toepcb *toep, struct tcpcb *tp, size_t wrsize) { struct ofld_tx_sdesc *txsd; CTR3(KTR_IW_CXGBE, "%s:creB %p %u", __func__, toep , wrsize); - INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) != 0) { - INP_WUNLOCK(inp); + INP_WLOCK(tptoinpcb(tp)); + if (tp->t_flags & TF_DISCONNECTED) { + INP_WUNLOCK(tptoinpcb(tp)); return (EINVAL); } txsd = &toep->txsd[toep->txsd_pidx]; @@ -1336,7 +1335,7 @@ creds(struct toepcb *toep, struct inpcb *inp, size_t wrsize) if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; - INP_WUNLOCK(inp); + INP_WUNLOCK(tptoinpcb(tp)); CTR5(KTR_IW_CXGBE, "%s:creE %p %u %u %u", __func__, toep , txsd->tx_credits, toep->tx_credits, toep->txsd_pidx); return (0); @@ -1351,8 +1350,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) struct c4iw_rdev *rdev = &qhp->rhp->rdev; struct adapter *sc = rdev->adap; struct socket *so = ep->com.so; - struct inpcb *inp = sotoinpcb(so); - struct tcpcb *tp = intotcpcb(inp); + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); struct toepcb *toep = tp->t_toe; CTR5(KTR_IW_CXGBE, "%s qhp %p qid 0x%x ep %p tid %u", __func__, qhp, @@ -1416,7 +1414,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) c4iw_init_wr_wait(&ep->com.wr_wait); - ret = creds(toep, inp, sizeof(*wqe)); + ret = creds(toep, tp, sizeof(*wqe)); if (ret) { free_wrqe(wr); free_ird(rhp, qhp->attr.max_ird); diff --git a/sys/dev/cxgbe/nvmf/nvmf_che.c b/sys/dev/cxgbe/nvmf/nvmf_che.c index 5c2174b8a40b..afdfc2f1b758 100644 --- a/sys/dev/cxgbe/nvmf/nvmf_che.c +++ b/sys/dev/cxgbe/nvmf/nvmf_che.c @@ -555,6 +555,7 @@ che_write_adapter_mem(struct nvmf_che_qpair *qp, uint32_t addr, uint32_t len, struct toepcb *toep = qp->toep; struct socket *so = qp->so; struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct mbufq mq; int error; @@ -568,7 +569,7 @@ che_write_adapter_mem(struct nvmf_che_qpair *qp, uint32_t addr, uint32_t len, goto error; INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) != 0) { + if ((tp->t_flags & TF_DISCONNECTED) != 0) { INP_WUNLOCK(inp); error = ECONNRESET; goto error; @@ -862,12 +863,13 @@ nvmf_che_write_pdu(struct nvmf_che_qpair *qp, struct mbuf *m) struct epoch_tracker et; struct socket *so = qp->so; struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = qp->toep; CURVNET_SET(so->so_vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); - if (__predict_false(inp->inp_flags & INP_DROPPED) || + if (__predict_false(tp->t_flags & TF_DISCONNECTED) || __predict_false((toep->flags & TPF_ATTACHED) == 0)) { m_freem(m); } else { @@ -2052,10 +2054,11 @@ do_nvmt_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) ("%s: payload length mismatch", __func__)); inp = toep->inp; + tp = intotcpcb(inp); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - CTR(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, len, inp->inp_flags); + if (tp->t_flags & TF_DISCONNECTED) { + CTR(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, len, tp->t_flags); INP_WUNLOCK(inp); m_freem(m); return (0); @@ -2070,7 +2073,6 @@ do_nvmt_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) mbufq_enqueue(&qp->rx_data, m); SOCKBUF_UNLOCK(&so->so_rcv); - tp = intotcpcb(inp); tp->t_rcvtime = ticks; #ifdef VERBOSE_TRACES @@ -2092,6 +2094,7 @@ do_nvmt_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) struct nvmf_che_qpair *qp = toep->ulpcb; struct socket *so = qp->so; struct inpcb *inp = toep->inp; + struct tcpcb *tp = intotcpcb(inp); u_int hlen __diagused; bool empty; @@ -2107,9 +2110,9 @@ do_nvmt_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) ("%s: payload length mismatch", __func__)); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - CTR(KTR_CXGBE, "%s: tid %u, rx (hlen %u), inp_flags 0x%x", - __func__, tid, hlen, inp->inp_flags); + if (tp->t_flags & TF_DISCONNECTED) { + CTR(KTR_CXGBE, "%s: tid %u, rx (hlen %u), t_flags 0x%x", + __func__, tid, hlen, tp->t_flags); INP_WUNLOCK(inp); m_freem(m); return (0); @@ -2505,7 +2508,7 @@ che_allocate_qpair(bool controller, const nvlist_t *nvl) inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); free(qp->fl_cid_set, M_NVMF_CHE); free(qp->fl_cids, M_NVMF_CHE); diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index c236ee060bc2..e5f6053e2cb6 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -78,6 +78,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss, u_int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; + struct tcpcb *tp = intotcpcb(inp); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); @@ -95,7 +96,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss, toep->ctrlq = &sc->sge.ctrlq[toep->params.ctrlq_idx]; } - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { /* socket closed by the kernel before hw told us it connected */ diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 7e1c497240c2..6e34d5f54897 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -245,13 +245,13 @@ send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; - struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ + struct tcpcb *tp = intotcpcb(inp); INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, - inp->inp_flags & INP_DROPPED ? "inp dropped" : + tp->t_flags & TF_DISCONNECTED ? "TCP disconnected" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toep->flags & TPF_ABORT_SHUTDOWN ? @@ -273,7 +273,7 @@ send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); - if (inp->inp_flags & INP_DROPPED) + if (tp->t_flags & TF_DISCONNECTED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); @@ -284,7 +284,7 @@ send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ - if ((inp->inp_flags & INP_DROPPED) == 0) { + if ((tp->t_flags & TF_DISCONNECTED) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ @@ -1588,8 +1588,8 @@ t4_tod_output(struct toedev *tod, struct tcpcb *tp) struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("%s: inp %p dropped.", __func__, inp)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("%s: tcpcb %p disconnected", __func__, tp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); t4_push_data(sc, toep, 0); @@ -1607,8 +1607,8 @@ t4_send_fin(struct toedev *tod, struct tcpcb *tp) struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("%s: inp %p dropped.", __func__, inp)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("%s: tcpcb %p disconnected", __func__, tp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toep->flags |= TPF_SEND_FIN; @@ -1628,8 +1628,8 @@ t4_send_rst(struct toedev *tod, struct tcpcb *tp) struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("%s: inp %p dropped.", __func__, inp)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("%s: tcpcb %p disconnected", __func__, tp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ @@ -1921,7 +1921,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) } toep->flags |= TPF_ABORT_SHUTDOWN; - if ((inp->inp_flags & INP_DROPPED) == 0) { + if ((tp->t_flags & TF_DISCONNECTED) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) @@ -2010,17 +2010,16 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; + tp = intotcpcb(inp); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, len, inp->inp_flags); + if (tp->t_flags & TF_DISCONNECTED) { + CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, len, tp->t_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } - tp = intotcpcb(inp); - if (__predict_false(ulp_mode(toep) == ULP_MODE_TLS && toep->flags & TPF_TLS_RECEIVE)) { /* Received "raw" data on a TLS socket. */ @@ -2170,6 +2169,7 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) } inp = toep->inp; + tp = intotcpcb(inp); KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); @@ -2183,10 +2183,8 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) return (0); } - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); - - tp = intotcpcb(inp); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("%s: t_flags 0x%x", __func__, tp->t_flags)); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); @@ -2627,8 +2625,9 @@ sendanother: /* Inlined tcp_usr_send(). */ inp = toep->inp; + tp = intotcpcb(inp); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); SOCK_IO_SEND_UNLOCK(so); error = ECONNRESET; @@ -2642,8 +2641,7 @@ sendanother: sbappendstream(sb, m, 0); m = NULL; - if (!(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); + if (!(tp->t_flags & TF_DISCONNECTED)) { if (moretocome) tp->t_flags |= TF_MORETOCOME; error = tcp_output(tp); diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 35fb1061d867..9d422c2b793e 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -641,8 +641,8 @@ handle_ddp_data_aio(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, uint32_t report = be32toh(ddp_report); unsigned int db_idx; struct inpcb *inp = toep->inp; + struct tcpcb *tp = intotcpcb(inp); struct ddp_buffer *db; - struct tcpcb *tp; struct socket *so; struct sockbuf *sb; struct kaiocb *job; @@ -664,13 +664,13 @@ handle_ddp_data_aio(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, db = &toep->ddp.db[db_idx]; job = db->job; - if (__predict_false(inp->inp_flags & INP_DROPPED)) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { /* * This can happen due to an administrative tcpdrop(8). * Just fail the request with ECONNRESET. */ - CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", - __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); + CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, t_flags 0x%x", + __func__, toep->tid, be32toh(rcv_nxt), len, tp->t_flags); if (aio_clear_cancel_function(job)) ddp_complete_one(job, ECONNRESET); goto completed; @@ -859,7 +859,7 @@ handle_ddp_data_rcvbuf(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, { uint32_t report = be32toh(ddp_report); struct inpcb *inp = toep->inp; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); struct socket *so; struct sockbuf *sb; struct ddp_buffer *db; @@ -881,20 +881,18 @@ handle_ddp_data_rcvbuf(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, toep->ddp.active_id, toep->tid)); db = &toep->ddp.db[db_idx]; - if (__predict_false(inp->inp_flags & INP_DROPPED)) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { /* * This can happen due to an administrative tcpdrop(8). * Just ignore the received data. */ - CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", - __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); + CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, t_flags 0x%x", + __func__, toep->tid, be32toh(rcv_nxt), len, tp->t_flags); if (invalidated) complete_ddp_buffer(toep, db, db_idx); goto out; } - tp = intotcpcb(inp); - /* * For RX_DDP_COMPLETE, len will be zero and rcv_nxt is the * sequence number of the next byte to receive. The length of diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index b879f6883f25..359267b7db90 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -886,6 +886,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; + struct tcpcb *tp = intotcpcb(inp); #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -911,13 +912,13 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, * If the inp has been dropped (listening socket closed) then * listen_stop must have run and taken the inp out of the hash. */ - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { KASSERT(listen_hash_del(sc, inp) == NULL, ("%s: inp %p still in listen hash", __func__, inp)); } #endif - if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { + if (tp->t_flags & TF_DISCONNECTED && status != CPL_ERR_NONE) { if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); @@ -928,7 +929,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, * it has started the hardware listener. Stop it; the lctx will be * released in do_close_server_rpl. */ - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { destroy_server(sc, lctx); INP_WUNLOCK(inp); return (status); @@ -1336,6 +1337,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, unsigned int tid = GET_TID(cpl); struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp; + struct tcpcb *tp; struct socket *so; struct in_conninfo inc; struct tcphdr th; @@ -1477,10 +1479,11 @@ found: } inp = lctx->inp; /* listening socket, not owned by TOE */ + tp = intotcpcb(inp); INP_RLOCK(inp); /* Don't offload if the listening socket has closed */ - if (__predict_false(inp->inp_flags & INP_DROPPED)) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_RUNLOCK(inp); NET_EPOCH_EXIT(et); REJECT_PASS_ACCEPT_REQ(false); @@ -1622,6 +1625,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp, *new_inp; + struct tcpcb *tp = intotcpcb(inp); struct socket *so; struct tcphdr th; struct tcpopt to; @@ -1653,7 +1657,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, KASSERT(vi->adapter == sc, ("%s: vi %p, sc %p mismatch", __func__, vi, sc)); - if (__predict_false(inp->inp_flags & INP_DROPPED)) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { reset: send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST); INP_WUNLOCK(inp); diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c index bbcc1c88c3db..0616279ba15e 100644 --- a/sys/dev/cxgbe/tom/t4_tls.c +++ b/sys/dev/cxgbe/tom/t4_tls.c @@ -762,7 +762,7 @@ do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); int len; /* XXX: Should this match do_rx_data instead? */ @@ -781,9 +781,9 @@ do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) ("%s: payload length mismatch", __func__)); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, len, inp->inp_flags); + if (tp->t_flags & TF_DISCONNECTED) { + CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, len, tp->t_flags); INP_WUNLOCK(inp); m_freem(m); return (0); @@ -803,7 +803,6 @@ do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) #endif } - tp = intotcpcb(inp); tp->t_rcvtime = ticks; #ifdef VERBOSE_TRACES @@ -824,7 +823,7 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); struct socket *so; struct sockbuf *sb; struct mbuf *tls_data; @@ -851,9 +850,9 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) ("%s: payload length mismatch", __func__)); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, len, inp->inp_flags); + if (tp->t_flags & TF_DISCONNECTED) { + CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), t_flags 0x%x", + __func__, tid, len, tp->t_flags); INP_WUNLOCK(inp); m_freem(m); return (0); @@ -862,7 +861,6 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length)); so = inp_inpcbtosocket(inp); - tp = intotcpcb(inp); #ifdef VERBOSE_TRACES CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u", diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 8dfffd465345..950608053be7 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -1830,7 +1830,7 @@ live_tid_failure_cleanup(struct adapter *sc, struct toepcb *toep, u_int status) INP_WLOCK(inp); tp = intotcpcb(inp); toep->flags |= TPF_ABORT_SHUTDOWN; - if ((inp->inp_flags & INP_DROPPED) == 0) { + if ((tp->t_flags & TF_DISCONNECTED) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) @@ -2283,8 +2283,8 @@ find_offload_adapter_cb(struct adapter *sc, void *arg) struct find_offload_adapter_data *fa = arg; struct socket *so = fa->so; struct tom_data *td = sc->tom_softc; - struct tcpcb *tp; - struct inpcb *inp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); /* Non-TCP were filtered out earlier. */ MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); @@ -2295,10 +2295,8 @@ find_offload_adapter_cb(struct adapter *sc, void *arg) if (td == NULL) return; /* TOE not enabled on this adapter. */ - inp = sotoinpcb(so); INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) == 0) { - tp = intotcpcb(inp); + if ((tp->t_flags & TF_DISCONNECTED) == 0) { if (tp->t_flags & TF_TOE && tp->tod == &td->tod) fa->sc = sc; /* Found. */ } diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c index 35009ad77722..0112cf8b6941 100644 --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -870,21 +870,15 @@ ktls_clone_session(struct ktls_session *tls, int direction) static int ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error; - inp = so->so_pcb; INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - return (ECONNRESET); - } - if (inp->inp_socket == NULL) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); if (!(tp->t_flags & TF_TOE)) { INP_WUNLOCK(inp); return (EOPNOTSUPP); @@ -923,19 +917,14 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, union if_snd_tag_alloc_params params; struct ifnet *ifp; struct nhop_object *nh; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); int error; INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); return (ECONNRESET); } - if (inp->inp_socket == NULL) { - INP_RUNLOCK(inp); - return (ECONNRESET); - } - tp = intotcpcb(inp); /* * Check administrative controls on ifnet TLS to determine if @@ -1027,11 +1016,7 @@ ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls, return (ENXIO); INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - INP_RUNLOCK(inp); - return (ECONNRESET); - } - if (inp->inp_socket == NULL) { + if (intotcpcb(inp)->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); return (ECONNRESET); } @@ -1506,23 +1491,15 @@ ktls_get_rx_mode(struct socket *so, int *modep) int ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq) { - struct socket *so; - struct tcpcb *tp; + struct socket *so = inp->inp_socket; + struct tcpcb *tp = intotcpcb(inp); INP_RLOCK(inp); - so = inp->inp_socket; - if (__predict_false(so == NULL)) { - INP_RUNLOCK(inp); - return (EINVAL); - } - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); - MPASS(tp != NULL); - SOCKBUF_LOCK(&so->so_rcv); *tcpseq = tp->rcv_nxt - so->so_rcv.sb_tlscc; *tlsseq = so->so_rcv.sb_tls_seqno; @@ -1697,7 +1674,7 @@ ktls_reset_receive_tag(void *context, int pending) ifp = NULL; INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (intotcpcb(inp)->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); goto out; } @@ -1818,8 +1795,8 @@ ktls_reset_send_tag(void *context, int pending) } else { NET_EPOCH_ENTER(et); INP_WLOCK(inp); - if (!(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); + tp = intotcpcb(inp); + if (!(tp->t_flags & TF_DISCONNECTED)) { CURVNET_SET(inp->inp_vnet); tp = tcp_drop(tp, ECONNABORTED); CURVNET_RESTORE(); @@ -2461,26 +2438,19 @@ ktls_resync_ifnet(struct socket *so, uint32_t tls_len, uint64_t tls_rcd_num) { union if_snd_tag_modify_params params; struct m_snd_tag *mst; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); mst = so->so_rcv.sb_tls_info->snd_tag; if (__predict_false(mst == NULL)) return (EINVAL); - inp = sotoinpcb(so); - if (__predict_false(inp == NULL)) - return (EINVAL); - INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); - MPASS(tp != NULL); - /* Get the TCP sequence number of the next valid TLS header. */ SOCKBUF_LOCK(&so->so_rcv); params.tls_rx.tls_hdr_tcp_sn = @@ -2500,12 +2470,11 @@ ktls_drop(struct socket *so, int error) { struct epoch_tracker et; struct inpcb *inp = sotoinpcb(so); - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); NET_EPOCH_ENTER(et); INP_WLOCK(inp); - if (!(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); + if (!(tp->t_flags & TF_DISCONNECTED)) { CURVNET_SET(inp->inp_vnet); tp = tcp_drop(tp, error); CURVNET_RESTORE(); @@ -3372,7 +3341,8 @@ ktls_disable_ifnet_help(void *context, int pending __unused) INP_WLOCK(inp); so = inp->inp_socket; MPASS(so != NULL); - if (inp->inp_flags & INP_DROPPED) { + tp = intotcpcb(inp); + if (tp->t_flags & TF_DISCONNECTED) { goto out; } @@ -3383,8 +3353,7 @@ ktls_disable_ifnet_help(void *context, int pending __unused) if (err == 0) { counter_u64_add(ktls_ifnet_disable_ok, 1); /* ktls_set_tx_mode() drops inp wlock, so recheck flags */ - if ((inp->inp_flags & INP_DROPPED) == 0 && - (tp = intotcpcb(inp)) != NULL && + if ((tp->t_flags & TF_DISCONNECTED) == 0 && tp->t_fb->tfb_hwtls_change != NULL) (*tp->t_fb->tfb_hwtls_change)(tp, 0); } else { diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 8bc90de239c2..f1053f6abe03 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -692,6 +692,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) */ inp->inp_route.ro_flags = RT_LLE_CACHE; refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */ + inp->inp_flags |= INP_UNCONNECTED; INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); pcbinfo->ipi_count++; @@ -1158,14 +1159,14 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred) lport = inp->inp_lport; MPASS(!in_nullhost(inp->inp_laddr) || inp->inp_lport != 0 || - !(inp->inp_flags & INP_INHASHLIST)); + (inp->inp_flags & INP_UNCONNECTED)); inp->inp_faddr = faddr; inp->inp_fport = sin->sin_port; inp->inp_laddr = laddr; inp->inp_lport = lport; - if ((inp->inp_flags & INP_INHASHLIST) == 0) { + if (inp->inp_flags & INP_UNCONNECTED) { error = in_pcbinshash(inp); MPASS(error == 0); } else @@ -1426,11 +1427,15 @@ in_pcbdisconnect(struct inpcb *inp) KASSERT(inp->inp_smr == SMR_SEQ_INVALID, ("%s: inp %p was already disconnected", __func__, inp)); + if (inp->inp_flags & INP_UNCONNECTED) + return; + INP_HASH_WLOCK(inp->inp_pcbinfo); in_pcbremhash(inp); CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp, inp_unconn_list); INP_HASH_WUNLOCK(inp->inp_pcbinfo); + inp->inp_flags |= INP_UNCONNECTED; if ((inp->inp_socket->so_proto->pr_flags & PR_CONNREQUIRED) == 0) { /* See the comment in in_pcbinshash(). */ @@ -1538,11 +1543,11 @@ inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock) { /* - * in_pcblookup() family of functions ignore not only freed entries, - * that may be found due to lockless access to the hash, but dropped - * entries, too. + * in_pcblookup() family of functions shall ignore not onlu pcbs that + * had been freed that may be found due to lockless access to the hash, + * but also pcbs that were removed from the hash, but are still around. */ - return (_inp_smr_lock(inp, lock, INP_FREED | INP_DROPPED)); + return (_inp_smr_lock(inp, lock, INP_FREED | INP_UNCONNECTED)); } /* @@ -1837,10 +1842,10 @@ in_pcbfree(struct inpcb *inp) * lock, thus in_pcbremhash() should be the first action. */ INP_HASH_WLOCK(pcbinfo); - if (inp->inp_flags & INP_INHASHLIST) - in_pcbremhash(inp); - else + if (inp->inp_flags & INP_UNCONNECTED) CK_LIST_REMOVE(inp, inp_unconn_list); + else + in_pcbremhash(inp); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; pcbinfo->ipi_count--; INP_HASH_WUNLOCK(pcbinfo); @@ -1901,36 +1906,6 @@ inpcb_fini(void *mem, int size) INP_LOCK_DESTROY(inp); } -/* - * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and - * port reservation, and preventing it from being returned by inpcb lookups. - * - * It is used by TCP to mark an inpcb as unused and avoid future packet - * delivery or event notification when a socket remains open but TCP has - * closed. This might occur as a result of a shutdown()-initiated TCP close - * or a RST on the wire, and allows the port binding to be reused while still - * maintaining the invariant that so_pcb always points to a valid inpcb until - * in_pcbdetach(). - * - * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by - * in_pcbpurgeif0()? - */ -void -in_pcbdrop(struct inpcb *inp) -{ - - INP_WLOCK_ASSERT(inp); - - inp->inp_flags |= INP_DROPPED; - if (inp->inp_flags & INP_INHASHLIST) { - INP_HASH_WLOCK(inp->inp_pcbinfo); - in_pcbremhash(inp); - CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp, - inp_unconn_list); - INP_HASH_WUNLOCK(inp->inp_pcbinfo); - } -} - #ifdef INET /* * Common routines to return the socket addresses associated with inpcbs. @@ -2691,8 +2666,7 @@ in_pcbinshash(struct inpcb *inp) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); - KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, - ("in_pcbinshash: INP_INHASHLIST")); + MPASS(inp->inp_flags & INP_UNCONNECTED); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { @@ -2751,7 +2725,7 @@ in_pcbinshash(struct inpcb *inp) _in_pcbinshash_wild(pcbhash, inp); } CK_LIST_INSERT_HEAD(pcbporthash, inp, inp_portlist); - inp->inp_flags |= INP_INHASHLIST; + inp->inp_flags &= ~INP_UNCONNECTED; return (0); } @@ -2762,7 +2736,7 @@ in_pcbremhash(struct inpcb *inp) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); - MPASS(inp->inp_flags & INP_INHASHLIST); + MPASS(!(inp->inp_flags & INP_UNCONNECTED)); if ((inp->inp_flags & INP_INLBGROUP) != 0) in_pcbremlbgrouphash(inp); @@ -2781,7 +2755,6 @@ in_pcbremhash(struct inpcb *inp) CK_LIST_REMOVE(inp, inp_hash_exact); } CK_LIST_REMOVE(inp, inp_portlist); - inp->inp_flags &= ~INP_INHASHLIST; } /* @@ -2800,8 +2773,7 @@ in_pcbrehash(struct inpcb *inp) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); - KASSERT(inp->inp_flags & INP_INHASHLIST, - ("%s: !INP_INHASHLIST", __func__)); + MPASS(!(inp->inp_flags & INP_UNCONNECTED)); KASSERT(inp->inp_smr == SMR_SEQ_INVALID, ("%s: inp was disconnected", __func__)); @@ -3040,7 +3012,13 @@ sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo, } while ((inp = inp_next(&inpi)) != NULL) if (inp->inp_gencnt == params->sop_id) { - if (inp->inp_flags & INP_DROPPED) { + /* + * XXXGL + * 1) the inp_next() that ignores INP_UNCONNECTED needs + * to be generally supported. + * 2) Why do we ECONNRESET instead of continueing? + */ + if (inp->inp_flags & INP_UNCONNECTED) { INP_WUNLOCK(inp); return (ECONNRESET); } @@ -3269,7 +3247,7 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, * down, allocating a new send tag is not allowed. Else send * tags may leak. */ - if (*st != NULL || (inp->inp_flags & INP_DROPPED) != 0) + if (*st != NULL || (inp->inp_flags & INP_UNCONNECTED)) return (EINVAL); error = m_snd_tag_alloc(ifp, ¶ms, st); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 592d951c018f..d34c88941c7f 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -181,7 +181,7 @@ struct xinpgen { #define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */ #define INP_DONTFRAG 0x00000800 /* don't fragment packet */ #define INP_BINDANY 0x00001000 /* allow bind to any address */ -#define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */ +/* available 0x00002000 */ #define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */ #define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */ #define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ @@ -194,7 +194,7 @@ struct xinpgen { #define IN6P_AUTOFLOWLABEL 0x00800000 /* attach flowlabel automatically */ /* INP_INLBGROUP 0x01000000 private to in_pcb.c */ #define INP_ONESBCAST 0x02000000 /* send all-ones broadcast */ -#define INP_DROPPED 0x04000000 /* protocol drop flag */ +/* INP_UNCONNECTED 0x04000000 private to in_pcb.c/in6_pcb.c */ #define INP_SOCKREF 0x08000000 /* strong socket reference */ #define INP_RESERVED_0 0x10000000 /* reserved field */ #define INP_BOUNDFIB 0x20000000 /* Bound to a specific FIB. */ @@ -213,10 +213,10 @@ struct xinpgen { "\1INP_RECVOPTS\2INP_RECVRETOPTS\3INP_RECVDSTADDR\4INP_HDRINCL" \ "\5INP_HIGHPORT\6INP_LOWPORT\7INP_ANONPORT\10INP_RECVIF" \ "\11INP_MTUDISC\12INP_FREED\13INP_RECVTTL\14INP_DONTFRAG" \ - "\15INP_BINDANY\16INP_INHASHLIST\17INP_RECVTOS\20IN6P_IPV6_V6ONLY" \ + "\15INP_BINDANY\17INP_RECVTOS\20IN6P_IPV6_V6ONLY" \ "\21IN6P_PKTINFO\22IN6P_HOPLIMIT\23IN6P_HOPOPTS\24IN6P_DSTOPTS" \ "\25IN6P_RTHDR\26IN6P_RTHDRDSTOPTS\27IN6P_TCLASS\30IN6P_AUTOFLOWLABEL" \ - "\31INP_INLBGROUP\32INP_ONESBCAST\33INP_DROPPED\34INP_SOCKREF" \ + "\31INP_INLBGROUP\32INP_ONESBCAST\33INP_UNCONNECTED\34INP_SOCKREF" \ "\35INP_RESERVED_0\36INP_BOUNDFIB\37IN6P_RFC2292\40IN6P_MTU" /* @@ -650,7 +650,6 @@ int in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *, u_short *, int, struct ucred *); int in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *); void in_pcbdisconnect(struct inpcb *); -void in_pcbdrop(struct inpcb *); void in_pcbfree(struct inpcb *); int in_pcbladdr(const struct inpcb *, struct in_addr *, struct in_addr *, struct ucred *); diff --git a/sys/netinet/in_pcb_var.h b/sys/netinet/in_pcb_var.h index 7a5c489f26d7..1f46e1bd8f3d 100644 --- a/sys/netinet/in_pcb_var.h +++ b/sys/netinet/in_pcb_var.h @@ -41,6 +41,8 @@ * Definitions shared between netinet/in_pcb.c and netinet6/in6_pcb.c */ +#define INP_UNCONNECTED 0x04000000 /* Not inserted into hashes. */ + VNET_DECLARE(uint32_t, in_pcbhashseed); #define V_in_pcbhashseed VNET(in_pcbhashseed) diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index 67b028d7603b..8dfbe126d46a 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -510,7 +510,7 @@ tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts) INP_WLOCK_ASSERT(inp); HPTS_MTX_ASSERT(hpts); MPASS(hpts->p_cpu == tp->t_hpts_cpu); - MPASS(!(inp->inp_flags & INP_DROPPED)); + MPASS(!(tp->t_flags & TF_DISCONNECTED)); hptsh = &hpts->p_hptss[tp->t_hpts_slot]; @@ -615,8 +615,10 @@ __tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *tp) * tcp_hptsi() moves inpcb to detached tailq * tcp_hpts_remove() marks as IHPTS_MOVING, slot = -1 * tcp_hpts_insert() sets slot to a meaningful value - * tcp_hpts_remove() again (we are here!), then in_pcbdrop() - * tcp_hptsi() finds pcb with meaningful slot and INP_DROPPED + * The connection is terminated with the final call to + tcp_hpts_remove() again (we are here!) and we fail to call + tcp_hpts_release() since it is IHPTS_MOVING. Set slot to -1 + to delegate the release to the owner of the detached tailq. */ tp->t_hpts_slot = -1; } @@ -828,7 +830,7 @@ __tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs, bool need_wakeup = false; INP_WLOCK_ASSERT(tptoinpcb(tp)); - MPASS(!(tptoinpcb(tp)->inp_flags & INP_DROPPED)); + MPASS(!(tp->t_flags & TF_DISCONNECTED)); MPASS(!(tp->t_in_hpts == IHPTS_ONQUEUE)); /* @@ -1292,7 +1294,7 @@ again: } MPASS(tp->t_in_hpts == IHPTS_ONQUEUE); - MPASS(!(inp->inp_flags & INP_DROPPED)); + MPASS(!(tp->t_flags & TF_DISCONNECTED)); KASSERT(runningslot == tp->t_hpts_slot, ("Hpts:%p inp:%p slot mis-aligned %u vs %u", hpts, inp, runningslot, tp->t_hpts_slot)); diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c index 61082adc9063..ea088f5c71cf 100644 --- a/sys/netinet/tcp_hpts_test.c +++ b/sys/netinet/tcp_hpts_test.c @@ -175,7 +175,6 @@ dump_tcpcb(struct tcpcb *tp) /* Input PCB fields that HPTS uses */ KTEST_LOG(ctx, " inp_flags: 0x%x", inp->inp_flags); - KTEST_LOG(ctx, " INP_DROPPED: %s", (inp->inp_flags & INP_DROPPED) ? "YES" : "NO"); KTEST_LOG(ctx, " inp_flowid: 0x%x", inp->inp_flowid); KTEST_LOG(ctx, " inp_flowtype: %u", inp->inp_flowtype); KTEST_LOG(ctx, " inp_numa_domain: %d", inp->inp_numa_domain); @@ -585,7 +584,7 @@ KTEST_FUNC(tcpcb_initialization) KTEST_EQUAL(tp->t_lro_cpu, 0); KTEST_VERIFY(tp->t_hpts_cpu < pace->rp_num_hptss); KTEST_EQUAL(tp->t_inpcb.inp_refcount, 1); - KTEST_VERIFY(!(tp->t_inpcb.inp_flags & INP_DROPPED)); + KTEST_VERIFY(!(tp->t_flags & TF_DISCONNECTED)); test_hpts_free_tcpcb(tp); tcp_hptsi_stop(pace); diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index 4505171d94d0..3e5955e5db4e 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -517,12 +517,12 @@ tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp, } #define RECHECK_INP_CLEAN(cleanup) do { \ - if (inp->inp_flags & INP_DROPPED) { \ + tp = intotcpcb(inp); \ + if (tp->t_flags & TF_DISCONNECTED) { \ rv = ECONNRESET; \ cleanup; \ goto done; \ } \ - tp = intotcpcb(inp); \ } while (0) #define RECHECK_INP() RECHECK_INP_CLEAN(/* noop */) @@ -2254,10 +2254,9 @@ tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp) if (error) { /* Restore list */ + tp = intotcpcb(inp); INP_WLOCK(inp); - if ((inp->inp_flags & INP_DROPPED) == 0) { - tp = intotcpcb(inp); - + if ((tp->t_flags & TF_DISCONNECTED) == 0) { /* Merge the two lists. */ STAILQ_CONCAT(&log_tailq, &tp->t_logs); tp->t_logs = log_tailq; @@ -2428,14 +2427,14 @@ tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force) * may end up dropping some entries. That seems like a * small price to pay for safety. */ - if (inp->inp_flags & INP_DROPPED) { + tp = intotcpcb(inp); + if (tp->t_flags & TF_DISCONNECTED) { free(entry, M_TCPLOGDEV); #ifdef TCPLOG_DEBUG_COUNTERS counter_u64_add(tcp_log_que_fail2, 1); #endif return (ECONNRESET); } - tp = intotcpcb(inp); if (tp->t_lognum == 0) { free(entry, M_TCPLOGDEV); return (0); @@ -2871,14 +2870,14 @@ tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) /* quick check to see if logging is enabled for this connection */ tp = intotcpcb(inp); - if ((inp->inp_flags & INP_DROPPED) || + if ((tp->t_flags & TF_DISCONNECTED) || (tp->_t_logstate == TCP_LOG_STATE_OFF)) { return; } INP_WLOCK(inp); /* double check log state now that we have the lock */ - if (inp->inp_flags & INP_DROPPED) + if (tp->t_flags & TF_DISCONNECTED) goto done; if (tcp_bblogging_on(tp)) { struct timeval tv; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 23085f67d5f9..8d88c4f734f6 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -220,6 +220,7 @@ tcp_default_output(struct tcpcb *tp) NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); + MPASS(!(tp->t_flags & TF_DISCONNECTED)); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 10383bc0801e..6697b6ecc7db 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -14217,7 +14217,7 @@ bbr_set_sockopt(struct tcpcb *tp, struct sockopt *sopt) if (error) return (error); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); return (ECONNRESET); } diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 2e3fcc7a9762..346468fe9a48 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -14750,8 +14750,8 @@ rack_init(struct tcpcb *tp, void **ptr) */ rack_convert_rtts(tp); rack_log_hystart_event(rack, rack->r_ctl.roundends, 20); - if ((tptoinpcb(tp)->inp_flags & INP_DROPPED) == 0) { - /* We do not start any timers on DROPPED connections */ + if ((tp->t_flags & TF_DISCONNECTED) == 0) { + /* We do not start any timers on disconnected connections */ if (tp->t_fb->tfb_chg_query == NULL) { rack_start_hpts_timer(rack, tp, tcp_get_usecs(NULL), 0, 0, 0); } else { diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 43c430708926..443af020848f 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2551,10 +2551,11 @@ tcp_close(struct tcpcb *tp) tcp_timer_stop(tp); if (tp->t_fb->tfb_tcp_timer_stop_all != NULL) tp->t_fb->tfb_tcp_timer_stop_all(tp); - in_pcbdrop(inp); + in_pcbdisconnect(inp); TCPSTAT_INC(tcps_closed); if (tp->t_state != TCPS_CLOSED) tcp_state_change(tp, TCPS_CLOSED); + tp->t_flags |= TF_DISCONNECTED; KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); tcp_free_sackholes(tp); soisdisconnected(so); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index ba5c90c91e43..430e98910743 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -126,10 +126,7 @@ tcp_twstart(struct tcpcb *tp) NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); - - /* A dropped inp should never transition to TIME_WAIT state. */ - KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: " - "(inp->inp_flags & INP_DROPPED) != 0")); + MPASS(!(tp->t_flags & TF_DISCONNECTED)); tcp_state_change(tp, TCPS_TIME_WAIT); tcp_free_sackholes(tp); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 07c436a1f2e0..ce13f1a9cefe 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -205,9 +205,9 @@ tcp_usr_detach(struct socket *so) tp = intotcpcb(inp); - KASSERT(inp->inp_flags & INP_DROPPED || + KASSERT(tp->t_flags & TF_DISCONNECTED || tp->t_state < TCPS_SYN_SENT, - ("%s: inp %p not dropped or embryonic", __func__, inp)); + ("%s: inp %p not disconnected or embryonic", __func__, inp)); tcp_discardcb(tp); in_pcbfree(inp); @@ -220,19 +220,16 @@ tcp_usr_detach(struct socket *so) static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { - int error = 0; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct sockaddr_in *sinp; + int error = 0; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (EINVAL); } - tp = intotcpcb(inp); sinp = (struct sockaddr_in *)nam; if (nam->sa_family != AF_INET) { @@ -276,20 +273,17 @@ out: static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { - int error = 0; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct sockaddr_in6 *sin6; + int error = 0; u_char vflagsav; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (EINVAL); } - tp = intotcpcb(inp); vflagsav = inp->inp_vflag; @@ -355,19 +349,16 @@ out: static int tcp_usr_listen(struct socket *so, int backlog, struct thread *td) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error = 0; bool already_listening; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (EINVAL); } - tp = intotcpcb(inp); SOCK_LOCK(so); already_listening = SOLISTENING(so); @@ -414,20 +405,17 @@ out: static int tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); u_char vflagsav; int error = 0; bool already_listening; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (EINVAL); } - tp = intotcpcb(inp); vflagsav = inp->inp_vflag; @@ -488,19 +476,16 @@ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct epoch_tracker et; - int error = 0; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct sockaddr_in *sinp; + int error = 0; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (ECONNREFUSED); } - tp = intotcpcb(inp); sinp = (struct sockaddr_in *)nam; if (nam->sa_family != AF_INET) { @@ -556,21 +541,18 @@ static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct epoch_tracker et; - int error = 0; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); struct sockaddr_in6 *sin6; + int error = 0; u_int8_t incflagsav; u_char vflagsav; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (ECONNREFUSED); } - tp = intotcpcb(inp); vflagsav = inp->inp_vflag; incflagsav = inp->inp_inc.inc_flags; @@ -725,18 +707,15 @@ out: static int tcp_usr_accept(struct socket *so, struct sockaddr *sa) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error = 0; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (ECONNABORTED); } - tp = intotcpcb(inp); if (so->so_state & SS_ISDISCONNECTED) error = ECONNABORTED; @@ -759,18 +738,15 @@ tcp_usr_accept(struct socket *so, struct sockaddr *sa) static int tcp6_usr_accept(struct socket *so, struct sockaddr *sa) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error = 0; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (ECONNABORTED); } - tp = intotcpcb(inp); if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; @@ -842,7 +818,7 @@ tcp_usr_shutdown(struct socket *so, enum shutdown_how how) * return ECONNRESEST for SHUT_RD as well? */ INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); return (ECONNRESET); } @@ -868,18 +844,16 @@ static int tcp_usr_rcvd(struct socket *so, int flags) { struct epoch_tracker et; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int outrv = 0, error = 0; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { + /* XXXGL: how could this happen?! */ INP_WUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); NET_EPOCH_ENTER(et); /* @@ -917,9 +891,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct epoch_tracker et; - int error = 0; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); #ifdef INET #ifdef INET6 struct sockaddr_in sin; @@ -930,20 +903,18 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr_in6 *sin6; int isipv6; #endif + int error = 0; u_int8_t incflagsav; u_char vflagsav; bool restoreflags; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { if (m != NULL && (flags & PRUS_NOTREADY) == 0) m_freem(m); INP_WUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); vflagsav = inp->inp_vflag; incflagsav = inp->inp_inc.inc_flags; @@ -1121,8 +1092,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, if (tp->t_fbyte_out && tp->t_fbyte_in) tp->t_flags2 |= TF2_FBYTES_COMPLETE; } - if (!(inp->inp_flags & INP_DROPPED) && - !(flags & PRUS_NOTREADY)) { + if (!(flags & PRUS_NOTREADY)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output_nodrop(tp); @@ -1232,18 +1202,16 @@ static int tcp_usr_ready(struct socket *so, struct mbuf *m, int count) { struct epoch_tracker et; - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error; - inp = sotoinpcb(so); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); mb_free_notready(m, count); return (ECONNRESET); } - tp = intotcpcb(inp); SOCK_SENDBUF_LOCK(so); error = sbready(&so->so_snd, m, count); @@ -1265,30 +1233,23 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count) static void tcp_usr_abort(struct socket *so) { - struct inpcb *inp; - struct tcpcb *tp; struct epoch_tracker et; - - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); - - NET_EPOCH_ENTER(et); - INP_WLOCK(inp); - KASSERT(inp->inp_socket != NULL, - ("tcp_usr_abort: inp_socket == NULL")); + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); /* * If we still have full TCP state, and we're not dropped, drop. */ - if (!(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); + NET_EPOCH_ENTER(et); + INP_WLOCK(inp); + if (!(tp->t_flags & TF_DISCONNECTED)) { tp = tcp_drop(tp, ECONNABORTED); if (tp == NULL) goto dropped; tcp_bblog_pru(tp, PRU_ABORT, 0); TCP_PROBE2(debug__user, tp, PRU_ABORT); } - if (!(inp->inp_flags & INP_DROPPED)) { + if (!(tp->t_flags & TF_DISCONNECTED)) { soref(so); inp->inp_flags |= INP_SOCKREF; } @@ -1303,24 +1264,17 @@ dropped: static void tcp_usr_close(struct socket *so) { - struct inpcb *inp; - struct tcpcb *tp; struct epoch_tracker et; - - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); - - NET_EPOCH_ENTER(et); - INP_WLOCK(inp); - KASSERT(inp->inp_socket != NULL, - ("tcp_usr_close: inp_socket == NULL")); + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); /* * If we are still connected and we're not dropped, initiate * a disconnect. */ - if (!(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); + NET_EPOCH_ENTER(et); + INP_WLOCK(inp); + if (!(tp->t_flags & TF_DISCONNECTED)) { if (tp->t_state != TCPS_TIME_WAIT) { tp->t_flags |= TF_CLOSED; tcp_disconnect(tp); @@ -1328,7 +1282,7 @@ tcp_usr_close(struct socket *so) TCP_PROBE2(debug__user, tp, PRU_CLOSE); } } - if (!(inp->inp_flags & INP_DROPPED)) { + if (!(tp->t_flags & TF_DISCONNECTED)) { soref(so); inp->inp_flags |= INP_SOCKREF; } @@ -1360,18 +1314,16 @@ tcp_pru_options_support(struct tcpcb *tp, int flags) static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error = 0; - struct inpcb *inp; - struct tcpcb *tp; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { + /* XXXGL: how could this happen?! */ INP_WUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); error = tcp_pru_options_support(tp, PRUS_OOB); if (error) { @@ -1650,15 +1602,16 @@ tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti) * socket option arguments. When it re-acquires the lock after the copy, it * has to revalidate that the connection is still valid for the socket * option. + * XXXGL: review if this is really needed */ #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \ INP_WLOCK(inp); \ - if (inp->inp_flags & INP_DROPPED) { \ + tp = intotcpcb(inp); \ + if (tp->t_flags & TF_DISCONNECTED) { \ INP_WUNLOCK(inp); \ cleanup; \ return (ECONNRESET); \ } \ - tp = intotcpcb(inp); \ } while(0) #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */) @@ -1671,8 +1624,8 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt) MPASS(sopt->sopt_dir == SOPT_SET); INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("inp_flags == %x", inp->inp_flags)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("tp_flags == %x", tp->t_flags)); KASSERT(so != NULL, ("inp_socket == NULL")); if (sopt->sopt_level != IPPROTO_TCP) { @@ -1839,8 +1792,8 @@ tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt) MPASS(sopt->sopt_dir == SOPT_GET); INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("inp_flags == %x", inp->inp_flags)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("tp_flags == %x", tp->t_flags)); KASSERT(so != NULL, ("inp_socket == NULL")); if (sopt->sopt_level != IPPROTO_TCP) { @@ -1883,13 +1836,11 @@ tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt) int tcp_ctloutput(struct socket *so, struct sockopt *sopt) { - struct inpcb *inp; - - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); return (ECONNRESET); } @@ -1917,7 +1868,7 @@ tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt) { struct cc_algo *algo; void *ptr = NULL; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); struct cc_var cc_mem; char buf[TCP_CA_NAME_MAX]; size_t mem_sz; @@ -1967,7 +1918,7 @@ no_mem_needed: */ memset(&cc_mem, 0, sizeof(cc_mem)); INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (__predict_false(tp->t_flags & TF_DISCONNECTED)) { INP_WUNLOCK(inp); if (ptr) free(ptr, M_CC_MEM); @@ -1977,7 +1928,6 @@ no_mem_needed: CC_LIST_RUNLOCK(); return (ECONNRESET); } - tp = intotcpcb(inp); if (ptr != NULL) memset(ptr, 0, mem_sz); cc_mem.tp = tp; @@ -2043,8 +1993,8 @@ tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt) size_t len; INP_WLOCK_ASSERT(inp); - KASSERT((inp->inp_flags & INP_DROPPED) == 0, - ("inp_flags == %x", inp->inp_flags)); + KASSERT((tp->t_flags & TF_DISCONNECTED) == 0, + ("tp_flags == %x", tp->t_flags)); KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL")); switch (sopt->sopt_level) { @@ -2673,7 +2623,7 @@ tcp_disconnect(struct tcpcb *tp) soisdisconnecting(so); sbflush(&so->so_rcv); tcp_usrclosed(tp); - if (!(inp->inp_flags & INP_DROPPED)) + if (!(tp->t_flags & TF_DISCONNECTED)) /* Ignore stack's drop request, we already at it. */ (void)tcp_output_nodrop(tp); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 95c4e4c52ba0..987bb98c19af 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -789,7 +789,7 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF_TSO 0x01000000 /* TSO enabled on this connection */ #define TF_TOE 0x02000000 /* this connection is offloaded */ #define TF_CLOSED 0x04000000 /* close(2) called on socket */ -#define TF_UNUSED 0x08000000 /* was TF_SENTSYN */ +#define TF_DISCONNECTED 0x08000000 /* went through tcp_close() */ #define TF_LRD 0x10000000 /* Lost Retransmission Detection */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index 1c4e89069a4b..38171f3439be 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -212,16 +212,15 @@ static void toe_listen_start(struct inpcb *inp, void *arg) { struct toedev *t, *tod; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); INP_WLOCK_ASSERT(inp); KASSERT(inp->inp_pcbinfo == &V_tcbinfo, ("%s: inp is not a TCP inp", __func__)); - if (inp->inp_flags & INP_DROPPED) + if (tp->t_flags & TF_DISCONNECTED) return; - tp = intotcpcb(inp); if (tp->t_state != TCPS_LISTEN) return; @@ -510,13 +509,12 @@ toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, void toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) { + struct tcpcb *tp = intotcpcb(inp); NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); - if (!(inp->inp_flags & INP_DROPPED)) { - struct tcpcb *tp = intotcpcb(inp); - + if (!(tp->t_flags & TF_DISCONNECTED)) { KASSERT(tp->t_flags & TF_TOE, ("%s: tp %p not offloaded.", __func__, tp)); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index bc21c31bf446..09a62a53e054 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -489,12 +489,11 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr_in6 *sin6, struct ucred *cred, inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); - if ((inp->inp_flags & INP_INHASHLIST) != 0) { - in_pcbrehash(inp); - } else { + if (inp->inp_flags & INP_UNCONNECTED) { error = in_pcbinshash(inp); MPASS(error == 0); - } + } else + in_pcbrehash(inp); return (0); } @@ -509,6 +508,7 @@ in6_pcbdisconnect(struct inpcb *inp) INP_HASH_WLOCK(inp->inp_pcbinfo); in_pcbremhash(inp); + inp->inp_flags |= INP_UNCONNECTED; CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp, inp_unconn_list); INP_HASH_WUNLOCK(inp->inp_pcbinfo); diff --git a/sys/netipsec/xform_tcp.c b/sys/netipsec/xform_tcp.c index d3d4d6c4d734..a87b27048dd7 100644 --- a/sys/netipsec/xform_tcp.c +++ b/sys/netipsec/xform_tcp.c @@ -76,7 +76,7 @@ static int tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) { - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); int error, optval; if (sopt->sopt_name != TCP_MD5SIG) { @@ -85,11 +85,10 @@ tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) if (sopt->sopt_dir == SOPT_GET) { INP_RLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_RUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; INP_RUNLOCK(inp); @@ -103,11 +102,10 @@ tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) /* INP_WLOCK_RECHECK */ INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { + if (tp->t_flags & TF_DISCONNECTED) { INP_WUNLOCK(inp); return (ECONNRESET); } - tp = intotcpcb(inp); if (optval > 0) tp->t_flags |= TF_SIGNATURE; else |
