diff options
Diffstat (limited to 'sys/dev/cxgbe/tom')
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_connect.c | 43 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_cpl_io.c | 498 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_ddp.c | 22 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_listen.c | 7 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_tls.c | 357 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_tls.h | 1 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.c | 149 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.h | 22 | ||||
| -rw-r--r-- | sys/dev/cxgbe/tom/t4_tom_l2t.c | 2 |
9 files changed, 934 insertions, 167 deletions
diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index 99e4c222996d..c236ee060bc2 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -89,6 +89,12 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss, INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1); + if (sc->params.tid_qid_sel_mask != 0) { + update_tid_qid_sel(toep->vi, &toep->params, tid); + toep->ofld_txq = &sc->sge.ofld_txq[toep->params.txq_idx]; + toep->ctrlq = &sc->sge.ctrlq[toep->params.ctrlq_idx]; + } + if (inp->inp_flags & INP_DROPPED) { /* socket closed by the kernel before hw told us it connected */ @@ -205,7 +211,7 @@ static inline int act_open_cpl_size(struct adapter *sc, int isipv6) { int idx; - static const int sz_table[3][2] = { + static const int sz_table[4][2] = { { sizeof (struct cpl_act_open_req), sizeof (struct cpl_act_open_req6) @@ -218,10 +224,14 @@ act_open_cpl_size(struct adapter *sc, int isipv6) sizeof (struct cpl_t6_act_open_req), sizeof (struct cpl_t6_act_open_req6) }, + { + sizeof (struct cpl_t7_act_open_req), + sizeof (struct cpl_t7_act_open_req6) + }, }; MPASS(chip_id(sc) >= CHELSIO_T4); - idx = min(chip_id(sc) - CHELSIO_T4, 2); + idx = min(chip_id(sc) - CHELSIO_T4, 3); return (sz_table[idx][!!isipv6]); } @@ -255,6 +265,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, struct offload_settings settings; struct epoch_tracker et; uint16_t vid = 0xfff, pcp = 0; + uint64_t ntuple; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, @@ -308,10 +319,12 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) | V_TID_COOKIE(CPL_COOKIE_TOM); + ntuple = select_ntuple(vi, toep->l2te); if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; + struct cpl_t7_act_open_req6 *cpl7 = (void *)cpl; if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); @@ -323,18 +336,23 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(vi, toep->l2te); + cpl->params = htobe32((uint32_t)ntuple); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); - cpl5->params = select_ntuple(vi, toep->l2te); + cpl5->params = htobe64(V_FILTER_TUPLE(ntuple)); break; case CHELSIO_T6: - default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); - cpl6->params = select_ntuple(vi, toep->l2te); + cpl6->params = htobe64(V_FILTER_TUPLE(ntuple)); + break; + case CHELSIO_T7: + default: + INIT_TP_WR(cpl7, 0); + cpl7->iss = htobe32(tp->iss); + cpl7->params = htobe64(V_T7_FILTER_TUPLE(ntuple)); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, @@ -356,23 +374,28 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, struct cpl_act_open_req *cpl = wrtod(wr); struct cpl_t5_act_open_req *cpl5 = (void *)cpl; struct cpl_t6_act_open_req *cpl6 = (void *)cpl; + struct cpl_t7_act_open_req *cpl7 = (void *)cpl; switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(vi, toep->l2te); + cpl->params = htobe32((uint32_t)ntuple); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); - cpl5->params = select_ntuple(vi, toep->l2te); + cpl5->params = htobe64(V_FILTER_TUPLE(ntuple)); break; case CHELSIO_T6: - default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); - cpl6->params = select_ntuple(vi, toep->l2te); + cpl6->params = htobe64(V_FILTER_TUPLE(ntuple)); break; + case CHELSIO_T7: + default: + INIT_TP_WR(cpl7, 0); + cpl7->iss = htobe32(tp->iss); + cpl7->params = htobe64(V_T7_FILTER_TUPLE(ntuple)); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 7a6b1cbdd736..5c39ae5fa8f3 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -66,6 +66,7 @@ #include <vm/vm_page.h> #include <dev/iscsi/iscsi_proto.h> +#include <dev/nvmf/nvmf_proto.h> #include "common/common.h" #include "common/t4_msg.h" @@ -127,8 +128,9 @@ send_flowc_wr(struct toepcb *toep, struct tcpcb *tp) paramidx = 0; FLOWC_PARAM(PFNVFN, pfvf); - FLOWC_PARAM(CH, pi->tx_chan); - FLOWC_PARAM(PORT, pi->tx_chan); + /* Firmware expects hw port and will translate to channel itself. */ + FLOWC_PARAM(CH, pi->hw_port); + FLOWC_PARAM(PORT, pi->hw_port); FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id); FLOWC_PARAM(SNDBUF, toep->params.sndbuf); if (tp) { @@ -148,6 +150,8 @@ send_flowc_wr(struct toepcb *toep, struct tcpcb *tp) KASSERT(paramidx == nparams, ("nparams mismatch")); + KASSERT(howmany(flowclen, 16) <= MAX_OFLD_TX_SDESC_CREDITS, + ("%s: tx_credits %u too large", __func__, howmany(flowclen, 16))); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, @@ -215,6 +219,8 @@ update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) else flowc->mnemval[0].val = htobe32(tc_idx); + KASSERT(flowclen16 <= MAX_OFLD_TX_SDESC_CREDITS, + ("%s: tx_credits %u too large", __func__, flowclen16)); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; @@ -490,6 +496,12 @@ t4_close_conn(struct adapter *sc, struct toepcb *toep) #define MIN_ISO_TX_CREDITS (howmany(sizeof(struct cpl_tx_data_iso), 16)) #define MIN_TX_CREDITS(iso) \ (MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0)) +#define MIN_OFLD_TX_V2_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_v2_wr) + 1, 16)) +#define MIN_TX_V2_CREDITS(iso) \ + (MIN_OFLD_TX_V2_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0)) + +_Static_assert(MAX_OFLD_TX_CREDITS <= MAX_OFLD_TX_SDESC_CREDITS, + "MAX_OFLD_TX_SDESC_CREDITS too small"); /* Maximum amount of immediate data we could stuff in a WR */ static inline int @@ -534,6 +546,46 @@ max_dsgl_nsegs(int tx_credits, int iso) return (nseg); } +/* Maximum amount of immediate data we could stuff in a WR */ +static inline int +max_imm_payload_v2(int tx_credits, int iso) +{ + const int iso_cpl_size = iso ? sizeof(struct cpl_tx_data_iso) : 0; + + KASSERT(tx_credits >= 0 && + tx_credits <= MAX_OFLD_TX_CREDITS, + ("%s: %d credits", __func__, tx_credits)); + + if (tx_credits < MIN_TX_V2_CREDITS(iso)) + return (0); + + return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_v2_wr) - + iso_cpl_size); +} + +/* Maximum number of SGL entries we could stuff in a WR */ +static inline int +max_dsgl_nsegs_v2(int tx_credits, int iso, int imm_payload) +{ + int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ + int sge_pair_credits = tx_credits - MIN_TX_V2_CREDITS(iso); + + KASSERT(tx_credits >= 0 && + tx_credits <= MAX_OFLD_TX_CREDITS, + ("%s: %d credits", __func__, tx_credits)); + + if (tx_credits < MIN_TX_V2_CREDITS(iso) || + sge_pair_credits <= howmany(imm_payload, 16)) + return (0); + sge_pair_credits -= howmany(imm_payload, 16); + + nseg += 2 * (sge_pair_credits * 16 / 24); + if ((sge_pair_credits * 16) % 24 == 16) + nseg++; + + return (nseg); +} + static inline void write_tx_wr(void *dst, struct toepcb *toep, int fw_wr_opcode, unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, @@ -561,6 +613,35 @@ write_tx_wr(void *dst, struct toepcb *toep, int fw_wr_opcode, } } +static inline void +write_tx_v2_wr(void *dst, struct toepcb *toep, int fw_wr_opcode, + unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, + int ulp_submode) +{ + struct fw_ofld_tx_data_v2_wr *txwr = dst; + uint32_t flags; + + memset(txwr, 0, sizeof(*txwr)); + txwr->op_to_immdlen = htobe32(V_WR_OP(fw_wr_opcode) | + V_FW_WR_IMMDLEN(immdlen)); + txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | + V_FW_WR_LEN16(credits)); + txwr->plen = htobe32(plen); + flags = V_TX_ULP_MODE(ULP_MODE_NVMET) | V_TX_ULP_SUBMODE(ulp_submode) | + V_TX_URG(0) | V_TX_SHOVE(shove); + + if (toep->params.tx_align > 0) { + if (plen < 2 * toep->params.emss) + flags |= F_FW_OFLD_TX_DATA_WR_LSODISABLE; + else + flags |= F_FW_OFLD_TX_DATA_WR_ALIGNPLD | + (toep->params.nagle == 0 ? 0 : + F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE); + } + + txwr->lsodisable_to_flags = htobe32(flags); +} + /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. @@ -612,6 +693,48 @@ write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) __func__, nsegs, start, stop)); } +bool +t4_push_raw_wr(struct adapter *sc, struct toepcb *toep, struct mbuf *m) +{ +#ifdef INVARIANTS + struct inpcb *inp = toep->inp; +#endif + struct wrqe *wr; + struct ofld_tx_sdesc *txsd; + u_int credits, plen; + + INP_WLOCK_ASSERT(inp); + MPASS(mbuf_raw_wr(m)); + plen = m->m_pkthdr.len; + credits = howmany(plen, 16); + if (credits > toep->tx_credits) + return (false); + + wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); + if (wr == NULL) + return (false); + + m_copydata(m, 0, plen, wrtod(wr)); + m_freem(m); + + toep->tx_credits -= credits; + if (toep->tx_credits < MIN_OFLD_TX_CREDITS) + toep->flags |= TPF_TX_SUSPENDED; + + KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); + KASSERT(credits <= MAX_OFLD_TX_SDESC_CREDITS, + ("%s: tx_credits %u too large", __func__, credits)); + txsd = &toep->txsd[toep->txsd_pidx]; + txsd->plen = 0; + txsd->tx_credits = credits; + if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) + toep->txsd_pidx = 0; + toep->txsd_avail--; + + t4_wrq_tx(sc, wr); + return (true); +} + /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. @@ -644,6 +767,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; + struct mbufq *pduq = &toep->ulp_pduq; int tx_credits, shove, compl, sowwakeup; struct ofld_tx_sdesc *txsd; bool nomap_mbuf_seen; @@ -688,6 +812,19 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) max_imm = max_imm_payload(tx_credits, 0); max_nsegs = max_dsgl_nsegs(tx_credits, 0); + if (__predict_false((sndptr = mbufq_first(pduq)) != NULL)) { + if (!t4_push_raw_wr(sc, toep, sndptr)) { + toep->flags |= TPF_TX_SUSPENDED; + return; + } + + m = mbufq_dequeue(pduq); + MPASS(m == sndptr); + + txsd = &toep->txsd[toep->txsd_pidx]; + continue; + } + SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { @@ -705,6 +842,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) if ((m->m_flags & M_NOTREADY) != 0) break; + if (plen + m->m_len > MAX_OFLD_TX_SDESC_PLEN) + break; if (m->m_flags & M_EXTPG) { #ifdef KERN_TLS if (m->m_epg_tls != NULL) { @@ -870,6 +1009,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); + KASSERT(plen <= MAX_OFLD_TX_SDESC_PLEN, + ("%s: plen %u too large", __func__, plen)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; @@ -914,8 +1055,8 @@ rqdrop_locked(struct mbufq *q, int plen) #define ULP_ISO G_TX_ULP_SUBMODE(F_FW_ISCSI_TX_DATA_WR_ULPSUBMODE_ISO) static void -write_tx_data_iso(void *dst, u_int ulp_submode, uint8_t flags, uint16_t mss, - int len, int npdu) +write_iscsi_tx_data_iso(void *dst, u_int ulp_submode, uint8_t flags, + uint16_t mss, int len, int npdu) { struct cpl_tx_data_iso *cpl; unsigned int burst_size; @@ -1079,7 +1220,7 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) adjusted_plen, credits, shove, ulp_submode | ULP_ISO); cpl_iso = (struct cpl_tx_data_iso *)(txwr + 1); MPASS(plen == sndptr->m_pkthdr.len); - write_tx_data_iso(cpl_iso, ulp_submode, + write_iscsi_tx_data_iso(cpl_iso, ulp_submode, mbuf_iscsi_iso_flags(sndptr), iso_mss, plen, npdu); p = cpl_iso + 1; } else { @@ -1115,21 +1256,269 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) return (wr); } +static void +write_nvme_tx_data_iso(void *dst, u_int ulp_submode, u_int iso_type, + uint16_t mss, int len, int npdu, int pdo) +{ + struct cpl_t7_tx_data_iso *cpl; + unsigned int burst_size; + + /* + * TODO: Need to figure out how the LAST_PDU and SUCCESS flags + * are handled. + * + * - Does len need padding bytes? (If so, does padding need + * to be in DSGL input?) + * + * - burst always 0? + */ + burst_size = 0; + + cpl = (struct cpl_t7_tx_data_iso *)dst; + cpl->op_to_scsi = htonl(V_CPL_T7_TX_DATA_ISO_OPCODE(CPL_TX_DATA_ISO) | + V_CPL_T7_TX_DATA_ISO_FIRST(1) | + V_CPL_T7_TX_DATA_ISO_LAST(1) | + V_CPL_T7_TX_DATA_ISO_CPLHDRLEN(0) | + V_CPL_T7_TX_DATA_ISO_HDRCRC(!!(ulp_submode & ULP_CRC_HEADER)) | + V_CPL_T7_TX_DATA_ISO_PLDCRC(!!(ulp_submode & ULP_CRC_DATA)) | + V_CPL_T7_TX_DATA_ISO_IMMEDIATE(0) | + V_CPL_T7_TX_DATA_ISO_SCSI(iso_type)); + + cpl->nvme_tcp_pkd = F_CPL_T7_TX_DATA_ISO_NVME_TCP; + cpl->ahs = 0; + cpl->mpdu = htons(DIV_ROUND_UP(mss, 4)); + cpl->burst = htonl(DIV_ROUND_UP(burst_size, 4)); + cpl->size = htonl(len); + cpl->num_pi_bytes_seglen_offset = htonl(0); + cpl->datasn_offset = htonl(0); + cpl->buffer_offset = htonl(0); + cpl->pdo_pkd = pdo; +} + +static struct wrqe * +write_nvme_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) +{ + struct mbuf *m; + const struct nvme_tcp_common_pdu_hdr *hdr; + struct fw_v2_nvmet_tx_data_wr *txwr; + struct cpl_tx_data_iso *cpl_iso; + void *p; + struct wrqe *wr; + u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; + u_int adjusted_plen, imm_data, ulp_submode; + struct inpcb *inp = toep->inp; + struct tcpcb *tp = intotcpcb(inp); + int tx_credits, shove, npdu, wr_len; + uint16_t iso_mss; + bool iso, nomap_mbuf_seen; + + M_ASSERTPKTHDR(sndptr); + + tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); + if (mbuf_raw_wr(sndptr)) { + plen = sndptr->m_pkthdr.len; + KASSERT(plen <= SGE_MAX_WR_LEN, + ("raw WR len %u is greater than max WR len", plen)); + if (plen > tx_credits * 16) + return (NULL); + + wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); + if (__predict_false(wr == NULL)) + return (NULL); + + m_copydata(sndptr, 0, plen, wrtod(wr)); + return (wr); + } + + /* + * The first mbuf is the PDU header that is always sent as + * immediate data. + */ + imm_data = sndptr->m_len; + + iso = mbuf_iscsi_iso(sndptr); + max_imm = max_imm_payload_v2(tx_credits, iso); + + /* + * Not enough credits for the PDU header. + */ + if (imm_data > max_imm) + return (NULL); + + max_nsegs = max_dsgl_nsegs_v2(tx_credits, iso, imm_data); + iso_mss = mbuf_iscsi_iso_mss(sndptr); + + plen = imm_data; + nsegs = 0; + max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ + nomap_mbuf_seen = false; + for (m = sndptr->m_next; m != NULL; m = m->m_next) { + int n; + + if (m->m_flags & M_EXTPG) + n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t), + m->m_len); + else + n = sglist_count(mtod(m, void *), m->m_len); + + nsegs += n; + plen += m->m_len; + + /* + * This mbuf would send us _over_ the nsegs limit. + * Suspend tx because the PDU can't be sent out. + */ + if ((nomap_mbuf_seen || plen > max_imm) && nsegs > max_nsegs) + return (NULL); + + if (m->m_flags & M_EXTPG) + nomap_mbuf_seen = true; + if (max_nsegs_1mbuf < n) + max_nsegs_1mbuf = n; + } + + if (__predict_false(toep->flags & TPF_FIN_SENT)) + panic("%s: excess tx.", __func__); + + /* + * We have a PDU to send. All of it goes out in one WR so 'm' + * is NULL. A PDU's length is always a multiple of 4. + */ + MPASS(m == NULL); + MPASS((plen & 3) == 0); + MPASS(sndptr->m_pkthdr.len == plen); + + shove = !(tp->t_flags & TF_MORETOCOME); + + /* + * plen doesn't include header digests, padding, and data + * digests which are generated and inserted in the right + * places by the TOE, but they do occupy TCP sequence space + * and need to be accounted for. + * + * To determine the overhead, check the PDU header in sndptr. + * Note that only certain PDU types can use digests and + * padding, and PDO accounts for all but the data digests for + * those PDUs. + */ + MPASS((sndptr->m_flags & M_EXTPG) == 0); + ulp_submode = mbuf_ulp_submode(sndptr); + hdr = mtod(sndptr, const void *); + switch (hdr->pdu_type) { + case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: + case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: + MPASS(ulp_submode == 0); + MPASS(!iso); + break; + case NVME_TCP_PDU_TYPE_CAPSULE_RESP: + case NVME_TCP_PDU_TYPE_R2T: + MPASS((ulp_submode & ULP_CRC_DATA) == 0); + /* FALLTHROUGH */ + case NVME_TCP_PDU_TYPE_CAPSULE_CMD: + MPASS(!iso); + break; + case NVME_TCP_PDU_TYPE_H2C_DATA: + case NVME_TCP_PDU_TYPE_C2H_DATA: + if (le32toh(hdr->plen) + ((ulp_submode & ULP_CRC_DATA) != 0 ? + sizeof(uint32_t) : 0) == plen) + MPASS(!iso); + break; + default: + __assert_unreachable(); + } + + if (iso) { + npdu = howmany(plen - hdr->hlen, iso_mss); + adjusted_plen = hdr->pdo * npdu + (plen - hdr->hlen); + if ((ulp_submode & ULP_CRC_DATA) != 0) + adjusted_plen += npdu * sizeof(uint32_t); + } else { + npdu = 1; + adjusted_plen = le32toh(hdr->plen); + } + wr_len = sizeof(*txwr); + if (iso) + wr_len += sizeof(struct cpl_tx_data_iso); + if (plen <= max_imm && !nomap_mbuf_seen) { + /* Immediate data tx for full PDU */ + imm_data = plen; + wr_len += plen; + nsegs = 0; + } else { + /* DSGL tx for PDU data */ + wr_len += roundup2(imm_data, 16); + wr_len += sizeof(struct ulptx_sgl) + + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; + } + + wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX: how will we recover from this? */ + return (NULL); + } + txwr = wrtod(wr); + credits = howmany(wr->wr_len, 16); + + if (iso) { + write_tx_v2_wr(txwr, toep, FW_V2_NVMET_TX_DATA_WR, + imm_data + sizeof(struct cpl_tx_data_iso), + adjusted_plen, credits, shove, ulp_submode | ULP_ISO); + cpl_iso = (struct cpl_tx_data_iso *)(txwr + 1); + MPASS(plen == sndptr->m_pkthdr.len); + write_nvme_tx_data_iso(cpl_iso, ulp_submode, + (hdr->pdu_type & 0x1) == 0 ? 1 : 2, iso_mss, plen, npdu, + hdr->pdo); + p = cpl_iso + 1; + } else { + write_tx_v2_wr(txwr, toep, FW_V2_NVMET_TX_DATA_WR, imm_data, + adjusted_plen, credits, shove, ulp_submode); + p = txwr + 1; + } + + /* PDU header (and immediate data payload). */ + m_copydata(sndptr, 0, imm_data, p); + if (nsegs != 0) { + p = roundup2((char *)p + imm_data, 16); + write_tx_sgl(p, sndptr->m_next, NULL, nsegs, max_nsegs_1mbuf); + if (wr_len & 0xf) { + uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len); + *pad = 0; + } + } + + KASSERT(toep->tx_credits >= credits, + ("%s: not enough credits: credits %u " + "toep->tx_credits %u tx_credits %u nsegs %u " + "max_nsegs %u iso %d", __func__, credits, + toep->tx_credits, tx_credits, nsegs, max_nsegs, iso)); + + tp->snd_nxt += adjusted_plen; + tp->snd_max += adjusted_plen; + + counter_u64_add(toep->ofld_txq->tx_nvme_pdus, npdu); + counter_u64_add(toep->ofld_txq->tx_nvme_octets, plen); + if (iso) + counter_u64_add(toep->ofld_txq->tx_nvme_iso_wrs, 1); + + return (wr); +} + void t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m; struct fw_wr_hdr *wrhdr; struct wrqe *wr; - u_int plen, credits; + u_int plen, credits, mode; struct inpcb *inp = toep->inp; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct mbufq *pduq = &toep->ulp_pduq; INP_WLOCK_ASSERT(inp); + mode = ulp_mode(toep); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); - KASSERT(ulp_mode(toep) == ULP_MODE_ISCSI, + KASSERT(mode == ULP_MODE_ISCSI || mode == ULP_MODE_NVMET, ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep)); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) @@ -1162,7 +1551,7 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) if (sbu > 0) { /* * The data transmitted before the - * tid's ULP mode changed to ISCSI is + * tid's ULP mode changed to ISCSI/NVMET is * still in so_snd. Incoming credits * should account for so_snd first. */ @@ -1175,7 +1564,10 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) } while ((sndptr = mbufq_first(pduq)) != NULL) { - wr = write_iscsi_mbuf_wr(toep, sndptr); + if (mode == ULP_MODE_ISCSI) + wr = write_iscsi_mbuf_wr(toep, sndptr); + else + wr = write_nvme_mbuf_wr(toep, sndptr); if (wr == NULL) { toep->flags |= TPF_TX_SUSPENDED; return; @@ -1211,6 +1603,8 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); + KASSERT(plen <= MAX_OFLD_TX_SDESC_PLEN, + ("%s: plen %u too large", __func__, plen)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; @@ -1232,7 +1626,8 @@ static inline void t4_push_data(struct adapter *sc, struct toepcb *toep, int drop) { - if (ulp_mode(toep) == ULP_MODE_ISCSI) + if (ulp_mode(toep) == ULP_MODE_ISCSI || + ulp_mode(toep) == ULP_MODE_NVMET) t4_push_pdus(sc, toep, drop); else if (toep->flags & TPF_KTLS) t4_push_ktls(sc, toep, drop); @@ -1240,6 +1635,35 @@ t4_push_data(struct adapter *sc, struct toepcb *toep, int drop) t4_push_frames(sc, toep, drop); } +void +t4_raw_wr_tx(struct adapter *sc, struct toepcb *toep, struct mbuf *m) +{ +#ifdef INVARIANTS + struct inpcb *inp = toep->inp; +#endif + + INP_WLOCK_ASSERT(inp); + + /* + * If there are other raw WRs enqueued, enqueue to preserve + * FIFO ordering. + */ + if (!mbufq_empty(&toep->ulp_pduq)) { + mbufq_enqueue(&toep->ulp_pduq, m); + return; + } + + /* + * Cannot call t4_push_data here as that will lock so_snd and + * some callers of this run in rx handlers with so_rcv locked. + * Instead, just try to transmit this WR. + */ + if (!t4_push_raw_wr(sc, toep, m)) { + mbufq_enqueue(&toep->ulp_pduq, m); + toep->flags |= TPF_TX_SUSPENDED; + } +} + int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { @@ -1363,7 +1787,8 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) socantrcvmore(so); if (ulp_mode(toep) == ULP_MODE_RDMA || - (ulp_mode(toep) == ULP_MODE_ISCSI && chip_id(sc) >= CHELSIO_T6)) { + (ulp_mode(toep) == ULP_MODE_ISCSI && chip_id(sc) >= CHELSIO_T6) || + ulp_mode(toep) == ULP_MODE_NVMET) { /* * There might be data received via DDP before the FIN * not reported to the driver. Just assume the @@ -1909,7 +2334,8 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) SOCKBUF_LOCK(sb); sbu = sbused(sb); - if (ulp_mode(toep) == ULP_MODE_ISCSI) { + if (ulp_mode(toep) == ULP_MODE_ISCSI || + ulp_mode(toep) == ULP_MODE_NVMET) { if (__predict_false(sbu > 0)) { /* * The data transmitted before the @@ -1941,35 +2367,55 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) } void -t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, +write_set_tcb_field(struct adapter *sc, void *dst, struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) { - struct wrqe *wr; - struct cpl_set_tcb_field *req; - struct ofld_tx_sdesc *txsd; + struct cpl_set_tcb_field *req = dst; MPASS((cookie & ~M_COOKIE) == 0); if (reply) { MPASS(cookie != CPL_COOKIE_RESERVED); } - wr = alloc_wrqe(sizeof(*req), wrq); + INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); + if (reply == 0) { + req->reply_ctrl = htobe16(F_NO_REPLY); + } else { + const int qid = toep->ofld_rxq->iq.abs_id; + if (chip_id(sc) >= CHELSIO_T7) { + req->reply_ctrl = htobe16(V_T7_QUEUENO(qid) | + V_T7_REPLY_CHAN(0) | V_NO_REPLY(0)); + } else { + req->reply_ctrl = htobe16(V_QUEUENO(qid) | + V_REPLY_CHAN(0) | V_NO_REPLY(0)); + } + } + req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); + req->mask = htobe64(mask); + req->val = htobe64(val); +} + +void +t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, + uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) +{ + struct wrqe *wr; + struct ofld_tx_sdesc *txsd; + const u_int len = sizeof(struct cpl_set_tcb_field); + + wr = alloc_wrqe(len, wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } - req = wrtod(wr); + write_set_tcb_field(sc, wrtod(wr), toep, word, mask, val, reply, + cookie); - INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); - req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id)); - if (reply == 0) - req->reply_ctrl |= htobe16(F_NO_REPLY); - req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); - req->mask = htobe64(mask); - req->val = htobe64(val); if (wrq->eq.type == EQ_OFLD) { txsd = &toep->txsd[toep->txsd_pidx]; - txsd->tx_credits = howmany(sizeof(*req), 16); + _Static_assert(howmany(len, 16) <= MAX_OFLD_TX_SDESC_CREDITS, + "MAX_OFLD_TX_SDESC_CREDITS too small"); + txsd->tx_credits = howmany(len, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 2fee8fa91dac..35fb1061d867 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -1655,7 +1655,10 @@ t4_write_page_pods_for_ps(struct adapter *sc, struct sge_wrq *wrq, int tid, INIT_ULPTX_WR(ulpmc, len, 0, 0); ulpmc->cmd = cmd; - ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + if (chip_id(sc) >= CHELSIO_T7) + ulpmc->dlen = htobe32(V_T7_ULP_MEMIO_DATA_LEN(chunk >> 5)); + else + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk >> 5)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); @@ -1785,7 +1788,7 @@ t4_write_page_pods_for_rcvbuf(struct adapter *sc, struct sge_wrq *wrq, int tid, return (0); } -static struct mbuf * +struct mbuf * alloc_raw_wr_mbuf(int len) { struct mbuf *m; @@ -1842,7 +1845,10 @@ t4_write_page_pods_for_bio(struct adapter *sc, struct toepcb *toep, ulpmc = mtod(m, struct ulp_mem_io *); INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); ulpmc->cmd = cmd; - ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + if (chip_id(sc) >= CHELSIO_T7) + ulpmc->dlen = htobe32(V_T7_ULP_MEMIO_DATA_LEN(chunk >> 5)); + else + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk >> 5)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); @@ -1922,7 +1928,10 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); ulpmc->cmd = cmd; - ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + if (chip_id(sc) >= CHELSIO_T7) + ulpmc->dlen = htobe32(V_T7_ULP_MEMIO_DATA_LEN(chunk >> 5)); + else + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk >> 5)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); @@ -2013,7 +2022,10 @@ t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); ulpmc->cmd = cmd; - ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + if (chip_id(sc) >= CHELSIO_T7) + ulpmc->dlen = htobe32(V_T7_ULP_MEMIO_DATA_LEN(chunk >> 5)); + else + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk >> 5)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 06c495dcafc3..b879f6883f25 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -508,10 +508,11 @@ send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe) V_FW_WR_FLOWID(synqe->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); + /* Firmware expects hw port and will translate to channel itself. */ flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; - flowc->mnemval[1].val = htobe32(pi->tx_chan); + flowc->mnemval[1].val = htobe32(pi->hw_port); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; - flowc->mnemval[2].val = htobe32(pi->tx_chan); + flowc->mnemval[2].val = htobe32(pi->hw_port); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; @@ -1507,6 +1508,8 @@ found: init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx, &synqe->params); + if (sc->params.tid_qid_sel_mask != 0) + update_tid_qid_sel(vi, &synqe->params, tid); /* * If all goes well t4_syncache_respond will get called during diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c index 27c16b9988ae..bbcc1c88c3db 100644 --- a/sys/dev/cxgbe/tom/t4_tls.c +++ b/sys/dev/cxgbe/tom/t4_tls.c @@ -61,11 +61,21 @@ static void t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, - uint64_t val) + uint64_t val, int reply, int cookie) { struct adapter *sc = td_adapter(toep->td); + struct mbuf *m; + + m = alloc_raw_wr_mbuf(sizeof(struct cpl_set_tcb_field)); + if (m == NULL) { + /* XXX */ + panic("%s: out of memory", __func__); + } - t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0); + write_set_tcb_field(sc, mtod(m, void *), toep, word, mask, val, reply, + cookie); + + t4_raw_wr_tx(sc, toep, m); } /* TLS and DTLS common routines */ @@ -88,10 +98,9 @@ tls_tx_key(struct toepcb *toep) static void t4_set_rx_quiesce(struct toepcb *toep) { - struct adapter *sc = td_adapter(toep->td); - t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, W_TCB_T_FLAGS, - V_TF_RX_QUIESCE(1), V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM); + t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), + V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM); } /* Clear TF_RX_QUIESCE to re-enable receive. */ @@ -99,7 +108,7 @@ static void t4_clear_rx_quiesce(struct toepcb *toep) { - t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0); + t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0, 0, 0); } /* TLS/DTLS content type for CPL SFO */ @@ -145,16 +154,15 @@ get_tp_plen_max(struct ktls_session *tls) return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX); } -/* Send request to get the key-id */ +/* Send request to save the key in on-card memory. */ static int tls_program_key_id(struct toepcb *toep, struct ktls_session *tls, int direction) { struct tls_ofld_info *tls_ofld = &toep->tls; struct adapter *sc = td_adapter(toep->td); - struct ofld_tx_sdesc *txsd; int keyid; - struct wrqe *wr; + struct mbuf *m; struct tls_key_req *kwr; struct tls_keyctx *kctx; @@ -173,12 +181,12 @@ tls_program_key_id(struct toepcb *toep, struct ktls_session *tls, return (ENOSPC); } - wr = alloc_wrqe(TLS_KEY_WR_SZ, &toep->ofld_txq->wrq); - if (wr == NULL) { + m = alloc_raw_wr_mbuf(TLS_KEY_WR_SZ); + if (m == NULL) { t4_free_tls_keyid(sc, keyid); return (ENOMEM); } - kwr = wrtod(wr); + kwr = mtod(m, struct tls_key_req *); memset(kwr, 0, TLS_KEY_WR_SZ); t4_write_tlskey_wr(tls, direction, toep->tid, F_FW_WR_COMPL, keyid, @@ -190,15 +198,7 @@ tls_program_key_id(struct toepcb *toep, struct ktls_session *tls, tls_ofld->rx_key_addr = keyid; t4_tls_key_ctx(tls, direction, kctx); - txsd = &toep->txsd[toep->txsd_pidx]; - txsd->tx_credits = DIV_ROUND_UP(TLS_KEY_WR_SZ, 16); - txsd->plen = 0; - toep->tx_credits -= txsd->tx_credits; - if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) - toep->txsd_pidx = 0; - toep->txsd_avail--; - - t4_wrq_tx(sc, wr); + t4_raw_wr_tx(sc, toep, m); return (0); } @@ -207,7 +207,7 @@ int tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) { struct adapter *sc = td_adapter(toep->td); - int error, explicit_iv_size, mac_first; + int error, iv_size, mac_first; if (!can_tls_offload(sc)) return (EINVAL); @@ -228,6 +228,21 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) } } + /* TLS 1.1 through TLS 1.3 are currently supported. */ + if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || + tls->params.tls_vminor < TLS_MINOR_VER_ONE || + tls->params.tls_vminor > TLS_MINOR_VER_THREE) { + return (EPROTONOSUPPORT); + } + + /* TLS 1.3 is only supported on T7+. */ + if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) { + if (is_t6(sc)) { + return (EPROTONOSUPPORT); + } + } + + /* Sanity check values in *tls. */ switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: /* XXX: Explicitly ignore any provided IV. */ @@ -247,13 +262,10 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) default: return (EPROTONOSUPPORT); } - explicit_iv_size = AES_BLOCK_LEN; + iv_size = AES_BLOCK_LEN; mac_first = 1; break; case CRYPTO_AES_NIST_GCM_16: - if (tls->params.iv_len != SALT_SIZE) { - return (EINVAL); - } switch (tls->params.cipher_key_len) { case 128 / 8: case 192 / 8: @@ -262,20 +274,19 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) default: return (EINVAL); } - explicit_iv_size = 8; + + /* + * The IV size for TLS 1.2 is the explicit IV in the + * record header. For TLS 1.3 it is the size of the + * sequence number. + */ + iv_size = 8; mac_first = 0; break; default: return (EPROTONOSUPPORT); } - /* Only TLS 1.1 and TLS 1.2 are currently supported. */ - if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || - tls->params.tls_vminor < TLS_MINOR_VER_ONE || - tls->params.tls_vminor > TLS_MINOR_VER_TWO) { - return (EPROTONOSUPPORT); - } - /* Bail if we already have a key. */ if (direction == KTLS_TX) { if (toep->tls.tx_key_addr != -1) @@ -289,6 +300,7 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) if (error) return (error); + toep->tls.tls13 = tls->params.tls_vminor == TLS_MINOR_VER_THREE; if (direction == KTLS_TX) { toep->tls.scmd0.seqno_numivs = (V_SCMD_SEQ_NO_CTRL(3) | @@ -298,14 +310,14 @@ tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) V_SCMD_CIPH_MODE(t4_tls_cipher_mode(tls)) | V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) | V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) | - V_SCMD_IV_SIZE(explicit_iv_size / 2)); + V_SCMD_IV_SIZE(iv_size / 2)); toep->tls.scmd0.ivgen_hdrlen = (V_SCMD_IV_GEN_CTRL(1) | V_SCMD_KEY_CTX_INLINE(0) | V_SCMD_TLS_FRAG_ENABLE(1)); - toep->tls.iv_len = explicit_iv_size; + toep->tls.iv_len = iv_size; toep->tls.frag_size = tls->params.max_frame_len; toep->tls.fcplenmax = get_tp_plen_max(tls); toep->tls.expn_per_ulp = tls->params.tls_hlen + @@ -352,7 +364,8 @@ tls_uninit_toep(struct toepcb *toep) static void write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep, - unsigned int plen, unsigned int expn, uint8_t credits, int shove) + unsigned int plen, unsigned int expn, uint8_t credits, int shove, + int num_ivs) { struct tls_ofld_info *tls_ofld = &toep->tls; unsigned int len = plen + expn; @@ -365,7 +378,7 @@ write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep, txwr->plen = htobe32(len); txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) | V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove)); - txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(1) | + txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(num_ivs) | V_FW_TLSTX_DATA_WR_EXP(expn) | V_FW_TLSTX_DATA_WR_CTXLOC(TLS_SFO_WR_CONTEXTLOC_DDR) | V_FW_TLSTX_DATA_WR_IVDSGL(0) | @@ -381,20 +394,20 @@ write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep, static void write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep, - struct tls_hdr *tls_hdr, unsigned int plen, uint64_t seqno) + struct tls_hdr *tls_hdr, unsigned int plen, uint8_t rec_type, + uint64_t seqno) { struct tls_ofld_info *tls_ofld = &toep->tls; int data_type, seglen; seglen = plen; - data_type = tls_content_type(tls_hdr->type); + data_type = tls_content_type(rec_type); cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) | V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) | V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen)); cpl->pld_len = htobe32(plen); if (data_type == CPL_TX_TLS_SFO_TYPE_CUSTOM) - cpl->type_protover = htobe32( - V_CPL_TX_TLS_SFO_TYPE(tls_hdr->type)); + cpl->type_protover = htobe32(V_CPL_TX_TLS_SFO_TYPE(rec_type)); cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs | V_SCMD_NUM_IVS(1)); cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen); @@ -494,9 +507,11 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; + struct mbufq *pduq = &toep->ulp_pduq; int tls_size, tx_credits, shove, sowwakeup; struct ofld_tx_sdesc *txsd; char *buf; + bool tls13; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, @@ -532,10 +547,23 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) return; } + tls13 = toep->tls.tls13; txsd = &toep->txsd[toep->txsd_pidx]; for (;;) { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); + if (__predict_false((m = mbufq_first(pduq)) != NULL)) { + if (!t4_push_raw_wr(sc, toep, m)) { + toep->flags |= TPF_TX_SUSPENDED; + return; + } + + (void)mbufq_dequeue(pduq); + + txsd = &toep->txsd[toep->txsd_pidx]; + continue; + } + SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { @@ -586,9 +614,11 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) sizeof(struct cpl_tx_tls_sfo) + sizeof(struct ulptx_idata) + sizeof(struct ulptx_sc_memrd); - /* Explicit IVs for AES-CBC and AES-GCM are <= 16. */ - MPASS(toep->tls.iv_len <= AES_BLOCK_LEN); - wr_len += AES_BLOCK_LEN; + if (!tls13) { + /* Explicit IVs for AES-CBC and AES-GCM are <= 16. */ + MPASS(toep->tls.iv_len <= AES_BLOCK_LEN); + wr_len += AES_BLOCK_LEN; + } /* Account for SGL in work request length. */ nsegs = count_ext_pgs_segs(m); @@ -658,8 +688,10 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) expn_size = m->m_epg_hdrlen + m->m_epg_trllen; tls_size = m->m_len - expn_size; - write_tlstx_wr(txwr, toep, tls_size, expn_size, credits, shove); - write_tlstx_cpl(cpl, toep, thdr, tls_size, m->m_epg_seqno); + write_tlstx_wr(txwr, toep, tls_size, expn_size, credits, shove, + tls13 ? 0 : 1); + write_tlstx_cpl(cpl, toep, thdr, tls_size, + tls13 ? m->m_epg_record_type : thdr->type, m->m_epg_seqno); idata = (struct ulptx_idata *)(cpl + 1); idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); @@ -670,10 +702,12 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) V_ULPTX_LEN16(toep->tls.tx_key_info_size >> 4)); memrd->addr = htobe32(toep->tls.tx_key_addr >> 5); - /* Copy IV. */ buf = (char *)(memrd + 1); - memcpy(buf, thdr + 1, toep->tls.iv_len); - buf += AES_BLOCK_LEN; + if (!tls13) { + /* Copy IV. */ + memcpy(buf, thdr + 1, toep->tls.iv_len); + buf += AES_BLOCK_LEN; + } write_ktlstx_sgl(buf, m, nsegs); @@ -694,6 +728,8 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); + KASSERT(m->m_len <= MAX_OFLD_TX_SDESC_PLEN, + ("%s: plen %u too large", __func__, m->m_len)); txsd->plen = m->m_len; txsd->tx_credits = credits; txsd++; @@ -793,8 +829,8 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) struct sockbuf *sb; struct mbuf *tls_data; struct tls_get_record *tgr; - struct mbuf *control; - int pdu_length, trailer_len; + struct mbuf *control, *n; + int pdu_length, resid, trailer_len; #if defined(KTR) || defined(INVARIANTS) int len; #endif @@ -842,7 +878,9 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) /* * The payload of this CPL is the TLS header followed by - * additional fields. + * additional fields. For TLS 1.3 the type field holds the + * inner record type and the length field has been updated to + * strip the inner record type, padding, and MAC. */ KASSERT(m->m_len >= sizeof(*tls_hdr_pkt), ("%s: payload too small", __func__)); @@ -854,7 +892,14 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) ("%s: sequence mismatch", __func__)); } - /* Report decryption errors as EBADMSG. */ + /* + * Report decryption errors as EBADMSG. + * + * XXX: To support rekeying for TLS 1.3 this will eventually + * have to be updated to recrypt the data with the old key and + * then decrypt with the new key. Punt for now as KTLS + * doesn't yet support rekeying. + */ if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) { CTR4(KTR_CXGBE, "%s: tid %u TLS error %#x ddp_vld %#x", __func__, toep->tid, tls_hdr_pkt->res_to_mac_error, @@ -872,6 +917,33 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) return (0); } + /* For TLS 1.3 trim the header and trailer. */ + if (toep->tls.tls13) { + KASSERT(tls_data != NULL, ("%s: TLS 1.3 record without data", + __func__)); + MPASS(tls_data->m_pkthdr.len == pdu_length); + m_adj(tls_data, sizeof(struct tls_record_layer)); + if (tls_data->m_pkthdr.len > be16toh(tls_hdr_pkt->length)) + tls_data->m_pkthdr.len = be16toh(tls_hdr_pkt->length); + resid = tls_data->m_pkthdr.len; + if (resid == 0) { + m_freem(tls_data); + tls_data = NULL; + } else { + for (n = tls_data;; n = n->m_next) { + if (n->m_len < resid) { + resid -= n->m_len; + continue; + } + + n->m_len = resid; + m_freem(n->m_next); + n->m_next = NULL; + break; + } + } + } + /* Handle data received after the socket is closed. */ sb = &so->so_rcv; SOCKBUF_LOCK(sb); @@ -1076,33 +1148,60 @@ out: } /* - * Send a work request setting multiple TCB fields to enable - * ULP_MODE_TLS. + * Send a work request setting one or more TCB fields to partially or + * fully enable ULP_MODE_TLS. + * + * - If resid == 0, the socket buffer ends at a record boundary + * (either empty or contains one or more complete records). Switch + * to ULP_MODE_TLS (if not already) and enable TLS decryption. + * + * - If resid != 0, the socket buffer contains a partial record. In + * this case, switch to ULP_MODE_TLS partially and configure the TCB + * to pass along the remaining resid bytes undecrypted. Once they + * arrive, this is called again with resid == 0 and enables TLS + * decryption. */ static void -tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno) +tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno, + size_t resid) { - struct wrqe *wr; + struct mbuf *m; struct work_request_hdr *wrh; struct ulp_txpkt *ulpmc; int fields, key_offset, len; - KASSERT(ulp_mode(toep) == ULP_MODE_NONE, - ("%s: tid %d already ULP_MODE_TLS", __func__, toep->tid)); + /* + * If we are already in ULP_MODE_TLS, then we should now be at + * a record boundary and ready to finish enabling TLS RX. + */ + KASSERT(resid == 0 || ulp_mode(toep) == ULP_MODE_NONE, + ("%s: tid %d needs %zu more data but already ULP_MODE_TLS", + __func__, toep->tid, resid)); fields = 0; + if (ulp_mode(toep) == ULP_MODE_NONE) { + /* 2 writes for the overlay region */ + fields += 2; + } - /* 2 writes for the overlay region */ - fields += 2; + if (resid == 0) { + /* W_TCB_TLS_SEQ */ + fields++; - /* W_TCB_TLS_SEQ */ - fields++; + /* W_TCB_ULP_RAW */ + fields++; + } else { + /* W_TCB_PDU_LEN */ + fields++; - /* W_TCB_ULP_RAW */ - fields++; + /* W_TCB_ULP_RAW */ + fields++; + } - /* W_TCB_ULP_TYPE */ - fields ++; + if (ulp_mode(toep) == ULP_MODE_NONE) { + /* W_TCB_ULP_TYPE */ + fields ++; + } /* W_TCB_T_FLAGS */ fields++; @@ -1111,59 +1210,94 @@ tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno) KASSERT(len <= SGE_MAX_WR_LEN, ("%s: WR with %d TCB field updates too large", __func__, fields)); - wr = alloc_wrqe(len, toep->ctrlq); - if (wr == NULL) { + m = alloc_raw_wr_mbuf(len); + if (m == NULL) { /* XXX */ panic("%s: out of memory", __func__); } - wrh = wrtod(wr); - INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ + wrh = mtod(m, struct work_request_hdr *); + INIT_ULPTX_WRH(wrh, len, 1, toep->tid); /* atomic */ ulpmc = (struct ulp_txpkt *)(wrh + 1); - /* - * Clear the TLS overlay region: 1023:832. - * - * Words 26/27 are always set to zero. Words 28/29 - * contain seqno and are set when enabling TLS - * decryption. Word 30 is zero and Word 31 contains - * the keyid. - */ - ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 26, - 0xffffffffffffffff, 0); + if (ulp_mode(toep) == ULP_MODE_NONE) { + /* + * Clear the TLS overlay region: 1023:832. + * + * Words 26/27 are always set to zero. Words 28/29 + * contain seqno and are set when enabling TLS + * decryption. Word 30 is zero and Word 31 contains + * the keyid. + */ + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 26, + 0xffffffffffffffff, 0); - /* - * RX key tags are an index into the key portion of MA - * memory stored as an offset from the base address in - * units of 64 bytes. - */ - key_offset = toep->tls.rx_key_addr - sc->vres.key.start; - ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 30, - 0xffffffffffffffff, - (uint64_t)V_TCB_RX_TLS_KEY_TAG(key_offset / 64) << 32); - - CTR3(KTR_CXGBE, "%s: tid %d enable TLS seqno %lu", __func__, - toep->tid, seqno); - ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_TLS_SEQ, - V_TCB_TLS_SEQ(M_TCB_TLS_SEQ), V_TCB_TLS_SEQ(seqno)); - ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_ULP_RAW, - V_TCB_ULP_RAW(M_TCB_ULP_RAW), - V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) | - V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1)))); - - toep->flags &= ~TPF_TLS_STARTING; - toep->flags |= TPF_TLS_RECEIVE; - - /* Set the ULP mode to ULP_MODE_TLS. */ - toep->params.ulp_mode = ULP_MODE_TLS; - ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_ULP_TYPE, - V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_TLS)); + /* + * RX key tags are an index into the key portion of MA + * memory stored as an offset from the base address in + * units of 64 bytes. + */ + key_offset = toep->tls.rx_key_addr - sc->vres.key.start; + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 30, + 0xffffffffffffffff, + (uint64_t)V_TCB_RX_TLS_KEY_TAG(key_offset / 64) << 32); + } + + if (resid == 0) { + /* + * The socket buffer is empty or only contains + * complete TLS records: Set the sequence number and + * enable TLS decryption. + */ + CTR3(KTR_CXGBE, "%s: tid %d enable TLS seqno %lu", __func__, + toep->tid, seqno); + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, + W_TCB_RX_TLS_SEQ, V_TCB_RX_TLS_SEQ(M_TCB_RX_TLS_SEQ), + V_TCB_RX_TLS_SEQ(seqno)); + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, + W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW), + V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) | + V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1)))); + + toep->flags &= ~TPF_TLS_STARTING; + toep->flags |= TPF_TLS_RECEIVE; + } else { + /* + * The socket buffer ends with a partial record with a + * full header and needs at least 6 bytes. + * + * Set PDU length. This is treating the 'resid' bytes + * as a TLS PDU, so the first 5 bytes are a fake + * header and the rest are the PDU length. + */ + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, + W_TCB_PDU_LEN, V_TCB_PDU_LEN(M_TCB_PDU_LEN), + V_TCB_PDU_LEN(resid - sizeof(struct tls_hdr))); + CTR3(KTR_CXGBE, "%s: tid %d setting PDU_LEN to %zu", + __func__, toep->tid, resid - sizeof(struct tls_hdr)); + + /* Clear all bits in ULP_RAW except for ENABLE. */ + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, + W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW), + V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1))); + + /* Wait for 'resid' bytes to be delivered as CPL_RX_DATA. */ + toep->tls.rx_resid = resid; + } + + if (ulp_mode(toep) == ULP_MODE_NONE) { + /* Set the ULP mode to ULP_MODE_TLS. */ + toep->params.ulp_mode = ULP_MODE_TLS; + ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, + W_TCB_ULP_TYPE, V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), + V_TCB_ULP_TYPE(ULP_MODE_TLS)); + } /* Clear TF_RX_QUIESCE. */ ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0); - t4_wrq_tx(sc, wr); + t4_raw_wr_tx(sc, toep, m); } /* @@ -1190,7 +1324,8 @@ tls_check_rx_sockbuf(struct adapter *sc, struct toepcb *toep, * size of a TLS record, re-enable receive and pause again once * we get more data to try again. */ - if (!have_header || resid != 0) { + if (!have_header || (resid != 0 && (resid < sizeof(struct tls_hdr) || + is_t6(sc)))) { CTR(KTR_CXGBE, "%s: tid %d waiting for more data", __func__, toep->tid); toep->flags &= ~TPF_TLS_RX_QUIESCED; @@ -1198,7 +1333,7 @@ tls_check_rx_sockbuf(struct adapter *sc, struct toepcb *toep, return; } - tls_update_tcb(sc, toep, seqno); + tls_update_tcb(sc, toep, seqno, resid); } void diff --git a/sys/dev/cxgbe/tom/t4_tls.h b/sys/dev/cxgbe/tom/t4_tls.h index 753a30890fdc..6faf946e9e3c 100644 --- a/sys/dev/cxgbe/tom/t4_tls.h +++ b/sys/dev/cxgbe/tom/t4_tls.h @@ -74,6 +74,7 @@ struct tls_ofld_info { unsigned short adjusted_plen; unsigned short expn_per_ulp; unsigned short pdus_per_ulp; + bool tls13; struct tls_scmd scmd0; u_int iv_len; unsigned int tx_key_info_size; diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 9b09facd05a7..8dfffd465345 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -182,7 +182,7 @@ init_toepcb(struct vi_info *vi, struct toepcb *toep) } toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx]; - toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; + toep->ctrlq = &sc->sge.ctrlq[cp->ctrlq_idx]; tls_init_toep(toep); MPASS(ulp_mode(toep) != ULP_MODE_TCPDDP); @@ -494,8 +494,15 @@ send_get_tcb(struct adapter *sc, u_int tid) bzero(cpl, sizeof(*cpl)); INIT_TP_WR(cpl, tid); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid)); - cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) | - V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id)); + if (chip_id(sc) >= CHELSIO_T7) { + cpl->reply_ctrl = + htobe16(V_T7_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id) | + V_T7_REPLY_CHAN(0) | V_NO_REPLY(0)); + } else { + cpl->reply_ctrl = + htobe16(V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id) | + V_REPLY_CHAN(0) | V_NO_REPLY(0)); + } cpl->cookie = 0xff; commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie); @@ -882,6 +889,8 @@ send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep) flowc->mnemval[0].val = htobe32(toep->params.emss); txsd = &toep->txsd[toep->txsd_pidx]; + _Static_assert(flowclen16 <= MAX_OFLD_TX_SDESC_CREDITS, + "MAX_OFLD_TX_SDESC_CREDITS too small"); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; @@ -1219,7 +1228,7 @@ select_ntuple(struct vi_info *vi, struct l2t_entry *e) ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; if (tp->port_shift >= 0) - ntuple |= (uint64_t)e->lport << tp->port_shift; + ntuple |= (uint64_t)e->hw_port << tp->port_shift; if (tp->protocol_shift >= 0) ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; @@ -1230,10 +1239,7 @@ select_ntuple(struct vi_info *vi, struct l2t_entry *e) tp->vnic_shift; } - if (is_t4(sc)) - return (htobe32((uint32_t)ntuple)); - else - return (htobe64(V_FILTER_TUPLE(ntuple))); + return (ntuple); } /* @@ -1324,6 +1330,9 @@ init_conn_params(struct vi_info *vi , struct offload_settings *s, */ cp->mtu_idx = find_best_mtu_idx(sc, inc, s); + /* Control queue. */ + cp->ctrlq_idx = vi->pi->port_id; + /* Tx queue for this connection. */ if (s->txq == QUEUE_RANDOM) q_idx = arc4random(); @@ -1436,6 +1445,32 @@ init_conn_params(struct vi_info *vi , struct offload_settings *s, cp->emss = 0; } +void +update_tid_qid_sel(struct vi_info *vi, struct conn_params *cp, int tid) +{ + struct adapter *sc = vi->adapter; + const int mask = sc->params.tid_qid_sel_mask; + struct sge_ofld_txq *ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; + uint32_t ngroup; + int g, nqpg; + + cp->ctrlq_idx = ofld_txq_group(tid, mask); + CTR(KTR_CXGBE, "tid %u is on core %u", tid, cp->ctrlq_idx); + if ((ofld_txq->wrq.eq.cntxt_id & mask) == (tid & mask)) + return; + + ngroup = 1 << bitcount32(mask); + MPASS(vi->nofldtxq % ngroup == 0); + g = ofld_txq_group(tid, mask); + nqpg = vi->nofldtxq / ngroup; + cp->txq_idx = vi->first_ofld_txq + g * nqpg + arc4random() % nqpg; +#ifdef INVARIANTS + MPASS(cp->txq_idx < vi->first_ofld_txq + vi->nofldtxq); + ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; + MPASS((ofld_txq->wrq.eq.cntxt_id & mask) == (tid & mask)); +#endif +} + int negative_advice(int status) { @@ -1955,8 +1990,10 @@ t4_tom_deactivate(struct adapter *sc) if (td == NULL) return (0); /* XXX. KASSERT? */ - if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) - return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ + /* These ULDs rely on the TOE. */ + if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI) || + uld_active(sc, ULD_NVME)) + return (EBUSY); if (sc->offload_map != 0) { for_each_port(sc, i) { @@ -2231,6 +2268,98 @@ t4_aio_queue_tom(struct socket *so, struct kaiocb *job) return (0); } +/* + * Request/response structure used to find out the adapter offloading + * a socket. + */ +struct find_offload_adapter_data { + struct socket *so; + struct adapter *sc; /* result */ +}; + +static void +find_offload_adapter_cb(struct adapter *sc, void *arg) +{ + struct find_offload_adapter_data *fa = arg; + struct socket *so = fa->so; + struct tom_data *td = sc->tom_softc; + struct tcpcb *tp; + struct inpcb *inp; + + /* Non-TCP were filtered out earlier. */ + MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); + + if (fa->sc != NULL) + return; /* Found already. */ + + if (td == NULL) + return; /* TOE not enabled on this adapter. */ + + inp = sotoinpcb(so); + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) == 0) { + tp = intotcpcb(inp); + if (tp->t_flags & TF_TOE && tp->tod == &td->tod) + fa->sc = sc; /* Found. */ + } + INP_WUNLOCK(inp); +} + +struct adapter * +find_offload_adapter(struct socket *so) +{ + struct find_offload_adapter_data fa; + + fa.sc = NULL; + fa.so = so; + t4_iterate(find_offload_adapter_cb, &fa); + return (fa.sc); +} + +void +send_txdataplen_max_flowc_wr(struct adapter *sc, struct toepcb *toep, + int maxlen) +{ + struct wrqe *wr; + struct fw_flowc_wr *flowc; + const u_int nparams = 1; + u_int flowclen; + struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; + + CTR(KTR_CXGBE, "%s: tid %u maxlen=%d", __func__, toep->tid, maxlen); + + flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); + + wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX */ + panic("%s: allocation failure.", __func__); + } + flowc = wrtod(wr); + memset(flowc, 0, wr->wr_len); + + flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | + V_FW_FLOWC_WR_NPARAMS(nparams)); + flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | + V_FW_WR_FLOWID(toep->tid)); + + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; + flowc->mnemval[0].val = htobe32(maxlen); + + KASSERT(howmany(flowclen, 16) <= MAX_OFLD_TX_SDESC_CREDITS, + ("%s: tx_credits %u too large", __func__, howmany(flowclen, 16))); + txsd->tx_credits = howmany(flowclen, 16); + txsd->plen = 0; + KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, + ("%s: not enough credits (%d)", __func__, toep->tx_credits)); + toep->tx_credits -= txsd->tx_credits; + if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) + toep->txsd_pidx = 0; + toep->txsd_avail--; + + t4_wrq_tx(sc, wr); +} + static int t4_tom_mod_load(void) { diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 6295a3484b9f..c8c2d432b8f1 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -113,6 +113,7 @@ struct conn_params { int8_t mtu_idx; int8_t ulp_mode; int8_t tx_align; + int8_t ctrlq_idx; /* ctrlq = &sc->sge.ctrlq[ctrlq_idx] */ int16_t txq_idx; /* ofld_txq = &sc->sge.ofld_txq[txq_idx] */ int16_t rxq_idx; /* ofld_rxq = &sc->sge.ofld_rxq[rxq_idx] */ int16_t l2t_idx; @@ -122,10 +123,13 @@ struct conn_params { }; struct ofld_tx_sdesc { - uint32_t plen; /* payload length */ - uint8_t tx_credits; /* firmware tx credits (unit is 16B) */ + uint32_t plen : 26; /* payload length */ + uint32_t tx_credits : 6; /* firmware tx credits (unit is 16B) */ }; +#define MAX_OFLD_TX_SDESC_PLEN ((1u << 26) - 1) +#define MAX_OFLD_TX_SDESC_CREDITS ((1u << 6) - 1) + struct ppod_region { u_int pr_start; u_int pr_len; @@ -474,11 +478,14 @@ int select_rcv_wscale(void); void init_conn_params(struct vi_info *, struct offload_settings *, struct in_conninfo *, struct socket *, const struct tcp_options *, int16_t, struct conn_params *cp); +void update_tid_qid_sel(struct vi_info *, struct conn_params *, int); __be64 calc_options0(struct vi_info *, struct conn_params *); __be32 calc_options2(struct vi_info *, struct conn_params *); uint64_t select_ntuple(struct vi_info *, struct l2t_entry *); int negative_advice(int); int add_tid_to_history(struct adapter *, u_int); +struct adapter *find_offload_adapter(struct socket *); +void send_txdataplen_max_flowc_wr(struct adapter *, struct toepcb *, int); void t4_pcb_detach(struct toedev *, struct tcpcb *); /* t4_connect.c */ @@ -526,6 +533,10 @@ int t4_send_rst(struct toedev *, struct tcpcb *); void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int); void t4_push_pdus(struct adapter *, struct toepcb *, int); +bool t4_push_raw_wr(struct adapter *, struct toepcb *, struct mbuf *); +void t4_raw_wr_tx(struct adapter *, struct toepcb *, struct mbuf *); +void write_set_tcb_field(struct adapter *, void *, struct toepcb *, uint16_t, + uint64_t, uint64_t, int, int); /* t4_ddp.c */ int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int, @@ -551,6 +562,7 @@ int t4_aio_queue_ddp(struct socket *, struct kaiocb *); int t4_enable_ddp_rcv(struct socket *, struct toepcb *); void t4_ddp_mod_load(void); void t4_ddp_mod_unload(void); +struct mbuf *alloc_raw_wr_mbuf(int); void ddp_assert_empty(struct toepcb *); void ddp_uninit_toep(struct toepcb *); void ddp_queue_toep(struct toepcb *); @@ -574,4 +586,10 @@ int tls_tx_key(struct toepcb *); void tls_uninit_toep(struct toepcb *); int tls_alloc_ktls(struct toepcb *, struct ktls_session *, int); +/* t4_tpt.c */ +uint32_t t4_pblpool_alloc(struct adapter *, int); +void t4_pblpool_free(struct adapter *, uint32_t, int); +int t4_pblpool_create(struct adapter *); +void t4_pblpool_destroy(struct adapter *); + #endif diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c index 3fd0d5ca41d4..e245c2b6fd5b 100644 --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -403,7 +403,7 @@ t4_l2t_get(struct port_info *pi, if_t ifp, struct sockaddr *sa) l2_store(sa, e); e->ifp = ifp; e->hash = hash; - e->lport = pi->lport; + e->hw_port = pi->hw_port; e->wrq = &sc->sge.ctrlq[pi->port_id]; e->iqid = sc->sge.ofld_rxq[pi->vi[0].first_ofld_rxq].iq.abs_id; atomic_store_rel_int(&e->refcnt, 1); |
