aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNavdeep Parhar <np@FreeBSD.org>2019-04-22 17:48:10 +0000
committerNavdeep Parhar <np@FreeBSD.org>2019-04-22 17:48:10 +0000
commit61e02298cea3bee31604640e53da1567271dfa57 (patch)
tree9f0b45edebc7a5dd6d94ede78bc4d9a34de0183e
parent687c09042ffb9c9a18d1b673cfbd3eeae2aca25d (diff)
downloadsrc-61e02298cea3bee31604640e53da1567271dfa57.tar.gz
src-61e02298cea3bee31604640e53da1567271dfa57.zip
cxgbe/t4_tom: Add a "TCB history" feature that samples hardware state
for a tid and maintains a running history of some interesting events. Service TCP_INFO queries from the history when the tid is being tracked there.
Notes
Notes: svn path=/head/; revision=346570
-rw-r--r--sys/dev/cxgbe/common/t4_msg.h3
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.c434
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.h31
3 files changed, 422 insertions, 46 deletions
diff --git a/sys/dev/cxgbe/common/t4_msg.h b/sys/dev/cxgbe/common/t4_msg.h
index f616ab91fbac..2a402228d691 100644
--- a/sys/dev/cxgbe/common/t4_msg.h
+++ b/sys/dev/cxgbe/common/t4_msg.h
@@ -923,7 +923,8 @@ struct cpl_get_tcb {
WR_HDR;
union opcode_tid ot;
__be16 reply_ctrl;
- __be16 cookie;
+ __u8 rsvd;
+ __u8 cookie;
};
/* cpl_get_tcb.reply_ctrl fields */
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index f4f0534d9190..cc055edd3130 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -386,55 +386,352 @@ t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name)
}
}
-static inline int
-get_tcb_bit(u_char *tcb, int bit)
+static inline uint64_t
+get_tcb_tflags(const uint64_t *tcb)
{
- int ix, shift;
- ix = 127 - (bit >> 3);
- shift = bit & 0x7;
+ return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32));
+}
- return ((tcb[ix] >> shift) & 1);
+static inline uint32_t
+get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift)
+{
+#define LAST_WORD ((TCB_SIZE / 4) - 1)
+ uint64_t t1, t2;
+ int flit_idx;
+
+ MPASS(mask != 0);
+ MPASS(word <= LAST_WORD);
+ MPASS(shift < 32);
+
+ flit_idx = (LAST_WORD - word) / 2;
+ if (word & 0x1)
+ shift += 32;
+ t1 = be64toh(tcb[flit_idx]) >> shift;
+ t2 = 0;
+ if (fls(mask) > 64 - shift) {
+ /*
+ * Will spill over into the next logical flit, which is the flit
+ * before this one. The flit_idx before this one must be valid.
+ */
+ MPASS(flit_idx > 0);
+ t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift);
+ }
+ return ((t2 | t1) & mask);
+#undef LAST_WORD
}
+#define GET_TCB_FIELD(tcb, F) \
+ get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F)
-static inline uint64_t
-get_tcb_bits(u_char *tcb, int hi, int lo)
+/*
+ * Issues a CPL_GET_TCB to read the entire TCB for the tid.
+ */
+static int
+send_get_tcb(struct adapter *sc, u_int tid)
{
- uint64_t rc = 0;
+ struct cpl_get_tcb *cpl;
+ struct wrq_cookie cookie;
- while (hi >= lo) {
- rc = (rc << 1) | get_tcb_bit(tcb, hi);
- --hi;
- }
+ MPASS(tid < sc->tids.ntids);
+
+ cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16),
+ &cookie);
+ if (__predict_false(cpl == NULL))
+ return (ENOMEM);
+ bzero(cpl, sizeof(*cpl));
+ INIT_TP_WR(cpl, tid);
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid));
+ cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) |
+ V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id));
+ cpl->cookie = 0xff;
+ commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie);
+
+ return (0);
+}
+
+static struct tcb_histent *
+alloc_tcb_histent(struct adapter *sc, u_int tid, int flags)
+{
+ struct tcb_histent *te;
+
+ MPASS(flags == M_NOWAIT || flags == M_WAITOK);
+
+ te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags);
+ if (te == NULL)
+ return (NULL);
+ mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF);
+ callout_init_mtx(&te->te_callout, &te->te_lock, 0);
+ te->te_adapter = sc;
+ te->te_tid = tid;
+
+ return (te);
+}
+static void
+free_tcb_histent(struct tcb_histent *te)
+{
+
+ mtx_destroy(&te->te_lock);
+ free(te, M_CXGBE);
+}
+
+/*
+ * Start tracking the tid in the TCB history.
+ */
+int
+add_tid_to_history(struct adapter *sc, u_int tid)
+{
+ struct tcb_histent *te = NULL;
+ struct tom_data *td = sc->tom_softc;
+ int rc;
+
+ MPASS(tid < sc->tids.ntids);
+
+ if (td->tcb_history == NULL)
+ return (ENXIO);
+
+ rw_wlock(&td->tcb_history_lock);
+ if (td->tcb_history[tid] != NULL) {
+ rc = EEXIST;
+ goto done;
+ }
+ te = alloc_tcb_histent(sc, tid, M_NOWAIT);
+ if (te == NULL) {
+ rc = ENOMEM;
+ goto done;
+ }
+ mtx_lock(&te->te_lock);
+ rc = send_get_tcb(sc, tid);
+ if (rc == 0) {
+ te->te_flags |= TE_RPL_PENDING;
+ td->tcb_history[tid] = te;
+ } else {
+ free(te, M_CXGBE);
+ }
+ mtx_unlock(&te->te_lock);
+done:
+ rw_wunlock(&td->tcb_history_lock);
return (rc);
}
+static void
+remove_tcb_histent(struct tcb_histent *te)
+{
+ struct adapter *sc = te->te_adapter;
+ struct tom_data *td = sc->tom_softc;
+
+ rw_assert(&td->tcb_history_lock, RA_WLOCKED);
+ mtx_assert(&te->te_lock, MA_OWNED);
+ MPASS(td->tcb_history[te->te_tid] == te);
+
+ td->tcb_history[te->te_tid] = NULL;
+ free_tcb_histent(te);
+ rw_wunlock(&td->tcb_history_lock);
+}
+
+static inline struct tcb_histent *
+lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem)
+{
+ struct tcb_histent *te;
+ struct tom_data *td = sc->tom_softc;
+
+ MPASS(tid < sc->tids.ntids);
+
+ if (addrem)
+ rw_wlock(&td->tcb_history_lock);
+ else
+ rw_rlock(&td->tcb_history_lock);
+ te = td->tcb_history[tid];
+ if (te != NULL) {
+ mtx_lock(&te->te_lock);
+ return (te); /* with both locks held */
+ }
+ if (addrem)
+ rw_wunlock(&td->tcb_history_lock);
+ else
+ rw_runlock(&td->tcb_history_lock);
+
+ return (te);
+}
+
+static inline void
+release_tcb_histent(struct tcb_histent *te)
+{
+ struct adapter *sc = te->te_adapter;
+ struct tom_data *td = sc->tom_softc;
+
+ mtx_assert(&te->te_lock, MA_OWNED);
+ mtx_unlock(&te->te_lock);
+ rw_assert(&td->tcb_history_lock, RA_RLOCKED);
+ rw_runlock(&td->tcb_history_lock);
+}
+
+static void
+request_tcb(void *arg)
+{
+ struct tcb_histent *te = arg;
+
+ mtx_assert(&te->te_lock, MA_OWNED);
+
+ /* Noone else is supposed to update the histent. */
+ MPASS(!(te->te_flags & TE_RPL_PENDING));
+ if (send_get_tcb(te->te_adapter, te->te_tid) == 0)
+ te->te_flags |= TE_RPL_PENDING;
+ else
+ callout_schedule(&te->te_callout, hz / 100);
+}
+
+static void
+update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb)
+{
+ struct tom_data *td = te->te_adapter->tom_softc;
+ uint64_t tflags = get_tcb_tflags(tcb);
+ uint8_t sample = 0;
+
+ if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) {
+ if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0)
+ sample |= TS_RTO;
+ if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0)
+ sample |= TS_DUPACKS;
+ if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold)
+ sample |= TS_FASTREXMT;
+ }
+
+ if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) {
+ uint32_t snd_wnd;
+
+ sample |= TS_SND_BACKLOGGED; /* for whatever reason. */
+
+ snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
+ if (tflags & V_TF_RECV_SCALE(1))
+ snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE);
+ if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd)
+ sample |= TS_CWND_LIMITED; /* maybe due to CWND */
+ }
+
+ if (tflags & V_TF_CCTRL_ECN(1)) {
+
+ /*
+ * CE marker on incoming IP hdr, echoing ECE back in the TCP
+ * hdr. Indicates congestion somewhere on the way from the peer
+ * to this node.
+ */
+ if (tflags & V_TF_CCTRL_ECE(1))
+ sample |= TS_ECN_ECE;
+
+ /*
+ * ECE seen and CWR sent (or about to be sent). Might indicate
+ * congestion on the way to the peer. This node is reducing its
+ * congestion window in response.
+ */
+ if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1)))
+ sample |= TS_ECN_CWR;
+ }
+
+ te->te_sample[te->te_pidx] = sample;
+ if (++te->te_pidx == nitems(te->te_sample))
+ te->te_pidx = 0;
+ memcpy(te->te_tcb, tcb, TCB_SIZE);
+ te->te_flags |= TE_ACTIVE;
+}
+
+static int
+do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+ struct adapter *sc = iq->adapter;
+ const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *);
+ const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1);
+ struct tcb_histent *te;
+ const u_int tid = GET_TID(cpl);
+ bool remove;
+
+ remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED;
+ te = lookup_tcb_histent(sc, tid, remove);
+ if (te == NULL) {
+ /* Not in the history. Who issued the GET_TCB for this? */
+ device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, "
+ "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid,
+ (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE),
+ GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE),
+ GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie);
+ goto done;
+ }
+
+ MPASS(te->te_flags & TE_RPL_PENDING);
+ te->te_flags &= ~TE_RPL_PENDING;
+ if (remove) {
+ remove_tcb_histent(te);
+ } else {
+ update_tcb_histent(te, tcb);
+ callout_reset(&te->te_callout, hz / 10, request_tcb, te);
+ release_tcb_histent(te);
+ }
+done:
+ m_freem(m);
+ return (0);
+}
+
+static void
+fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti)
+{
+ uint32_t v;
+
+ ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE);
+
+ v = GET_TCB_FIELD(tcb, T_SRTT);
+ ti->tcpi_rtt = tcp_ticks_to_us(sc, v);
+
+ v = GET_TCB_FIELD(tcb, T_RTTVAR);
+ ti->tcpi_rttvar = tcp_ticks_to_us(sc, v);
+
+ ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH);
+ ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND);
+ ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT);
+
+ v = GET_TCB_FIELD(tcb, TX_MAX);
+ ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW);
+
+ /* Receive window being advertised by us. */
+ ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE); /* Yes, SND. */
+ ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND);
+
+ /* Send window */
+ ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE); /* Yes, RCV. */
+ ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
+ if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1))
+ ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale;
+ else
+ ti->tcpi_snd_wscale = 0;
+
+}
+
+static void
+fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te,
+ struct tcp_info *ti)
+{
+
+ fill_tcp_info_from_tcb(sc, te->te_tcb, ti);
+}
+
/*
- * Called by the kernel to allow the TOE driver to "refine" values filled up in
- * the tcp_info for an offloaded connection.
+ * Reads the TCB for the given tid using a memory window and copies it to 'buf'
+ * in the same format as CPL_GET_TCB_RPL.
*/
static void
-t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti)
+read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf)
{
int i, j, k, rc;
- struct adapter *sc = tod->tod_softc;
- struct toepcb *toep = tp->t_toe;
- uint32_t addr, v;
- uint32_t buf[TCB_SIZE / sizeof(uint32_t)];
+ uint32_t addr;
u_char *tcb, tmp;
- INP_WLOCK_ASSERT(tp->t_inpcb);
- MPASS(ti != NULL);
-
- ti->tcpi_toe_tid = toep->tid;
+ MPASS(tid < sc->tids.ntids);
- addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + toep->tid * TCB_SIZE;
- rc = read_via_memwin(sc, 2, addr, &buf[0], TCB_SIZE);
+ addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE;
+ rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE);
if (rc != 0)
return;
- tcb = (u_char *)&buf[0];
+ tcb = (u_char *)buf;
for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) {
for (k = 0; k < 16; k++) {
tmp = tcb[i + k];
@@ -442,28 +739,42 @@ t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti)
tcb[j + k] = tmp;
}
}
+}
- ti->tcpi_state = get_tcb_bits(tcb, 115, 112);
-
- v = get_tcb_bits(tcb, 271, 256);
- ti->tcpi_rtt = tcp_ticks_to_us(sc, v);
-
- v = get_tcb_bits(tcb, 287, 272);
- ti->tcpi_rttvar = tcp_ticks_to_us(sc, v);
-
- ti->tcpi_snd_ssthresh = get_tcb_bits(tcb, 487, 460);
- ti->tcpi_snd_cwnd = get_tcb_bits(tcb, 459, 432);
- ti->tcpi_rcv_nxt = get_tcb_bits(tcb, 553, 522);
+static void
+fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti)
+{
+ uint64_t tcb[TCB_SIZE / sizeof(uint64_t)];
+ struct tcb_histent *te;
+
+ ti->tcpi_toe_tid = tid;
+ te = lookup_tcb_histent(sc, tid, false);
+ if (te != NULL) {
+ fill_tcp_info_from_history(sc, te, ti);
+ release_tcb_histent(te);
+ } else {
+ if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) {
+ /* XXX: tell firmware to flush TCB cache. */
+ }
+ read_tcb_using_memwin(sc, tid, tcb);
+ fill_tcp_info_from_tcb(sc, tcb, ti);
+ }
+}
- ti->tcpi_snd_nxt = get_tcb_bits(tcb, 319, 288) -
- get_tcb_bits(tcb, 375, 348);
+/*
+ * Called by the kernel to allow the TOE driver to "refine" values filled up in
+ * the tcp_info for an offloaded connection.
+ */
+static void
+t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti)
+{
+ struct adapter *sc = tod->tod_softc;
+ struct toepcb *toep = tp->t_toe;
- /* Receive window being advertised by us. */
- ti->tcpi_rcv_space = get_tcb_bits(tcb, 581, 554);
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ MPASS(ti != NULL);
- /* Send window ceiling. */
- v = get_tcb_bits(tcb, 159, 144) << get_tcb_bits(tcb, 131, 128);
- ti->tcpi_snd_wnd = min(v, ti->tcpi_snd_cwnd);
+ fill_tcp_info(sc, toep->tid, ti);
}
/*
@@ -807,6 +1118,35 @@ failed:
return (rc);
}
+static inline void
+alloc_tcb_history(struct adapter *sc, struct tom_data *td)
+{
+
+ if (sc->tids.ntids == 0 || sc->tids.ntids > 1024)
+ return;
+ rw_init(&td->tcb_history_lock, "TCB history");
+ td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history),
+ M_CXGBE, M_ZERO | M_NOWAIT);
+ td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0));
+}
+
+static inline void
+free_tcb_history(struct adapter *sc, struct tom_data *td)
+{
+#ifdef INVARIANTS
+ int i;
+
+ if (td->tcb_history != NULL) {
+ for (i = 0; i < sc->tids.ntids; i++) {
+ MPASS(td->tcb_history[i] == NULL);
+ }
+ }
+#endif
+ free(td->tcb_history, M_CXGBE);
+ if (rw_initialized(&td->tcb_history_lock))
+ rw_destroy(&td->tcb_history_lock);
+}
+
static void
free_tom_data(struct adapter *sc, struct tom_data *td)
{
@@ -830,6 +1170,7 @@ free_tom_data(struct adapter *sc, struct tom_data *td)
if (mtx_initialized(&td->toep_list_lock))
mtx_destroy(&td->toep_list_lock);
+ free_tcb_history(sc, td);
free_tid_tabs(&sc->tids);
free(td, M_CXGBE);
}
@@ -1097,6 +1438,8 @@ t4_tom_activate(struct adapter *sc)
t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
+ alloc_tcb_history(sc, td);
+
/* toedev ops */
tod = &td->tod;
init_toedev(tod);
@@ -1214,6 +1557,7 @@ t4_tom_mod_load(void)
struct protosw *tcp_protosw, *tcp6_protosw;
/* CPL handlers */
+ t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2,
CPL_COOKIE_TOM);
t4_init_connect_cpl_handlers();
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index 9a9346f3932d..f32091755b95 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -33,6 +33,7 @@
#ifndef __T4_TOM_H__
#define __T4_TOM_H__
#include <sys/vmem.h>
+#include "common/t4_hw.h"
#include "tom/t4_tls.h"
#define LISTEN_HASH_SIZE 32
@@ -254,6 +255,31 @@ struct listen_ctx {
struct clip_entry *ce;
};
+/* tcb_histent flags */
+#define TE_RPL_PENDING 1
+#define TE_ACTIVE 2
+
+/* bits in one 8b tcb_histent sample. */
+#define TS_RTO (1 << 0)
+#define TS_DUPACKS (1 << 1)
+#define TS_FASTREXMT (1 << 2)
+#define TS_SND_BACKLOGGED (1 << 3)
+#define TS_CWND_LIMITED (1 << 4)
+#define TS_ECN_ECE (1 << 5)
+#define TS_ECN_CWR (1 << 6)
+#define TS_RESERVED (1 << 7) /* Unused. */
+
+struct tcb_histent {
+ struct mtx te_lock;
+ struct callout te_callout;
+ uint64_t te_tcb[TCB_SIZE / sizeof(uint64_t)];
+ struct adapter *te_adapter;
+ u_int te_flags;
+ u_int te_tid;
+ uint8_t te_pidx;
+ uint8_t te_sample[100];
+};
+
struct tom_data {
struct toedev tod;
@@ -268,6 +294,10 @@ struct tom_data {
struct ppod_region pr;
+ struct rwlock tcb_history_lock __aligned(CACHE_LINE_SIZE);
+ struct tcb_histent **tcb_history;
+ int dupack_threshold;
+
/* WRs that will not be sent to the chip because L2 resolution failed */
struct mtx unsent_wr_lock;
STAILQ_HEAD(, wrqe) unsent_wr_list;
@@ -326,6 +356,7 @@ int select_ulp_mode(struct socket *, struct adapter *,
struct offload_settings *);
void set_ulp_mode(struct toepcb *, int);
int negative_advice(int);
+int add_tid_to_history(struct adapter *, u_int);
/* t4_connect.c */
void t4_init_connect_cpl_handlers(void);