aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNavdeep Parhar <np@FreeBSD.org>2022-02-04 21:16:35 +0000
committerNavdeep Parhar <np@FreeBSD.org>2022-03-02 22:08:33 +0000
commitde6c7392d0efc52ffa875757bbfe24af0c23a360 (patch)
treed56b324d5c8262da41439ba1dd22dcca8e997069
parent109eb045bdcdaab238bcb688ca4895817236a004 (diff)
cxgbe(4): Changes to the fatal error handler.
* New error_flags that can be used from the error ithread and elsewhere without a synch_op. * Stop the adapter immediately in t4_fatal_err but defer most of the rest of the handling to a task. The task is allowed to sleep, unlike the ithread. Remove async_event_task as it is no longer needed. * Dump the devlog, CIMLA, and PCIE_FW exactly once on any fatal error involving the firmware or the CIM block. While here, dump some additional info (see dump_cim_regs) for these errors. * If both reset_on_fatal_err and panic_on_fatal_err are set then attempt a reset first and do not panic the system if it is successful. Sponsored by: Chelsio Communications (cherry picked from commit e9e7bc8250548fc6f91e2b3b9c30a865c5edfb60)
-rw-r--r--sys/dev/cxgbe/adapter.h21
-rw-r--r--sys/dev/cxgbe/common/common.h3
-rw-r--r--sys/dev/cxgbe/common/t4_hw.c55
-rw-r--r--sys/dev/cxgbe/t4_main.c185
-rw-r--r--sys/dev/cxgbe/t4_sge.c5
-rw-r--r--sys/riscv/conf/NOTES5
6 files changed, 176 insertions, 98 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 1c2e52802060..6537dce62c43 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -154,18 +154,21 @@ enum {
};
enum {
- /* adapter flags */
+ /* adapter flags. synch_op or adapter_lock. */
FULL_INIT_DONE = (1 << 0),
FW_OK = (1 << 1),
CHK_MBOX_ACCESS = (1 << 2),
MASTER_PF = (1 << 3),
- /* 1 << 4 is unused, was ADAP_SYSCTL_CTX */
- ADAP_ERR = (1 << 5),
BUF_PACKING_OK = (1 << 6),
IS_VF = (1 << 7),
KERN_TLS_ON = (1 << 8), /* HW is configured for KERN_TLS */
CXGBE_BUSY = (1 << 9),
- HW_OFF_LIMITS = (1 << 10), /* off limits to all except reset_thread */
+
+ /* adapter error_flags. reg_lock for HW_OFF_LIMITS, atomics for the rest. */
+ ADAP_STOPPED = (1 << 0), /* Adapter has been stopped. */
+ ADAP_FATAL_ERR = (1 << 1), /* Encountered a fatal error. */
+ HW_OFF_LIMITS = (1 << 2), /* off limits to all except reset_thread */
+ ADAP_CIM_ERR = (1 << 3), /* Error was related to FW/CIM. */
/* port flags */
HAS_TRACEQ = (1 << 3),
@@ -906,7 +909,6 @@ struct adapter {
int nrawf;
struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */
- struct task async_event_task;
struct port_info *port[MAX_NPORTS];
uint8_t chan_map[MAX_NCHAN]; /* channel -> port */
@@ -937,6 +939,7 @@ struct adapter {
int active_ulds; /* ULDs activated on this adapter */
int flags;
int debug_flags;
+ int error_flags; /* Used by error handler and live reset. */
char ifp_lockname[16];
struct mtx ifp_lock;
@@ -993,6 +996,7 @@ struct adapter {
struct mtx tc_lock;
struct task tc_task;
+ struct task fatal_error_task;
struct task reset_task;
const void *reset_thread;
int num_resets;
@@ -1091,7 +1095,9 @@ forwarding_intr_to_fwq(struct adapter *sc)
static inline bool
hw_off_limits(struct adapter *sc)
{
- return (__predict_false(sc->flags & HW_OFF_LIMITS));
+ int off_limits = atomic_load_int(&sc->error_flags) & HW_OFF_LIMITS;
+
+ return (__predict_false(off_limits != 0));
}
static inline uint32_t
@@ -1288,8 +1294,7 @@ void free_atid(struct adapter *, int);
void release_tid(struct adapter *, int, struct sge_wrq *);
int cxgbe_media_change(struct ifnet *);
void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
-bool t4_os_dump_cimla(struct adapter *, int, bool);
-void t4_os_dump_devlog(struct adapter *);
+void t4_os_cim_err(struct adapter *);
#ifdef KERN_TLS
/* t4_kern_tls.c */
diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h
index bee4f58f693c..7151c53d3b0f 100644
--- a/sys/dev/cxgbe/common/common.h
+++ b/sys/dev/cxgbe/common/common.h
@@ -582,6 +582,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
int size, void *rpl, bool sleep_ok, int timeout);
int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size,
void *rpl, bool sleep_ok);
+void t4_report_fw_error(struct adapter *adap);
static inline int t4_wr_mbox_timeout(struct adapter *adap, int mbox,
const void *cmd, int size, void *rpl,
@@ -617,7 +618,7 @@ struct fw_filter_wr;
void t4_intr_enable(struct adapter *adapter);
void t4_intr_disable(struct adapter *adapter);
void t4_intr_clear(struct adapter *adapter);
-int t4_slow_intr_handler(struct adapter *adapter, bool verbose);
+bool t4_slow_intr_handler(struct adapter *adapter, bool verbose);
int t4_hash_mac_addr(const u8 *addr);
int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c
index c7e4d48e855f..c32dab915ee6 100644
--- a/sys/dev/cxgbe/common/t4_hw.c
+++ b/sys/dev/cxgbe/common/t4_hw.c
@@ -196,7 +196,7 @@ u32 t4_hw_pci_read_cfg4(adapter_t *adap, int reg)
* If the firmware has indicated an error, print out the reason for
* the firmware error.
*/
-static void t4_report_fw_error(struct adapter *adap)
+void t4_report_fw_error(struct adapter *adap)
{
static const char *const reason[] = {
"Crash", /* PCIE_FW_EVAL_CRASH */
@@ -212,11 +212,8 @@ static void t4_report_fw_error(struct adapter *adap)
pcie_fw = t4_read_reg(adap, A_PCIE_FW);
if (pcie_fw & F_PCIE_FW_ERR) {
- adap->flags &= ~FW_OK;
CH_ERR(adap, "firmware reports adapter error: %s (0x%08x)\n",
reason[G_PCIE_FW_EVAL(pcie_fw)], pcie_fw);
- if (pcie_fw != 0xffffffff)
- t4_os_dump_devlog(adap);
}
}
@@ -374,6 +371,12 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
/*
* Attempt to gain access to the mailbox.
*/
+ pcie_fw = 0;
+ if (!(adap->flags & IS_VF)) {
+ pcie_fw = t4_read_reg(adap, A_PCIE_FW);
+ if (pcie_fw & F_PCIE_FW_ERR)
+ goto failed;
+ }
for (i = 0; i < 4; i++) {
ctl = t4_read_reg(adap, ctl_reg);
v = G_MBOWNER(ctl);
@@ -385,7 +388,11 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
* If we were unable to gain access, report the error to our caller.
*/
if (v != X_MBOWNER_PL) {
- t4_report_fw_error(adap);
+ if (!(adap->flags & IS_VF)) {
+ pcie_fw = t4_read_reg(adap, A_PCIE_FW);
+ if (pcie_fw & F_PCIE_FW_ERR)
+ goto failed;
+ }
ret = (v == X_MBOWNER_FW) ? -EBUSY : -ETIMEDOUT;
return ret;
}
@@ -436,7 +443,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
* Loop waiting for the reply; bail out if we time out or the firmware
* reports an error.
*/
- pcie_fw = 0;
for (i = 0; i < timeout; i += ms) {
if (!(adap->flags & IS_VF)) {
pcie_fw = t4_read_reg(adap, A_PCIE_FW);
@@ -494,15 +500,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
*(const u8 *)cmd, mbox, pcie_fw);
CH_DUMP_MBOX(adap, mbox, 0, "cmdsent", cmd_rpl, true);
CH_DUMP_MBOX(adap, mbox, data_reg, "current", NULL, true);
-
- if (pcie_fw & F_PCIE_FW_ERR) {
- ret = -ENXIO;
- t4_report_fw_error(adap);
- } else {
- ret = -ETIMEDOUT;
- t4_os_dump_devlog(adap);
- }
-
+failed:
+ adap->flags &= ~FW_OK;
+ ret = pcie_fw & F_PCIE_FW_ERR ? -ENXIO : -ETIMEDOUT;
t4_fatal_err(adap, true);
return ret;
}
@@ -4464,10 +4464,6 @@ static bool sge_intr_handler(struct adapter *adap, int arg, bool verbose)
*/
static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose)
{
- static const struct intr_action cim_host_intr_actions[] = {
- { F_TIMER0INT, 0, t4_os_dump_cimla },
- { 0 },
- };
static const struct intr_details cim_host_intr_details[] = {
/* T6+ */
{ F_PCIE2CIMINTFPARERR, "CIM IBQ PCIe interface parity error" },
@@ -4513,7 +4509,7 @@ static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose)
.fatal = 0x007fffe6,
.flags = NONFATAL_IF_DISABLED,
.details = cim_host_intr_details,
- .actions = cim_host_intr_actions,
+ .actions = NULL,
};
static const struct intr_details cim_host_upacc_intr_details[] = {
{ F_EEPROMWRINT, "CIM EEPROM came out of busy state" },
@@ -4578,10 +4574,6 @@ static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose)
u32 val, fw_err;
bool fatal;
- fw_err = t4_read_reg(adap, A_PCIE_FW);
- if (fw_err & F_PCIE_FW_ERR)
- t4_report_fw_error(adap);
-
/*
* When the Firmware detects an internal error which normally wouldn't
* raise a Host Interrupt, it forces a CIM Timer0 interrupt in order
@@ -4589,16 +4581,19 @@ static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose)
* Timer0 interrupt and don't see a Firmware Crash, ignore the Timer0
* interrupt.
*/
+ fw_err = t4_read_reg(adap, A_PCIE_FW);
val = t4_read_reg(adap, A_CIM_HOST_INT_CAUSE);
if (val & F_TIMER0INT && (!(fw_err & F_PCIE_FW_ERR) ||
G_PCIE_FW_EVAL(fw_err) != PCIE_FW_EVAL_CRASH)) {
t4_write_reg(adap, A_CIM_HOST_INT_CAUSE, F_TIMER0INT);
}
- fatal = false;
+ fatal = (fw_err & F_PCIE_FW_ERR) != 0;
fatal |= t4_handle_intr(adap, &cim_host_intr_info, 0, verbose);
fatal |= t4_handle_intr(adap, &cim_host_upacc_intr_info, 0, verbose);
fatal |= t4_handle_intr(adap, &cim_pf_host_intr_info, 0, verbose);
+ if (fatal)
+ t4_os_cim_err(adap);
return (fatal);
}
@@ -5297,7 +5292,7 @@ static bool plpl_intr_handler(struct adapter *adap, int arg, bool verbose)
* The designation 'slow' is because it involves register reads, while
* data interrupts typically don't involve any MMIOs.
*/
-int t4_slow_intr_handler(struct adapter *adap, bool verbose)
+bool t4_slow_intr_handler(struct adapter *adap, bool verbose)
{
static const struct intr_details pl_intr_details[] = {
{ F_MC1, "MC1" },
@@ -5376,7 +5371,6 @@ int t4_slow_intr_handler(struct adapter *adap, bool verbose)
.details = pl_intr_details,
.actions = pl_intr_action,
};
- bool fatal;
u32 perr;
perr = t4_read_reg(adap, pl_perr_cause.cause_reg);
@@ -5387,11 +5381,8 @@ int t4_slow_intr_handler(struct adapter *adap, bool verbose)
if (verbose)
perr |= t4_read_reg(adap, pl_intr_info.enable_reg);
}
- fatal = t4_handle_intr(adap, &pl_intr_info, perr, verbose);
- if (fatal)
- t4_fatal_err(adap, false);
- return (0);
+ return (t4_handle_intr(adap, &pl_intr_info, perr, verbose));
}
#define PF_INTR_MASK (F_PFSW | F_PFCIM)
@@ -7521,8 +7512,6 @@ retry:
if (ret != FW_SUCCESS) {
if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0)
goto retry;
- if (t4_read_reg(adap, A_PCIE_FW) & F_PCIE_FW_ERR)
- t4_report_fw_error(adap);
return ret;
}
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index b6045dd41674..8a1fb7349373 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -859,7 +859,7 @@ static int hold_clip_addr(struct adapter *, struct t4_clip_addr *);
static int release_clip_addr(struct adapter *, struct t4_clip_addr *);
#ifdef TCP_OFFLOAD
static int toe_capability(struct vi_info *, bool);
-static void t4_async_event(void *, int);
+static void t4_async_event(struct adapter *);
#endif
#ifdef KERN_TLS
static int ktls_capability(struct adapter *, bool);
@@ -869,7 +869,11 @@ static int notify_siblings(device_t, int);
static uint64_t vi_get_counter(struct ifnet *, ift_counter);
static uint64_t cxgbe_get_counter(struct ifnet *, ift_counter);
static void enable_vxlan_rx(struct adapter *);
-static void reset_adapter(void *, int);
+static void reset_adapter_task(void *, int);
+static void fatal_error_task(void *, int);
+static void dump_devlog(struct adapter *);
+static void dump_cim_regs(struct adapter *);
+static void dump_cimla(struct adapter *);
struct {
uint16_t device;
@@ -1173,13 +1177,10 @@ t4_attach(device_t dev)
callout_init(&sc->ktls_tick, 1);
-#ifdef TCP_OFFLOAD
- TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
-#endif
-
refcount_init(&sc->vxlan_refcount, 0);
- TASK_INIT(&sc->reset_task, 0, reset_adapter, sc);
+ TASK_INIT(&sc->reset_task, 0, reset_adapter_task, sc);
+ TASK_INIT(&sc->fatal_error_task, 0, fatal_error_task, sc);
sc->ctrlq_oid = SYSCTL_ADD_NODE(&sc->ctx,
SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "ctrlq",
@@ -1715,10 +1716,6 @@ t4_detach_common(device_t dev)
}
}
-#ifdef TCP_OFFLOAD
- taskqueue_drain(taskqueue_thread, &sc->async_event_task);
-#endif
-
for (i = 0; i < sc->intr_count; i++)
t4_free_irq(sc, &sc->irq[i]);
@@ -1868,6 +1865,14 @@ ok_to_reset(struct adapter *sc)
return (true);
}
+static inline int
+stop_adapter(struct adapter *sc)
+{
+ if (atomic_testandset_int(&sc->error_flags, ilog2(ADAP_STOPPED)))
+ return (1); /* Already stopped. */
+ return (t4_shutdown_adapter(sc));
+}
+
static int
t4_suspend(device_t dev)
{
@@ -1903,7 +1908,7 @@ t4_suspend(device_t dev)
}
/* No more DMA or interrupts. */
- t4_shutdown_adapter(sc);
+ stop_adapter(sc);
/* Quiesce all activity. */
for_each_port(sc, i) {
@@ -1979,12 +1984,11 @@ t4_suspend(device_t dev)
/* Mark the adapter totally off limits. */
mtx_lock(&sc->reg_lock);
- sc->flags |= HW_OFF_LIMITS;
+ atomic_set_int(&sc->error_flags, HW_OFF_LIMITS);
sc->flags &= ~(FW_OK | MASTER_PF);
sc->reset_thread = NULL;
mtx_unlock(&sc->reg_lock);
- sc->num_resets++;
CH_ALERT(sc, "suspend completed.\n");
done:
end_synchronized_op(sc, 0);
@@ -2171,6 +2175,9 @@ t4_resume(device_t dev)
goto done;
}
+ /* Note that HW_OFF_LIMITS is cleared a bit later. */
+ atomic_clear_int(&sc->error_flags, ADAP_FATAL_ERR | ADAP_STOPPED);
+
/* Restore memory window. */
setup_memwin(sc);
@@ -2179,7 +2186,7 @@ t4_resume(device_t dev)
CH_ALERT(sc, "recovery mode on resume.\n");
rc = 0;
mtx_lock(&sc->reg_lock);
- sc->flags &= ~HW_OFF_LIMITS;
+ atomic_clear_int(&sc->error_flags, HW_OFF_LIMITS);
mtx_unlock(&sc->reg_lock);
goto done;
}
@@ -2248,7 +2255,7 @@ t4_resume(device_t dev)
* this thread is still in the middle of a synchronized_op.
*/
mtx_lock(&sc->reg_lock);
- sc->flags &= ~HW_OFF_LIMITS;
+ atomic_clear_int(&sc->error_flags, HW_OFF_LIMITS);
mtx_unlock(&sc->reg_lock);
if (sc->flags & FULL_INIT_DONE) {
@@ -2363,17 +2370,16 @@ t4_reset_post(device_t dev, device_t child)
return (0);
}
-static void
-reset_adapter(void *arg, int pending)
+static int
+reset_adapter(struct adapter *sc)
{
- struct adapter *sc = arg;
- int rc;
+ int rc, oldinc, error_flags;
CH_ALERT(sc, "reset requested.\n");
rc = begin_synchronized_op(sc, NULL, SLEEP_OK, "t4rst1");
if (rc != 0)
- return;
+ return (EBUSY);
if (hw_off_limits(sc)) {
CH_ERR(sc, "adapter is suspended, use resume (not reset).\n");
@@ -2389,17 +2395,41 @@ reset_adapter(void *arg, int pending)
}
done:
+ oldinc = sc->incarnation;
end_synchronized_op(sc, 0);
if (rc != 0)
- return; /* Error logged already. */
+ return (rc); /* Error logged already. */
+ atomic_add_int(&sc->num_resets, 1);
mtx_lock(&Giant);
rc = BUS_RESET_CHILD(device_get_parent(sc->dev), sc->dev, 0);
mtx_unlock(&Giant);
if (rc != 0)
CH_ERR(sc, "bus_reset_child failed: %d.\n", rc);
- else
- CH_ALERT(sc, "bus_reset_child succeeded.\n");
+ else {
+ rc = begin_synchronized_op(sc, NULL, SLEEP_OK, "t4rst2");
+ if (rc != 0)
+ return (EBUSY);
+ error_flags = atomic_load_int(&sc->error_flags);
+ if (sc->incarnation > oldinc && error_flags == 0) {
+ CH_ALERT(sc, "bus_reset_child succeeded.\n");
+ } else {
+ CH_ERR(sc, "adapter did not reset properly, flags "
+ "0x%08x, error_flags 0x%08x.\n", sc->flags,
+ error_flags);
+ rc = ENXIO;
+ }
+ end_synchronized_op(sc, 0);
+ }
+
+ return (rc);
+}
+
+static void
+reset_adapter_task(void *arg, int pending)
+{
+ /* XXX: t4_async_event here? */
+ reset_adapter(arg);
}
static int
@@ -3548,36 +3578,64 @@ delayed_panic(void *arg)
panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
}
-void
-t4_fatal_err(struct adapter *sc, bool fw_error)
+static void
+fatal_error_task(void *arg, int pending)
{
+ struct adapter *sc = arg;
+ int rc;
- t4_shutdown_adapter(sc);
- log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
- device_get_nameunit(sc->dev));
- if (fw_error) {
- if (sc->flags & CHK_MBOX_ACCESS)
- ASSERT_SYNCHRONIZED_OP(sc);
- sc->flags |= ADAP_ERR;
- } else {
- ADAPTER_LOCK(sc);
- sc->flags |= ADAP_ERR;
- ADAPTER_UNLOCK(sc);
- }
#ifdef TCP_OFFLOAD
- taskqueue_enqueue(taskqueue_thread, &sc->async_event_task);
+ t4_async_event(sc);
#endif
+ if (atomic_testandclear_int(&sc->error_flags, ilog2(ADAP_CIM_ERR))) {
+ dump_cim_regs(sc);
+ dump_cimla(sc);
+ dump_devlog(sc);
+ }
+
+ if (t4_reset_on_fatal_err) {
+ CH_ALERT(sc, "resetting on fatal error.\n");
+ rc = reset_adapter(sc);
+ if (rc == 0 && t4_panic_on_fatal_err) {
+ CH_ALERT(sc, "reset was successful, "
+ "system will NOT panic.\n");
+ return;
+ }
+ }
if (t4_panic_on_fatal_err) {
CH_ALERT(sc, "panicking on fatal error (after 30s).\n");
callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
- } else if (t4_reset_on_fatal_err) {
- CH_ALERT(sc, "resetting on fatal error.\n");
- taskqueue_enqueue(reset_tq, &sc->reset_task);
}
}
void
+t4_fatal_err(struct adapter *sc, bool fw_error)
+{
+ const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0;
+
+ stop_adapter(sc);
+ if (atomic_testandset_int(&sc->error_flags, ilog2(ADAP_FATAL_ERR)))
+ return;
+ if (fw_error) {
+ /*
+ * We are here because of a firmware error/timeout and not
+ * because of a hardware interrupt. It is possible (although
+ * not very likely) that an error interrupt was also raised but
+ * this thread ran first and inhibited t4_intr_err. We walk the
+ * main INT_CAUSE registers here to make sure we haven't missed
+ * anything interesting.
+ */
+ t4_slow_intr_handler(sc, verbose);
+ atomic_set_int(&sc->error_flags, ADAP_CIM_ERR);
+ }
+ t4_report_fw_error(sc);
+ log(LOG_ALERT, "%s: encountered fatal error, adapter stopped (%d).\n",
+ device_get_nameunit(sc->dev), fw_error);
+ taskqueue_enqueue(reset_tq, &sc->fatal_error_task);
+}
+
+void
t4_add_adapter(struct adapter *sc)
{
sx_xlock(&t4_list_lock);
@@ -8976,24 +9034,44 @@ sysctl_cim_la(SYSCTL_HANDLER_ARGS)
return (rc);
}
-bool
-t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
+static void
+dump_cim_regs(struct adapter *sc)
+{
+ log(LOG_DEBUG, "%s: CIM debug regs %08x %08x %08x %08x %08x\n",
+ device_get_nameunit(sc->dev),
+ t4_read_reg(sc, A_EDC_H_BIST_USER_WDATA0),
+ t4_read_reg(sc, A_EDC_H_BIST_USER_WDATA1),
+ t4_read_reg(sc, A_EDC_H_BIST_USER_WDATA2),
+ t4_read_reg(sc, A_EDC_H_BIST_DATA_PATTERN),
+ t4_read_reg(sc, A_EDC_H_BIST_STATUS_RDATA));
+}
+
+static void
+dump_cimla(struct adapter *sc)
{
struct sbuf sb;
int rc;
- if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
- return (false);
+ if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb) {
+ log(LOG_DEBUG, "%s: failed to generate CIM LA dump.\n",
+ device_get_nameunit(sc->dev));
+ return;
+ }
rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
if (rc == 0) {
rc = sbuf_finish(&sb);
if (rc == 0) {
- log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
+ log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s\n",
device_get_nameunit(sc->dev), sbuf_data(&sb));
}
}
sbuf_delete(&sb);
- return (false);
+}
+
+void
+t4_os_cim_err(struct adapter *sc)
+{
+ atomic_set_int(&sc->error_flags, ADAP_CIM_ERR);
}
static int
@@ -9409,8 +9487,8 @@ sysctl_devlog(SYSCTL_HANDLER_ARGS)
return (rc);
}
-void
-t4_os_dump_devlog(struct adapter *sc)
+static void
+dump_devlog(struct adapter *sc)
{
int rc;
struct sbuf sb;
@@ -11067,14 +11145,14 @@ sysctl_reset(SYSCTL_HANDLER_ARGS)
u_int val;
int rc;
- val = sc->num_resets;
+ val = atomic_load_int(&sc->num_resets);
rc = sysctl_handle_int(oidp, &val, 0, req);
if (rc != 0 || req->newptr == NULL)
return (rc);
if (val == 0) {
/* Zero out the counter that tracks reset. */
- sc->num_resets = 0;
+ atomic_store_int(&sc->num_resets, 0);
return (0);
}
@@ -12539,10 +12617,9 @@ t4_deactivate_uld(struct adapter *sc, int id)
}
static void
-t4_async_event(void *arg, int n)
+t4_async_event(struct adapter *sc)
{
struct uld_info *ui;
- struct adapter *sc = (struct adapter *)arg;
if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4async") != 0)
return;
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 131fb617e102..1c2102f4f06e 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -1303,7 +1303,7 @@ t4_intr_err(void *arg)
uint32_t v;
const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0;
- if (sc->flags & ADAP_ERR)
+ if (atomic_load_int(&sc->error_flags) & ADAP_FATAL_ERR)
return;
v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE));
@@ -1312,7 +1312,8 @@ t4_intr_err(void *arg)
t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v);
}
- t4_slow_intr_handler(sc, verbose);
+ if (t4_slow_intr_handler(sc, verbose))
+ t4_fatal_err(sc, false);
}
/*
diff --git a/sys/riscv/conf/NOTES b/sys/riscv/conf/NOTES
index 1e4b8af1840e..01ae4c672ec6 100644
--- a/sys/riscv/conf/NOTES
+++ b/sys/riscv/conf/NOTES
@@ -99,3 +99,8 @@ nodevice snd_cmi
# Don't yet have hwpmc(4)
nodevice hwpmc
nooptions HWPMC_HOOKS
+
+# riscv doesn't yet have atomic_testandset_int and atomic_testandclear_int.
+nodevice ccr
+nodevice cxgbe
+nodevice cxgbev