diff options
| author | Sreekanth Reddy <sreekanth.reddy@broadcom.com> | 2026-04-13 06:28:08 +0000 |
|---|---|---|
| committer | Sumit Saxena <ssaxena@FreeBSD.org> | 2026-04-14 09:13:34 +0000 |
| commit | d2b96f654a672f6059c5c623c276dcd76841ed12 (patch) | |
| tree | 0a850a8ce7deedba6887430bdd33bf9e2a0919a2 | |
| parent | 54f5d20492d231b5c2ddc6f1d94dbffa1707d820 (diff) | |
iflib: Fix panic observed while doing sysctl -a with if_bnxt unload
Observed below kernel panic calltrace while performing sysctl -a
operation while unloading the if_bnxt driver,
Fatal trap 9: general protection fault while in kernel mode
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe02a7569940
vpanic() at vpanic+0x136/frame 0xfffffe02a7569a70
panic() at panic+0x43/frame 0xfffffe02a7569ad0
trap_fatal() at trap_fatal+0x68/frame 0xfffffe02a7569af0
calltrap() at calltrap+0x8/frame 0xfffffe02a7569af0
trap 0x9, rip = 0xffffffff80c0b411, rsp = 0xfffffe02a7569bc0, rbp = 0xfffffe02a7569be0 ---
sysctl_handle_counter_u64() at sysctl_handle_counter_u64+0x61/frame 0xfffffe02a7569be0
sysctl_root_handler_locked() at sysctl_root_handler_locked+0x9c/frame 0xfffffe02a7569c30
sysctl_root() at sysctl_root+0x22f/frame 0xfffffe02a7569cb0
userland_sysctl() at userland_sysctl+0x196/frame 0xfffffe02a7569d50
sys___sysctl() at sys___sysctl+0x65/frame 0xfffffe02a7569e00
amd64_syscall() at amd64_syscall+0x169/frame 0xfffffe02a7569f30
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe02a7569f30
Root Cause:
iflib adds per-device sysctl nodes under the device tree using the device
sysctl context. Some of those nodes are counter sysctl that point at fields
inside txq→ift_br. When the if_bnxt driver is unloaded, iflib_device_deregister
runs and calls iflib_tx_structures_free, which frees the txqs ift_br. The device
sysctl tree is only freed when the device is destroyed. If sysctl -a runs during
unload, it can still traverse the device tree and call sysctl_handle_counter_u64
for those nodes. The handler does counter_u64_fetch(*(counter_u64_t *)arg1).
By then arg1 can point into freed memory and leads to use after free type kernel panic.
Fix:
flib now uses its own sysctl context for all iflib-related nodes
instead of using device’s context. And iflib sysctl context is now
removed before any queue/ring memory is freed.
MFC after: 2 weeks
Reviewed by: gallatin, ssaxena, #iflib
Differential Revision: https://reviews.freebsd.org/D55981
| -rw-r--r-- | sys/net/iflib.c | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/sys/net/iflib.c b/sys/net/iflib.c index f9d0b1af0f83..186c41d9f839 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -190,6 +190,7 @@ struct iflib_ctx { struct ifmedia ifc_media; struct ifmedia *ifc_mediap; + struct sysctl_ctx_list ifc_sysctl_ctx; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; @@ -5293,6 +5294,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: + sysctl_ctx_free(&ctx->ifc_sysctl_ctx); + ctx->ifc_sysctl_node = NULL; taskqueue_free(ctx->ifc_tq); iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); @@ -5332,6 +5335,9 @@ iflib_device_deregister(if_ctx_t ctx) if_t ifp = ctx->ifc_ifp; device_t dev = ctx->ifc_dev; + sysctl_ctx_free(&ctx->ifc_sysctl_ctx); + ctx->ifc_sysctl_node = NULL; + /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev, "Vlan in use, detach first\n"); @@ -6787,62 +6793,61 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; - struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; - ctx_list = device_get_sysctl_ctx(dev); + sysctl_ctx_init(&ctx->ifc_sysctl_ctx); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); - ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, + ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(&ctx->ifc_sysctl_ctx, child, OID_AUTO, "iflib", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); - SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", + SYSCTL_ADD_CONST_STRING(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); - SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + SYSCTL_ADD_BOOL(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "simple_tx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, "use simple tx ring"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", + SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSI-X (default 0)"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the RX budget"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, "cause TX to abdicate instead of running to completion"); ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "core_offset", CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, "offset to start using cores at"); - SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", + SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "separate_txrx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, "use separate cores for TX and RX"); - SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores", + SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_logical_cores", CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0, "try to make use of logical cores for TX and RX"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "use_extra_msix_vectors", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_extra_msix_vectors", CTLFLAG_RDTUN, &ctx->ifc_sysctl_extra_msix_vectors, 0, "attempt to reserve the given number of extra MSI-X vectors during driver load for the creation of additional interfaces later"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "allocated_msix_vectors", + SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "allocated_msix_vectors", CTLFLAG_RDTUN, &ctx->ifc_softc_ctx.isc_vectors, 0, "total # of MSI-X vectors allocated by driver"); /* XXX change for per-queue sizes */ - SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", + SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of TX descriptors to use, 0 = use default #"); - SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", + SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of RX descriptors to use, 0 = use default #"); @@ -6853,9 +6858,8 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; - struct sysctl_ctx_list *ctx_list; + struct sysctl_ctx_list *ctx_list = &ctx->ifc_sysctl_ctx; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; @@ -6864,7 +6868,6 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; - ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); |
