diff options
Diffstat (limited to 'sys')
409 files changed, 6487 insertions, 2553 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 81427b5b18b6..d5a1adb2dc65 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -26,7 +26,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption -options BLOAT_KERNEL_WITH_EXTERR +options EXTERR_STRINGS options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols @@ -287,9 +287,9 @@ device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support +device wlan_tkip # 802.11 TKIP support device wlan_ccmp # 802.11 CCMP support device wlan_gcmp # 802.11 GCMP support -device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device ath # Atheros CardBus/PCI NICs device ath_hal # Atheros CardBus/PCI chip support diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 0b3daed4f69e..e35119af8572 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -46,6 +46,7 @@ enum vm_suspend_how { VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_TRIPLEFAULT, + VM_SUSPEND_DESTROY, VM_SUSPEND_LAST }; diff --git a/sys/arm/arm/generic_timer.c b/sys/arm/arm/generic_timer.c index a8c779dcba6d..97976408c943 100644 --- a/sys/arm/arm/generic_timer.c +++ b/sys/arm/arm/generic_timer.c @@ -882,32 +882,32 @@ DELAY(int usec) TSEXIT(); } -static bool +static cpu_feat_en wfxt_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t id_aa64isar2; if (!get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2)) - return (false); - return (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) != ID_AA64ISAR2_WFxT_NONE); + return (FEAT_ALWAYS_DISABLE); + if (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) >= ID_AA64ISAR2_WFxT_NONE) + return (FEAT_DEFAULT_ENABLE); + + return (FEAT_ALWAYS_DISABLE); } -static void +static bool wfxt_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status __unused, u_int *errata_list __unused, u_int errata_count __unused) { /* will be called if wfxt_check returns true */ enable_wfxt = true; + return (true); } -static struct cpu_feat feat_wfxt = { - .feat_name = "FEAT_WFXT", - .feat_check = wfxt_check, - .feat_enable = wfxt_enable, - .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM, -}; -DATA_SET(cpu_feat_set, feat_wfxt); +CPU_FEAT(feat_wfxt, "WFE and WFI instructions with timeout", + wfxt_check, NULL, wfxt_enable, + CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM); #endif static uint32_t diff --git a/sys/arm/conf/TEGRA124 b/sys/arm/conf/TEGRA124 index ad5532427eda..ff23e63f77bd 100644 --- a/sys/arm/conf/TEGRA124 +++ b/sys/arm/conf/TEGRA124 @@ -107,9 +107,9 @@ device ums # USB mouse # Wireless NIC cards #device wlan # 802.11 support #device wlan_wep # 802.11 WEP support +#device wlan_tkip # 802.11 TKIP support #device wlan_ccmp # 802.11 CCMP support #device wlan_gcmp # 802.11 GCMP support -#device wlan_tkip # 802.11 TKIP support #device wlan_amrr # AMRR transmit rate control algorithm # PCI diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c index cc262394913d..986d5079e980 100644 --- a/sys/arm64/arm64/cpu_feat.c +++ b/sys/arm64/arm64/cpu_feat.c @@ -32,16 +32,21 @@ #include <machine/cpu.h> #include <machine/cpu_feat.h> +SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata"); + /* TODO: Make this a list if we ever grow a callback other than smccc_errata */ static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL; void enable_cpu_feat(uint32_t stage) { + char tunable[32]; struct cpu_feat **featp, *feat; uint32_t midr; u_int errata_count, *errata_list; cpu_feat_errata errata_status; + cpu_feat_en check_status; + bool val; MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0); @@ -58,9 +63,26 @@ enable_cpu_feat(uint32_t stage) PCPU_GET(cpuid) != 0) continue; - if (feat->feat_check != NULL && !feat->feat_check(feat, midr)) + if (feat->feat_check != NULL) + continue; + + check_status = feat->feat_check(feat, midr); + /* Ignore features that are not present */ + if (check_status == FEAT_ALWAYS_DISABLE) continue; + snprintf(tunable, sizeof(tunable), "hw.feat.%s", + feat->feat_name); + if (TUNABLE_BOOL_FETCH(tunable, &val)) { + /* Is the feature disabled by the tunable? */ + if (!val) + continue; + /* If enabled by the tunable then enable it */ + } else if (check_status == FEAT_DEFAULT_DISABLE) { + /* No tunable set and disabled by default */ + continue; + } + /* * Check if the feature has any errata that may need a * workaround applied (or it is to install the workaround for @@ -97,8 +119,9 @@ enable_cpu_feat(uint32_t stage) /* Shouldn't be possible */ MPASS(errata_status != ERRATA_UNKNOWN); - feat->feat_enable(feat, errata_status, errata_list, - errata_count); + if (feat->feat_enable(feat, errata_status, errata_list, + errata_count)) + feat->feat_enabled = true; } } diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 5c81c6cdce3d..8f8a934ad520 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -225,7 +225,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->args[i] = ap[i]; if (narg > nap) { if (narg - nap > nitems(args)) - panic("Too many system call arguiments"); + panic("Too many system call arguments"); error = copyin((void *)td->td_frame->tf_x[13], args, (narg - nap) * sizeof(int)); if (error != 0) diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c index bcacea43ad2f..a001be200518 100644 --- a/sys/arm64/arm64/identcpu.c +++ b/sys/arm64/arm64/identcpu.c @@ -2272,37 +2272,25 @@ static const struct mrs_user_reg user_regs[] = { static bool user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr) { - /* Skip non-Neoverse-N1 */ - if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM, - CPU_PART_NEOVERSE_N1, 0, 0)) - return (false); - - switch (CPU_VAR(midr)) { - default: - break; - case 4: - /* Fixed in r4p1 */ - if (CPU_REV(midr) > 0) - break; - /* FALLTHROUGH */ - case 3: - /* If DIC is enabled (coherent icache) then we are affected */ - return (CTR_DIC_VAL(ctr) != 0); - } - - return (false); + /* + * Neoverse-N1 erratum 1542419 + * Present in r3p0 - r4p0 + * Fixed in r4p1 + */ + return (midr_check_var_part_range(midr, CPU_IMPL_ARM, + CPU_PART_NEOVERSE_N1, 3, 0, 4, 0) && CTR_DIC_VAL(ctr) != 0); } -static bool -user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused) +static cpu_feat_en +user_ctr_check(const struct cpu_feat *feat __unused, u_int midr) { if (emulate_ctr) - return (true); + return (FEAT_DEFAULT_ENABLE); if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0))) - return (true); + return (FEAT_DEFAULT_ENABLE); - return (false); + return (FEAT_ALWAYS_DISABLE); } static bool @@ -2320,7 +2308,7 @@ user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr, return (false); } -static void +static bool user_ctr_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count) { @@ -2356,16 +2344,13 @@ user_ctr_enable(const struct cpu_feat *feat __unused, WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT); isb(); + + return (true); } -static struct cpu_feat user_ctr = { - .feat_name = "Trap CTR_EL0", - .feat_check = user_ctr_check, - .feat_has_errata = user_ctr_has_errata, - .feat_enable = user_ctr_enable, - .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, user_ctr); +CPU_FEAT(trap_ctr, "Trap CTR_EL0", + user_ctr_check, user_ctr_has_errata, user_ctr_enable, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); static bool user_ctr_handler(uint64_t esr, struct trapframe *frame) diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index 53856dd90cae..47c701e8588c 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -173,16 +173,19 @@ SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); #endif -static bool +static cpu_feat_en pan_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t id_aa64mfr1; id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE); + if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); } -static void +static bool pan_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status __unused, u_int *errata_list __unused, u_int errata_count __unused) @@ -200,15 +203,13 @@ pan_enable(const struct cpu_feat *feat __unused, ".arch_extension pan \n" "msr pan, #1 \n" ".arch_extension nopan \n"); + + return (true); } -static struct cpu_feat feat_pan = { - .feat_name = "FEAT_PAN", - .feat_check = pan_check, - .feat_enable = pan_enable, - .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, feat_pan); +CPU_FEAT(feat_pan, "Privileged access never", + pan_check, NULL, pan_enable, + CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU); bool has_hyp(void) @@ -857,7 +858,7 @@ initarm(struct arm64_bootparams *abp) cninit(); set_ttbr0(abp->kern_ttbr0); - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); if (!valid) panic("Invalid bus configuration: %s", diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index ec89c4573799..8a4395aa1c89 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr; #define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \ ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW)) +static bool __read_mostly pmap_multiple_tlbi = false; + struct pmap_large_md_page { struct rwlock pv_lock; struct md_page pv_page; @@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen) } } - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); bs_state.dmap_valid = true; @@ -1399,7 +1401,7 @@ pmap_bootstrap(void) /* And the l3 tables for the early devmap */ pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE)); - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); #define alloc_pages(var, np) \ (var) = bs_state.freemempos; \ @@ -1656,14 +1658,17 @@ pmap_init_pv_table(void) } } -static bool +static cpu_feat_en pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t id_aa64mmfr1; id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >= - ID_AA64MMFR1_HAFDBS_AF_DBS); + if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >= + ID_AA64MMFR1_HAFDBS_AF_DBS) + return (FEAT_DEFAULT_ENABLE); + + return (FEAT_ALWAYS_DISABLE); } static bool @@ -1671,8 +1676,8 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr, u_int **errata_list, u_int *errata_count) { /* Disable on Cortex-A55 for erratum 1024718 - all revisions */ - if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM, - CPU_PART_CORTEX_A55, 0, 0)) { + if (CPU_IMPL(midr) == CPU_IMPL_ARM && + CPU_PART(midr) == CPU_PART_CORTEX_A55) { static u_int errata_id = 1024718; *errata_list = &errata_id; @@ -1681,21 +1686,19 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr, } /* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */ - if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK, - CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) { - if (CPU_REV(PCPU_GET(midr)) < 3) { - static u_int errata_id = 2051678; + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510, + 0, 0, 0, 2)) { + static u_int errata_id = 2051678; - *errata_list = &errata_id; - *errata_count = 1; - return (true); - } + *errata_list = &errata_id; + *errata_count = 1; + return (true); } return (false); } -static void +static bool pmap_dbm_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status, u_int *errata_list __unused, u_int errata_count) @@ -1704,7 +1707,7 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused, /* Skip if there is an erratum affecting DBM */ if (errata_status != ERRATA_NONE) - return; + return (false); tcr = READ_SPECIALREG(tcr_el1) | TCR_HD; WRITE_SPECIALREG(tcr_el1, tcr); @@ -1714,16 +1717,58 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused, __asm __volatile("tlbi vmalle1"); dsb(nsh); isb(); + + return (true); } -static struct cpu_feat feat_dbm = { - .feat_name = "FEAT_HAFDBS (DBM)", - .feat_check = pmap_dbm_check, - .feat_has_errata = pmap_dbm_has_errata, - .feat_enable = pmap_dbm_enable, - .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, feat_dbm); +CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state", + pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); + +static cpu_feat_en +pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr) +{ + /* + * Cortex-A55 erratum 2441007 (Cat B rare) + * Present in all revisions + */ + if (CPU_IMPL(midr) == CPU_IMPL_ARM && + CPU_PART(midr) == CPU_PART_CORTEX_A55) + return (FEAT_DEFAULT_DISABLE); + + /* + * Cortex-A76 erratum 1286807 (Cat B rare) + * Present in r0p0 - r3p0 + * Fixed in r3p1 + */ + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76, + 0, 0, 3, 0)) + return (FEAT_DEFAULT_DISABLE); + + /* + * Cortex-A510 erratum 2441009 (Cat B rare) + * Present in r0p0 - r1p1 + * Fixed in r1p2 + */ + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510, + 0, 0, 1, 1)) + return (FEAT_DEFAULT_DISABLE); + + return (FEAT_ALWAYS_DISABLE); +} + +static bool +pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status, u_int *errata_list __unused, + u_int errata_count __unused) +{ + pmap_multiple_tlbi = true; + return (true); +} + +CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata", + pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable, + CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU); /* * Initialize the pmap module. @@ -1878,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only) r = TLBI_VA(va); if (pmap == kernel_pmap) { pmap_s1_invalidate_kernel(r, final_only); + if (pmap_multiple_tlbi) { + dsb(ish); + pmap_s1_invalidate_kernel(r, final_only); + } } else { r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); pmap_s1_invalidate_user(r, final_only); + if (pmap_multiple_tlbi) { + dsb(ish); + pmap_s1_invalidate_user(r, final_only); + } } dsb(ish); isb(); @@ -1922,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, end = TLBI_VA(eva); for (r = start; r < end; r += TLBI_VA(stride)) pmap_s1_invalidate_kernel(r, final_only); + + if (pmap_multiple_tlbi) { + dsb(ish); + for (r = start; r < end; r += TLBI_VA(stride)) + pmap_s1_invalidate_kernel(r, final_only); + } } else { start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); start |= TLBI_VA(sva); end |= TLBI_VA(eva); for (r = start; r < end; r += TLBI_VA(stride)) pmap_s1_invalidate_user(r, final_only); + + if (pmap_multiple_tlbi) { + dsb(ish); + for (r = start; r < end; r += TLBI_VA(stride)) + pmap_s1_invalidate_user(r, final_only); + } } dsb(ish); isb(); @@ -1963,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pmap_s2_invalidate_range(pmap, sva, eva, final_only); } +void +pmap_s1_invalidate_all_kernel(void) +{ + dsb(ishst); + __asm __volatile("tlbi vmalle1is"); + dsb(ish); + if (pmap_multiple_tlbi) { + __asm __volatile("tlbi vmalle1is"); + dsb(ish); + } + isb(); +} + /* * Invalidates all cached intermediate- and final-level TLB entries for the * given virtual address space. @@ -1977,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap) dsb(ishst); if (pmap == kernel_pmap) { __asm __volatile("tlbi vmalle1is"); + if (pmap_multiple_tlbi) { + dsb(ish); + __asm __volatile("tlbi vmalle1is"); + } } else { r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + if (pmap_multiple_tlbi) { + dsb(ish); + __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + } } dsb(ish); isb(); @@ -7967,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size) pa += L2_SIZE; } if ((old_l2e & ATTR_DESCR_VALID) != 0) - pmap_s1_invalidate_all(kernel_pmap); + pmap_s1_invalidate_all_kernel(); else { /* * Because the old entries were invalid and the new @@ -8058,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size) } } if (preinit_map) { - pmap_s1_invalidate_all(kernel_pmap); + pmap_s1_invalidate_all_kernel(); return; } diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c index dbe0c69b8d60..fdab5414e24c 100644 --- a/sys/arm64/arm64/ptrauth.c +++ b/sys/arm64/arm64/ptrauth.c @@ -82,7 +82,7 @@ ptrauth_disable(void) return (false); } -static bool +static cpu_feat_en ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t isar; @@ -116,14 +116,14 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused) if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) { if (ID_AA64ISAR1_APA_VAL(isar) > 0 || ID_AA64ISAR1_API_VAL(isar) > 0) { - return (true); + return (FEAT_DEFAULT_ENABLE); } } /* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */ if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) { if (ID_AA64ISAR2_APA3_VAL(isar) > 0) { - return (true); + return (FEAT_DEFAULT_ENABLE); } } @@ -138,10 +138,10 @@ out: ID_AA64ISAR1_GPI_MASK, 0); update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0); - return (false); + return (FEAT_ALWAYS_DISABLE); } -static void +static bool ptrauth_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status __unused, u_int *errata_list __unused, u_int errata_count __unused) @@ -153,16 +153,13 @@ ptrauth_enable(const struct cpu_feat *feat __unused, elf64_addr_mask_14.code |= PAC_ADDR_MASK_14; elf64_addr_mask_14.data |= PAC_ADDR_MASK_14; #endif -} + return (true); +} -static struct cpu_feat feat_pauth = { - .feat_name = "FEAT_PAuth", - .feat_check = ptrauth_check, - .feat_enable = ptrauth_enable, - .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM, -}; -DATA_SET(cpu_feat_set, feat_pauth); +CPU_FEAT(feat_pauth, "Pointer Authentication", + ptrauth_check, NULL, ptrauth_enable, + CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM); /* Copy the keys when forking a new process */ void diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index bed58095201a..75c9b5f87892 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -246,6 +246,7 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr, print_registers(frame); print_gp_register("far", far); + printf(" esr: 0x%.16lx\n", esr); panic("Unhandled external data abort"); } diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64 index c83e98c17a33..a0568466cfaf 100644 --- a/sys/arm64/conf/std.arm64 +++ b/sys/arm64/conf/std.arm64 @@ -7,6 +7,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption +options EXTERR_STRINGS options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h index 59cda36f275e..07a783138f42 100644 --- a/sys/arm64/include/cpu.h +++ b/sys/arm64/include/cpu.h @@ -193,8 +193,30 @@ (((mask) & PCPU_GET(midr)) == \ ((mask) & CPU_ID_RAW((impl), (part), (var), (rev)))) -#define CPU_MATCH_RAW(mask, devid) \ - (((mask) & PCPU_GET(midr)) == ((mask) & (devid))) +#if !defined(__ASSEMBLER__) +static inline bool +midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low, + u_int part_low, u_int var_high, u_int part_high) +{ + /* Check for the correct part */ + if (CPU_IMPL(midr) != impl || CPU_PART(midr) != part) + return (false); + + /* Check if the variant is between var_low and var_high inclusive */ + if (CPU_VAR(midr) < var_low || CPU_VAR(midr) > var_high) + return (false); + + /* If the variant is the low value, check if the part is high enough */ + if (CPU_VAR(midr) == var_low && CPU_PART(midr) < part_low) + return (false); + + /* If the variant is the high value, check if the part is low enough */ + if (CPU_VAR(midr) == var_high && CPU_PART(midr) > part_high) + return (false); + + return (true); +} +#endif /* * Chip-specific errata. This defines are intended to be diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h index 9fe6a9dd95d9..6a554b6baedf 100644 --- a/sys/arm64/include/cpu_feat.h +++ b/sys/arm64/include/cpu_feat.h @@ -29,6 +29,7 @@ #define _MACHINE_CPU_FEAT_H_ #include <sys/linker_set.h> +#include <sys/sysctl.h> typedef enum { ERRATA_UNKNOWN, /* Unknown erratum */ @@ -39,6 +40,31 @@ typedef enum { /* kernel component. */ } cpu_feat_errata; +typedef enum { + /* + * Don't implement the feature or erratum wrokarount, + * e.g. the feature is not implemented or erratum is + * for another CPU. + */ + FEAT_ALWAYS_DISABLE, + + /* + * Disable by default, but allow the user to enable, + * e.g. For a rare erratum with a workaround, Arm + * Category B (rare) or similar. + */ + FEAT_DEFAULT_DISABLE, + + /* + * Enabled by default, bit allow the user to disable, + * e.g. For a common erratum with a workaround, Arm + * Category A or B or similar. + */ + FEAT_DEFAULT_ENABLE, + + /* We could add FEAT_ALWAYS_ENABLE if a need was found. */ +} cpu_feat_en; + #define CPU_FEAT_STAGE_MASK 0x00000001 #define CPU_FEAT_EARLY_BOOT 0x00000000 #define CPU_FEAT_AFTER_DEV 0x00000001 @@ -49,10 +75,10 @@ typedef enum { struct cpu_feat; -typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int); +typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int); typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int, u_int **, u_int *); -typedef void (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata, +typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata, u_int *, u_int); struct cpu_feat { @@ -61,9 +87,25 @@ struct cpu_feat { cpu_feat_has_errata *feat_has_errata; cpu_feat_enable *feat_enable; uint32_t feat_flags; + bool feat_enabled; }; SET_DECLARE(cpu_feat_set, struct cpu_feat); +SYSCTL_DECL(_hw_feat); + +#define CPU_FEAT(name, descr, check, has_errata, enable, flags) \ +static struct cpu_feat name = { \ + .feat_name = #name, \ + .feat_check = check, \ + .feat_has_errata = has_errata, \ + .feat_enable = enable, \ + .feat_flags = flags, \ + .feat_enabled = false, \ +}; \ +DATA_SET(cpu_feat_set, name); \ +SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \ + 0, descr) + /* * Allow drivers to mark an erratum as worked around, e.g. the Errata * Management ABI may know the workaround isn't needed on a given system. diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 0f23f200f0f6..406b6e2c5e0a 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -69,6 +69,7 @@ struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; vm_memattr_t pv_memattr; + uint8_t pv_reserve[3]; }; enum pmap_stage { @@ -174,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t); struct pcb *pmap_switch(struct thread *); +void pmap_s1_invalidate_all_kernel(void); + extern void (*pmap_clean_stage2_tlbi)(void); extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t, bool); diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h index 184743d4cc80..b40990e89385 100644 --- a/sys/arm64/include/proc.h +++ b/sys/arm64/include/proc.h @@ -75,6 +75,7 @@ struct mdthread { struct mdproc { uint64_t md_tcr; /* TCR_EL1 fields to update */ + uint64_t md_reserved[2]; }; #endif /* !LOCORE */ diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 73b5b4a09591..e839b5dd92c9 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -42,6 +42,7 @@ enum vm_suspend_how { VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, + VM_SUSPEND_DESTROY, VM_SUSPEND_LAST }; diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 3082d2941221..1dcefa1489e9 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -1342,8 +1342,14 @@ vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { + struct vm *vm; + + vm = vcpu->vm; vcpu_lock(vcpu); while (1) { + if (vm->suspend) + break; + if (vgic_has_pending_irq(vcpu->cookie)) break; diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c index 7192df200ae2..8078f3f6d4b1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c @@ -7761,7 +7761,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, uintptr_t *memref = (uintptr_t *)(uintptr_t) val; if (!DTRACE_INSCRATCHPTR(&mstate, - (uintptr_t)memref, 2 * sizeof(uintptr_t))) { + (uintptr_t) memref, + sizeof (uintptr_t) + sizeof (size_t))) { *flags |= CPU_DTRACE_BADADDR; continue; } @@ -7773,21 +7774,21 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, * Check if the size exceeds the allocated * buffer size. */ - if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) { + if (size + sizeof (size_t) > + dp->dtdo_rtype.dtdt_size) { /* Flag a drop! */ *flags |= CPU_DTRACE_DROP; continue; } /* Store the size in the buffer first. */ - DTRACE_STORE(uintptr_t, tomax, - valoffs, size); + DTRACE_STORE(size_t, tomax, valoffs, size); /* * Offset the buffer address to the start * of the data. */ - valoffs += sizeof(uintptr_t); + valoffs += sizeof(size_t); /* * Reset to the memory address rather than diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 90cd21a80923..54063150eef9 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -515,4 +515,6 @@ #define FREEBSD32_SYS_inotify_rm_watch 594 #define FREEBSD32_SYS_getgroups 595 #define FREEBSD32_SYS_setgroups 596 -#define FREEBSD32_SYS_MAXSYSCALL 597 +#define FREEBSD32_SYS_jail_attach_jd 597 +#define FREEBSD32_SYS_jail_remove_jd 598 +#define FREEBSD32_SYS_MAXSYSCALL 599 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index f0f8d26554b5..f7cc4c284e4d 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = { "inotify_rm_watch", /* 594 = inotify_rm_watch */ "getgroups", /* 595 = getgroups */ "setgroups", /* 596 = setgroups */ + "jail_attach_jd", /* 597 = jail_attach_jd */ + "jail_remove_jd", /* 598 = jail_remove_jd */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 12f1a346c3e9..18f809ef04e3 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index e471c5148021..29a5497e9efa 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } + /* jail_attach_jd */ + case 597: { + struct jail_attach_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } + /* jail_remove_jd */ + case 598: { + struct jail_remove_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } default: *n_args = 0; break; @@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* jail_attach_jd */ + case 597: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; + /* jail_remove_jd */ + case 598: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* jail_attach_jd */ + case 597: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* jail_remove_jd */ + case 598: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/lindebugfs/lindebugfs.c b/sys/compat/lindebugfs/lindebugfs.c index 50f9377ffec3..8cddc6f390bc 100644 --- a/sys/compat/lindebugfs/lindebugfs.c +++ b/sys/compat/lindebugfs/lindebugfs.c @@ -206,7 +206,7 @@ debugfs_create_file(const char *name, umode_t mode, pnode = debugfs_root; flags = fops->write ? PFS_RDWR : PFS_RD; - dnode->d_pfs_node = pfs_create_file(pnode, name, debugfs_fill, + pfs_create_file(pnode, &dnode->d_pfs_node, name, debugfs_fill, debugfs_attr, NULL, debugfs_destroy, flags | PFS_NOWAIT); if (dnode->d_pfs_node == NULL) { free(dm, M_DFSINT); @@ -283,7 +283,8 @@ debugfs_create_dir(const char *name, struct dentry *parent) else pnode = debugfs_root; - dnode->d_pfs_node = pfs_create_dir(pnode, name, debugfs_attr, NULL, debugfs_destroy, PFS_RD | PFS_NOWAIT); + pfs_create_dir(pnode, &dnode->d_pfs_node, name, debugfs_attr, NULL, + debugfs_destroy, PFS_RD | PFS_NOWAIT); if (dnode->d_pfs_node == NULL) { free(dm, M_DFSINT); return (NULL); @@ -316,7 +317,8 @@ debugfs_create_symlink(const char *name, struct dentry *parent, else pnode = debugfs_root; - dnode->d_pfs_node = pfs_create_link(pnode, name, &debugfs_fill_data, NULL, NULL, NULL, PFS_NOWAIT); + pfs_create_link(pnode, &dnode->d_pfs_node, name, &debugfs_fill_data, + NULL, NULL, NULL, PFS_NOWAIT); if (dnode->d_pfs_node == NULL) goto fail; dnode->d_pfs_node->pn_data = dm; diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 1c6d64d6b8bc..95b212be1306 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -2320,165 +2320,165 @@ linprocfs_init(PFS_INIT_ARGS) root = pi->pi_root; /* /proc/... */ - pfs_create_file(root, "cmdline", &linprocfs_docmdline, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "devices", &linprocfs_dodevices, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "filesystems", &linprocfs_dofilesystems, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "loadavg", &linprocfs_doloadavg, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "meminfo", &linprocfs_domeminfo, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "modules", &linprocfs_domodules, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "mounts", &linprocfs_domtab, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "mtab", &linprocfs_domtab, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "partitions", &linprocfs_dopartitions, - NULL, NULL, NULL, PFS_RD); - pfs_create_link(root, "self", &procfs_docurproc, - NULL, NULL, NULL, 0); - pfs_create_file(root, "stat", &linprocfs_dostat, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "swaps", &linprocfs_doswaps, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "uptime", &linprocfs_douptime, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(root, "version", &linprocfs_doversion, + pfs_create_file(root, NULL, "cmdline", &linprocfs_docmdline, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "cpuinfo", &linprocfs_docpuinfo, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "devices", &linprocfs_dodevices, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "filesystems", &linprocfs_dofilesystems, NULL, NULL, NULL, PFS_RD); + pfs_create_file(root, NULL, "loadavg", &linprocfs_doloadavg, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "meminfo", &linprocfs_domeminfo, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "modules", &linprocfs_domodules, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "mounts", &linprocfs_domtab, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "mtab", &linprocfs_domtab, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(root, NULL, "partitions", &linprocfs_dopartitions, NULL, + NULL, NULL, PFS_RD); + pfs_create_link(root, NULL, "self", &procfs_docurproc, NULL, NULL, NULL, + 0); + pfs_create_file(root, NULL, "stat", &linprocfs_dostat, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(root, NULL, "swaps", &linprocfs_doswaps, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "uptime", &linprocfs_douptime, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(root, NULL, "version", &linprocfs_doversion, NULL, NULL, + NULL, PFS_RD); /* /proc/bus/... */ - dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0); - dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0); - dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0); + pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0); + pfs_create_dir(dir, &dir, "pci", NULL, NULL, NULL, 0); + pfs_create_dir(dir, &dir, "devices", NULL, NULL, NULL, 0); /* /proc/net/... */ - dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0); - pfs_create_file(dir, "dev", &linprocfs_donetdev, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "route", &linprocfs_donetroute, - NULL, NULL, NULL, PFS_RD); + pfs_create_dir(root, &dir, "net", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "dev", &linprocfs_donetdev, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(dir, NULL, "route", &linprocfs_donetroute, NULL, NULL, + NULL, PFS_RD); /* /proc/<pid>/... */ - dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP); - pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline, - NULL, NULL, NULL, PFS_RD); - pfs_create_link(dir, "cwd", &linprocfs_doproccwd, - NULL, NULL, NULL, 0); - pfs_create_file(dir, "environ", &linprocfs_doprocenviron, - NULL, &procfs_candebug, NULL, PFS_RD); - pfs_create_link(dir, "exe", &procfs_doprocfile, - NULL, &procfs_notsystem, NULL, 0); - pfs_create_file(dir, "maps", &linprocfs_doprocmaps, - NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN); - pfs_create_file(dir, "mem", &linprocfs_doprocmem, - procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW); - pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "mounts", &linprocfs_domtab, + pfs_create_dir(root, &dir, "pid", NULL, NULL, NULL, PFS_PROCDEP); + pfs_create_file(dir, NULL, "cmdline", &linprocfs_doproccmdline, NULL, + NULL, NULL, PFS_RD); + pfs_create_link(dir, NULL, "cwd", &linprocfs_doproccwd, NULL, NULL, + NULL, 0); + pfs_create_file(dir, NULL, "environ", &linprocfs_doprocenviron, NULL, + &procfs_candebug, NULL, PFS_RD); + pfs_create_link(dir, NULL, "exe", &procfs_doprocfile, NULL, + &procfs_notsystem, NULL, 0); + pfs_create_file(dir, NULL, "maps", &linprocfs_doprocmaps, NULL, NULL, + NULL, PFS_RD | PFS_AUTODRAIN); + pfs_create_file(dir, NULL, "mem", &linprocfs_doprocmem, procfs_attr_rw, + &procfs_candebug, NULL, PFS_RDWR | PFS_RAW); + pfs_create_file(dir, NULL, "mountinfo", &linprocfs_doprocmountinfo, NULL, NULL, NULL, PFS_RD); - pfs_create_link(dir, "root", &linprocfs_doprocroot, - NULL, NULL, NULL, 0); - pfs_create_file(dir, "stat", &linprocfs_doprocstat, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "statm", &linprocfs_doprocstatm, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "status", &linprocfs_doprocstatus, - NULL, NULL, NULL, PFS_RD); - pfs_create_link(dir, "fd", &linprocfs_dofdescfs, - NULL, NULL, NULL, 0); - pfs_create_file(dir, "auxv", &linprocfs_doauxv, - NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD); - pfs_create_file(dir, "limits", &linprocfs_doproclimits, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj, + pfs_create_file(dir, NULL, "mounts", &linprocfs_domtab, NULL, NULL, + NULL, PFS_RD); + pfs_create_link(dir, NULL, "root", &linprocfs_doprocroot, NULL, NULL, + NULL, 0); + pfs_create_file(dir, NULL, "stat", &linprocfs_doprocstat, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "statm", &linprocfs_doprocstatm, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "status", &linprocfs_doprocstatus, NULL, + NULL, NULL, PFS_RD); + pfs_create_link(dir, NULL, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL, + 0); + pfs_create_file(dir, NULL, "auxv", &linprocfs_doauxv, NULL, + &procfs_candebug, NULL, PFS_RD | PFS_RAWRD); + pfs_create_file(dir, NULL, "limits", &linprocfs_doproclimits, NULL, + NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "oom_score_adj", &linprocfs_do_oom_score_adj, procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR); /* /proc/<pid>/task/... */ - dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0); - pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy, - NULL, NULL, NULL, PFS_RD); + pfs_create_dir(dir, &dir, "task", linprocfs_dotaskattr, NULL, NULL, 0); + pfs_create_file(dir, NULL, ".dummy", &linprocfs_dotaskdummy, NULL, NULL, + NULL, PFS_RD); /* /proc/scsi/... */ - dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0); - pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi, + pfs_create_dir(root, &dir, "scsi", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "device_info", &linprocfs_doscsidevinfo, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "scsi", &linprocfs_doscsiscsi, NULL, NULL, + NULL, PFS_RD); /* /proc/sys/... */ - sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0); + pfs_create_dir(root, &sys, "sys", NULL, NULL, NULL, 0); /* /proc/sys/kernel/... */ - dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0); - pfs_create_file(dir, "osrelease", &linprocfs_doosrelease, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "ostype", &linprocfs_doostype, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "version", &linprocfs_doosbuild, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msgmax", &linprocfs_domsgmax, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msgmni", &linprocfs_domsgmni, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "pid_max", &linprocfs_dopid_max, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "sem", &linprocfs_dosem, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "shmall", &linprocfs_doshmall, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "shmmax", &linprocfs_doshmmax, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "shmmni", &linprocfs_doshmmni, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "tainted", &linprocfs_dotainted, + pfs_create_dir(sys, &dir, "kernel", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "osrelease", &linprocfs_doosrelease, NULL, + NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "ostype", &linprocfs_doostype, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "version", &linprocfs_doosbuild, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "msgmax", &linprocfs_domsgmax, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "msgmni", &linprocfs_domsgmni, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "msgmnb", &linprocfs_domsgmnb, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "ngroups_max", &linprocfs_dongroups_max, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "pid_max", &linprocfs_dopid_max, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "sem", &linprocfs_dosem, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(dir, NULL, "shmall", &linprocfs_doshmall, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "shmmax", &linprocfs_doshmmax, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "shmmni", &linprocfs_doshmmni, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "tainted", &linprocfs_dotainted, NULL, NULL, + NULL, PFS_RD); /* /proc/sys/kernel/random/... */ - dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0); - pfs_create_file(dir, "uuid", &linprocfs_douuid, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "boot_id", &linprocfs_doboot_id, - NULL, NULL, NULL, PFS_RD); + pfs_create_dir(dir, &dir, "random", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "uuid", &linprocfs_douuid, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(dir, NULL, "boot_id", &linprocfs_doboot_id, NULL, NULL, + NULL, PFS_RD); /* /proc/sys/vm/.... */ - dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0); - pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree, + pfs_create_dir(sys, &dir, "vm", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "min_free_kbytes", &linprocfs_dominfree, NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt, + pfs_create_file(dir, NULL, "max_map_count", &linprocfs_domax_map_cnt, NULL, NULL, NULL, PFS_RD); /* /proc/sysvipc/... */ - dir = pfs_create_dir(root, "sysvipc", NULL, NULL, NULL, 0); - pfs_create_file(dir, "msg", &linprocfs_dosysvipc_msg, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "sem", &linprocfs_dosysvipc_sem, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "shm", &linprocfs_dosysvipc_shm, - NULL, NULL, NULL, PFS_RD); + pfs_create_dir(root, &dir, "sysvipc", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "msg", &linprocfs_dosysvipc_msg, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "sem", &linprocfs_dosysvipc_sem, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "shm", &linprocfs_dosysvipc_shm, NULL, NULL, + NULL, PFS_RD); /* /proc/sys/fs/... */ - dir = pfs_create_dir(sys, "fs", NULL, NULL, NULL, 0); + pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0); /* /proc/sys/fs/mqueue/... */ - dir = pfs_create_dir(dir, "mqueue", NULL, NULL, NULL, 0); - pfs_create_file(dir, "msg_default", &linprocfs_domqueue_msg_default, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msgsize_default", &linprocfs_domqueue_msgsize_default, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msg_max", &linprocfs_domqueue_msg_max, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "msgsize_max", &linprocfs_domqueue_msgsize_max, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "queues_max", &linprocfs_domqueue_queues_max, + pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "msg_default", + &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "msgsize_default", + &linprocfs_domqueue_msgsize_default, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "msg_max", &linprocfs_domqueue_msg_max, NULL, + NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "msgsize_max", + &linprocfs_domqueue_msgsize_max, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, NULL, "queues_max", &linprocfs_domqueue_queues_max, NULL, NULL, NULL, PFS_RD); return (0); diff --git a/sys/compat/linsysfs/linsysfs.c b/sys/compat/linsysfs/linsysfs.c index 7f70221b420d..5a41c5193415 100644 --- a/sys/compat/linsysfs/linsysfs.c +++ b/sys/compat/linsysfs/linsysfs.c @@ -267,6 +267,8 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, struct pci_devinfo *dinfo; char *device, *host, *new_path, *devname; + children = NULL; + device = host = NULL; new_path = path; devname = malloc(16, M_TEMP, M_WAITOK); @@ -292,39 +294,43 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, dinfo->cfg.func); strcat(new_path, "/"); strcat(new_path, device); - dir = pfs_create_dir(dir, device, + error = pfs_create_dir(dir, &dir, device, NULL, NULL, NULL, 0); - cur_file = pfs_create_file(dir, "vendor", + if (error != 0) + goto out; + pfs_create_dir(dir, &dir, device, NULL, NULL, + NULL, 0); + pfs_create_file(dir, &cur_file, "vendor", &linsysfs_fill_vendor, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, "device", + pfs_create_file(dir, &cur_file, "device", &linsysfs_fill_device, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, + pfs_create_file(dir, &cur_file, "subsystem_vendor", &linsysfs_fill_subvendor, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, + pfs_create_file(dir, &cur_file, "subsystem_device", &linsysfs_fill_subdevice, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, "revision", + pfs_create_file(dir, &cur_file, "revision", &linsysfs_fill_revid, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, "config", + pfs_create_file(dir, &cur_file, "config", &linsysfs_fill_config, NULL, NULL, NULL, PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_file(dir, "uevent", - &linsysfs_fill_uevent_pci, NULL, NULL, - NULL, PFS_RD); + pfs_create_file(dir, &cur_file, "uevent", + &linsysfs_fill_uevent_pci, NULL, NULL, NULL, + PFS_RD); cur_file->pn_data = (void*)dev; - cur_file = pfs_create_link(dir, "subsystem", + pfs_create_link(dir, &cur_file, "subsystem", &linsysfs_fill_data, NULL, NULL, NULL, 0); /* libdrm just checks that the link ends in "/pci" */ cur_file->pn_data = "/sys/bus/pci"; @@ -334,34 +340,32 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, sprintf(host, "host%d", host_number++); strcat(new_path, "/"); strcat(new_path, host); - pfs_create_dir(dir, host, - NULL, NULL, NULL, 0); + pfs_create_dir(dir, NULL, host, NULL, + NULL, NULL, 0); scsi_host = malloc(sizeof( struct scsi_host_queue), - M_DEVBUF, M_NOWAIT); + M_DEVBUF, M_WAITOK); scsi_host->path = malloc( strlen(new_path) + 1, - M_DEVBUF, M_NOWAIT); + M_DEVBUF, M_WAITOK); scsi_host->path[0] = '\000'; bcopy(new_path, scsi_host->path, strlen(new_path) + 1); scsi_host->name = "unknown"; - sub_dir = pfs_create_dir(scsi, host, + pfs_create_dir(scsi, &sub_dir, host, NULL, NULL, NULL, 0); - pfs_create_link(sub_dir, "device", - &linsysfs_link_scsi_host, - NULL, NULL, NULL, 0); - pfs_create_file(sub_dir, "proc_name", - &linsysfs_scsiname, + pfs_create_link(sub_dir, NULL, "device", + &linsysfs_link_scsi_host, NULL, + NULL, NULL, 0); + pfs_create_file(sub_dir, NULL, + "proc_name", &linsysfs_scsiname, NULL, NULL, NULL, PFS_RD); scsi_host->name = linux_driver_get_name_dev(dev); TAILQ_INSERT_TAIL(&scsi_host_q, scsi_host, scsi_host_next); } - free(device, M_TEMP); - free(host, M_TEMP); } } @@ -374,26 +378,27 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, device_get_unit(dev) >= 0) { dinfo = device_get_ivars(parent); if (dinfo != NULL && dinfo->cfg.baseclass == PCIC_DISPLAY) { - pfs_create_dir(dir, "drm", NULL, NULL, NULL, 0); + pfs_create_dir(dir, NULL, "drm", NULL, NULL, + NULL, 0); sprintf(devname, "226:%d", device_get_unit(dev)); - sub_dir = pfs_create_dir(chardev, - devname, NULL, NULL, NULL, 0); - cur_file = pfs_create_link(sub_dir, - "device", &linsysfs_fill_vgapci, NULL, - NULL, NULL, PFS_RD); + pfs_create_dir(chardev, &sub_dir, devname, NULL, + NULL, NULL, 0); + pfs_create_link(sub_dir, &cur_file, "device", + &linsysfs_fill_vgapci, NULL, NULL, NULL, + PFS_RD); cur_file->pn_data = (void*)dir; - cur_file = pfs_create_file(sub_dir, - "uevent", &linsysfs_fill_uevent_drm, NULL, - NULL, NULL, PFS_RD); + pfs_create_file(sub_dir, &cur_file, "uevent", + &linsysfs_fill_uevent_drm, NULL, NULL, NULL, + PFS_RD); cur_file->pn_data = (void*)dev; sprintf(devname, "card%d", device_get_unit(dev)); - sub_dir = pfs_create_dir(drm, - devname, NULL, NULL, NULL, 0); - cur_file = pfs_create_link(sub_dir, - "device", &linsysfs_fill_vgapci, NULL, - NULL, NULL, PFS_RD); + pfs_create_dir(drm, &sub_dir, devname, NULL, + NULL, NULL, 0); + pfs_create_link(sub_dir, &cur_file, "device", + &linsysfs_fill_vgapci, NULL, NULL, NULL, + PFS_RD); cur_file->pn_data = (void*)dir; } } @@ -401,17 +406,37 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi, error = device_get_children(dev, &children, &nchildren); if (error == 0) { - for (i = 0; i < nchildren; i++) - if (children[i]) - linsysfs_run_bus(children[i], dir, scsi, + for (i = 0; i < nchildren; i++) { + if (children[i]) { + error = linsysfs_run_bus(children[i], dir, scsi, chardev, drm, new_path, prefix); - free(children, M_TEMP); + if (error != 0) { + printf( + "linsysfs_run_bus: %s omitted from sysfs tree, error %d\n", + device_get_nameunit(children[i]), + error); + } + } + } + + /* + * We override the error to avoid cascading failures; the + * innermost device that failed in a tree is probably the most + * significant one for diagnostics, its parents would be noise. + */ + error = 0; } + +out: + free(host, M_TEMP); + free(device, M_TEMP); + if (children != NULL) + free(children, M_TEMP); if (new_path != path) free(new_path, M_TEMP); free(devname, M_TEMP); - return (1); + return (error); } /* @@ -455,10 +480,10 @@ linsysfs_listcpus(struct pfs_node *dir) for (i = 0; i < mp_ncpus; ++i) { /* /sys/devices/system/cpu/cpuX */ sprintf(name, "cpu%d", i); - cpu = pfs_create_dir(dir, name, NULL, NULL, NULL, 0); + pfs_create_dir(dir, &cpu, name, NULL, NULL, NULL, 0); - pfs_create_file(cpu, "online", &linsysfs_cpuxonline, - NULL, NULL, NULL, PFS_RD); + pfs_create_file(cpu, NULL, "online", &linsysfs_cpuxonline, NULL, + NULL, NULL, PFS_RD); } free(name, M_TEMP); } @@ -485,52 +510,56 @@ linsysfs_init(PFS_INIT_ARGS) root = pi->pi_root; /* /sys/bus/... */ - dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0); + pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0); /* /sys/class/... */ - class = pfs_create_dir(root, "class", NULL, NULL, NULL, 0); - scsi = pfs_create_dir(class, "scsi_host", NULL, NULL, NULL, 0); - drm = pfs_create_dir(class, "drm", NULL, NULL, NULL, 0); - pfs_create_dir(class, "power_supply", NULL, NULL, NULL, 0); + pfs_create_dir(root, &class, "class", NULL, NULL, NULL, 0); + pfs_create_dir(class, &scsi, "scsi_host", NULL, NULL, NULL, 0); + pfs_create_dir(class, &drm, "drm", NULL, NULL, NULL, 0); + pfs_create_dir(class, NULL, "power_supply", NULL, NULL, NULL, 0); /* /sys/class/net/.. */ - net = pfs_create_dir(class, "net", NULL, NULL, NULL, 0); + pfs_create_dir(class, &net, "net", NULL, NULL, NULL, 0); /* /sys/dev/... */ - devdir = pfs_create_dir(root, "dev", NULL, NULL, NULL, 0); - chardev = pfs_create_dir(devdir, "char", NULL, NULL, NULL, 0); + pfs_create_dir(root, &devdir, "dev", NULL, NULL, NULL, 0); + pfs_create_dir(devdir, &chardev, "char", NULL, NULL, NULL, 0); /* /sys/devices/... */ - dir = pfs_create_dir(root, "devices", NULL, NULL, NULL, 0); - pci = pfs_create_dir(dir, "pci0000:00", NULL, NULL, NULL, 0); + pfs_create_dir(root, &dir, "devices", NULL, NULL, NULL, 0); + pfs_create_dir(dir, &pci, "pci0000:00", NULL, NULL, NULL, 0); devclass = devclass_find("root"); if (devclass == NULL) { return (0); } + /* + * This assumes that the root node is unlikely to error out in + * linsysfs_run_bus, which may or may not be true. + */ dev = devclass_get_device(devclass, 0); linsysfs_run_bus(dev, pci, scsi, chardev, drm, "/pci0000:00", "0000"); /* /sys/devices/system */ - sys = pfs_create_dir(dir, "system", NULL, NULL, NULL, 0); + pfs_create_dir(dir, &sys, "system", NULL, NULL, NULL, 0); /* /sys/devices/system/cpu */ - cpu = pfs_create_dir(sys, "cpu", NULL, NULL, NULL, 0); + pfs_create_dir(sys, &cpu, "cpu", NULL, NULL, NULL, 0); - pfs_create_file(cpu, "online", &linsysfs_cpuonline, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(cpu, "possible", &linsysfs_cpuonline, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(cpu, "present", &linsysfs_cpuonline, - NULL, NULL, NULL, PFS_RD); + pfs_create_file(cpu, NULL, "online", &linsysfs_cpuonline, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(cpu, NULL, "possible", &linsysfs_cpuonline, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(cpu, NULL, "present", &linsysfs_cpuonline, NULL, NULL, + NULL, PFS_RD); linsysfs_listcpus(cpu); /* /sys/kernel */ - kernel = pfs_create_dir(root, "kernel", NULL, NULL, NULL, 0); + pfs_create_dir(root, &kernel, "kernel", NULL, NULL, NULL, 0); /* /sys/kernel/debug, mountpoint for lindebugfs. */ - pfs_create_dir(kernel, "debug", NULL, NULL, NULL, 0); + pfs_create_dir(kernel, NULL, "debug", NULL, NULL, NULL, 0); linsysfs_net_init(); diff --git a/sys/compat/linsysfs/linsysfs_net.c b/sys/compat/linsysfs/linsysfs_net.c index 73602b0132a4..751dbb5b3713 100644 --- a/sys/compat/linsysfs/linsysfs_net.c +++ b/sys/compat/linsysfs/linsysfs_net.c @@ -237,22 +237,22 @@ linsysfs_net_addif(if_t ifp, void *arg) nic = pfs_find_node(dir, ifname); if (nic == NULL) { - nic = pfs_create_dir(dir, ifname, NULL, linsysfs_if_visible, + pfs_create_dir(dir, &nic, ifname, NULL, linsysfs_if_visible, NULL, 0); - pfs_create_file(nic, "address", &linsysfs_if_addr, + pfs_create_file(nic, NULL, "address", &linsysfs_if_addr, NULL, + NULL, NULL, PFS_RD); + pfs_create_file(nic, NULL, "addr_len", &linsysfs_if_addrlen, NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "addr_len", &linsysfs_if_addrlen, + pfs_create_file(nic, NULL, "flags", &linsysfs_if_flags, NULL, + NULL, NULL, PFS_RD); + pfs_create_file(nic, NULL, "ifindex", &linsysfs_if_ifindex, NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "flags", &linsysfs_if_flags, + pfs_create_file(nic, NULL, "mtu", &linsysfs_if_mtu, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(nic, NULL, "tx_queue_len", &linsysfs_if_txq_len, NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "ifindex", &linsysfs_if_ifindex, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "mtu", &linsysfs_if_mtu, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "tx_queue_len", &linsysfs_if_txq_len, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(nic, "type", &linsysfs_if_type, - NULL, NULL, NULL, PFS_RD); + pfs_create_file(nic, NULL, "type", &linsysfs_if_type, NULL, + NULL, NULL, PFS_RD); } /* * There is a small window between registering the if_arrival diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi.h b/sys/compat/linuxkpi/common/include/acpi/acpi.h index 016c7ede0f6e..9bb435591daa 100644 --- a/sys/compat/linuxkpi/common/include/acpi/acpi.h +++ b/sys/compat/linuxkpi/common/include/acpi/acpi.h @@ -131,7 +131,7 @@ acpi_format_exception(ACPI_STATUS Exception) } static inline ACPI_STATUS -acpi_get_handle(ACPI_HANDLE Parent, ACPI_STRING Pathname, +acpi_get_handle(ACPI_HANDLE Parent, const char *Pathname, ACPI_HANDLE *RetHandle) { return (AcpiGetHandle(Parent, Pathname, RetHandle)); diff --git a/sys/compat/linuxkpi/common/include/kunit/static_stub.h b/sys/compat/linuxkpi/common/include/kunit/static_stub.h new file mode 100644 index 000000000000..9d425d46dbb0 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/kunit/static_stub.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2025 The FreeBSD Foundation + * + * This software was developed by Björn Zeeb under sponsorship from + * the FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_KUNIT_STATIC_STUB_H +#define _LINUXKPI_KUNIT_STATIC_STUB_H + +#define KUNIT_STATIC_STUB_REDIRECT(_fn, ...) do { } while(0) + +#endif /* _LINUXKPI_KUNIT_STATIC_STUB_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/cleanup.h b/sys/compat/linuxkpi/common/include/linux/cleanup.h index 01f234f0cbe7..5bb146f082ed 100644 --- a/sys/compat/linuxkpi/common/include/linux/cleanup.h +++ b/sys/compat/linuxkpi/common/include/linux/cleanup.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2024 The FreeBSD Foundation + * Copyright (c) 2024-2025 The FreeBSD Foundation * * This software was developed by Björn Zeeb under sponsorship from * the FreeBSD Foundation. @@ -43,4 +43,51 @@ guard_ ## _n ## _t guard_ ## _n ## _ ## __COUNTER__ \ __cleanup(guard_ ## _n ## _destroy) = guard_ ## _n ## _create +#define DEFINE_FREE(_n, _t, _f) \ + static inline void \ + __free_ ## _n(void *p) \ + { \ + _t _T; \ + \ + _T = *(_t *)p; \ + _f; \ + } + +#define __free(_n) __cleanup(__free_##_n) + +/* + * Given this is a _0 version it should likely be broken up into parts. + * But we have no idead what a _1, _2, ... version would do different + * until we see a call. + * This is used for a not-real-type (rcu). We use a bool to "simulate" + * the lock held. Also _T still special, may not always be used, so tag + * with __unused (or better the LinuxKPI __maybe_unused). + */ +#define DEFINE_LOCK_GUARD_0(_n, _lock, _unlock, ...) \ + \ + typedef struct { \ + bool lock; \ + __VA_ARGS__; \ + } guard_ ## _n ## _t; \ + \ + static inline void \ + guard_ ## _n ## _destroy(guard_ ## _n ## _t *_T) \ + { \ + if (_T->lock) { \ + _unlock; \ + } \ + } \ + \ + static inline guard_ ## _n ## _t \ + guard_ ## _n ## _create(void) \ + { \ + guard_ ## _n ## _t _tmp; \ + guard_ ## _n ## _t *_T __maybe_unused; \ + \ + _tmp.lock = true; \ + _T = &_tmp; \ + _lock; \ + return (_tmp); \ + } + #endif /* _LINUXKPI_LINUX_CLEANUP_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h index fb5ad3bf4fe4..948396144ad6 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler.h @@ -130,4 +130,10 @@ #define is_signed_type(t) ((t)-1 < (t)1) #define is_unsigned_type(t) ((t)-1 > (t)1) +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(_s) __builtin_dynamic_object_size(_s, 0) +#else +#define __struct_size(_s) __builtin_object_size(_s, 0) +#endif + #endif /* _LINUXKPI_LINUX_COMPILER_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index 2556b0c45e49..7dd6340746d2 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -4,7 +4,7 @@ * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * All rights reserved. - * Copyright (c) 2021-2022 The FreeBSD Foundation + * Copyright (c) 2021-2025 The FreeBSD Foundation * * Portions of this software were developed by Björn Zeeb * under sponsorship from the FreeBSD Foundation. @@ -284,7 +284,8 @@ int lkpi_devres_destroy(struct device *, void(*release)(struct device *, void *) void lkpi_devres_release_free_list(struct device *); void lkpi_devres_unlink(struct device *, void *); void lkpi_devm_kmalloc_release(struct device *, void *); -#define devm_kfree(_d, _p) lkpi_devm_kmalloc_release(_d, _p) +void lkpi_devm_kfree(struct device *, const void *); +#define devm_kfree(_d, _p) lkpi_devm_kfree(_d, _p) static inline const char * dev_driver_string(const struct device *dev) diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index b9161c586d07..17041bb03ce8 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -408,6 +408,14 @@ enum ieee80211_sta_state { IEEE80211_STA_AUTHORIZED = 4, /* 802.1x */ }; +enum ieee80211_sta_rx_bandwidth { + IEEE80211_STA_RX_BW_20 = 0, + IEEE80211_STA_RX_BW_40, + IEEE80211_STA_RX_BW_80, + IEEE80211_STA_RX_BW_160, + IEEE80211_STA_RX_BW_320, +}; + enum ieee80211_tx_info_flags { /* XXX TODO .. right shift numbers - not sure where that came from? */ IEEE80211_TX_CTL_AMPDU = BIT(0), @@ -524,24 +532,24 @@ struct ieee80211_mgmt { uint16_t beacon_int; uint16_t capab_info; uint8_t variable[0]; - } beacon; + } __packed beacon; /* 9.3.3.5 Association Request frame format */ struct { uint16_t capab_info; uint16_t listen_interval; uint8_t variable[0]; - } assoc_req; + } __packed assoc_req; /* 9.3.3.10 Probe Request frame format */ struct { uint8_t variable[0]; - } probe_req; + } __packed probe_req; /* 9.3.3.11 Probe Response frame format */ struct { uint64_t timestamp; uint16_t beacon_int; uint16_t capab_info; uint8_t variable[0]; - } probe_resp; + } __packed probe_resp; /* 9.3.3.14 Action frame format */ struct { /* 9.4.1.11 Action field */ @@ -557,7 +565,7 @@ struct ieee80211_mgmt { uint8_t tpc_elem_length; uint8_t tpc_elem_tx_power; uint8_t tpc_elem_link_margin; - } tpc_report; + } __packed tpc_report; /* 9.6.8.33 Fine Timing Measurement frame format */ struct { uint8_t dialog_token; @@ -567,7 +575,7 @@ struct ieee80211_mgmt { uint16_t tod_error; uint16_t toa_error; uint8_t variable[0]; - } ftm; + } __packed ftm; /* 802.11-2016, 9.6.5.2 ADDBA Request frame format */ struct { uint8_t action_code; @@ -577,16 +585,16 @@ struct ieee80211_mgmt { uint16_t start_seq_num; /* Optional follows... */ uint8_t variable[0]; - } addba_req; + } __packed addba_req; /* XXX */ struct { uint8_t dialog_token; - } wnm_timing_msr; + } __packed wnm_timing_msr; } u; - } action; + } __packed action; DECLARE_FLEX_ARRAY(uint8_t, body); } u; -}; +} __packed __aligned(2); struct ieee80211_cts { /* net80211::ieee80211_frame_cts */ __le16 frame_control; diff --git a/sys/compat/linuxkpi/common/include/linux/math.h b/sys/compat/linuxkpi/common/include/linux/math.h index 5a348a57747b..1d50e011f66d 100644 --- a/sys/compat/linuxkpi/common/include/linux/math.h +++ b/sys/compat/linuxkpi/common/include/linux/math.h @@ -56,7 +56,7 @@ __ret; \ }) -#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 60600 +#if !defined(LINUXKPI_VERSION) || (LINUXKPI_VERSION >= 60600) #define abs_diff(x, y) ({ \ __typeof(x) _x = (x); \ __typeof(y) _y = (y); \ diff --git a/sys/compat/linuxkpi/common/include/linux/math64.h b/sys/compat/linuxkpi/common/include/linux/math64.h index a216d350570f..25ca9da1b622 100644 --- a/sys/compat/linuxkpi/common/include/linux/math64.h +++ b/sys/compat/linuxkpi/common/include/linux/math64.h @@ -98,6 +98,12 @@ div64_u64_round_up(uint64_t dividend, uint64_t divisor) return ((dividend + divisor - 1) / divisor); } +static inline uint64_t +roundup_u64(uint64_t x1, uint32_t x2) +{ + return (div_u64(x1 + x2 - 1, x2) * x2); +} + #define DIV64_U64_ROUND_UP(...) \ div64_u64_round_up(__VA_ARGS__) diff --git a/sys/compat/linuxkpi/common/include/linux/overflow.h b/sys/compat/linuxkpi/common/include/linux/overflow.h index 9ba9b9500f11..e811037b8ecc 100644 --- a/sys/compat/linuxkpi/common/include/linux/overflow.h +++ b/sys/compat/linuxkpi/common/include/linux/overflow.h @@ -33,8 +33,10 @@ * credit to Christian Biere. */ #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_max(t) __type_max(typeof(t)) +#define __type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define type_min(t) __type_min(typeof(t)) /* * Avoids triggering -Wtype-limits compilation warning, @@ -59,46 +61,123 @@ static inline bool __must_check __must_check_overflow(bool overflow) * @b: second addend * @d: pointer to store sum * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted addition, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * sum has overflowed or been truncated. + * *@d holds the results of the attempted addition, regardless of whether + * wrap-around occurred. */ #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) /** + * wrapping_add() - Intentionally perform a wrapping addition + * @type: type for result of calculation + * @a: first addend + * @b: second addend + * + * Return the potentially wrapped-around addition without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_add(type, a, b) \ + ({ \ + type __val; \ + __builtin_add_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_add() - Intentionally perform a wrapping increment assignment + * @var: variable to be incremented + * @offset: amount to add + * + * Increments @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_add(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_add(typeof(var), *__ptr, offset); \ + }) + +/** * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted subtraction, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * difference has underflowed or been truncated. + * *@d holds the results of the attempted subtraction, regardless of whether + * wrap-around occurred. */ #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) /** + * wrapping_sub() - Intentionally perform a wrapping subtraction + * @type: type for result of calculation + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * + * Return the potentially wrapped-around subtraction without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_sub(type, a, b) \ + ({ \ + type __val; \ + __builtin_sub_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign + * @var: variable to be decremented + * @offset: amount to subtract + * + * Decrements @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_sub(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \ + }) + +/** * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted multiplication, but is not - * considered "safe for use" on a non-zero return value, which indicates - * that the product has overflowed or been truncated. + * *@d holds the results of the attempted multiplication, regardless of whether + * wrap-around occurred. */ #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** + * wrapping_mul() - Intentionally perform a wrapping multiplication + * @type: type for result of calculation + * @a: first factor + * @b: second factor + * + * Return the potentially wrapped-around multiplication without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_mul(type, a, b) \ + ({ \ + type __val; \ + __builtin_mul_overflow(a, b, &__val); \ + __val; \ + }) + +/** * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift @@ -122,7 +201,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ - u64 _a_full = _a; \ + unsigned long long _a_full = _a; \ unsigned int _to_shift = \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ @@ -132,10 +211,10 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define __overflows_type_constexpr(x, T) ( \ is_unsigned_type(typeof(x)) ? \ - (x) > type_max(typeof(T)) : \ + (x) > type_max(T) : \ is_unsigned_type(typeof(T)) ? \ - (x) < 0 || (x) > type_max(typeof(T)) : \ - (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + (x) < 0 || (x) > type_max(T) : \ + (x) < type_min(T) || (x) > type_max(T)) #define __overflows_type(x, T) ({ \ typeof(T) v = 0; \ @@ -312,27 +391,40 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u trailer; \ type *name = (type *)&name##_u /** - * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing - * flexible array member. + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + +/** + * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member, when it does not have a __counted_by annotation. * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. @@ -342,8 +434,42 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @type structure with a trailing * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. + * Use __member_size(@name->member) to get compile-time size of @name members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. + */ +#define DEFINE_RAW_FLEX(type, name, member, count) \ + __DEFINE_FLEX(type, name, member, count, = { }) + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @TYPE: structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @MEMBER: Name of the array member. + * @COUNTER: Name of the __counted_by member. + * @COUNT: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @TYPE structure with a trailing + * flexible array member. + * Use __struct_size(@NAME) to get compile-time size of it afterwards. + * Use __member_size(@NAME->member) to get compile-time size of @NAME members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. + */ +#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) + +/** + * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family. + * Returns the number of elements in @array. + * + * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX(). + * @array: Name of the array member. */ -#define DEFINE_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) +#define STACK_FLEX_ARRAY_SIZE(name, array) \ + (__member_size((name)->array) / sizeof(*(name)->array) + \ + __must_be_array((name)->array)) #endif /* _LINUXKPI_LINUX_OVERFLOW_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index 3fd4191b9917..d891d0df3546 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -355,7 +355,6 @@ struct pci_dev { TAILQ_HEAD(, pci_mmio_region) mmio; }; -int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name); int pci_alloc_irq_vectors(struct pci_dev *pdev, int minv, int maxv, unsigned int flags); bool pci_device_is_present(struct pci_dev *pdev); @@ -365,10 +364,13 @@ void __iomem **linuxkpi_pcim_iomap_table(struct pci_dev *pdev); void *linuxkpi_pci_iomap_range(struct pci_dev *, int, unsigned long, unsigned long); void *linuxkpi_pci_iomap(struct pci_dev *, int, unsigned long); +void *linuxkpi_pcim_iomap(struct pci_dev *, int, unsigned long); void linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res); int linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask, const char *name); +int linuxkpi_pci_request_region(struct pci_dev *, int, const char *); int linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name); +int linuxkpi_pcim_request_all_regions(struct pci_dev *, const char *); void linuxkpi_pci_release_region(struct pci_dev *pdev, int bar); void linuxkpi_pci_release_regions(struct pci_dev *pdev); int linuxkpi_pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, @@ -561,12 +563,16 @@ done: return (pdev->bus->self); } +#define pci_request_region(pdev, bar, res_name) \ + linuxkpi_pci_request_region(pdev, bar, res_name) #define pci_release_region(pdev, bar) \ linuxkpi_pci_release_region(pdev, bar) -#define pci_release_regions(pdev) \ - linuxkpi_pci_release_regions(pdev) #define pci_request_regions(pdev, res_name) \ linuxkpi_pci_request_regions(pdev, res_name) +#define pci_release_regions(pdev) \ + linuxkpi_pci_release_regions(pdev) +#define pcim_request_all_regions(pdev, name) \ + linuxkpi_pcim_request_all_regions(pdev, name) static inline void lkpi_pci_disable_msix(struct pci_dev *pdev) @@ -803,6 +809,8 @@ static inline void pci_disable_sriov(struct pci_dev *dev) linuxkpi_pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size) #define pci_iomap(pdev, mmio_bar, mmio_size) \ linuxkpi_pci_iomap(pdev, mmio_bar, mmio_size) +#define pcim_iomap(pdev, bar, maxlen) \ + linuxkpi_pcim_iomap(pdev, bar, maxlen) #define pci_iounmap(pdev, res) \ linuxkpi_pci_iounmap(pdev, res) @@ -1445,6 +1453,9 @@ linuxkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev) return (lkpi_pci_get_device(vendor, device, odev)); } +#define for_each_pci_dev(_pdev) \ + while ((_pdev = linuxkpi_pci_get_device(PCI_ANY_ID, PCI_ANY_ID, _pdev)) != NULL) + /* This is a FreeBSD extension so we can use bus_*(). */ static inline void linuxkpi_pcim_want_to_use_bus_functions(struct pci_dev *pdev) diff --git a/sys/compat/linuxkpi/common/include/linux/rcupdate.h b/sys/compat/linuxkpi/common/include/linux/rcupdate.h index 85d766c8dbc9..4aceb7296cd6 100644 --- a/sys/compat/linuxkpi/common/include/linux/rcupdate.h +++ b/sys/compat/linuxkpi/common/include/linux/rcupdate.h @@ -1,7 +1,7 @@ /*- * Copyright (c) 2016-2017 Mellanox Technologies, Ltd. * All rights reserved. - * Copyright (c) 2024 The FreeBSD Foundation + * Copyright (c) 2024-2025 The FreeBSD Foundation * * Portions of this software were developed by Björn Zeeb * under sponsorship from the FreeBSD Foundation. @@ -35,6 +35,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/cleanup.h> #include <machine/atomic.h> @@ -162,4 +163,6 @@ void linux_synchronize_rcu(unsigned type); #define init_rcu_head_on_stack(...) #define destroy_rcu_head_on_stack(...) +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) + #endif /* _LINUXKPI_LINUX_RCUPDATE_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/skbuff.h b/sys/compat/linuxkpi/common/include/linux/skbuff.h index c8ad90281e34..6e41c368a8b8 100644 --- a/sys/compat/linuxkpi/common/include/linux/skbuff.h +++ b/sys/compat/linuxkpi/common/include/linux/skbuff.h @@ -1,6 +1,6 @@ /*- * Copyright (c) 2020-2025 The FreeBSD Foundation - * Copyright (c) 2021-2023 Bjoern A. Zeeb + * Copyright (c) 2021-2025 Bjoern A. Zeeb * * This software was developed by Björn Zeeb under sponsorship from * the FreeBSD Foundation. @@ -47,13 +47,11 @@ #include <linux/ktime.h> #include <linux/compiler.h> -#include "opt_wlan.h" - -/* Currently this is only used for wlan so we can depend on that. */ -#if defined(IEEE80211_DEBUG) && !defined(SKB_DEBUG) -#define SKB_DEBUG -#endif - +/* + * At least the net/intel-irdma-kmod port pulls this header in; likely through + * if_ether.h (see PR289268). This means we no longer can rely on + * IEEE80211_DEBUG (opt_wlan.h) to automatically set SKB_DEBUG. + */ /* #define SKB_DEBUG */ #ifdef SKB_DEBUG @@ -120,7 +118,7 @@ enum sk_checksum_flags { CHECKSUM_NONE = 0x00, CHECKSUM_UNNECESSARY = 0x01, CHECKSUM_PARTIAL = 0x02, - CHECKSUM_COMPLETE = 0x04, + CHECKSUM_COMPLETE = 0x03, }; struct skb_frag { @@ -170,7 +168,7 @@ struct sk_buff { }; }; uint16_t protocol; - uint8_t ip_summed; + uint8_t ip_summed; /* 2 bit only. */ /* uint8_t */ /* "Scratch" area for layers to store metadata. */ diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index 47e3d133eb6c..0e649e1e3c4a 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -40,8 +40,10 @@ #include <linux/compat.h> #include <linux/types.h> #include <linux/gfp.h> +#include <linux/err.h> #include <linux/llist.h> #include <linux/overflow.h> +#include <linux/cleanup.h> MALLOC_DECLARE(M_KMALLOC); @@ -153,6 +155,8 @@ kfree(const void *ptr) lkpi_kfree(ptr); } +DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) + /* * Other k*alloc() funtions using the above as underlying allocator. */ diff --git a/sys/compat/linuxkpi/common/include/linux/string_helpers.h b/sys/compat/linuxkpi/common/include/linux/string_helpers.h index 1bdff2730361..2c6fe0b1708d 100644 --- a/sys/compat/linuxkpi/common/include/linux/string_helpers.h +++ b/sys/compat/linuxkpi/common/include/linux/string_helpers.h @@ -66,4 +66,6 @@ str_enable_disable(bool value) return "disable"; } +#define str_disable_enable(_v) str_enable_disable(!(_v)) + #endif diff --git a/sys/compat/linuxkpi/common/include/linux/timer.h b/sys/compat/linuxkpi/common/include/linux/timer.h index a635f0faea59..d48939e28a02 100644 --- a/sys/compat/linuxkpi/common/include/linux/timer.h +++ b/sys/compat/linuxkpi/common/include/linux/timer.h @@ -49,8 +49,13 @@ extern unsigned long linux_timer_hz_mask; #define TIMER_IRQSAFE 0x0001 +#if defined(LINUXKPI_VERSION) && (LINUXKPI_VERSION < 61600) #define from_timer(var, arg, field) \ container_of(arg, typeof(*(var)), field) +#else +#define timer_container_of(var, arg, field) \ + container_of(arg, typeof(*(var)), field) +#endif #define timer_setup(timer, func, flags) do { \ CTASSERT(((flags) & ~TIMER_IRQSAFE) == 0); \ @@ -79,11 +84,23 @@ extern unsigned long linux_timer_hz_mask; extern int mod_timer(struct timer_list *, unsigned long); extern void add_timer(struct timer_list *); extern void add_timer_on(struct timer_list *, int cpu); -extern int del_timer(struct timer_list *); -extern int del_timer_sync(struct timer_list *); + +extern int timer_delete(struct timer_list *); extern int timer_delete_sync(struct timer_list *); extern int timer_shutdown_sync(struct timer_list *); +static inline int +del_timer(struct timer_list *tl) +{ + return (timer_delete(tl)); +} + +static inline int +del_timer_sync(struct timer_list *tl) +{ + return (timer_delete_sync(tl)); +} + #define timer_pending(timer) callout_pending(&(timer)->callout) #define round_jiffies(j) \ ((unsigned long)(((j) + linux_timer_hz_mask) & ~linux_timer_hz_mask)) diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index 0106e6648bd4..8de03410c6b6 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -737,7 +737,7 @@ struct ieee80211_link_sta { struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; uint8_t rx_nss; - enum ieee80211_sta_rx_bw bandwidth; + enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_agg agg; struct ieee80211_sta_txpwr txpwr; diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index e248588dd275..d00734001a59 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -77,6 +77,8 @@ #include <linux/rculist.h> #include "linux_80211.h" +/* #define LKPI_80211_USE_SCANLIST */ +/* #define LKPI_80211_BGSCAN */ #define LKPI_80211_WME #define LKPI_80211_HW_CRYPTO #define LKPI_80211_HT @@ -103,6 +105,10 @@ SYSCTL_DECL(_compat_linuxkpi); SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, 80211, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "LinuxKPI 802.11 compatibility layer"); +static bool lkpi_order_scanlist = false; +SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, order_scanlist, CTLFLAG_RW, + &lkpi_order_scanlist, 0, "Enable LinuxKPI 802.11 scan list shuffeling"); + #if defined(LKPI_80211_HW_CRYPTO) static bool lkpi_hwcrypto = false; SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, hw_crypto, CTLFLAG_RDTUN, @@ -167,6 +173,7 @@ const struct cfg80211_ops linuxkpi_mac80211cfgops = { static struct lkpi_sta *lkpi_find_lsta_by_ni(struct lkpi_vif *, struct ieee80211_node *); #endif +static void lkpi_sw_scan_task(void *, int); static void lkpi_80211_txq_tx_one(struct lkpi_sta *, struct mbuf *); static void lkpi_80211_txq_task(void *, int); static void lkpi_80211_lhw_rxq_task(void *, int); @@ -394,7 +401,7 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) return (0); } -static enum ieee80211_sta_rx_bw +static enum ieee80211_sta_rx_bandwidth lkpi_cw_to_rx_bw(enum nl80211_chan_width cw) { switch (cw) { @@ -418,7 +425,7 @@ lkpi_cw_to_rx_bw(enum nl80211_chan_width cw) } static enum nl80211_chan_width -lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bw rx_bw) +lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bandwidth rx_bw) { switch (rx_bw) { case IEEE80211_STA_RX_BW_20: @@ -439,7 +446,7 @@ lkpi_sync_chanctx_cw_from_rx_bw(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ieee80211_chanctx_conf *chanctx_conf; - enum ieee80211_sta_rx_bw old_bw; + enum ieee80211_sta_rx_bandwidth old_bw; uint32_t changed; chanctx_conf = rcu_dereference_protected(vif->bss_conf.chanctx_conf, @@ -544,7 +551,7 @@ static void lkpi_sta_sync_vht_from_ni(struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_node *ni) { - enum ieee80211_sta_rx_bw bw; + enum ieee80211_sta_rx_bandwidth bw; uint32_t width; int rx_nss; uint16_t rx_mcs_map; @@ -955,6 +962,30 @@ lkpi_nl80211_band_to_net80211_band(enum nl80211_band band) return (0x00); } +#ifdef LINUXKPI_DEBUG_80211 +static const char * +lkpi_nl80211_band_name(enum nl80211_band band) +{ + switch (band) { + case NL80211_BAND_2GHZ: + return "2Ghz"; + break; + case NL80211_BAND_5GHZ: + return "5Ghz"; + break; + case NL80211_BAND_60GHZ: + return "60Ghz"; + break; + case NL80211_BAND_6GHZ: + return "6Ghz"; + break; + default: + panic("%s: unsupported band %u\n", __func__, band); + break; + } +} +#endif + #if 0 static enum ieee80211_ac_numbers lkpi_ac_net_to_l80211(int ac) @@ -1319,6 +1350,7 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); + vif = LVIF_TO_VIF(lvif); /* * Make sure we do not make it here without going through @@ -1326,6 +1358,23 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) */ lockdep_assert_wiphy(hw->wiphy); + /* + * While we are assoc we may still send packets. We cannot delete the + * keys as otherwise packets could go out unencrypted. Some firmware + * does not like this and will fire an assert. + * net80211 needs to drive this better but given we want the disassoc + * frame out and have to unlock we are open to a race currently. + * This check should prevent problems. + * How to test: run 800Mbit/s UDP traffic and during that restart your + * supplicant. You want to survive that. + */ + if (vif->cfg.assoc) { + if (linuxkpi_debug_80211 & D80211_TRACE_HW_CRYPTO) + ic_printf(ic, "%d %lu %s: vif still assoc; not deleting keys\n", + curthread->td_tid, jiffies, __func__); + return (0); + } + if (IEEE80211_KEY_UNDEFINED(k)) { ic_printf(ic, "%s: vap %p key %p is undefined: %p %u\n", __func__, vap, k, k->wk_cipher, k->wk_keyix); @@ -1370,7 +1419,6 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) kc->keyidx, kc->hw_key_idx, kc->flags, IEEE80211_KEY_FLAG_BITS); #endif - vif = LVIF_TO_VIF(lvif); error = lkpi_80211_mo_set_key(hw, DISABLE_KEY, vif, sta, kc); if (error != 0) { ic_printf(ic, "%d %lu %s: set_key cmd %d(%s) for sta %6D failed: %d\n", @@ -1842,13 +1890,31 @@ lkpi_update_dtim_tsf(struct ieee80211_vif *vif, struct ieee80211_node *ni, vif->bss_conf.beacon_int = 16; bss_changed |= BSS_CHANGED_BEACON_INT; } - if (vif->bss_conf.dtim_period != vap->iv_dtim_period && - vap->iv_dtim_period > 0) { - vif->bss_conf.dtim_period = vap->iv_dtim_period; + + /* + * lkpi_iv_sta_recv_mgmt() will directly call into this function. + * iwlwifi(4) in iwl_mvm_bss_info_changed_station_common() will + * stop seesion protection the moment it sees + * BSS_CHANGED_BEACON_INFO (with the expectations that it was + * "a beacon from the associated AP"). It will also update + * the beacon filter in that case. This is the only place + * we set the BSS_CHANGED_BEACON_INFO on the non-teardown + * path so make sure we only do run this check once we are + * assoc. (*iv_recv_mgmt)() will be called before we enter + * here so the ni will be updates with information from the + * beacon via net80211::sta_recv_mgmt(). We also need to + * make sure we do not do it on every beacon we still may + * get so only do if something changed. vif->bss_conf.dtim_period + * should be 0 as we start up (we also reset it on teardown). + */ + if (vif->cfg.assoc && + vif->bss_conf.dtim_period != ni->ni_dtim_period && + ni->ni_dtim_period > 0) { + vif->bss_conf.dtim_period = ni->ni_dtim_period; bss_changed |= BSS_CHANGED_BEACON_INFO; } - vif->bss_conf.sync_dtim_count = vap->iv_dtim_count; + vif->bss_conf.sync_dtim_count = ni->ni_dtim_count; vif->bss_conf.sync_tsf = le64toh(ni->ni_tstamp.tsf); /* vif->bss_conf.sync_device_ts = set in linuxkpi_ieee80211_rx. */ @@ -1876,6 +1942,8 @@ lkpi_stop_hw_scan(struct lkpi_hw *lhw, struct ieee80211_vif *vif) int error; bool cancel; + TRACE_SCAN(lhw->ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS); + LKPI_80211_LHW_SCAN_LOCK(lhw); cancel = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); @@ -2798,6 +2866,14 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i bss_changed = 0; bss_changed |= lkpi_disassoc(sta, vif, lhw); +#ifdef LKPI_80211_HW_CRYPTO + /* + * In theory we remove keys here but there must not exist any for this + * state change until we clean them up again into small steps and no + * code duplication. + */ +#endif + lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Adjust sta and change state (from NONE) to NOTEXIST. */ @@ -3333,6 +3409,16 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int #ifdef LKPI_80211_HW_CRYPTO if (lkpi_hwcrypto) { + /* + * In theory we only need to do this if we changed assoc. + * If we were not assoc, there should be no keys and we + * should not be here. + */ +#ifdef notyet + KASSERT((bss_changed & BSS_CHANGED_ASSOC) != 0, ("%s: " + "trying to remove keys but were not assoc: %#010jx, lvif %p\n", + __func__, (uintmax_t)bss_changed, lvif)); +#endif error = lkpi_sta_del_keys(hw, vif, lsta); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: lkpi_sta_del_keys " @@ -3394,6 +3480,9 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int * 4) call unassign_vif_chanctx * 5) call lkpi_hw_conf_idle * 6) call remove_chanctx + * + * Note: vif->driver_flags & IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC + * might change this. */ bss_changed |= lkpi_disassoc(sta, vif, lhw); @@ -3545,7 +3634,7 @@ lkpi_iv_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) vif = LVIF_TO_VIF(lvif); /* No need to replicate this in most state handlers. */ - if (ostate == IEEE80211_S_SCAN && nstate != IEEE80211_S_SCAN) + if (nstate > IEEE80211_S_SCAN) lkpi_stop_hw_scan(lhw, vif); s = sta_state_fsm; @@ -3739,6 +3828,7 @@ lkpi_iv_sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, enum ieee80211_bss_changed bss_changed; lvif = VAP_TO_LVIF(ni->ni_vap); + vif = LVIF_TO_VIF(lvif); lvif->iv_recv_mgmt(ni, m0, subtype, rxs, rssi, nf); @@ -3746,13 +3836,18 @@ lkpi_iv_sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, case IEEE80211_FC0_SUBTYPE_PROBE_RESP: break; case IEEE80211_FC0_SUBTYPE_BEACON: - lvif->beacons++; + /* + * Only count beacons when assoc. SCAN has its own logging. + * This is for connection/beacon loss/session protection almost + * over debugging when trying to get into a stable RUN state. + */ + if (vif->cfg.assoc) + lvif->beacons++; break; default: return; } - vif = LVIF_TO_VIF(lvif); lhw = ni->ni_ic->ic_softc; hw = LHW_TO_HW(lhw); @@ -3824,6 +3919,7 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], lvif = malloc(len, M_80211_VAP, M_WAITOK | M_ZERO); mtx_init(&lvif->mtx, "lvif", NULL, MTX_DEF); + TASK_INIT(&lvif->sw_scan_task, 0, lkpi_sw_scan_task, lvif); INIT_LIST_HEAD(&lvif->lsta_list); lvif->lvif_bss = NULL; refcount_init(&lvif->nt_unlocked, 0); @@ -3987,13 +4083,9 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], * Modern chipset/fw/drv will do A-MPDU in drv/fw and fail * to do so if they cannot do the crypto too. */ - if (!lkpi_hwcrypto && ieee80211_hw_check(hw, AMPDU_AGGREGATION)) + if (!lkpi_hwcrypto && IEEE80211_CONF_AMPDU_OFFLOAD(ic)) vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_RX; #endif -#if defined(LKPI_80211_HT) - /* 20250125-BZ Keep A-MPDU TX cleared until we sorted out AddBA for all drivers. */ - vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_TX; -#endif if (hw->max_listen_interval == 0) hw->max_listen_interval = 7 * (ic->ic_lintval / ic->ic_bintval); @@ -4062,6 +4154,8 @@ lkpi_ic_vap_delete(struct ieee80211vap *vap) /* Clear up per-VIF/VAP sysctls. */ sysctl_ctx_free(&lvif->sysctl_ctx); + ieee80211_draintask(ic, &lvif->sw_scan_task); + LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_REMOVE(&lhw->lvif_head, lvif, lvif_entry); LKPI_80211_LHW_LVIF_UNLOCK(lhw); @@ -4303,6 +4397,113 @@ lkpi_scan_ies_add(uint8_t *p, struct ieee80211_scan_ies *scan_ies, } static void +lkpi_enable_hw_scan(struct lkpi_hw *lhw) +{ + + if (lhw->ops->hw_scan) { + /* + * Advertise full-offload scanning. + * + * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise + * we essentially disable hw_scan for all drivers not setting + * the flag. + */ + lhw->ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD; + lhw->scan_flags |= LKPI_LHW_SCAN_HW; + } +} + +#ifndef LKPI_80211_USE_SCANLIST +static const uint32_t chan_pri[] = { + 5180, 5500, 5745, + 5260, 5580, 5660, 5825, + 5220, 5300, 5540, 5620, 5700, 5785, 5865, + 2437, 2412, 2422, 2462, 2472, 2432, 2452 +}; + +static int +lkpi_scan_chan_list_idx(const struct linuxkpi_ieee80211_channel *lc) +{ + int i; + + for (i = 0; i < nitems(chan_pri); i++) { + if (lc->center_freq == chan_pri[i]) + return (i); + } + + return (-1); +} + +static int +lkpi_scan_chan_list_comp(const struct linuxkpi_ieee80211_channel *lc1, + const struct linuxkpi_ieee80211_channel *lc2) +{ + int idx1, idx2; + + /* Find index in list. */ + idx1 = lkpi_scan_chan_list_idx(lc1); + idx2 = lkpi_scan_chan_list_idx(lc2); + + if (idx1 == -1 && idx2 != -1) + return (1); + if (idx1 != -1 && idx2 == -1) + return (-1); + + /* Neither on the list, use center_freq. */ + if (idx1 == -1 && idx2 == -1) + return (lc1->center_freq - lc2->center_freq); + + /* Whichever is first in the list. */ + return (idx1 - idx2); +} + +static void +lkpi_scan_chan_list_resort(struct linuxkpi_ieee80211_channel **cpp, size_t nchan) +{ + struct linuxkpi_ieee80211_channel *lc, *nc; + size_t i, j; + int rc; + + for (i = (nchan - 1); i > 0; i--) { + for (j = i; j > 0 ; j--) { + lc = *(cpp + j); + nc = *(cpp + j - 1); + rc = lkpi_scan_chan_list_comp(lc, nc); + if (rc < 0) { + *(cpp + j) = nc; + *(cpp + j - 1) = lc; + } + } + } +} + +static bool +lkpi_scan_chan(struct linuxkpi_ieee80211_channel *c, + struct ieee80211com *ic, bool log) +{ + + if ((c->flags & IEEE80211_CHAN_DISABLED) != 0) { + if (log) + TRACE_SCAN(ic, "Skipping disabled chan " + "on band %s [%#x/%u/%#x]", + lkpi_nl80211_band_name(c->band), c->hw_value, + c->center_freq, c->flags); + return (false); + } + if (isclr(ic->ic_chan_active, ieee80211_mhz2ieee(c->center_freq, + lkpi_nl80211_band_to_net80211_band(c->band)))) { + if (log) + TRACE_SCAN(ic, "Skipping !active chan " + "on band %s [%#x/%u/%#x]", + lkpi_nl80211_band_name(c->band), c->hw_value, + c->center_freq, c->flags); + return (false); + } + return (true); +} +#endif + +static void lkpi_ic_scan_start(struct ieee80211com *ic) { struct lkpi_hw *lhw; @@ -4315,35 +4516,52 @@ lkpi_ic_scan_start(struct ieee80211com *ic) bool is_hw_scan; lhw = ic->ic_softc; + ss = ic->ic_scan; + vap = ss->ss_vap; + TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS); + LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) { /* A scan is still running. */ LKPI_80211_LHW_SCAN_UNLOCK(lhw); + TRACE_SCAN(ic, "Trying to start new scan while still running; " + "cancelling new net80211 scan; scan_flags %b", + lhw->scan_flags, LKPI_LHW_SCAN_BITS); + ieee80211_cancel_scan(vap); return; } is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); - ss = ic->ic_scan; - vap = ss->ss_vap; +#if 0 if (vap->iv_state != IEEE80211_S_SCAN) { - IMPROVE("We need to be able to scan if not in S_SCAN"); + TODO("We need to be able to scan if not in S_SCAN"); + TRACE_SCAN(ic, "scan_flags %b iv_state %d", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, vap->iv_state); + ieee80211_cancel_scan(vap); return; } +#endif hw = LHW_TO_HW(lhw); if (!is_hw_scan) { /* If hw_scan is cleared clear FEXT_SCAN_OFFLOAD too. */ vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD; -sw_scan: + lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); if (vap->iv_state == IEEE80211_S_SCAN) lkpi_hw_conf_idle(hw, false); + LKPI_80211_LHW_SCAN_LOCK(lhw); + lhw->scan_flags |= LKPI_LHW_SCAN_RUNNING; + LKPI_80211_LHW_SCAN_UNLOCK(lhw); + lkpi_update_mcast_filter(ic); + TRACE_SCAN(vap->iv_ic, "Starting SW_SCAN: scan_flags %b", + lhw->scan_flags, LKPI_LHW_SCAN_BITS); lkpi_80211_mo_sw_scan_start(hw, vif, vif->addr); /* net80211::scan_start() handled PS for us. */ IMPROVE(); @@ -4358,6 +4576,9 @@ sw_scan: struct cfg80211_scan_6ghz_params *s6gp; size_t chan_len, nchan, ssids_len, s6ghzlen; int band, i, ssid_count, common_ie_len; +#ifndef LKPI_80211_USE_SCANLIST + int n; +#endif uint32_t band_mask; uint8_t *ie, *ieend; bool running; @@ -4369,7 +4590,8 @@ sw_scan: band_mask = 0; nchan = 0; if (ieee80211_hw_check(hw, SINGLE_SCAN_ON_ALL_BANDS)) { -#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */ +#ifdef LKPI_80211_USE_SCANLIST + /* Avoid net80211 scan lists until it has proper scan offload support. */ for (i = ss->ss_next; i < ss->ss_last; i++) { nchan++; band = lkpi_net80211_chan_to_nl80211_band( @@ -4387,8 +4609,17 @@ sw_scan: continue; } if (hw->wiphy->bands[band] != NULL) { - nchan += hw->wiphy->bands[band]->n_channels; + struct linuxkpi_ieee80211_channel *channels; + int n; + band_mask |= (1 << band); + + channels = hw->wiphy->bands[band]->channels; + n = hw->wiphy->bands[band]->n_channels; + for (i = 0; i < n; i++) { + if (lkpi_scan_chan(&channels[i], ic, true)) + nchan++; + } } } #endif @@ -4427,11 +4658,32 @@ sw_scan: /* hw_req->req.wdev */ hw_req->req.wiphy = hw->wiphy; hw_req->req.no_cck = false; /* XXX */ -#if 0 - /* This seems to pessimise default scanning behaviour. */ - hw_req->req.duration_mandatory = TICKS_2_USEC(ss->ss_mindwell); - hw_req->req.duration = TICKS_2_USEC(ss->ss_maxdwell); -#endif + + /* + * In general setting duration[_mandatory] seems to pessimise + * default scanning behaviour. We only use it for BGSCANnig + * to keep the dwell times small. + * Setting duration_mandatory makes this the maximum dwell + * time (otherwise may be shorter). Duration is in TU. + */ + if ((ic->ic_flags_ext & IEEE80211_FEXT_BGSCAN) != 0) { + unsigned long dwell; + + if ((ic->ic_caps & IEEE80211_C_BGSCAN) == 0 || + (vap->iv_flags & IEEE80211_F_BGSCAN) == 0) + ic_printf(ic, "BGSCAN despite off: %b, %b, %b\n", + ic->ic_flags_ext, IEEE80211_FEXT_BITS, + vap->iv_flags, IEEE80211_F_BITS, + ic->ic_caps, IEEE80211_C_BITS); + + dwell = ss->ss_mindwell; + if (dwell == 0) + dwell = msecs_to_ticks(20); + + hw_req->req.duration_mandatory = true; + hw_req->req.duration = TICKS_2_USEC(dwell) / 1024; + } + #ifdef __notyet__ hw_req->req.flags |= NL80211_SCAN_FLAG_RANDOM_ADDR; memcpy(hw_req->req.mac_addr, xxx, IEEE80211_ADDR_LEN); @@ -4442,11 +4694,12 @@ sw_scan: hw_req->req.n_channels = nchan; cpp = (struct linuxkpi_ieee80211_channel **)(hw_req + 1); lc = (struct linuxkpi_ieee80211_channel *)(cpp + nchan); +#ifdef LKPI_80211_USE_SCANLIST for (i = 0; i < nchan; i++) { *(cpp + i) = (struct linuxkpi_ieee80211_channel *)(lc + i); } -#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */ + /* Avoid net80211 scan lists until it has proper scan offload support. */ for (i = 0; i < nchan; i++) { struct ieee80211_channel *c; @@ -4459,7 +4712,9 @@ sw_scan: lc++; } #else - for (band = 0; band < NUM_NL80211_BANDS; band++) { + /* Add bands in reverse order for scanning. */ + n = 0; + for (band = NUM_NL80211_BANDS - 1; band >= 0; band--) { struct ieee80211_supported_band *supband; struct linuxkpi_ieee80211_channel *channels; @@ -4474,11 +4729,30 @@ sw_scan: channels = supband->channels; for (i = 0; i < supband->n_channels; i++) { - *lc = channels[i]; - lc++; + if (lkpi_scan_chan(&channels[i], ic, false)) + *(cpp + n++) = &channels[i]; } } + if (lkpi_order_scanlist) + lkpi_scan_chan_list_resort(cpp, nchan); + + if ((linuxkpi_debug_80211 & D80211_SCAN) != 0) { + printf("%s:%d: %s SCAN Channel List (nchan=%zu): ", + __func__, __LINE__, ic->ic_name, nchan); + for (i = 0; i < nchan; i++) { + struct linuxkpi_ieee80211_channel *xc; + + xc = *(cpp + i); + printf(" %d(%d)", + ieee80211_mhz2ieee(xc->center_freq, + lkpi_nl80211_band_to_net80211_band( + xc->band)), + xc->center_freq); + } + printf("\n"); + } #endif + hw_req->req.n_ssids = ssid_count; if (hw_req->req.n_ssids > 0) { ssids = (struct cfg80211_ssid *)lc; @@ -4505,6 +4779,7 @@ sw_scan: ieend = lkpi_scan_ies_add(ie, &hw_req->ies, band_mask, vap, hw); hw_req->req.ie = ie; hw_req->req.ie_len = ieend - ie; + hw_req->req.scan_start = jiffies; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); @@ -4522,13 +4797,30 @@ sw_scan: LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (running) { free(hw_req, M_LKPI80211); + TRACE_SCAN(ic, "Trying to start new scan while still " + "running (2); cancelling new net80211 scan; " + "scan_flags %b", + lhw->scan_flags, LKPI_LHW_SCAN_BITS); + ieee80211_cancel_scan(vap); return; } lkpi_update_mcast_filter(ic); + TRACE_SCAN(ic, "Starting HW_SCAN: scan_flags %b, " + "ie_len %d, n_ssids %d, n_chan %d, common_ie_len %d [%d, %d]", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, hw_req->req.ie_len, + hw_req->req.n_ssids, hw_req->req.n_channels, + hw_req->ies.common_ie_len, + hw_req->ies.len[NL80211_BAND_2GHZ], + hw_req->ies.len[NL80211_BAND_5GHZ]); error = lkpi_80211_mo_hw_scan(hw, vif, hw_req); if (error != 0) { + bool scan_done; + int e; + + TRACE_SCAN(ic, "hw_scan failed; scan_flags %b, error %d", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, error); ieee80211_cancel_scan(vap); /* @@ -4545,14 +4837,35 @@ sw_scan: * So we cannot rely on that behaviour and have to check * and balance between both code paths. */ + e = 0; + scan_done = true; LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) { + free(lhw->hw_req, M_LKPI80211); lhw->hw_req = NULL; + /* + * The ieee80211_cancel_scan() above runs in a + * taskq and it may take ages for the previous + * scan to clear; starting a new one right away + * we run into the problem that the old one is + * still active. + */ + e = msleep(lhw, &lhw->scan_mtx, 0, "lhwscanstop", hz); + scan_done = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0; + + /* + * Now we can clear running if no one else did. + */ lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING; } LKPI_80211_LHW_SCAN_UNLOCK(lhw); lkpi_update_mcast_filter(ic); + if (!scan_done) { + ic_printf(ic, "ERROR: %s: timeout/error to wait " + "for ieee80211_cancel_scan: %d\n", __func__, e); + return; + } /* * XXX-SIGH magic number. @@ -4560,24 +4873,15 @@ sw_scan: * not possible. Fall back to sw scan in that case. */ if (error == 1) { - LKPI_80211_LHW_SCAN_LOCK(lhw); - lhw->scan_flags &= ~LKPI_LHW_SCAN_HW; - LKPI_80211_LHW_SCAN_UNLOCK(lhw); /* - * XXX If we clear this now and later a driver - * thinks it * can do a hw_scan again, we will - * currently not re-enable it? + * We need to put this into some defered context + * the net80211 scan may not be done yet + * (ic_flags & IEEE80211_F_SCAN) and we cannot + * wait here; if we do scan_curchan_task always + * runs after our timeout to finalize the scan. */ - vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD; - ieee80211_start_scan(vap, - IEEE80211_SCAN_ACTIVE | - IEEE80211_SCAN_NOPICK | - IEEE80211_SCAN_ONCE, - IEEE80211_SCAN_FOREVER, - ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20), - ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200), - vap->iv_des_nssid, vap->iv_des_ssid); - goto sw_scan; + ieee80211_runtask(ic, &lvif->sw_scan_task); + return; } ic_printf(ic, "ERROR: %s: hw_scan returned %d\n", @@ -4587,12 +4891,50 @@ sw_scan: } static void +lkpi_sw_scan_task(void *arg, int pending __unused) +{ + struct lkpi_hw *lhw; + struct lkpi_vif *lvif; + struct ieee80211vap *vap; + struct ieee80211_scan_state *ss; + + lvif = arg; + vap = LVIF_TO_VAP(lvif); + lhw = vap->iv_ic->ic_softc; + ss = vap->iv_ic->ic_scan; + + LKPI_80211_LHW_SCAN_LOCK(lhw); + /* + * We will re-enable this at scan_end calling lkpi_enable_hw_scan(). + * IEEE80211_FEXT_SCAN_OFFLOAD will be cleared by lkpi_ic_scan_start. + */ + lhw->scan_flags &= ~LKPI_LHW_SCAN_HW; + LKPI_80211_LHW_SCAN_UNLOCK(lhw); + + TRACE_SCAN(vap->iv_ic, "Triggering SW_SCAN: pending %d, scan_flags %b", + pending, lhw->scan_flags, LKPI_LHW_SCAN_BITS); + + /* + * This will call ic_scan_start() and we will get into the right path + * unless other scans started in between. + */ + ieee80211_start_scan(vap, + IEEE80211_SCAN_ONCE, + msecs_to_ticks(10000), /* 10000 ms (=~ 50 chan * 200 ms) */ + ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20), + ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200), + vap->iv_des_nssid, vap->iv_des_ssid); +} + +static void lkpi_ic_scan_end(struct ieee80211com *ic) { struct lkpi_hw *lhw; bool is_hw_scan; lhw = ic->ic_softc; + TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS); + LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) == 0) { LKPI_80211_LHW_SCAN_UNLOCK(lhw); @@ -4621,6 +4963,16 @@ lkpi_ic_scan_end(struct ieee80211com *ic) if (vap->iv_state == IEEE80211_S_SCAN) lkpi_hw_conf_idle(hw, true); } + + /* + * In case we disabled the hw_scan in lkpi_ic_scan_start() and + * switched to swscan, re-enable hw_scan if available. + */ + lkpi_enable_hw_scan(lhw); + + LKPI_80211_LHW_SCAN_LOCK(lhw); + wakeup(lhw); + LKPI_80211_LHW_SCAN_UNLOCK(lhw); } static void @@ -4631,6 +4983,10 @@ lkpi_ic_scan_curchan(struct ieee80211_scan_state *ss, bool is_hw_scan; lhw = ss->ss_ic->ic_softc; + TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d maxdwell %lu", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, + ss->ss_ic->ic_curchan->ic_ieee, maxdwell); + LKPI_80211_LHW_SCAN_LOCK(lhw); is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); @@ -4645,6 +5001,10 @@ lkpi_ic_scan_mindwell(struct ieee80211_scan_state *ss) bool is_hw_scan; lhw = ss->ss_ic->ic_softc; + TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d mindwell %lu", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, + ss->ss_ic->ic_curchan->ic_ieee, ss->ss_mindwell); + LKPI_80211_LHW_SCAN_LOCK(lhw); is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); @@ -6042,6 +6402,7 @@ linuxkpi_ieee80211_alloc_hw(size_t priv_len, const struct ieee80211_ops *ops) LKPI_80211_LHW_SCAN_LOCK_INIT(lhw); LKPI_80211_LHW_TXQ_LOCK_INIT(lhw); + spin_lock_init(&lhw->txq_lock); sx_init_flags(&lhw->lvif_sx, "lhw-lvif", SX_RECURSE | SX_DUPOK); LKPI_80211_LHW_MC_LOCK_INIT(lhw); TAILQ_INIT(&lhw->lvif_head); @@ -6147,6 +6508,7 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw) LKPI_80211_LHW_MC_UNLOCK(lhw); /* Cleanup more of lhw here or in wiphy_free()? */ + spin_lock_destroy(&lhw->txq_lock); LKPI_80211_LHW_TXQ_LOCK_DESTROY(lhw); LKPI_80211_LHW_SCAN_LOCK_DESTROY(lhw); sx_destroy(&lhw->lvif_sx); @@ -6255,26 +6617,26 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw) IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_SHPREAMBLE /* short preamble supported */ ; -#if 0 - /* Scanning is a different kind of beast to re-work. */ - ic->ic_caps |= IEEE80211_C_BGSCAN; + +#ifdef LKPI_80211_BGSCAN + if (lhw->ops->hw_scan) + ic->ic_caps |= IEEE80211_C_BGSCAN; #endif - if (lhw->ops->hw_scan) { - /* - * Advertise full-offload scanning. - * - * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise - * we essentially disable hw_scan for all drivers not setting - * the flag. - */ - ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD; - lhw->scan_flags |= LKPI_LHW_SCAN_HW; - } + + lkpi_enable_hw_scan(lhw); /* Does HW support Fragmentation offload? */ if (ieee80211_hw_check(hw, SUPPORTS_TX_FRAG)) ic->ic_flags_ext |= IEEE80211_FEXT_FRAG_OFFLOAD; + /* Does HW support full AMPDU[-TX] offload? */ + if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) + ic->ic_flags_ext |= IEEE80211_FEXT_AMPDU_OFFLOAD; +#ifdef __notyet__ + if (ieee80211_hw_check(hw, TX_AMSDU)) + if (ieee80211_hw_check(hw, SUPPORTS_AMSDU_IN_AMPDU)) +#endif + /* * The wiphy variables report bitmasks of avail antennas. * (*get_antenna) get the current bitmask sets which can be @@ -6726,13 +7088,19 @@ linuxkpi_ieee80211_scan_completed(struct ieee80211_hw *hw, ic = lhw->ic; ss = ic->ic_scan; + TRACE_SCAN(ic, "scan_flags %b info { %ju, %6D, aborted %d }", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, + (uintmax_t)info->scan_start_tsf, info->tsf_bssid, ":", + info->aborted); + ieee80211_scan_done(ss->ss_vap); LKPI_80211_LHW_SCAN_LOCK(lhw); free(lhw->hw_req, M_LKPI80211); lhw->hw_req = NULL; lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING; - wakeup(lhw); + /* The wakeup(lhw) will be called from lkpi_ic_scan_end(). */ + /* wakeup(lhw); */ LKPI_80211_LHW_SCAN_UNLOCK(lhw); return; @@ -7002,6 +7370,63 @@ lkpi_convert_rx_status(struct ieee80211_hw *hw, struct lkpi_sta *lsta, } } +#ifdef LINUXKPI_DEBUG_80211 +static void +lkpi_rx_log_beacon(struct mbuf *m, struct lkpi_hw *lhw, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_mgmt *f; + uint8_t *e; + char ssid[IEEE80211_NWID_LEN * 4 + 1]; + + memset(ssid, '\0', sizeof(ssid)); + + f = mtod(m, struct ieee80211_mgmt *); + e = f->u.beacon.variable; + /* + * Usually SSID is right after the fixed part and for debugging we will + * be fine should we miss it if it is not. + */ + while ((e - (uint8_t *)f) < m->m_len) { + if (*e == IEEE80211_ELEMID_SSID) + break; + e += (2 + *(e + 1)); + } + if (*e == IEEE80211_ELEMID_SSID) { + int i, len; + char *p; + + p = ssid; + len = m->m_len - ((e + 2) - (uint8_t *)f); + if (len > *(e + 1)) + len = *(e + 1); + e += 2; + for (i = 0; i < len; i++) { + /* Printable character? */ + if (*e >= 0x20 && *e < 0x7f) { + *p++ = *e++; + } else { + snprintf(p, 5, "%#04x", *e++); + p += 4; + } + } + *p = '\0'; + } + + /* We print skb, skb->data, m as we are seeing 'ghost beacons'. */ + TRACE_SCAN_BEACON(lhw->ic, "Beacon: scan_flags %b, band %s freq %u chan %-4d " + "len %d { %#06x %#06x %6D %6D %6D %#06x %ju %u %#06x SSID '%s' }", + lhw->scan_flags, LKPI_LHW_SCAN_BITS, + lkpi_nl80211_band_name(rx_status->band), rx_status->freq, + linuxkpi_ieee80211_frequency_to_channel(rx_status->freq, 0), + m->m_pkthdr.len, f->frame_control, f->duration_id, + f->da, ":", f->sa, ":", f->bssid, ":", f->seq_ctrl, + (uintmax_t)le64_to_cpu(f->u.beacon.timestamp), + le16_to_cpu(f->u.beacon.beacon_int), + le16_to_cpu(f->u.beacon.capab_info), ssid); +} +#endif + /* For %list see comment towards the end of the function. */ void linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, @@ -7058,7 +7483,15 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, is_beacon = ieee80211_is_beacon(hdr->frame_control); #ifdef LINUXKPI_DEBUG_80211 - if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0) + /* + * We use the mbuf here as otherwise the variable part might + * be in skb frags. + */ + if (is_beacon && ((linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0)) + lkpi_rx_log_beacon(m, lhw, rx_status); + + if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0 && + (linuxkpi_debug_80211 & D80211_SCAN_BEACON) == 0) goto no_trace_beacons; if (linuxkpi_debug_80211 & D80211_TRACE_RX) @@ -7073,7 +7506,8 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, hexdump(mtod(m, const void *), m->m_len, "RX (raw) ", 0); /* Implement a dump_rxcb() !!! */ - if (linuxkpi_debug_80211 & D80211_TRACE_RX) + if ((linuxkpi_debug_80211 & D80211_TRACE_RX) != 0 || + (linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0) printf("TRACE-RX: %s: RXCB: %ju %ju %u, %b, %u, %#0x, %#0x, " "%u band %u, %u { %d %d %d %d }, %d, %#x %#x %#x %#x %u %u %u\n", __func__, @@ -7380,7 +7814,7 @@ lkpi_wiphy_delayed_work_timer(struct timer_list *tl) { struct wiphy_delayed_work *wdwk; - wdwk = from_timer(wdwk, tl, timer); + wdwk = timer_container_of(wdwk, tl, timer); wiphy_work_queue(wdwk->wiphy, &wdwk->work); } @@ -8124,21 +8558,30 @@ lkpi_ieee80211_wake_queues_locked(struct ieee80211_hw *hw) void linuxkpi_ieee80211_wake_queues(struct ieee80211_hw *hw) { - wiphy_lock(hw->wiphy); + struct lkpi_hw *lhw; + unsigned long flags; + + lhw = HW_TO_LHW(hw); + + spin_lock_irqsave(&lhw->txq_lock, flags); lkpi_ieee80211_wake_queues_locked(hw); - wiphy_unlock(hw->wiphy); + spin_unlock_irqrestore(&lhw->txq_lock, flags); } void linuxkpi_ieee80211_wake_queue(struct ieee80211_hw *hw, int qnum) { + struct lkpi_hw *lhw; + unsigned long flags; KASSERT(qnum < hw->queues, ("%s: qnum %d >= hw->queues %d, hw %p\n", __func__, qnum, hw->queues, hw)); - wiphy_lock(hw->wiphy); + lhw = HW_TO_LHW(hw); + + spin_lock_irqsave(&lhw->txq_lock, flags); lkpi_ieee80211_wake_queues(hw, qnum); - wiphy_unlock(hw->wiphy); + spin_unlock_irqrestore(&lhw->txq_lock, flags); } /* This is just hardware queues. */ diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h index eaf6d804af4c..0dfcd7646c34 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.h +++ b/sys/compat/linuxkpi/common/src/linux_80211.h @@ -59,6 +59,8 @@ #define D80211_IMPROVE_TXQ 0x00000004 #define D80211_TRACE 0x00000010 #define D80211_TRACEOK 0x00000020 +#define D80211_SCAN 0x00000040 +#define D80211_SCAN_BEACON 0x00000080 #define D80211_TRACE_TX 0x00000100 #define D80211_TRACE_TX_DUMP 0x00000200 #define D80211_TRACE_RX 0x00001000 @@ -75,6 +77,20 @@ #define D80211_TRACE_MODE_HE 0x04000000 #define D80211_TRACE_MODE_EHT 0x08000000 +#ifdef LINUXKPI_DEBUG_80211 +#define TRACE_SCAN(ic, fmt, ...) \ + if (linuxkpi_debug_80211 & D80211_SCAN) \ + printf("%s:%d: %s SCAN " fmt "\n", \ + __func__, __LINE__, ic->ic_name, ##__VA_ARGS__) +#define TRACE_SCAN_BEACON(ic, fmt, ...) \ + if (linuxkpi_debug_80211 & D80211_SCAN_BEACON) \ + printf("%s:%d: %s SCAN " fmt "\n", \ + __func__, __LINE__, ic->ic_name, ##__VA_ARGS__) +#else +#define TRACE_SCAN(...) do {} while (0) +#define TRACE_SCAN_BEACON(...) do {} while (0) +#endif + #define IMPROVE_TXQ(...) \ if (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ) \ printf("%s:%d: XXX LKPI80211 IMPROVE_TXQ\n", __func__, __LINE__) @@ -191,6 +207,7 @@ struct lkpi_vif { struct mbuf *, int, const struct ieee80211_rx_stats *, int, int); + struct task sw_scan_task; struct list_head lsta_list; @@ -236,6 +253,7 @@ struct lkpi_hw { /* name it mac80211_sc? */ struct mtx txq_mtx; uint32_t txq_generation[IEEE80211_NUM_ACS]; TAILQ_HEAD(, lkpi_txq) scheduled_txqs[IEEE80211_NUM_ACS]; + spinlock_t txq_lock; /* Deferred RX path. */ struct task rxq_task; @@ -298,6 +316,9 @@ struct lkpi_hw { /* name it mac80211_sc? */ #define LHW_TO_HW(_lhw) (&(_lhw)->hw) #define HW_TO_LHW(_hw) container_of(_hw, struct lkpi_hw, hw) +#define LKPI_LHW_SCAN_BITS \ + "\010\1RUNING\2HW" + struct lkpi_chanctx { struct list_head entry; diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index dcdec0dfcc78..458744a9fec6 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -2120,7 +2120,7 @@ add_timer_on(struct timer_list *timer, int cpu) } int -del_timer(struct timer_list *timer) +timer_delete(struct timer_list *timer) { if (callout_stop(&(timer)->callout) == -1) @@ -2129,7 +2129,7 @@ del_timer(struct timer_list *timer) } int -del_timer_sync(struct timer_list *timer) +timer_delete_sync(struct timer_list *timer) { if (callout_drain(&(timer)->callout) == -1) @@ -2138,13 +2138,6 @@ del_timer_sync(struct timer_list *timer) } int -timer_delete_sync(struct timer_list *timer) -{ - - return (del_timer_sync(timer)); -} - -int timer_shutdown_sync(struct timer_list *timer) { diff --git a/sys/compat/linuxkpi/common/src/linux_devres.c b/sys/compat/linuxkpi/common/src/linux_devres.c index 84f03ba0dd7d..23c91cb5ab2f 100644 --- a/sys/compat/linuxkpi/common/src/linux_devres.c +++ b/sys/compat/linuxkpi/common/src/linux_devres.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2020-2021 The FreeBSD Foundation + * Copyright (c) 2020-2025 The FreeBSD Foundation * * This software was developed by Bj\xc3\xb6rn Zeeb under sponsorship from * the FreeBSD Foundation. @@ -223,6 +223,30 @@ lkpi_devm_kmalloc_release(struct device *dev __unused, void *p __unused) /* Nothing to do. Freed with the devres. */ } +static int +lkpi_devm_kmalloc_match(struct device *dev __unused, void *p, void *mp) +{ + return (p == mp); +} + +void +lkpi_devm_kfree(struct device *dev, const void *p) +{ + void *mp; + int error; + + if (p == NULL) + return; + + /* I assume Linux simply casts the const away... */ + mp = __DECONST(void *, p); + error = lkpi_devres_destroy(dev, lkpi_devm_kmalloc_release, + lkpi_devm_kmalloc_match, mp); + if (error != 0) + dev_warn(dev, "%s: lkpi_devres_destroy failed with %d\n", + __func__, error); +} + struct devres_action { void *data; void (*action)(void *); diff --git a/sys/compat/linuxkpi/common/src/linux_pci.c b/sys/compat/linuxkpi/common/src/linux_pci.c index d5bbbea1eb2c..43fd6ad28ac4 100644 --- a/sys/compat/linuxkpi/common/src/linux_pci.c +++ b/sys/compat/linuxkpi/common/src/linux_pci.c @@ -111,6 +111,9 @@ static device_method_t pci_methods[] = { DEVMETHOD(pci_iov_uninit, linux_pci_iov_uninit), DEVMETHOD(pci_iov_add_vf, linux_pci_iov_add_vf), + /* Bus interface. */ + DEVMETHOD(bus_add_child, bus_generic_add_child), + /* backlight interface */ DEVMETHOD(backlight_update_status, linux_backlight_update_status), DEVMETHOD(backlight_get_status, linux_backlight_get_status), @@ -145,6 +148,23 @@ struct linux_dma_priv { #define DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock) #define DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock) +static void +lkpi_set_pcim_iomap_devres(struct pcim_iomap_devres *dr, int bar, + void *res) +{ + dr->mmio_table[bar] = (void *)rman_get_bushandle(res); + dr->res_table[bar] = res; +} + +static bool +lkpi_pci_bar_id_valid(int bar) +{ + if (bar < 0 || bar > PCIR_MAX_BAR_0) + return (false); + + return (true); +} + static int linux_pdev_dma_uninit(struct pci_dev *pdev) { @@ -289,12 +309,18 @@ lkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev) { struct pci_dev *pdev, *found; - KASSERT(odev == NULL, ("%s: odev argument not yet supported\n", __func__)); - found = NULL; spin_lock(&pci_lock); list_for_each_entry(pdev, &pci_devices, links) { - if (pdev->vendor == vendor && pdev->device == device) { + /* Walk until we find odev. */ + if (odev != NULL) { + if (pdev == odev) + odev = NULL; + continue; + } + + if ((pdev->vendor == vendor || vendor == PCI_ANY_ID) && + (pdev->device == device || device == PCI_ANY_ID)) { found = pdev; break; } @@ -757,6 +783,9 @@ _lkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen __unused) struct pci_mmio_region *mmio, *p; int type; + if (!lkpi_pci_bar_id_valid(bar)) + return (NULL); + type = pci_resource_type(pdev, bar); if (type < 0) { device_printf(pdev->dev.bsddev, "%s: bar %d type %d\n", @@ -797,6 +826,9 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar, { struct resource *res; + if (!lkpi_pci_bar_id_valid(bar)) + return (NULL); + res = _lkpi_pci_iomap(pdev, bar, maxlen); if (res == NULL) return (NULL); @@ -810,9 +842,41 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar, void * linuxkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) { + if (!lkpi_pci_bar_id_valid(bar)) + return (NULL); + return (linuxkpi_pci_iomap_range(pdev, bar, 0, maxlen)); } +void * +linuxkpi_pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) +{ + struct pcim_iomap_devres *dr; + void *res; + + if (!lkpi_pci_bar_id_valid(bar)) + return (NULL); + + dr = lkpi_pcim_iomap_devres_find(pdev); + if (dr == NULL) + return (NULL); + + if (dr->res_table[bar] != NULL) + return (dr->res_table[bar]); + + res = linuxkpi_pci_iomap(pdev, bar, maxlen); + if (res == NULL) { + /* + * Do not free the devres in case there were + * other valid mappings before already. + */ + return (NULL); + } + lkpi_set_pcim_iomap_devres(dr, bar, res); + + return (res); +} + void linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res) { @@ -864,8 +928,7 @@ linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask, const char *nam res = _lkpi_pci_iomap(pdev, bar, 0); if (res == NULL) goto err; - dr->mmio_table[bar] = (void *)rman_get_bushandle(res); - dr->res_table[bar] = res; + lkpi_set_pcim_iomap_devres(dr, bar, res); mappings |= (1 << bar); } @@ -1099,8 +1162,9 @@ pci_resource_len(struct pci_dev *pdev, int bar) return (rle->count); } -int -pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +static int +lkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name, + bool managed) { struct resource *res; struct pci_devres *dr; @@ -1108,9 +1172,20 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) int rid; int type; + if (!lkpi_pci_bar_id_valid(bar)) + return (-EINVAL); + + /* + * If the bar is not valid, return success without adding the BAR; + * otherwise linuxkpi_pcim_request_all_regions() will error. + */ + if (pci_resource_len(pdev, bar) == 0) + return (0); + /* Likewise if it is neither IO nor MEM, nothing to do for us. */ type = pci_resource_type(pdev, bar); if (type < 0) - return (-ENODEV); + return (0); + rid = PCIR_BAR(bar); res = bus_alloc_resource_any(pdev->dev.bsddev, type, &rid, RF_ACTIVE|RF_SHAREABLE); @@ -1123,11 +1198,16 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) /* * It seems there is an implicit devres tracking on these if the device - * is managed; otherwise the resources are not automatiaclly freed on - * FreeBSD/LinuxKPI tough they should be/are expected to be by Linux - * drivers. + * is managed (lkpi_pci_devres_find() case); otherwise the resources are + * not automatically freed on FreeBSD/LinuxKPI though they should be/are + * expected to be by Linux drivers. + * Otherwise if we are called from a pcim-function with the managed + * argument set, we need to track devres independent of pdev->managed. */ - dr = lkpi_pci_devres_find(pdev); + if (managed) + dr = lkpi_pci_devres_get_alloc(pdev); + else + dr = lkpi_pci_devres_find(pdev); if (dr != NULL) { dr->region_mask |= (1 << bar); dr->region_table[bar] = res; @@ -1144,6 +1224,12 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) } int +linuxkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +{ + return (lkpi_pci_request_region(pdev, bar, res_name, false)); +} + +int linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name) { int error; @@ -1159,6 +1245,24 @@ linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name) return (0); } +int +linuxkpi_pcim_request_all_regions(struct pci_dev *pdev, const char *res_name) +{ + int bar, error; + + for (bar = 0; bar <= PCIR_MAX_BAR_0; bar++) { + error = lkpi_pci_request_region(pdev, bar, res_name, true); + if (error != 0) { + device_printf(pdev->dev.bsddev, "%s: bar %d res_name '%s': " + "lkpi_pci_request_region returned %d\n", __func__, + bar, res_name, error); + pci_release_regions(pdev); + return (error); + } + } + return (0); +} + void linuxkpi_pci_release_region(struct pci_dev *pdev, int bar) { diff --git a/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test.h b/sys/compat/linuxkpi/dummy/include/kunit/test.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/sys/compat/linuxkpi/dummy/include/kunit/test.h diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 92e98aa57ebf..c7a8862fb906 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -888,13 +888,13 @@ options IEEE80211_DEBUG_REFCNT options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support options IEEE80211_SUPPORT_TDMA #enable TDMA support -# The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide -# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally -# used with 802.11 devices that depend on the `wlan' module. +# The `wlan_wep', `wlan_tkip', `wlan_ccmp', and `wlan_gcmp' devices provide +# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally used +# with 802.11 devices that depend on the `wlan' module. device wlan_wep +device wlan_tkip device wlan_ccmp device wlan_gcmp -device wlan_tkip # The `wlan_xauth' device provides support for external (i.e. user-mode) # authenticators for use with 802.11 drivers that use the `wlan' @@ -1249,7 +1249,7 @@ options MAC options MAC_BIBA options MAC_BSDEXTENDED options MAC_DDB -options MAC_DO +options MAC_DO options MAC_IFOFF options MAC_IPACL options MAC_LOMAC diff --git a/sys/conf/files b/sys/conf/files index d89813c70355..9661bafea8f9 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard +kern/kern_jaildesc.c standard kern/kern_jailmeta.c standard kern/kern_kcov.c optional kcov \ compile-with "${NOSAN_C} ${MSAN_CFLAGS}" diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 1fcfd6467e7f..0251486247da 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -8,7 +8,11 @@ # the rest of /usr/src, but they still always process SRCCONF even though # the normal mechanisms to prevent that (compiling out of tree) won't # work. To ensure they do work, we have to duplicate thee few lines here. +.if exists(${SRCTOP}/src.conf) +SRCCONF?= ${SRCTOP}/src.conf +.else SRCCONF?= /etc/src.conf +.endif .if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_) .include "${SRCCONF}" _srcconf_included_: diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index 8b60da95741e..145377c1e75e 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -50,8 +50,8 @@ # TYPE="FreeBSD" -REVISION="15.0" -BRANCH="PRERELEASE" +REVISION="16.0" +BRANCH="CURRENT" if [ -n "${BRANCH_OVERRIDE}" ]; then BRANCH=${BRANCH_OVERRIDE} fi diff --git a/sys/conf/options b/sys/conf/options index 4009ba2b4843..66f7f2ee2d7e 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -53,7 +53,7 @@ DDB_CAPTURE_MAXBUFSIZE opt_ddb.h DDB_CTF opt_ddb.h DDB_NUMSYM opt_ddb.h EARLY_PRINTF opt_global.h -BLOAT_KERNEL_WITH_EXTERR opt_global.h +EXTERR_STRINGS opt_global.h FULL_BUF_TRACKING opt_global.h GDB KDB opt_global.h diff --git a/sys/contrib/dev/acpica/components/executer/extrace.c b/sys/contrib/dev/acpica/components/executer/extrace.c index d54d4908ca65..b48a5fcb289b 100644 --- a/sys/contrib/dev/acpica/components/executer/extrace.c +++ b/sys/contrib/dev/acpica/components/executer/extrace.c @@ -301,7 +301,7 @@ AcpiExTraceArgs(ACPI_OPERAND_OBJECT **Params, UINT32 Count) switch (obj_desc->Common.Type) { case ACPI_TYPE_INTEGER: - ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%lx", obj_desc->Integer.Value)); + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%jx", (uintmax_t)obj_desc->Integer.Value)); break; case ACPI_TYPE_STRING: diff --git a/sys/contrib/dev/rtw88/main.c b/sys/contrib/dev/rtw88/main.c index 021d076808e0..963b73f35350 100644 --- a/sys/contrib/dev/rtw88/main.c +++ b/sys/contrib/dev/rtw88/main.c @@ -57,6 +57,62 @@ module_param_named(support_vht, rtw_vht_support, bool, 0644); MODULE_PARM_DESC(support_vht, "Set to Y to enable VHT support"); #endif +#if defined(__FreeBSD__) +/* Macros based on rtw89::core.c. */ +#define RTW88_DEF_CHAN(_freq, _hw_val, _flags, _band) \ + { .center_freq = _freq, .hw_value = _hw_val, .flags = _flags, .band = _band, } +#define RTW88_DEF_CHAN_2G(_freq, _hw_val) \ + RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_2GHZ) +#define RTW88_DEF_CHAN_5G(_freq, _hw_val) \ + RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_5GHZ) +#define RTW88_DEF_CHAN_5G_NO_HT40MINUS(_freq, _hw_val) \ + RTW88_DEF_CHAN(_freq, _hw_val, IEEE80211_CHAN_NO_HT40MINUS, NL80211_BAND_5GHZ) + +static struct ieee80211_channel rtw_channeltable_2g[] = { + RTW88_DEF_CHAN_2G(2412, 1), + RTW88_DEF_CHAN_2G(2417, 2), + RTW88_DEF_CHAN_2G(2422, 3), + RTW88_DEF_CHAN_2G(2427, 4), + RTW88_DEF_CHAN_2G(2432, 5), + RTW88_DEF_CHAN_2G(2437, 6), + RTW88_DEF_CHAN_2G(2442, 7), + RTW88_DEF_CHAN_2G(2447, 8), + RTW88_DEF_CHAN_2G(2452, 9), + RTW88_DEF_CHAN_2G(2457, 10), + RTW88_DEF_CHAN_2G(2462, 11), + RTW88_DEF_CHAN_2G(2467, 12), + RTW88_DEF_CHAN_2G(2472, 13), + RTW88_DEF_CHAN_2G(2484, 14), +}; + +static struct ieee80211_channel rtw_channeltable_5g[] = { + RTW88_DEF_CHAN_5G(5180, 36), + RTW88_DEF_CHAN_5G(5200, 40), + RTW88_DEF_CHAN_5G(5220, 44), + RTW88_DEF_CHAN_5G(5240, 48), + RTW88_DEF_CHAN_5G(5260, 52), + RTW88_DEF_CHAN_5G(5280, 56), + RTW88_DEF_CHAN_5G(5300, 60), + RTW88_DEF_CHAN_5G(5320, 64), + RTW88_DEF_CHAN_5G(5500, 100), + RTW88_DEF_CHAN_5G(5520, 104), + RTW88_DEF_CHAN_5G(5540, 108), + RTW88_DEF_CHAN_5G(5560, 112), + RTW88_DEF_CHAN_5G(5580, 116), + RTW88_DEF_CHAN_5G(5600, 120), + RTW88_DEF_CHAN_5G(5620, 124), + RTW88_DEF_CHAN_5G(5640, 128), + RTW88_DEF_CHAN_5G(5660, 132), + RTW88_DEF_CHAN_5G(5680, 136), + RTW88_DEF_CHAN_5G(5700, 140), + RTW88_DEF_CHAN_5G(5720, 144), + RTW88_DEF_CHAN_5G(5745, 149), + RTW88_DEF_CHAN_5G(5765, 153), + RTW88_DEF_CHAN_5G(5785, 157), + RTW88_DEF_CHAN_5G(5805, 161), + RTW88_DEF_CHAN_5G_NO_HT40MINUS(5825, 165), +}; +#elif deifned(__linux__) static struct ieee80211_channel rtw_channeltable_2g[] = { {.center_freq = 2412, .hw_value = 1,}, {.center_freq = 2417, .hw_value = 2,}, @@ -102,6 +158,7 @@ static struct ieee80211_channel rtw_channeltable_5g[] = { {.center_freq = 5825, .hw_value = 165, .flags = IEEE80211_CHAN_NO_HT40MINUS}, }; +#endif static struct ieee80211_rate rtw_ratetable[] = { {.bitrate = 10, .hw_value = 0x00,}, diff --git a/sys/contrib/dev/rtw89/fw.c b/sys/contrib/dev/rtw89/fw.c index e360f27c2ade..b4c0f864bc75 100644 --- a/sys/contrib/dev/rtw89/fw.c +++ b/sys/contrib/dev/rtw89/fw.c @@ -908,11 +908,7 @@ int rtw89_build_phy_tbl_from_elm(struct rtw89_dev *rtwdev, case RTW89_FW_ELEMENT_ID_RADIO_B: case RTW89_FW_ELEMENT_ID_RADIO_C: case RTW89_FW_ELEMENT_ID_RADIO_D: -#if defined(__linux__) rf_path = arg.rf_path; -#elif defined(__FreeBSD__) - rf_path = __DECONST(enum rtw89_rf_path, arg.rf_path); -#endif idx = elm->u.reg2.idx; elm_info->rf_radio[idx] = tbl; diff --git a/sys/contrib/libnv/nvlist.c b/sys/contrib/libnv/nvlist.c index 41edc72322c3..73226ee51a78 100644 --- a/sys/contrib/libnv/nvlist.c +++ b/sys/contrib/libnv/nvlist.c @@ -478,7 +478,7 @@ nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level) void nvlist_dump(const nvlist_t *nvl, int fd) { - const nvlist_t *tmpnvl; + const nvlist_t *tmpnvl, *top; nvpair_t *nvp, *tmpnvp; void *cookie; int level; @@ -487,6 +487,7 @@ nvlist_dump(const nvlist_t *nvl, int fd) if (nvlist_dump_error_check(nvl, fd, level)) return; + top = nvl; nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp), @@ -645,6 +646,8 @@ nvlist_dump(const nvlist_t *nvl, int fd) while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { + if (nvl == top) + return; cookie = NULL; if (nvlist_in_array(nvl)) dprintf(fd, "%*s,\n", level * 4, ""); @@ -847,7 +850,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) { unsigned char *buf, *ptr; size_t left, size; - const nvlist_t *tmpnvl; + const nvlist_t *tmpnvl, *top; nvpair_t *nvp, *tmpnvp; void *cookie; @@ -868,6 +871,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) ptr = nvlist_pack_header(nvl, ptr, &left); + top = nvl; nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { NVPAIR_ASSERT(nvp); @@ -958,6 +962,8 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) goto fail; while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { + if (nvl == top) + goto out; cookie = NULL; if (nvlist_in_array(nvl)) { ptr = nvpair_pack_nvlist_array_next(ptr, diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py index b49255e8381d..08021aabcb61 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py +++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py @@ -65,7 +65,7 @@ if __name__ == '__main__': # check last (HEAD) commit message last_commit_message_raw = subprocess.run([ - 'git', 'show', '-s', '--format=%B', 'HEAD' + 'git', 'show', '-s', '--format=%B', head ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in last_commit_message_raw.stdout.decode().splitlines(): diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index 4ebb80af1f03..a5dbfc099c90 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -44,7 +44,7 @@ jobs: os_selection="$FULL_OS" fi - if [ ${{ github.event.inputs.fedora_kernel_ver }} != "" ] ; then + if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then # They specified a custom kernel version for Fedora. Use only # Fedora runners. os_json=$(echo ${os_selection} | jq -c '[.[] | select(startswith("fedora"))]') @@ -53,9 +53,8 @@ jobs: os_json=$(echo ${os_selection} | jq -c) fi - echo $os_json - echo "os=$os_json" >> $GITHUB_OUTPUT - echo "ci_type=$ci_type" >> $GITHUB_OUTPUT + echo "os=$os_json" | tee -a $GITHUB_OUTPUT + echo "ci_type=$ci_type" | tee -a $GITHUB_OUTPUT qemu-vm: name: qemu-x86 @@ -78,7 +77,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Setup QEMU - timeout-minutes: 10 + timeout-minutes: 15 run: .github/workflows/scripts/qemu-1-setup.sh - name: Start build machine diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 42f65290e4e3..5704b5c6de8a 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,8 +1,8 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.4.0 -Release: rc1 +Version: 2.4.99 +Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am index 5f09d170e730..30f78e490b78 100644 --- a/sys/contrib/openzfs/Makefile.am +++ b/sys/contrib/openzfs/Makefile.am @@ -1,6 +1,7 @@ CLEANFILES = dist_noinst_DATA = INSTALL_DATA_HOOKS = +INSTALL_EXEC_HOOKS = ALL_LOCAL = CLEAN_LOCAL = CHECKS = shellcheck checkbashisms @@ -71,6 +72,9 @@ all: gitrev PHONY += install-data-hook $(INSTALL_DATA_HOOKS) install-data-hook: $(INSTALL_DATA_HOOKS) +PHONY += install-exec-hook $(INSTALL_EXEC_HOOKS) +install-exec-hook: $(INSTALL_EXEC_HOOKS) + PHONY += maintainer-clean-local maintainer-clean-local: -$(RM) $(GITREV) diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am index 96040976e53e..e79bfae2b10f 100644 --- a/sys/contrib/openzfs/cmd/Makefile.am +++ b/sys/contrib/openzfs/cmd/Makefile.am @@ -107,8 +107,12 @@ $(call SUBST,dbufstat,%D%/) $(call SUBST,zilstat,%D%/) arc_summary: %D%/arc_summary $(AM_V_at)cp $< $@ -endif +cmd-rename-install-exec-hook: + $(LN_S) -f arcstat $(DESTDIR)$(bindir)/zarcstat + $(LN_S) -f arc_summary $(DESTDIR)$(bindir)/zarcsummary +INSTALL_EXEC_HOOKS += cmd-rename-install-exec-hook +endif PHONY += cmd cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS) diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary index e60c6b64e8a1..9538dd599cb7 100755 --- a/sys/contrib/openzfs/cmd/arc_summary +++ b/sys/contrib/openzfs/cmd/arc_summary @@ -1021,6 +1021,13 @@ def main(): treated separately because they come with their own call. """ + # notify user for upcoming renaming in 2.4.0 + abs_path = os.path.abspath(sys.argv[0].strip()) + script_name = os.path.basename(abs_path) + if script_name != "zarcsummary": + sys.stderr.write("Note: this script will be renamed to zarcsummary in ") + sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n") + kstats = get_kstats() if ARGS.graph: diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/arcstat.in index 6f9abb39c3fb..e153eddb36cf 100755 --- a/sys/contrib/openzfs/cmd/arcstat.in +++ b/sys/contrib/openzfs/cmd/arcstat.in @@ -56,6 +56,7 @@ import time import getopt import re import copy +import os from signal import signal, SIGINT, SIGWINCH, SIG_DFL @@ -766,6 +767,14 @@ def calculate(): def main(): + + # notify user for upcoming renaming in 2.4.0 + abs_path = os.path.abspath(sys.argv[0].strip()) + script_name = os.path.basename(abs_path) + if script_name != "zarcstat": + sys.stderr.write("Note: this script will be renamed to zarcstat in ") + sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n") + global sint global count global hdr_intr diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index adaa5cd10961..134c258a1e32 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -2635,7 +2635,7 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, if (BP_GET_LEVEL(bp) != zb->zb_level) { (void) printf(" (ERROR: Block pointer level " "(%llu) does not match bookmark level (%lld))", - BP_GET_LEVEL(bp), (u_longlong_t)zb->zb_level); + BP_GET_LEVEL(bp), (longlong_t)zb->zb_level); corruption_found = B_TRUE; } } diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c index 2bd3051dce7b..536532a6762d 100644 --- a/sys/contrib/openzfs/cmd/zhack.c +++ b/sys/contrib/openzfs/cmd/zhack.c @@ -363,10 +363,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount + 1, tx); spa_history_log_internal(spa, "zhack feature incr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void @@ -376,10 +378,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount - 1, tx); spa_history_log_internal(spa, "zhack feature decr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am index 2f962408e5a3..5bb6d8160b18 100644 --- a/sys/contrib/openzfs/cmd/zpool/Makefile.am +++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am @@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \ %D%/compatibility.d/openzfs-2.1-linux \ %D%/compatibility.d/openzfs-2.2 \ %D%/compatibility.d/openzfs-2.3 \ + %D%/compatibility.d/openzfs-2.4 \ %D%/compatibility.d/openzfsonosx-1.7.0 \ %D%/compatibility.d/openzfsonosx-1.8.1 \ %D%/compatibility.d/openzfsonosx-1.9.3 \ @@ -187,7 +188,9 @@ zpoolcompatlinks = \ "openzfs-2.2 openzfs-2.2-linux" \ "openzfs-2.2 openzfs-2.2-freebsd" \ "openzfs-2.3 openzfs-2.3-linux" \ - "openzfs-2.3 openzfs-2.3-freebsd" + "openzfs-2.3 openzfs-2.3-freebsd" \ + "openzfs-2.4 openzfs-2.4-linux" \ + "openzfs-2.4 openzfs-2.4-freebsd" zpoolconfdir = $(sysconfdir)/zfs/zpool.d INSTALL_DATA_HOOKS += zpool-install-data-hook diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 new file mode 100644 index 000000000000..3fbd91014c95 --- /dev/null +++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 @@ -0,0 +1,48 @@ +# Features supported by OpenZFS 2.4 on Linux and FreeBSD +allocation_classes +async_destroy +blake3 +block_cloning +block_cloning_endian +bookmark_v2 +bookmark_written +bookmarks +device_rebuild +device_removal +draid +dynamic_gang_header +edonr +embedded_data +empty_bpobj +enabled_txg +encryption +extensible_dataset +fast_dedup +filesystem_limits +head_errlog +hole_birth +large_blocks +large_dnode +large_microzap +livelist +log_spacemap +longname +lz4_compress +multi_vdev_crash_dump +obsolete_counts +physical_rewrite +project_quota +raidz_expansion +redacted_datasets +redaction_bookmarks +redaction_list_spill +resilver_defer +sha512 +skein +spacemap_histogram +spacemap_v2 +userobj_accounting +vdev_zaps_v2 +zilsaxattr +zpool_checkpoint +zstd_compress diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am index be3539fe905d..80ef1ea7ca11 100644 --- a/sys/contrib/openzfs/cmd/zstream/Makefile.am +++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am @@ -18,6 +18,7 @@ zstream_LDADD = \ libzpool.la \ libnvpair.la -PHONY += install-exec-hook -install-exec-hook: +cmd-zstream-install-exec-hook: cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump + +INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook diff --git a/sys/contrib/openzfs/config/always-arch.m4 b/sys/contrib/openzfs/config/always-arch.m4 index 9f413eeddf95..1ee6099ca8b2 100644 --- a/sys/contrib/openzfs/config/always-arch.m4 +++ b/sys/contrib/openzfs/config/always-arch.m4 @@ -39,3 +39,20 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [ AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64) AM_CONDITIONAL([TARGET_CPU_ARM], test $TARGET_CPU = arm) ]) +dnl # +dnl # Check for conflicting environment variables +dnl # +dnl # If ARCH env variable is set up, then kernel Makefile in the /usr/src/kernel +dnl # can misbehave during the zfs ./configure test of the module compilation. +AC_DEFUN([ZFS_AC_CONFIG_CHECK_ARCH_VAR], [ + AC_MSG_CHECKING([for conflicting environment variables]) + if test -n "$ARCH"; then + AC_MSG_RESULT([warning]) + AC_MSG_WARN(m4_normalize([ARCH environment variable is set to "$ARCH". + This can cause build kernel modules support check failure. + Please unset it.])) + else + AC_MSG_RESULT([done]) + fi +]) + diff --git a/sys/contrib/openzfs/config/always-compiler-options.m4 b/sys/contrib/openzfs/config/always-compiler-options.m4 index 6383b12506ee..37fa079e0f4c 100644 --- a/sys/contrib/openzfs/config/always-compiler-options.m4 +++ b/sys/contrib/openzfs/config/always-compiler-options.m4 @@ -156,6 +156,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [ ]) dnl # +dnl # Check if kernel cc supports -Wno-format-zero-length option. +dnl # +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH], [ + saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length]) + + saved_flags="$CFLAGS" + CFLAGS="$CFLAGS -Werror -Wno-format-zero-length" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ + KERNEL_NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length + AC_MSG_RESULT([yes]) + ], [ + KERNEL_NO_FORMAT_ZERO_LENGTH= + AC_MSG_RESULT([no]) + ]) + + CC="$saved_cc" + CFLAGS="$saved_flags" + AC_SUBST([KERNEL_NO_FORMAT_ZERO_LENGTH]) +]) + +dnl # dnl # Check if cc supports -Wno-clobbered option. dnl # dnl # We actually invoke it with the -Wclobbered option @@ -231,20 +259,17 @@ dnl # dnl # Check if kernel cc supports -Winfinite-recursion option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [ - AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion]) - saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion]) + saved_flags="$CFLAGS" - CC="gcc" CFLAGS="$CFLAGS -Werror -Winfinite-recursion" - AS_IF([ test -n "$KERNEL_CC" ], [ - CC="$KERNEL_CC" - ]) - AS_IF([ test -n "$KERNEL_LLVM" ], [ - CC="clang" - ]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ KERNEL_INFINITE_RECURSION=-Winfinite-recursion AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1, @@ -329,20 +354,17 @@ dnl # dnl # Check if kernel cc supports -fno-ipa-sra option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [ - AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra]) - saved_cc="$CC" + AS_IF( + [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ], + [ test -n "$KERNEL_LLVM" ], [ CC="clang" ], + [ CC="gcc" ] + ) + AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra]) + saved_flags="$CFLAGS" - CC="gcc" CFLAGS="$CFLAGS -Werror -fno-ipa-sra" - AS_IF([ test -n "$KERNEL_CC" ], [ - CC="$KERNEL_CC" - ]) - AS_IF([ test -n "$KERNEL_LLVM" ], [ - CC="clang" - ]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ KERNEL_NO_IPA_SRA=-fno-ipa-sra AC_MSG_RESULT([yes]) diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4 index 83190c6fbe3f..02011bf39fb2 100644 --- a/sys/contrib/openzfs/config/kernel-blkdev.m4 +++ b/sys/contrib/openzfs/config/kernel-blkdev.m4 @@ -29,9 +29,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - bdev = blkdev_get_by_path(path, mode, holder, &h); + bdev = blkdev_get_by_path(path, mode, holder, NULL); ]) ]) @@ -48,9 +47,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - bdh = bdev_open_by_path(path, mode, holder, &h); + bdh = bdev_open_by_path(path, mode, holder, NULL); ]) ]) @@ -68,9 +66,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [ const char *path = "path"; fmode_t mode = 0; void *holder = NULL; - struct blk_holder_ops h; - file = bdev_file_open_by_path(path, mode, holder, &h); + file = bdev_file_open_by_path(path, mode, holder, NULL); ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 index aa5a9f2aff39..6d87ad0e0710 100644 --- a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 @@ -24,6 +24,9 @@ dnl # dnl # 2.6.38 API change dnl # Added d_set_d_op() helper function. dnl # +dnl # 6.17 API change +dnl # d_set_d_op() removed. No direct replacement. +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ ZFS_LINUX_TEST_SRC([d_set_d_op], [ #include <linux/dcache.h> @@ -34,22 +37,21 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ AC_MSG_CHECKING([whether d_set_d_op() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op], - [d_set_d_op], [fs/dcache.c], [ + ZFS_LINUX_TEST_RESULT([d_set_d_op], [ AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_SET_D_OP, 1, + [Define if d_set_d_op() is available]) ], [ - ZFS_LINUX_TEST_ERROR([d_set_d_op]) + AC_MSG_RESULT(no) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS ZFS_AC_KERNEL_SRC_D_SET_D_OP - ZFS_AC_KERNEL_SRC_S_D_OP ]) AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ ZFS_AC_KERNEL_D_OBTAIN_ALIAS ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_S_D_OP ]) diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index e3e7625db7d8..35819e4d68c5 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -70,6 +70,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_COMMIT_METADATA ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_SRC_DENTRY ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT @@ -188,6 +189,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_DENTRY ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT diff --git a/sys/contrib/openzfs/config/user-statx.m4 b/sys/contrib/openzfs/config/user-statx.m4 index 0315f93e0c20..1ba74a40e9b8 100644 --- a/sys/contrib/openzfs/config/user-statx.m4 +++ b/sys/contrib/openzfs/config/user-statx.m4 @@ -2,7 +2,7 @@ dnl # dnl # Check for statx() function and STATX_MNT_ID availability dnl # AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ - AC_CHECK_HEADERS([linux/stat.h], + AC_CHECK_HEADERS([sys/stat.h], [have_stat_headers=yes], [have_stat_headers=no]) @@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ AC_MSG_CHECKING([for STATX_MNT_ID]) AC_COMPILE_IFELSE([ AC_LANG_PROGRAM([[ - #include <linux/stat.h> + #include <sys/stat.h> ]], [[ struct statx stx; int mask = STATX_MNT_ID; @@ -29,6 +29,6 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [ ]) ]) ], [ - AC_MSG_WARN([linux/stat.h not found; skipping statx support]) + AC_MSG_WARN([sys/stat.h not found; skipping statx support]) ]) ]) dnl end AC_DEFUN diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index 7cf1b02d8757..adf6576f3193 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -256,6 +256,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH + ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA @@ -265,6 +266,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD ZFS_AC_CONFIG_ALWAYS_SYSTEM ZFS_AC_CONFIG_ALWAYS_ARCH + ZFS_AC_CONFIG_CHECK_ARCH_VAR ZFS_AC_CONFIG_ALWAYS_PYTHON ZFS_AC_CONFIG_ALWAYS_PYZFS ZFS_AC_CONFIG_ALWAYS_SED diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install index 37284a78ad18..2362c83dfa3f 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install @@ -37,7 +37,9 @@ usr/lib/zfs-linux/zpool.d/ usr/lib/zfs-linux/zpool_influxdb usr/lib/zfs-linux/zfs_prepare_disk usr/sbin/arc_summary +usr/sbin/zarcsummary usr/sbin/arcstat +usr/sbin/zarcstat usr/sbin/dbufstat usr/sbin/zilstat usr/share/zfs/compatibility.d/ diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in index 2b0568938b25..966e34bf9dc6 100755 --- a/sys/contrib/openzfs/contrib/debian/rules.in +++ b/sys/contrib/openzfs/contrib/debian/rules.in @@ -82,7 +82,9 @@ override_dh_auto_install: # https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts mkdir -p '$(CURDIR)/debian/tmp/usr/sbin/' mv '$(CURDIR)/debian/tmp/usr/bin/arc_summary' '$(CURDIR)/debian/tmp/usr/sbin/arc_summary' + mv '$(CURDIR)/debian/tmp/usr/bin/zarcsummary' '$(CURDIR)/debian/tmp/usr/sbin/zarcsummary' mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat' + mv '$(CURDIR)/debian/tmp/usr/bin/zarcstat' '$(CURDIR)/debian/tmp/usr/sbin/zarcstat' mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat' mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat' diff --git a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in index 4776087d9a76..db9bf0e20274 100644 --- a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in +++ b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in @@ -8,3 +8,12 @@ fi . /usr/share/initramfs-tools/hook-functions copy_exec /usr/share/initramfs-tools/zfsunlock /usr/bin/zfsunlock + +if [ -f /etc/initramfs-tools/etc/motd ]; then + copy_file text /etc/initramfs-tools/etc/motd /etc/motd +else + tmpf=$(mktemp) + echo "If you use zfs encrypted root filesystems, you can use \`zfsunlock\` to manually unlock it" > "$tmpf" + copy_file text "$tmpf" /etc/motd + rm -f "$tmpf" +fi diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c index a0bc172c6f44..88698dedabbc 100644 --- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c +++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c @@ -391,7 +391,11 @@ static int zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config, int argc, const char **argv) { +#if defined(__FreeBSD__) + config->homes_prefix = strdup("zroot/home"); +#else config->homes_prefix = strdup("rpool/home"); +#endif if (config->homes_prefix == NULL) { pam_syslog(pamh, LOG_ERR, "strdup failure"); return (PAM_SERVICE_ERR); diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h index 16e8a319a5f8..152e5a606f0e 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h @@ -61,32 +61,6 @@ #endif /* - * 2.6.30 API change, - * The const keyword was added to the 'struct dentry_operations' in - * the dentry structure. To handle this we define an appropriate - * dentry_operations_t typedef which can be used. - */ -typedef const struct dentry_operations dentry_operations_t; - -/* - * 2.6.38 API addition, - * Added d_clear_d_op() helper function which clears some flags and the - * registered dentry->d_op table. This is required because d_set_d_op() - * issues a warning when the dentry operations table is already set. - * For the .zfs control directory to work properly we must be able to - * override the default operations table and register custom .d_automount - * and .d_revalidate callbacks. - */ -static inline void -d_clear_d_op(struct dentry *dentry) -{ - dentry->d_op = NULL; - dentry->d_flags &= ~( - DCACHE_OP_HASH | DCACHE_OP_COMPARE | - DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE); -} - -/* * Walk and invalidate all dentry aliases of an inode * unless it's a mountpoint */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h index 087389b57b34..ad2815e46394 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h @@ -25,6 +25,6 @@ #ifndef _SPL_STAT_H #define _SPL_STAT_H -#include <linux/stat.h> +#include <sys/stat.h> #endif /* SPL_STAT_H */ diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 353805fcb969..a8acb83b4c2f 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -82,7 +82,8 @@ gbh_nblkptrs(uint64_t size) { static inline zio_eck_t * gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) { ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t))); - return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - sizeof (zio_eck_t))); + return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - + sizeof (zio_eck_t))); } static inline blkptr_t * diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h index cdc9dba2a28d..5791246e99e4 100644 --- a/sys/contrib/openzfs/include/sys/zvol.h +++ b/sys/contrib/openzfs/include/sys/zvol.h @@ -53,7 +53,7 @@ extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern int zvol_set_ro(const char *, boolean_t); -extern zvol_state_handle_t *zvol_suspend(const char *); +extern int zvol_suspend(const char *, zvol_state_handle_t **); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h index a605af962a6d..13cc0b46ac93 100644 --- a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h @@ -33,7 +33,7 @@ #ifdef HAVE_STATX #include <fcntl.h> -#include <linux/stat.h> +#include <sys/stat.h> #endif /* diff --git a/sys/contrib/openzfs/man/man1/arcstat.1 b/sys/contrib/openzfs/man/man1/arcstat.1 index f2474fbb701f..288b98d57a11 100644 --- a/sys/contrib/openzfs/man/man1/arcstat.1 +++ b/sys/contrib/openzfs/man/man1/arcstat.1 @@ -13,13 +13,15 @@ .\" Copyright (c) 2015 by Delphix. All rights reserved. .\" Copyright (c) 2020 by AJ Jordan. All rights reserved. .\" -.Dd December 23, 2022 +.Dd September 19, 2024 .Dt ARCSTAT 1 .Os . .Sh NAME .Nm arcstat .Nd report ZFS ARC and L2ARC statistics +.Sh NOTICE +It will be renamed to zarcstat in zfs 2.4.0. Please migrate ASAP. .Sh SYNOPSIS .Nm .Op Fl havxp diff --git a/sys/contrib/openzfs/man/man1/cstyle.1 b/sys/contrib/openzfs/man/man1/cstyle.1 index 241c82edd5a8..8f29129ce175 100644 --- a/sys/contrib/openzfs/man/man1/cstyle.1 +++ b/sys/contrib/openzfs/man/man1/cstyle.1 @@ -21,7 +21,7 @@ .\" .\" CDDL HEADER END .\" -.Dd May 26, 2021 +.Dd April 4, 2022 .Dt CSTYLE 1 .Os . diff --git a/sys/contrib/openzfs/man/man1/zhack.1 b/sys/contrib/openzfs/man/man1/zhack.1 index f58c0527649b..743bd53b731c 100644 --- a/sys/contrib/openzfs/man/man1/zhack.1 +++ b/sys/contrib/openzfs/man/man1/zhack.1 @@ -23,7 +23,7 @@ .\" .\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS .\" -.Dd May 26, 2021 +.Dd May 3, 2023 .Dt ZHACK 1 .Os . diff --git a/sys/contrib/openzfs/man/man1/ztest.1 b/sys/contrib/openzfs/man/man1/ztest.1 index febbb62b1664..ae857bfea29c 100644 --- a/sys/contrib/openzfs/man/man1/ztest.1 +++ b/sys/contrib/openzfs/man/man1/ztest.1 @@ -24,7 +24,7 @@ .\" reserved. .\" Copyright (c) 2017, Intel Corporation. .\" -.Dd May 26, 2021 +.Dd July 12, 2025 .Dt ZTEST 1 .Os . diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4 index 683f8e2b631f..61dfe42e463d 100644 --- a/sys/contrib/openzfs/man/man4/spl.4 +++ b/sys/contrib/openzfs/man/man4/spl.4 @@ -15,7 +15,7 @@ .\" .\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved. .\" -.Dd August 24, 2020 +.Dd May 7, 2025 .Dt SPL 4 .Os . diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 5c7958667f92..e865d6a79c5a 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -17,7 +17,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd May 29, 2025 +.Dd August 14, 2025 .Dt ZFS 4 .Os . diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 index d2f817631c15..299a23720201 100644 --- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 +++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5 @@ -9,7 +9,7 @@ .\" source. A copy of the CDDL is also available via the Internet at .\" http://www.illumos.org/license/CDDL. .\" -.Dd May 26, 2021 +.Dd October 8, 2024 .Dt VDEV_ID.CONF 5 .Os . diff --git a/sys/contrib/openzfs/man/man7/dracut.zfs.7 b/sys/contrib/openzfs/man/man7/dracut.zfs.7 index fb5da553af6e..3d051d4d3343 100644 --- a/sys/contrib/openzfs/man/man7/dracut.zfs.7 +++ b/sys/contrib/openzfs/man/man7/dracut.zfs.7 @@ -1,7 +1,7 @@ .\" SPDX-License-Identifier: CDDL-1.0 .\" SPDX-License-Identifier: 0BSD .\" -.Dd March 28, 2023 +.Dd July 13, 2024 .Dt DRACUT.ZFS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7 index acabe6b6613a..61e60d950416 100644 --- a/sys/contrib/openzfs/man/man7/vdevprops.7 +++ b/sys/contrib/openzfs/man/man7/vdevprops.7 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2021 Klara, Inc. .\" -.Dd October 30, 2022 +.Dd July 23, 2024 .Dt VDEVPROPS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7 index 5c736e53670d..bb2178d85bcd 100644 --- a/sys/contrib/openzfs/man/man7/zfsconcepts.7 +++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright 2023 Klara, Inc. .\" -.Dd October 6, 2023 +.Dd October 2, 2024 .Dt ZFSCONCEPTS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7 index ac3152cb5d51..0930771c9fce 100644 --- a/sys/contrib/openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/openzfs/man/man7/zfsprops.7 @@ -39,7 +39,7 @@ .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP. .\" -.Dd June 29, 2024 +.Dd August 6, 2025 .Dt ZFSPROPS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7 index 10dfd1f92936..b4404a6eb58d 100644 --- a/sys/contrib/openzfs/man/man7/zpool-features.7 +++ b/sys/contrib/openzfs/man/man7/zpool-features.7 @@ -19,7 +19,7 @@ .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd October 2, 2024 +.Dd July 23, 2025 .Dt ZPOOL-FEATURES 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7 index dafe3bffc453..b9c8926d835d 100644 --- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 +++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd April 7, 2023 +.Dd August 6, 2025 .Dt ZPOOLCONCEPTS 7 .Os . diff --git a/sys/contrib/openzfs/man/man7/zpoolprops.7 b/sys/contrib/openzfs/man/man7/zpoolprops.7 index 5d84753193ee..d3b4c2376943 100644 --- a/sys/contrib/openzfs/man/man7/zpoolprops.7 +++ b/sys/contrib/openzfs/man/man7/zpoolprops.7 @@ -29,7 +29,7 @@ .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" Copyright (c) 2023, Klara Inc. .\" -.Dd November 18, 2024 +.Dd December 4, 2024 .Dt ZPOOLPROPS 7 .Os . diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index 0a5b6af73fdb..e00544e4a5a4 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -15,7 +15,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd April 23, 2025 +.Dd August 12, 2025 .Dt ZDB 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zed.8.in b/sys/contrib/openzfs/man/man8/zed.8.in index c90a1834403b..eda377aafc1e 100644 --- a/sys/contrib/openzfs/man/man8/zed.8.in +++ b/sys/contrib/openzfs/man/man8/zed.8.in @@ -13,7 +13,7 @@ .\" .\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) .\" -.Dd May 26, 2021 +.Dd August 22, 2022 .Dt ZED 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-allow.8 b/sys/contrib/openzfs/man/man8/zfs-allow.8 index 5a8e80bf6a43..b154aebd92aa 100644 --- a/sys/contrib/openzfs/man/man8/zfs-allow.8 +++ b/sys/contrib/openzfs/man/man8/zfs-allow.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd March 13, 2025 .Dt ZFS-ALLOW 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 b/sys/contrib/openzfs/man/man8/zfs-bookmark.8 index 083ff46d241b..5a0933820020 100644 --- a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 +++ b/sys/contrib/openzfs/man/man8/zfs-bookmark.8 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved. .\" -.Dd May 12, 2022 +.Dd July 11, 2022 .Dt ZFS-BOOKMARK 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-clone.8 b/sys/contrib/openzfs/man/man8/zfs-clone.8 index cd412815f5fe..9609cf2ce36a 100644 --- a/sys/contrib/openzfs/man/man8/zfs-clone.8 +++ b/sys/contrib/openzfs/man/man8/zfs-clone.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-CLONE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8 index 91878056cc7d..58bde5799240 100644 --- a/sys/contrib/openzfs/man/man8/zfs-create.8 +++ b/sys/contrib/openzfs/man/man8/zfs-create.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd June 2, 2023 .Dt ZFS-CREATE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-destroy.8 b/sys/contrib/openzfs/man/man8/zfs-destroy.8 index 38359be02430..6a6791f7a44e 100644 --- a/sys/contrib/openzfs/man/man8/zfs-destroy.8 +++ b/sys/contrib/openzfs/man/man8/zfs-destroy.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd February 5, 2025 .Dt ZFS-DESTROY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-diff.8 b/sys/contrib/openzfs/man/man8/zfs-diff.8 index d4c48f4109be..5b94ea524666 100644 --- a/sys/contrib/openzfs/man/man8/zfs-diff.8 +++ b/sys/contrib/openzfs/man/man8/zfs-diff.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-DIFF 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-hold.8 b/sys/contrib/openzfs/man/man8/zfs-hold.8 index 0c88937f0dc8..a877e428f88b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-hold.8 +++ b/sys/contrib/openzfs/man/man8/zfs-hold.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd November 8, 2022 .Dt ZFS-HOLD 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-jail.8 b/sys/contrib/openzfs/man/man8/zfs-jail.8 index 53499a279d05..569f5f57eab4 100644 --- a/sys/contrib/openzfs/man/man8/zfs-jail.8 +++ b/sys/contrib/openzfs/man/man8/zfs-jail.8 @@ -37,7 +37,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZFS-JAIL 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8 index 677d8292e207..42eff94f9762 100644 --- a/sys/contrib/openzfs/man/man8/zfs-list.8 +++ b/sys/contrib/openzfs/man/man8/zfs-list.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd February 8, 2024 +.Dd August 25, 2025 .Dt ZFS-LIST 8 .Os . @@ -50,27 +50,25 @@ .Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns … . .Sh DESCRIPTION -If specified, you can list property information by the absolute pathname or the -relative pathname. -By default, all file systems and volumes are displayed. +By default, all file systems and volumes are displayed, with the following +fields: +.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint . Snapshots are displayed if the .Sy listsnapshots pool property is .Sy on .Po the default is .Sy off -.Pc , +.Pc or if the .Fl t Sy snapshot or .Fl t Sy all options are specified. -The following fields are displayed: -.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint . .Bl -tag -width "-H" .It Fl H Used for scripting mode. -Do not print headers and separate fields by a single tab instead of arbitrary +Do not print headers, and separate fields by a single tab instead of arbitrary white space. .It Fl j , -json Op Ar --json-int Print the output in JSON format. @@ -87,7 +85,7 @@ of will display only the dataset and its direct children. .It Fl o Ar property A comma-separated list of properties to display. -The property must be: +Each property must be: .Bl -bullet -compact .It One of the properties described in the @@ -125,30 +123,41 @@ section of or the value .Sy name to sort by the dataset name. -Multiple properties can be specified at one time using multiple +Multiple properties can be specified to operate together using multiple .Fl s -property options. +or +.Fl S +options. Multiple .Fl s -options are evaluated from left to right in decreasing order of importance. -The following is a list of sorting criteria: +and +.Fl S +options are evaluated from left to right to supply sort keys in +decreasing order of priority. +Property types operate as follows: .Bl -bullet -compact .It Numeric types sort in numeric order. .It String types sort in alphabetical order. .It -Types inappropriate for a row sort that row to the literal bottom, regardless of -the specified ordering. +Types inappropriate for a row sort that row to the literal bottom, +regardless of the specified ordering. .El .Pp -If no sorting options are specified the existing behavior of -.Nm zfs Cm list -is preserved. +If no sort columns are specified, or if two lines of output would sort +equally across all specified columns, then datasets and bookmarks are +sorted by name, whereas snapshots are sorted first by the name of their +dataset and then by the time of their creation. +When no sort columns are specified but snapshots are listed, this +default behavior causes snapshots to be grouped under their datasets in +chronological order by creation time. .It Fl S Ar property Same as .Fl s , -but sorts by property in descending order. +but sorts by +.Ar property +in descending order. .It Fl t Ar type A comma-separated list of types to display, where .Ar type diff --git a/sys/contrib/openzfs/man/man8/zfs-load-key.8 b/sys/contrib/openzfs/man/man8/zfs-load-key.8 index 7838c46d9e77..3a11cea99fd6 100644 --- a/sys/contrib/openzfs/man/man8/zfs-load-key.8 +++ b/sys/contrib/openzfs/man/man8/zfs-load-key.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd January 13, 2020 +.Dd July 11, 2022 .Dt ZFS-LOAD-KEY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in index ea470247daac..9e44ea30c636 100644 --- a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in +++ b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in @@ -23,7 +23,7 @@ .\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION .\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. .\" -.Dd May 31, 2021 +.Dd November 30, 2021 .Dt ZFS-MOUNT-GENERATOR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8 index 9fca6fffd5bb..2689b6dc345b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-mount.8 +++ b/sys/contrib/openzfs/man/man8/zfs-mount.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd February 16, 2019 +.Dd October 12, 2024 .Dt ZFS-MOUNT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-project.8 b/sys/contrib/openzfs/man/man8/zfs-project.8 index 36547680f53e..4ebfdf6ffe4f 100644 --- a/sys/contrib/openzfs/man/man8/zfs-project.8 +++ b/sys/contrib/openzfs/man/man8/zfs-project.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZFS-PROJECT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-promote.8 b/sys/contrib/openzfs/man/man8/zfs-promote.8 index 767045812607..435a7a5d0144 100644 --- a/sys/contrib/openzfs/man/man8/zfs-promote.8 +++ b/sys/contrib/openzfs/man/man8/zfs-promote.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-PROMOTE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-rename.8 b/sys/contrib/openzfs/man/man8/zfs-rename.8 index 4cf192c0682b..8fedc67469e6 100644 --- a/sys/contrib/openzfs/man/man8/zfs-rename.8 +++ b/sys/contrib/openzfs/man/man8/zfs-rename.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-RENAME 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 b/sys/contrib/openzfs/man/man8/zfs-rewrite.8 index a3a037f3794a..ca5340c7e5eb 100644 --- a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 +++ b/sys/contrib/openzfs/man/man8/zfs-rewrite.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2025 iXsystems, Inc. .\" -.Dd May 6, 2025 +.Dd July 23, 2025 .Dt ZFS-REWRITE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index f7c6b840303c..6c5f6b94afd5 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -31,7 +31,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2024, Klara, Inc. .\" -.Dd October 2, 2024 +.Dd August 29, 2025 .Dt ZFS-SEND 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8 index 67f4d6eba171..08daf09d05f8 100644 --- a/sys/contrib/openzfs/man/man8/zfs-set.8 +++ b/sys/contrib/openzfs/man/man8/zfs-set.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd April 20, 2024 +.Dd October 12, 2024 .Dt ZFS-SET 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-share.8 b/sys/contrib/openzfs/man/man8/zfs-share.8 index f7a09a189182..e9c32a44b0c7 100644 --- a/sys/contrib/openzfs/man/man8/zfs-share.8 +++ b/sys/contrib/openzfs/man/man8/zfs-share.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd May 17, 2021 +.Dd July 11, 2022 .Dt ZFS-SHARE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 b/sys/contrib/openzfs/man/man8/zfs-snapshot.8 index 3ddd1273c8e8..8f4b2c335f09 100644 --- a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 +++ b/sys/contrib/openzfs/man/man8/zfs-snapshot.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZFS-SNAPSHOT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 b/sys/contrib/openzfs/man/man8/zfs-upgrade.8 index bac74e37aef9..a5ce2b760da4 100644 --- a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 +++ b/sys/contrib/openzfs/man/man8/zfs-upgrade.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd July 11, 2022 .Dt ZFS-UPGRADE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-userspace.8 b/sys/contrib/openzfs/man/man8/zfs-userspace.8 index d7a4d18e83b1..c255d911740d 100644 --- a/sys/contrib/openzfs/man/man8/zfs-userspace.8 +++ b/sys/contrib/openzfs/man/man8/zfs-userspace.8 @@ -30,7 +30,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd June 30, 2019 +.Dd July 11, 2022 .Dt ZFS-USERSPACE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-wait.8 b/sys/contrib/openzfs/man/man8/zfs-wait.8 index 554a67455c60..e5c60010d2f9 100644 --- a/sys/contrib/openzfs/man/man8/zfs-wait.8 +++ b/sys/contrib/openzfs/man/man8/zfs-wait.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 31, 2021 +.Dd July 11, 2022 .Dt ZFS-WAIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8 index 7ad0ac89463c..a56a304e82b2 100644 --- a/sys/contrib/openzfs/man/man8/zfs-zone.8 +++ b/sys/contrib/openzfs/man/man8/zfs-zone.8 @@ -38,7 +38,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright 2021 Klara, Inc. .\" -.Dd June 3, 2022 +.Dd July 11, 2022 .Dt ZFS-ZONE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8 index e16a3a82b672..b7566a727469 100644 --- a/sys/contrib/openzfs/man/man8/zfs.8 +++ b/sys/contrib/openzfs/man/man8/zfs.8 @@ -37,7 +37,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd April 18, 2025 +.Dd May 12, 2025 .Dt ZFS 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 index eef0ce68f17b..465e336d170c 100644 --- a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 +++ b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd April 17, 2020 +.Dd July 11, 2022 .Dt ZFS_IDS_TO_PATH 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zgenhostid.8 b/sys/contrib/openzfs/man/man8/zgenhostid.8 index 2b5b4fc18216..ff564880f97d 100644 --- a/sys/contrib/openzfs/man/man8/zgenhostid.8 +++ b/sys/contrib/openzfs/man/man8/zgenhostid.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC. .\" -.Dd May 26, 2021 +.Dd July 11, 2022 .Dt ZGENHOSTID 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-attach.8 b/sys/contrib/openzfs/man/man8/zpool-attach.8 index 51d876767666..f120350a5190 100644 --- a/sys/contrib/openzfs/man/man8/zpool-attach.8 +++ b/sys/contrib/openzfs/man/man8/zpool-attach.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 28, 2023 +.Dd November 8, 2023 .Dt ZPOOL-ATTACH 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 index d97d10d5df6e..b654f669cfa2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 +++ b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZPOOL-CHECKPOINT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-clear.8 b/sys/contrib/openzfs/man/man8/zpool-clear.8 index 19cd4be36408..70cd8325bd0e 100644 --- a/sys/contrib/openzfs/man/man8/zpool-clear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-clear.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd April 29, 2024 .Dt ZPOOL-CLEAR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8 index 490c67629a20..a36ae260a158 100644 --- a/sys/contrib/openzfs/man/man8/zpool-create.8 +++ b/sys/contrib/openzfs/man/man8/zpool-create.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-CREATE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-destroy.8 b/sys/contrib/openzfs/man/man8/zpool-destroy.8 index f49f29804ad7..82f3f3e203d6 100644 --- a/sys/contrib/openzfs/man/man8/zpool-destroy.8 +++ b/sys/contrib/openzfs/man/man8/zpool-destroy.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-DESTROY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-detach.8 b/sys/contrib/openzfs/man/man8/zpool-detach.8 index ae02dbc2d5b8..79a44310110d 100644 --- a/sys/contrib/openzfs/man/man8/zpool-detach.8 +++ b/sys/contrib/openzfs/man/man8/zpool-detach.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-DETACH 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-export.8 b/sys/contrib/openzfs/man/man8/zpool-export.8 index 171a7541c6d2..02495c088f94 100644 --- a/sys/contrib/openzfs/man/man8/zpool-export.8 +++ b/sys/contrib/openzfs/man/man8/zpool-export.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-EXPORT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8 index 1d6d1f08afa6..bfe1bae7619f 100644 --- a/sys/contrib/openzfs/man/man8/zpool-get.8 +++ b/sys/contrib/openzfs/man/man8/zpool-get.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd October 12, 2024 .Dt ZPOOL-GET 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-history.8 b/sys/contrib/openzfs/man/man8/zpool-history.8 index f15086eabc47..f02168951ff2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-history.8 +++ b/sys/contrib/openzfs/man/man8/zpool-history.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-HISTORY 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-import.8 b/sys/contrib/openzfs/man/man8/zpool-import.8 index 9076f5c34929..c6d5f222b6b2 100644 --- a/sys/contrib/openzfs/man/man8/zpool-import.8 +++ b/sys/contrib/openzfs/man/man8/zpool-import.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-IMPORT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-initialize.8 b/sys/contrib/openzfs/man/man8/zpool-initialize.8 index 39579a58010e..5299a897cb97 100644 --- a/sys/contrib/openzfs/man/man8/zpool-initialize.8 +++ b/sys/contrib/openzfs/man/man8/zpool-initialize.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd May 27, 2021 +.Dd July 30, 2025 .Dt ZPOOL-INITIALIZE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8 index d8c21d0cfc6c..5dd9c9d55e20 100644 --- a/sys/contrib/openzfs/man/man8/zpool-iostat.8 +++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd January 29, 2024 .Dt ZPOOL-IOSTAT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 index ba3d1509aa75..b807acaaede3 100644 --- a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 31, 2021 +.Dd July 11, 2022 .Dt ZPOOL-LABELCLEAR 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8 index b720e203c1c9..106399941f98 100644 --- a/sys/contrib/openzfs/man/man8/zpool-list.8 +++ b/sys/contrib/openzfs/man/man8/zpool-list.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd October 12, 2024 .Dt ZPOOL-LIST 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8 index 49b1f34ad5d5..388c7634acce 100644 --- a/sys/contrib/openzfs/man/man8/zpool-offline.8 +++ b/sys/contrib/openzfs/man/man8/zpool-offline.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd December 21, 2023 .Dt ZPOOL-OFFLINE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-reguid.8 b/sys/contrib/openzfs/man/man8/zpool-reguid.8 index 77101fc07326..b98c88e320de 100644 --- a/sys/contrib/openzfs/man/man8/zpool-reguid.8 +++ b/sys/contrib/openzfs/man/man8/zpool-reguid.8 @@ -29,7 +29,7 @@ .\" Copyright (c) 2024, Klara Inc. .\" Copyright (c) 2024, Mateusz Piotrowski .\" -.Dd June 21, 2023 +.Dd August 26, 2024 .Dt ZPOOL-REGUID 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-remove.8 b/sys/contrib/openzfs/man/man8/zpool-remove.8 index d10a92e49bbe..4d5fc431d332 100644 --- a/sys/contrib/openzfs/man/man8/zpool-remove.8 +++ b/sys/contrib/openzfs/man/man8/zpool-remove.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd November 19, 2024 .Dt ZPOOL-REMOVE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-reopen.8 b/sys/contrib/openzfs/man/man8/zpool-reopen.8 index 594cff3d16d8..c4e10f0a546e 100644 --- a/sys/contrib/openzfs/man/man8/zpool-reopen.8 +++ b/sys/contrib/openzfs/man/man8/zpool-reopen.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 2, 2021 +.Dd July 11, 2022 .Dt ZPOOL-REOPEN 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-replace.8 b/sys/contrib/openzfs/man/man8/zpool-replace.8 index 9f3156eeb3ef..651af13b19b8 100644 --- a/sys/contrib/openzfs/man/man8/zpool-replace.8 +++ b/sys/contrib/openzfs/man/man8/zpool-replace.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 29, 2021 +.Dd July 11, 2022 .Dt ZPOOL-REPLACE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-resilver.8 b/sys/contrib/openzfs/man/man8/zpool-resilver.8 index 2161d77f62ed..59c4be5db209 100644 --- a/sys/contrib/openzfs/man/man8/zpool-resilver.8 +++ b/sys/contrib/openzfs/man/man8/zpool-resilver.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd July 11, 2022 .Dt ZPOOL-RESILVER 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8 index 0ecf8bd3851f..cf7ead5788bf 100644 --- a/sys/contrib/openzfs/man/man8/zpool-scrub.8 +++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd December 11, 2024 +.Dd August 6, 2025 .Dt ZPOOL-SCRUB 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-split.8 b/sys/contrib/openzfs/man/man8/zpool-split.8 index a67c865cf30c..ee4c6384cf23 100644 --- a/sys/contrib/openzfs/man/man8/zpool-split.8 +++ b/sys/contrib/openzfs/man/man8/zpool-split.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 2, 2021 +.Dd July 11, 2022 .Dt ZPOOL-SPLIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8 index a7f3e088043b..108a1067b384 100644 --- a/sys/contrib/openzfs/man/man8/zpool-status.8 +++ b/sys/contrib/openzfs/man/man8/zpool-status.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd February 14, 2024 +.Dd May 20, 2025 .Dt ZPOOL-STATUS 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-sync.8 b/sys/contrib/openzfs/man/man8/zpool-sync.8 index 8f438f363e83..d1dc05d0c202 100644 --- a/sys/contrib/openzfs/man/man8/zpool-sync.8 +++ b/sys/contrib/openzfs/man/man8/zpool-sync.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd August 9, 2019 +.Dd July 11, 2022 .Dt ZPOOL-SYNC 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-trim.8 b/sys/contrib/openzfs/man/man8/zpool-trim.8 index 18723e1be0d2..c4e849019789 100644 --- a/sys/contrib/openzfs/man/man8/zpool-trim.8 +++ b/sys/contrib/openzfs/man/man8/zpool-trim.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP. .\" -.Dd May 27, 2021 +.Dd July 30, 2025 .Dt ZPOOL-TRIM 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 b/sys/contrib/openzfs/man/man8/zpool-upgrade.8 index 20632ae4bba0..cf69060da5ce 100644 --- a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 +++ b/sys/contrib/openzfs/man/man8/zpool-upgrade.8 @@ -28,7 +28,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org> .\" -.Dd March 16, 2022 +.Dd July 11, 2022 .Dt ZPOOL-UPGRADE 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool-wait.8 b/sys/contrib/openzfs/man/man8/zpool-wait.8 index 0ffb4badfb7b..28a51d29a913 100644 --- a/sys/contrib/openzfs/man/man8/zpool-wait.8 +++ b/sys/contrib/openzfs/man/man8/zpool-wait.8 @@ -28,7 +28,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd May 27, 2021 +.Dd January 29, 2024 .Dt ZPOOL-WAIT 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index b96944050594..3bfef780b298 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -27,7 +27,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd February 14, 2024 +.Dd November 19, 2024 .Dt ZPOOL 8 .Os . diff --git a/sys/contrib/openzfs/man/man8/zstream.8 b/sys/contrib/openzfs/man/man8/zstream.8 index 03a8479c9e6a..5b3d063bc4a5 100644 --- a/sys/contrib/openzfs/man/man8/zstream.8 +++ b/sys/contrib/openzfs/man/man8/zstream.8 @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd October 4, 2022 +.Dd November 10, 2022 .Dt ZSTREAM 8 .Os . diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index 362d2295e091..58a80dc4402c 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -4,7 +4,7 @@ ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement ZFS_MODULE_CFLAGS += -Wmissing-prototypes -ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@ +ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@ ifneq ($(KBUILD_EXTMOD),) zfs_include = @abs_top_srcdir@/include diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c index 6d3bcca9f995..dcb0a391dda4 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c @@ -38,11 +38,14 @@ kfpu_begin(); E(s, d, b); kfpu_end(); \ } +#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \ + defined(__PPC64__) /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } +#endif #if defined(__x86_64) diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c index 2efd9fcf4c99..a85a71a83df4 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c @@ -38,11 +38,14 @@ kfpu_begin(); E(s, d, b); kfpu_end(); \ } +#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__arm__) || defined(__PPC64__) /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } +#endif #if defined(__x86_64) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index ace2360c032d..393bfaa65ff5 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -188,11 +188,6 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, - CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_max, "LU", - "Maximum ARC size in bytes (LEGACY)"); - int param_set_arc_min(SYSCTL_HANDLER_ARGS) { @@ -217,11 +212,6 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, - CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_min, "LU", - "Minimum ARC size in bytes (LEGACY)"); - extern uint_t zfs_arc_free_target; int @@ -245,16 +235,6 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) return (0); } -/* - * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on - * pagedaemon initialization. - */ -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, - CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_free_target, "IU", - "Desired number of free pages below which ARC triggers reclaim" - " (LEGACY)"); - int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) { @@ -273,187 +253,6 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, - CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_no_grow_shift, "I", - "log2(fraction of ARC which must be free to allow growing) (LEGACY)"); - -extern uint64_t l2arc_write_max; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, - CTLFLAG_RWTUN, &l2arc_write_max, 0, - "Max write bytes per interval (LEGACY)"); - -extern uint64_t l2arc_write_boost; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, - CTLFLAG_RWTUN, &l2arc_write_boost, 0, - "Extra write bytes during device warmup (LEGACY)"); - -extern uint64_t l2arc_headroom; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, - CTLFLAG_RWTUN, &l2arc_headroom, 0, - "Number of max device writes to precache (LEGACY)"); - -extern uint64_t l2arc_headroom_boost; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost, - CTLFLAG_RWTUN, &l2arc_headroom_boost, 0, - "Compressed l2arc_headroom multiplier (LEGACY)"); - -extern uint64_t l2arc_feed_secs; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, - CTLFLAG_RWTUN, &l2arc_feed_secs, 0, - "Seconds between L2ARC writing (LEGACY)"); - -extern uint64_t l2arc_feed_min_ms; - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, - CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0, - "Min feed interval in milliseconds (LEGACY)"); - -extern int l2arc_noprefetch; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, - CTLFLAG_RWTUN, &l2arc_noprefetch, 0, - "Skip caching prefetched buffers (LEGACY)"); - -extern int l2arc_feed_again; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, - CTLFLAG_RWTUN, &l2arc_feed_again, 0, - "Turbo L2ARC warmup (LEGACY)"); - -extern int l2arc_norw; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, - CTLFLAG_RWTUN, &l2arc_norw, 0, - "No reads during writes (LEGACY)"); - -static int -param_get_arc_state_size(SYSCTL_HANDLER_ARGS) -{ - arc_state_t *state = (arc_state_t *)arg1; - int64_t val; - - val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) + - zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]); - return (sysctl_handle_64(oidp, &val, 0, req)); -} - -extern arc_state_t ARC_anon; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_anon, 0, param_get_arc_state_size, "Q", - "size of anonymous state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, - &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in anonymous state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, - &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in anonymous state"); - -extern arc_state_t ARC_mru; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_mru, 0, param_get_arc_state_size, "Q", - "size of mru state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD, - &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in mru state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD, - &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in mru state"); - -extern arc_state_t ARC_mru_ghost; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_mru_ghost, 0, param_get_arc_state_size, "Q", - "size of mru ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD, - &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in mru ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD, - &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in mru ghost state"); - -extern arc_state_t ARC_mfu; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_mfu, 0, param_get_arc_state_size, "Q", - "size of mfu state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD, - &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in mfu state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD, - &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in mfu state"); - -extern arc_state_t ARC_mfu_ghost; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q", - "size of mfu ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD, - &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in mfu ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, - &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in mfu ghost state"); - -extern arc_state_t ARC_uncached; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_uncached, 0, param_get_arc_state_size, "Q", - "size of uncached state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD, - &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of evictable metadata in uncached state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD, - &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of evictable data in uncached state"); - -extern arc_state_t ARC_l2c_only; - -SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size, - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, - &ARC_l2c_only, 0, param_get_arc_state_size, "Q", - "size of l2c_only state"); - -/* dbuf.c */ - -/* dmu.c */ - -/* dmu_zfetch.c */ - -SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)"); - -extern uint32_t zfetch_max_distance; - -SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, - CTLFLAG_RWTUN, &zfetch_max_distance, 0, - "Max bytes to prefetch per stream (LEGACY)"); - -extern uint32_t zfetch_max_idistance; - -SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, - CTLFLAG_RWTUN, &zfetch_max_idistance, 0, - "Max bytes to prefetch indirects for per stream (LEGACY)"); - -/* dsl_pool.c */ - -/* dnode.c */ - -/* dsl_scan.c */ - /* metaslab.c */ int @@ -514,19 +313,6 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct, "Condense on-disk spacemap when it is more than this many percents" " of in-memory counterpart"); -extern uint_t zfs_remove_max_segment; - -SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment, - CTLFLAG_RWTUN, &zfs_remove_max_segment, 0, - "Largest contiguous segment ZFS will attempt to allocate when removing" - " a device"); - -extern int zfs_removal_suspend_progress; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, - CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0, - "Ensures certain actions can happen while in the middle of a removal"); - /* * Minimum size which forces the dynamic allocator to change * it's allocation strategy. Once the space map cannot satisfy @@ -749,12 +535,6 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift, - CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift), - param_set_min_auto_ashift, "IU", - "Min ashift used when creating new top-level vdev. (LEGACY)"); - int param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) { @@ -774,13 +554,6 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } -SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, - CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift), - param_set_max_auto_ashift, "IU", - "Max ashift used when optimizing for logical -> physical sector size on" - " new top-level vdevs. (LEGACY)"); - /* * Since the DTL space map of a vdev is not expected to have a lot of * entries, we default its block size to 4K. @@ -802,23 +575,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0, "Block size for standard space map. Power of 2 greater than 4096."); -extern int vdev_validate_skip; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, - CTLFLAG_RDTUN, &vdev_validate_skip, 0, - "Enable to bypass vdev_validate()."); - -/* vdev_mirror.c */ - -/* vdev_queue.c */ - -extern uint_t zfs_vdev_max_active; - -SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, - CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, - "The maximum number of I/Os of all types active for each device." - " (LEGACY)"); - /* zio.c */ SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 174141a5deab..120d97510c9e 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -61,6 +61,7 @@ #include <sys/fs/zfs.h> #include <sys/dmu.h> #include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> #include <sys/spa.h> #include <sys/txg.h> #include <sys/dbuf.h> @@ -5729,6 +5730,9 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) { ulong_t val; int error; +#ifdef _PC_CLONE_BLKSIZE + zfsvfs_t *zfsvfs; +#endif error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); @@ -5775,6 +5779,21 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = 1; return (0); #endif +#ifdef _PC_CLONE_BLKSIZE + case _PC_CLONE_BLKSIZE: + zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data; + if (zfs_bclone_enabled && + spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os), + SPA_FEATURE_BLOCK_CLONING)) + *ap->a_retval = dsl_dataset_feature_is_active( + zfsvfs->z_os->os_dsl_dataset, + SPA_FEATURE_LARGE_BLOCKS) ? + SPA_MAXBLOCKSIZE : + SPA_OLD_MAXBLOCKSIZE; + else + *ap->a_retval = 0; + return (0); +#endif default: return (vop_stdpathconf(ap)); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c index 48dae79a2373..81ac26cb0c93 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c @@ -202,7 +202,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) return (!!dentry->d_inode); } -static dentry_operations_t zpl_dops_snapdirs = { +static const struct dentry_operations zpl_dops_snapdirs = { /* * Auto mounting of snapshots is only supported for 2.6.37 and * newer kernels. Prior to this kernel the ops->follow_link() @@ -215,6 +215,51 @@ static dentry_operations_t zpl_dops_snapdirs = { .d_revalidate = zpl_snapdir_revalidate, }; +/* + * For the .zfs control directory to work properly we must be able to override + * the default operations table and register custom .d_automount and + * .d_revalidate callbacks. + */ +static void +set_snapdir_dentry_ops(struct dentry *dentry, unsigned int extraflags) { + static const unsigned int op_flags = + DCACHE_OP_HASH | DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE | + DCACHE_OP_PRUNE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_REAL; + +#ifdef HAVE_D_SET_D_OP + /* + * d_set_d_op() will set the DCACHE_OP_ flags according to what it + * finds in the passed dentry_operations, so we don't have to. + * + * We clear the flags and the old op table before calling d_set_d_op() + * because issues a warning when the dentry operations table is already + * set. + */ + dentry->d_op = NULL; + dentry->d_flags &= ~op_flags; + d_set_d_op(dentry, &zpl_dops_snapdirs); + dentry->d_flags |= extraflags; +#else + /* + * Since 6.17 there's no exported way to modify dentry ops, so we have + * to reach in and do it ourselves. This should be safe for our very + * narrow use case, which is to create or splice in an entry to give + * access to a snapshot. + * + * We need to set the op flags directly. We hardcode + * DCACHE_OP_REVALIDATE because that's the only operation we have; if + * we ever extend zpl_dops_snapdirs we will need to update the op flags + * to match. + */ + spin_lock(&dentry->d_lock); + dentry->d_op = &zpl_dops_snapdirs; + dentry->d_flags &= ~op_flags; + dentry->d_flags |= DCACHE_OP_REVALIDATE | extraflags; + spin_unlock(&dentry->d_lock); +#endif +} + static struct dentry * zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags) @@ -236,10 +281,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, return (ERR_PTR(error)); ASSERT(error == 0 || ip == NULL); - d_clear_d_op(dentry); - d_set_d_op(dentry, &zpl_dops_snapdirs); - dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - + set_snapdir_dentry_ops(dentry, DCACHE_NEED_AUTOMOUNT); return (d_splice_alias(ip, dentry)); } @@ -373,8 +415,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0); if (error == 0) { - d_clear_d_op(dentry); - d_set_d_op(dentry, &zpl_dops_snapdirs); + set_snapdir_dentry_ops(dentry, 0); d_instantiate(dentry, ip); } diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index df41e3b49204..bd6dc8edd8ca 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq; static uint_t zfs_arc_evict_threads = 0; /* The 7 states: */ -arc_state_t ARC_anon; -arc_state_t ARC_mru; -arc_state_t ARC_mru_ghost; -arc_state_t ARC_mfu; -arc_state_t ARC_mfu_ghost; -arc_state_t ARC_l2c_only; -arc_state_t ARC_uncached; +static arc_state_t ARC_anon; +/* */ arc_state_t ARC_mru; +static arc_state_t ARC_mru_ghost; +/* */ arc_state_t ARC_mfu; +static arc_state_t ARC_mfu_ghost; +static arc_state_t ARC_l2c_only; +static arc_state_t ARC_uncached; arc_stats_t arc_stats = { { "hits", KSTAT_DATA_UINT64 }, @@ -832,15 +832,15 @@ typedef struct arc_async_flush { #define L2ARC_FEED_TYPES 4 /* L2ARC Performance Tunables */ -uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ -uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ -uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ -uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; -uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ -int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ -int l2arc_feed_again = B_TRUE; /* turbo warmup */ -int l2arc_norw = B_FALSE; /* no reads during writes */ +static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ +static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ +static uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ +static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; +static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ +static int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ +static int l2arc_feed_again = B_TRUE; /* turbo warmup */ +static int l2arc_norw = B_FALSE; /* no reads during writes */ static uint_t l2arc_meta_percent = 33; /* limit on headers size */ /* diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c index d6658375f810..0dc9adc7fd4f 100644 --- a/sys/contrib/openzfs/module/zfs/ddt.c +++ b/sys/contrib/openzfs/module/zfs/ddt.c @@ -1701,9 +1701,11 @@ ddt_load(spa_t *spa) } } - error = ddt_log_load(ddt); - if (error != 0 && error != ENOENT) - return (error); + if (ddt->ddt_flags & DDT_FLAG_LOG) { + error = ddt_log_load(ddt); + if (error != 0 && error != ENOENT) + return (error); + } DDT_KSTAT_SET(ddt, dds_log_active_entries, avl_numnodes(&ddt->ddt_log_active->ddl_tree)); diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c index 3d30e244c1f7..c9217cef4f7d 100644 --- a/sys/contrib/openzfs/module/zfs/ddt_log.c +++ b/sys/contrib/openzfs/module/zfs/ddt_log.c @@ -176,11 +176,13 @@ ddt_log_update_stats(ddt_t *ddt) * that's reasonable to expect anyway. */ dmu_object_info_t doi; - uint64_t nblocks; - dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, &doi); - nblocks = doi.doi_physical_blocks_512; - dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, &doi); - nblocks += doi.doi_physical_blocks_512; + uint64_t nblocks = 0; + if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, + &doi) == 0) + nblocks += doi.doi_physical_blocks_512; + if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, + &doi) == 0) + nblocks += doi.doi_physical_blocks_512; ddt_object_t *ddo = &ddt->ddt_log_stats; ddo->ddo_count = diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index 51165d0bf723..3d3a9c713568 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -57,19 +57,19 @@ static unsigned int zfetch_max_sec_reap = 2; /* min bytes to prefetch per stream (default 2MB) */ static unsigned int zfetch_min_distance = 2 * 1024 * 1024; /* max bytes to prefetch per stream (default 8MB) */ -unsigned int zfetch_max_distance = 8 * 1024 * 1024; +static unsigned int zfetch_max_distance = 8 * 1024 * 1024; #else /* min bytes to prefetch per stream (default 4MB) */ static unsigned int zfetch_min_distance = 4 * 1024 * 1024; /* max bytes to prefetch per stream (default 64MB) */ -unsigned int zfetch_max_distance = 64 * 1024 * 1024; +static unsigned int zfetch_max_distance = 64 * 1024 * 1024; #endif /* max bytes to prefetch indirects for per stream (default 128MB) */ -unsigned int zfetch_max_idistance = 128 * 1024 * 1024; +static unsigned int zfetch_max_idistance = 128 * 1024 * 1024; /* max request reorder distance within a stream (default 16MB) */ -unsigned int zfetch_max_reorder = 16 * 1024 * 1024; +static unsigned int zfetch_max_reorder = 16 * 1024 * 1024; /* Max log2 fraction of holes in a stream */ -unsigned int zfetch_hole_shift = 2; +static unsigned int zfetch_hole_shift = 2; typedef struct zfetch_stats { kstat_named_t zfetchstat_hits; diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 9cf35e379000..ed04ce0c86eb 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29; /* upper limit for metaslab size (16G) */ static uint_t zfs_vdev_max_ms_shift = 34; -int vdev_validate_skip = B_FALSE; +static int vdev_validate_skip = B_FALSE; /* * Since the DTL space map of a vdev is not expected to have a lot of diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c index c12713b107bf..e69e5598939e 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_queue.c +++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c @@ -122,7 +122,7 @@ * The maximum number of i/os active to each device. Ideally, this will be >= * the sum of each queue's max_active. */ -uint_t zfs_vdev_max_active = 1000; +static uint_t zfs_vdev_max_active = 1000; /* * Per-queue limits on the number of i/os active to each device. If the diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 2f7a739da241..2ce0121324ad 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * * See also the accessor function spa_remove_max_segment(). */ -uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; +static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; /* * Ignore hard IO errors during device removal. When set if a device @@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024; * This is used by the test suite so that it can ensure that certain * actions happen while in the middle of a removal. */ -int zfs_removal_suspend_progress = 0; +static int zfs_removal_suspend_progress = 0; #define VDEV_REMOVAL_ZAP_OBJS "lzap" diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c index 0816ea134bf3..4cf9e0dbb405 100644 --- a/sys/contrib/openzfs/module/zfs/zfeature.c +++ b/sys/contrib/openzfs/module/zfs/zfeature.c @@ -308,6 +308,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount, ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature)); uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; + ASSERT(MUTEX_HELD(&spa->spa_feat_stats_lock)); VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid, sizeof (uint64_t), 1, &refcount, tx)); @@ -360,7 +361,9 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) feature->fi_guid, 1, strlen(feature->fi_desc) + 1, feature->fi_desc, tx)); + mutex_enter(&spa->spa_feat_stats_lock); feature_sync(spa, feature, initial_refcount, tx); + mutex_exit(&spa->spa_feat_stats_lock); if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) { uint64_t enabling_txg = dmu_tx_get_txg(tx); @@ -416,6 +419,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action, ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); + mutex_enter(&spa->spa_feat_stats_lock); VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP); switch (action) { @@ -433,6 +437,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action, } feature_sync(spa, feature, refcount, tx); + mutex_exit(&spa->spa_feat_stats_lock); } void diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 121b966b9864..76c9d4ccd51f 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -4726,7 +4726,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) error = error ? error : resume_err; } zfs_vfs_rele(zfsvfs); - } else if ((zv = zvol_suspend(fsname)) != NULL) { + } else if (zvol_suspend(fsname, &zv) == 0) { error = dsl_dataset_rollback(fsname, target, zvol_tag(zv), outnvl); zvol_resume(zv); @@ -5448,7 +5448,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin, } error = error ? error : end_err; zfs_vfs_rele(zfsvfs); - } else if ((zv = zvol_suspend(tofs)) != NULL) { + } else if (zvol_suspend(tofs, &zv) == 0) { error = dmu_recv_end(&drc, zvol_tag(zv)); zvol_resume(zv); } else { diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 2fd3e1c37045..faced0db7e9e 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -1145,20 +1145,34 @@ zvol_tag(zvol_state_t *zv) /* * Suspend the zvol for recv and rollback. */ -zvol_state_t * -zvol_suspend(const char *name) +int +zvol_suspend(const char *name, zvol_state_t **zvp) { zvol_state_t *zv; zv = zvol_find_by_name(name, RW_WRITER); if (zv == NULL) - return (NULL); + return (SET_ERROR(ENOENT)); /* block all I/O, release in zvol_resume. */ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock)); + /* + * If it's being removed, unlock and return error. It doesn't make any + * sense to try to suspend a zvol being removed, but being here also + * means that zvol_remove_minors_impl() is about to call zvol_remove() + * and then destroy the zvol_state_t, so returning a pointer to it for + * the caller to mess with would be a disaster anyway. + */ + if (zv->zv_flags & ZVOL_REMOVING) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); + /* NB: Returning EIO here to match zfsvfs_teardown() */ + return (SET_ERROR(EIO)); + } + atomic_inc(&zv->zv_suspend_ref); if (zv->zv_open_count > 0) @@ -1171,7 +1185,8 @@ zvol_suspend(const char *name) mutex_exit(&zv->zv_state_lock); /* zv_suspend_lock is released in zvol_resume() */ - return (zv); + *zvp = zv; + return (0); } int diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index 1ce668e7b86d..edcfdd2d7136 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -509,7 +509,9 @@ systemctl --system daemon-reload >/dev/null || true %{_bindir}/zvol_wait # Optional Python 3 scripts %{_bindir}/arc_summary +%{_bindir}/zarcsummary %{_bindir}/arcstat +%{_bindir}/zarcstat %{_bindir}/dbufstat %{_bindir}/zilstat # Man pages diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run index f3d56acffde0..ba367fad402b 100644 --- a/sys/contrib/openzfs/tests/runfiles/linux.run +++ b/sys/contrib/openzfs/tests/runfiles/linux.run @@ -161,7 +161,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', tags = ['functional', 'mmp'] [tests/functional/mount:Linux] -tests = ['umount_unlinked_drain'] +tests = ['umount_unlinked_drain', 'mount_loopback'] tags = ['functional', 'mount'] [tests/functional/pam:Linux] diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index 884a99d785bc..580281b30d7e 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -100,6 +100,7 @@ export SYSTEM_FILES_COMMON='awk uniq vmstat wc + which xargs xxh128sum' @@ -146,6 +147,7 @@ export SYSTEM_FILES_LINUX='attr lscpu lsmod lsscsi + mkfs.xfs mkswap modprobe mountpoint diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index e273c9f85c28..f2d7ceac0cbb 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -73,8 +73,8 @@ OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_esti PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled -REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress -REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment +REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress +REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index 41e7b45ef4ec..94db292c9518 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1706,6 +1706,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/mmp/setup.ksh \ functional/mount/cleanup.ksh \ functional/mount/setup.ksh \ + functional/mount/mount_loopback.ksh \ functional/mount/umount_001.ksh \ functional/mount/umountall_001.ksh \ functional/mount/umount_unlinked_drain.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh index 1b3310edb98b..45b041503e22 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh @@ -67,7 +67,7 @@ log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1)) # Add some data to the pool log_must zfs create $TESTPOOL/fs MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)" -log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z +log_must fill_fs $MNTPOINT $PARITY 200 32768 100 R sync_pool $TESTPOOL # Replace the last child vdev to form a replacing vdev diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh new file mode 100755 index 000000000000..86adef7ea032 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2025 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that we can make an xfs filesystem on a ZFS-backed loopback device. +# +# See: +# https://github.com/openzfs/zfs/pull/17298 +# https://github.com/openzfs/zfs/issues/17277 +# +# STRATEGY: +# 1. Make a pool +# 2. Make a file on the pool or create zvol +# 3. Mount the file/zvol behind a loopback device +# 4. Create & mount an xfs filesystem on the loopback device + +function cleanup +{ + if [ -d $TEST_BASE_DIR/mnt ] ; then + umount $TEST_BASE_DIR/mnt + log_must rmdir $TEST_BASE_DIR/mnt + fi + if [ -n "$DEV" ] ; then + log_must losetup -d $DEV + fi + destroy_pool $TESTPOOL2 + log_must rm -f $TEST_BASE_DIR/file1 +} + +if [ ! -x "$(which mkfs.xfs)" ] ; then + log_unsupported "No mkfs.xfs binary" +fi + +if [ ! -d /lib/modules/$(uname -r)/kernel/fs/xfs ] && \ + ! grep -qE '\sxfs$' /proc/filesystems ; then + log_unsupported "No XFS kernel support" +fi + +log_assert "Make an xfs filesystem on a ZFS-backed loopback device" +log_onexit cleanup + +# fio options +export NUMJOBS=2 +export RUNTIME=3 +export PERF_RANDSEED=1234 +export PERF_COMPPERCENT=66 +export PERF_COMPCHUNK=0 +export BLOCKSIZE=128K +export SYNC_TYPE=0 +export FILE_SIZE=$(( 1024 * 1024 )) + +function do_test +{ + imgfile=$1 + log_note "Running test on $imgfile" + log_must losetup -f $imgfile + DEV=$(losetup --associated $imgfile | grep -Eo '^/dev/loop[0-9]+') + log_must mkfs.xfs $DEV + mkdir $TEST_BASE_DIR/mnt + log_must mount $DEV $TEST_BASE_DIR/mnt + export DIRECTORY=$TEST_BASE_DIR/mnt + + for d in 0 1 ; do + # fio options + export DIRECT=$d + log_must fio $FIO_SCRIPTS/mkfiles.fio + log_must fio $FIO_SCRIPTS/random_reads.fio + done + log_must umount $TEST_BASE_DIR/mnt + log_must rmdir $TEST_BASE_DIR/mnt + log_must losetup -d $DEV + DEV="" +} + +log_must truncate -s 1G $TEST_BASE_DIR/file1 +log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/file1 +log_must truncate -s 512M /$TESTPOOL2/img +do_test /$TESTPOOL2/img +log_must rm /$TESTPOOL2/img +log_must zfs create -V 512M $TESTPOOL2/vol + +blkdev="$ZVOL_DEVDIR/$TESTPOOL2/vol" +block_device_wait $blkdev +do_test $blkdev + +log_pass "Verified xfs filesystem on a ZFS-backed loopback device" diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index 733c440f5ee3..a26cf8161294 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -459,12 +459,11 @@ DB_SHOW_COMMAND(proc, db_show_proc) db_printf("??? (%#x)\n", p->p_state); } if (p->p_ucred != NULL) { - db_printf(" uid: %d gids: ", p->p_ucred->cr_uid); - for (i = 0; i < p->p_ucred->cr_ngroups; i++) { - db_printf("%d", p->p_ucred->cr_groups[i]); - if (i < (p->p_ucred->cr_ngroups - 1)) - db_printf(", "); - } + db_printf(" uid: %d gid: %d supp gids: ", + p->p_ucred->cr_uid, p->p_ucred->cr_gid); + for (i = 0; i < p->p_ucred->cr_ngroups; i++) + db_printf(i == 0 ? "%d" : ", %d", + p->p_ucred->cr_groups[i]); db_printf("\n"); } if (p->p_pptr != NULL) diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c index f29d803e99a8..82f56fc0d19e 100644 --- a/sys/dev/ahci/ahci_pci.c +++ b/sys/dev/ahci/ahci_pci.c @@ -195,6 +195,7 @@ static const struct { {0x1f3f8086, 0x00, "Intel Avoton (RAID)", 0}, {0x23a38086, 0x00, "Intel Coleto Creek", 0}, {0x31e38086, 0x00, "Intel Gemini Lake", 0}, + {0x4b638086, 0x00, "Intel Elkhart Lake", 0}, {0x5ae38086, 0x00, "Intel Apollo Lake", 0}, {0x7ae28086, 0x00, "Intel Alder Lake", 0}, {0x8c028086, 0x00, "Intel Lynx Point", 0}, diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c index 291d1ec64ed7..79bf08678249 100644 --- a/sys/dev/ath/ath_rate/sample/sample.c +++ b/sys/dev/ath/ath_rate/sample/sample.c @@ -179,7 +179,7 @@ ath_rate_sample_find_min_pktlength(struct ath_softc *sc, const struct txschedule *sched = &sn->sched[rix0]; int max_pkt_length = 65530; // ATH_AGGR_MAXSIZE // Note: this may not be true in all cases; need to check? - int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40); + int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40); // Note: not great, but good enough.. int idx = is_ht40 ? MCS_HT40 : MCS_HT20; @@ -979,7 +979,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an, const int size_bin = size_to_bin(frame_size); const int size = bin_to_size(size_bin); int tt; - int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40); + int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40); int pct; if (!IS_RATE_DEFINED(sn, rix0)) @@ -1365,7 +1365,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni) continue; printf(" %d %s/%d", dot11rate(rt, rix), dot11rate_label(rt, rix), calc_usecs_unicast_packet(sc, 1600, rix, 0,0, - (ni->ni_chw == IEEE80211_STA_RX_BW_40))); + (ni->ni_chw == NET80211_STA_RX_BW_40))); } printf("\n"); } @@ -1396,7 +1396,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni) sn->stats[y][rix].perfect_tx_time = calc_usecs_unicast_packet(sc, size, rix, 0, 0, - (ni->ni_chw == IEEE80211_STA_RX_BW_40)); + (ni->ni_chw == NET80211_STA_RX_BW_40)); sn->stats[y][rix].average_tx_time = sn->stats[y][rix].perfect_tx_time; } diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index e7ee029fecf0..f42058bacb0d 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -283,7 +283,7 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) if (IS_HT_RATE(rate)) { rc[i].flags |= ATH_RC_HT_FLAG; - if (ni->ni_chw == IEEE80211_STA_RX_BW_40) + if (ni->ni_chw == NET80211_STA_RX_BW_40) rc[i].flags |= ATH_RC_CW40_FLAG; /* @@ -295,13 +295,13 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) * and doesn't return the fractional part, so * we are always "out" by some amount. */ - if (ni->ni_chw == IEEE80211_STA_RX_BW_40 && + if (ni->ni_chw == NET80211_STA_RX_BW_40 && ieee80211_ht_check_tx_shortgi_40(ni) && (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) { rc[i].flags |= ATH_RC_SGI_FLAG; } - if (ni->ni_chw == IEEE80211_STA_RX_BW_20 && + if (ni->ni_chw == NET80211_STA_RX_BW_20 && ieee80211_ht_check_tx_shortgi_20(ni) && (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) { rc[i].flags |= ATH_RC_SGI_FLAG; diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c index 290156ff11ca..6bc4bd33e162 100644 --- a/sys/dev/axgbe/if_axgbe_pci.c +++ b/sys/dev/axgbe/if_axgbe_pci.c @@ -2415,7 +2415,8 @@ axgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) case IFCOUNTER_OPACKETS: return (pstats->txframecount_gb); case IFCOUNTER_OERRORS: - return (pstats->txframecount_gb - pstats->txframecount_g); + return (if_get_counter_default(ifp, cnt) + + pstats->txframecount_gb - pstats->txframecount_g); case IFCOUNTER_IBYTES: return (pstats->rxoctetcount_gb); case IFCOUNTER_OBYTES: diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 9e91250cb61c..9756a6945384 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -9016,7 +9016,7 @@ sysctl_loadavg(SYSCTL_HANDLER_ARGS) rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg"); if (rc) return (rc); - if (hw_all_ok(sc)) + if (!hw_all_ok(sc)) rc = ENXIO; else { param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c index 50fa4faa560a..ed755f992949 100644 --- a/sys/dev/cyapa/cyapa.c +++ b/sys/dev/cyapa/cyapa.c @@ -761,42 +761,60 @@ again: /* * Generate report */ - c0 = 0; - if (delta_x < 0) - c0 |= 0x10; - if (delta_y < 0) - c0 |= 0x20; - c0 |= 0x08; - if (but & CYAPA_FNGR_LEFT) - c0 |= 0x01; - if (but & CYAPA_FNGR_MIDDLE) - c0 |= 0x04; - if (but & CYAPA_FNGR_RIGHT) - c0 |= 0x02; - - fifo_write_char(sc, &sc->rfifo, c0); - fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x); - fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y); - switch(sc->zenabled) { - case 1: - /* Z axis all 8 bits */ - fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z); - break; - case 2: - /* - * Z axis low 4 bits + 4th button and 5th button - * (high 2 bits must be left 0). Auto-scale - * delta_z to fit to avoid a wrong-direction - * overflow (don't try to retain the remainder). - */ - while (delta_z > 7 || delta_z < -8) - delta_z >>= 1; - c0 = (uint8_t)delta_z & 0x0F; + if (sc->mode.level == 1) { + c0 = MOUSE_SYS_SYNC; + if (but & CYAPA_FNGR_LEFT) + c0 |= MOUSE_SYS_BUTTON1UP; + if (but & CYAPA_FNGR_MIDDLE) + c0 |= MOUSE_SYS_BUTTON2UP; + if (but & CYAPA_FNGR_RIGHT) + c0 |= MOUSE_SYS_BUTTON3UP; fifo_write_char(sc, &sc->rfifo, c0); - break; - default: - /* basic PS/2 */ - break; + fifo_write_char(sc, &sc->rfifo, delta_x >> 1); + fifo_write_char(sc, &sc->rfifo, delta_y >> 1); + fifo_write_char(sc, &sc->rfifo, delta_x - (delta_x >> 1)); + fifo_write_char(sc, &sc->rfifo, delta_y - (delta_y >> 1)); + fifo_write_char(sc, &sc->rfifo, delta_z >> 1); + fifo_write_char(sc, &sc->rfifo, delta_z - (delta_z >> 1)); + fifo_write_char(sc, &sc->rfifo, MOUSE_SYS_EXTBUTTONS); + } else { + c0 = 0; + if (delta_x < 0) + c0 |= 0x10; + if (delta_y < 0) + c0 |= 0x20; + c0 |= 0x08; + if (but & CYAPA_FNGR_LEFT) + c0 |= 0x01; + if (but & CYAPA_FNGR_MIDDLE) + c0 |= 0x04; + if (but & CYAPA_FNGR_RIGHT) + c0 |= 0x02; + + fifo_write_char(sc, &sc->rfifo, c0); + fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x); + fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y); + switch(sc->zenabled) { + case 1: + /* Z axis all 8 bits */ + fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z); + break; + case 2: + /* + * Z axis low 4 bits + 4th button and 5th button + * (high 2 bits must be left 0). Auto-scale + * delta_z to fit to avoid a wrong-direction + * overflow (don't try to retain the remainder). + */ + while (delta_z > 7 || delta_z < -8) + delta_z >>= 1; + c0 = (uint8_t)delta_z & 0x0F; + fifo_write_char(sc, &sc->rfifo, c0); + break; + default: + /* basic PS/2 */ + break; + } } cyapa_notify(sc); } @@ -1205,6 +1223,11 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread ((mousemode_t *)data)->packetsize = MOUSE_PS2_PACKETSIZE; break; + case 1: + ((mousemode_t *)data)->protocol = MOUSE_PROTO_SYSMOUSE; + ((mousemode_t *)data)->packetsize = + MOUSE_SYS_PACKETSIZE; + break; case 2: ((mousemode_t *)data)->protocol = MOUSE_PROTO_PS2; ((mousemode_t *)data)->packetsize = @@ -1223,7 +1246,7 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread error = EINVAL; break; } - sc->mode.level = *(int *)data ? 2 : 0; + sc->mode.level = *(int *)data; sc->zenabled = sc->mode.level ? 1 : 0; break; diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 9c5ae2806f75..60959fe679b8 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -4782,8 +4782,8 @@ em_if_get_counter(if_ctx_t ctx, ift_counter cnt) sc->stats.ruc + sc->stats.roc + sc->stats.mpc + sc->stats.cexterr); case IFCOUNTER_OERRORS: - return (sc->stats.ecol + sc->stats.latecol + - sc->watchdog_events); + return (if_get_counter_default(ifp, cnt) + + sc->stats.ecol + sc->stats.latecol + sc->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } diff --git a/sys/dev/enetc/if_enetc.c b/sys/dev/enetc/if_enetc.c index 3a5d6ec23282..808397b229a7 100644 --- a/sys/dev/enetc/if_enetc.c +++ b/sys/dev/enetc/if_enetc.c @@ -1343,7 +1343,8 @@ enetc_get_counter(if_ctx_t ctx, ift_counter cnt) case IFCOUNTER_IERRORS: return (ENETC_PORT_RD8(sc, ENETC_PM0_RERR)); case IFCOUNTER_OERRORS: - return (ENETC_PORT_RD8(sc, ENETC_PM0_TERR)); + return (if_get_counter_default(ifp, cnt) + + ENETC_PORT_RD8(sc, ENETC_PM0_TERR)); default: return (if_get_counter_default(ifp, cnt)); } diff --git a/sys/dev/gpio/gpio_if.m b/sys/dev/gpio/gpio_if.m index 5501b2b5c0e7..0b6988ceba79 100644 --- a/sys/dev/gpio/gpio_if.m +++ b/sys/dev/gpio/gpio_if.m @@ -62,6 +62,22 @@ CODE { return (0); } + + static int + gpio_default_get_pin_list(device_t dev, uint32_t *pin_list) + { + uint32_t maxpin; + int err; + + err = GPIO_PIN_MAX(dev, &maxpin); + if (err != 0) + return (ENXIO); + + for (int i = 0; i <= maxpin; i++) + pin_list[i] = i; + + return (0); + } }; HEADER { @@ -185,3 +201,13 @@ METHOD int pin_config_32 { uint32_t num_pins; uint32_t *pin_flags; } DEFAULT gpio_default_nosupport; + +# +# Get the controller's pin numbers. pin_list is expected to be an array with at +# least GPIO_PIN_MAX() elements. Populates pin_list from 0 to GPIO_PIN_MAX() by +# default. +# +METHOD int get_pin_list { + device_t dev; + uint32_t *pin_list; +} DEFAULT gpio_default_get_pin_list; diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 5f1f6532a79b..698b5e5fdd01 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -319,10 +319,6 @@ gpiobus_add_bus(device_t dev) busdev = device_add_child(dev, "gpiobus", DEVICE_UNIT_ANY); if (busdev == NULL) return (NULL); - if (device_add_child(dev, "gpioc", DEVICE_UNIT_ANY) == NULL) { - device_delete_child(dev, busdev); - return (NULL); - } #ifdef FDT ofw_gpiobus_register_provider(dev); #endif @@ -372,6 +368,37 @@ gpiobus_init_softc(device_t dev) } int +gpiobus_add_gpioc(device_t dev) +{ + struct gpiobus_ivar *devi; + struct gpiobus_softc *sc; + device_t gpioc; + int err; + + gpioc = BUS_ADD_CHILD(dev, 0, "gpioc", DEVICE_UNIT_ANY); + if (gpioc == NULL) + return (ENXIO); + + sc = device_get_softc(dev); + devi = device_get_ivars(gpioc); + + devi->npins = sc->sc_npins; + err = gpiobus_alloc_ivars(devi); + if (err != 0) { + device_delete_child(dev, gpioc); + return (err); + } + + err = GPIO_GET_PIN_LIST(sc->sc_dev, devi->pins); + if (err != 0) { + device_delete_child(dev, gpioc); + gpiobus_free_ivars(devi); + } + + return (err); +} + +int gpiobus_alloc_ivars(struct gpiobus_ivar *devi) { @@ -562,6 +589,10 @@ gpiobus_attach(device_t dev) if (err != 0) return (err); + err = gpiobus_add_gpioc(dev); + if (err != 0) + return (err); + /* * Get parent's pins and mark them as unmapped */ @@ -961,7 +992,7 @@ gpiobus_pin_getflags(device_t dev, device_t child, uint32_t pin, if (pin >= devi->npins) return (EINVAL); - return GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags); + return (GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags)); } static int @@ -974,7 +1005,7 @@ gpiobus_pin_getcaps(device_t dev, device_t child, uint32_t pin, if (pin >= devi->npins) return (EINVAL); - return GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps); + return (GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps)); } static int @@ -987,7 +1018,7 @@ gpiobus_pin_set(device_t dev, device_t child, uint32_t pin, if (pin >= devi->npins) return (EINVAL); - return GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value); + return (GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value)); } static int @@ -1000,7 +1031,7 @@ gpiobus_pin_get(device_t dev, device_t child, uint32_t pin, if (pin >= devi->npins) return (EINVAL); - return GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value); + return (GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value)); } static int @@ -1012,7 +1043,57 @@ gpiobus_pin_toggle(device_t dev, device_t child, uint32_t pin) if (pin >= devi->npins) return (EINVAL); - return GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin]); + return (GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin])); +} + +/* + * Verify that a child has all the pins they are requesting + * to access in their ivars. + */ +static bool +gpiobus_pin_verify_32(struct gpiobus_ivar *devi, uint32_t first_pin, + uint32_t num_pins) +{ + if (first_pin + num_pins > devi->npins) + return (false); + + /* Make sure the pins are consecutive. */ + for (uint32_t pin = first_pin; pin < first_pin + num_pins - 1; pin++) { + if (devi->pins[pin] + 1 != devi->pins[pin + 1]) + return (false); + } + + return (true); +} + +static int +gpiobus_pin_access_32(device_t dev, device_t child, uint32_t first_pin, + uint32_t clear_pins, uint32_t change_pins, uint32_t *orig_pins) +{ + struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev); + struct gpiobus_ivar *devi = GPIOBUS_IVAR(child); + + if (!gpiobus_pin_verify_32(devi, first_pin, 32)) + return (EINVAL); + + return (GPIO_PIN_ACCESS_32(sc->sc_dev, devi->pins[first_pin], + clear_pins, change_pins, orig_pins)); +} + +static int +gpiobus_pin_config_32(device_t dev, device_t child, uint32_t first_pin, + uint32_t num_pins, uint32_t *pin_flags) +{ + struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev); + struct gpiobus_ivar *devi = GPIOBUS_IVAR(child); + + if (num_pins > 32) + return (EINVAL); + if (!gpiobus_pin_verify_32(devi, first_pin, num_pins)) + return (EINVAL); + + return (GPIO_PIN_CONFIG_32(sc->sc_dev, + devi->pins[first_pin], num_pins, pin_flags)); } static int @@ -1093,6 +1174,8 @@ static device_method_t gpiobus_methods[] = { DEVMETHOD(gpiobus_pin_get, gpiobus_pin_get), DEVMETHOD(gpiobus_pin_set, gpiobus_pin_set), DEVMETHOD(gpiobus_pin_toggle, gpiobus_pin_toggle), + DEVMETHOD(gpiobus_pin_access_32,gpiobus_pin_access_32), + DEVMETHOD(gpiobus_pin_config_32,gpiobus_pin_config_32), DEVMETHOD(gpiobus_pin_getname, gpiobus_pin_getname), DEVMETHOD(gpiobus_pin_setname, gpiobus_pin_setname), diff --git a/sys/dev/gpio/gpiobus_if.m b/sys/dev/gpio/gpiobus_if.m index 8bf29839ef4e..890738c4e809 100644 --- a/sys/dev/gpio/gpiobus_if.m +++ b/sys/dev/gpio/gpiobus_if.m @@ -107,6 +107,36 @@ METHOD int pin_setflags { }; # +# Simultaneously read and/or change up to 32 adjacent pins. +# If the device cannot change the pins simultaneously, returns EOPNOTSUPP. +# +# More details about using this interface can be found in sys/gpio.h +# +METHOD int pin_access_32 { + device_t dev; + device_t child; + uint32_t first_pin; + uint32_t clear_pins; + uint32_t change_pins; + uint32_t *orig_pins; +}; + +# +# Simultaneously configure up to 32 adjacent pins. +# This is intended to change the configuration of all the pins simultaneously, +# but unlike pin_access_32, this will not fail if the hardware can't do so. +# +# More details about using this interface can be found in sys/gpio.h +# +METHOD int pin_config_32 { + device_t dev; + device_t child; + uint32_t first_pin; + uint32_t num_pins; + uint32_t *pin_flags; +}; + +# # Get the pin name # METHOD int pin_getname { diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h index c198e5f79989..58f862343403 100644 --- a/sys/dev/gpio/gpiobus_internal.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -44,6 +44,7 @@ int gpiobus_acquire_pin(device_t, uint32_t); void gpiobus_release_pin(device_t, uint32_t); int gpiobus_child_location(device_t, device_t, struct sbuf *); device_t gpiobus_add_child_common(device_t, u_int, const char *, int, size_t); +int gpiobus_add_gpioc(device_t); extern driver_t gpiobus_driver; #endif diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c index 87fed38ebe3e..5a60f939dc78 100644 --- a/sys/dev/gpio/gpioc.c +++ b/sys/dev/gpio/gpioc.c @@ -45,7 +45,6 @@ #include <dev/gpio/gpiobusvar.h> -#include "gpio_if.h" #include "gpiobus_if.h" #undef GPIOC_DEBUG @@ -59,7 +58,7 @@ struct gpioc_softc { device_t sc_dev; /* gpiocX dev */ - device_t sc_pdev; /* gpioX dev */ + device_t sc_pdev; /* gpiobusX dev */ struct cdev *sc_ctl_dev; /* controller device */ int sc_unit; int sc_npins; @@ -69,6 +68,7 @@ struct gpioc_softc { struct gpioc_pin_intr { struct gpioc_softc *sc; gpio_pin_t pin; + uint32_t intr_mode; bool config_locked; int intr_rid; struct resource *intr_res; @@ -112,8 +112,10 @@ struct gpioc_pin_event { static MALLOC_DEFINE(M_GPIOC, "gpioc", "gpioc device data"); -static int gpioc_allocate_pin_intr(struct gpioc_pin_intr*, uint32_t); -static int gpioc_release_pin_intr(struct gpioc_pin_intr*); +static int gpioc_allocate_pin_intr(struct gpioc_softc*, + struct gpioc_pin_intr*, uint32_t, uint32_t); +static int gpioc_release_pin_intr(struct gpioc_softc*, + struct gpioc_pin_intr*); static int gpioc_attach_priv_pin(struct gpioc_cdevpriv*, struct gpioc_pin_intr*); static int gpioc_detach_priv_pin(struct gpioc_cdevpriv*, @@ -191,27 +193,36 @@ number_of_events(struct gpioc_cdevpriv *priv) } static int -gpioc_allocate_pin_intr(struct gpioc_pin_intr *intr_conf, uint32_t flags) +gpioc_allocate_pin_intr(struct gpioc_softc *sc, + struct gpioc_pin_intr *intr_conf, uint32_t pin, uint32_t flags) { int err; intr_conf->config_locked = true; mtx_unlock(&intr_conf->mtx); - intr_conf->intr_res = gpio_alloc_intr_resource(intr_conf->pin->dev, + MPASS(intr_conf->pin == NULL); + err = gpio_pin_get_by_bus_pinnum(sc->sc_pdev, pin, &intr_conf->pin); + if (err != 0) + goto error_exit; + + intr_conf->intr_res = gpio_alloc_intr_resource(sc->sc_dev, &intr_conf->intr_rid, RF_ACTIVE, intr_conf->pin, flags); if (intr_conf->intr_res == NULL) { err = ENXIO; - goto error_exit; + goto error_pin; } - err = bus_setup_intr(intr_conf->pin->dev, intr_conf->intr_res, + err = bus_setup_intr(sc->sc_dev, intr_conf->intr_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, gpioc_interrupt_handler, intr_conf, &intr_conf->intr_cookie); - if (err != 0) - goto error_exit; + if (err != 0) { + bus_release_resource(sc->sc_dev, intr_conf->intr_res); + intr_conf->intr_res = NULL; + goto error_pin; + } - intr_conf->pin->flags = flags; + intr_conf->intr_mode = flags; error_exit: mtx_lock(&intr_conf->mtx); @@ -219,10 +230,15 @@ error_exit: wakeup(&intr_conf->config_locked); return (err); + +error_pin: + gpio_pin_release(intr_conf->pin); + intr_conf->pin = NULL; + goto error_exit; } static int -gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf) +gpioc_release_pin_intr(struct gpioc_softc *sc, struct gpioc_pin_intr *intr_conf) { int err; @@ -230,8 +246,8 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf) mtx_unlock(&intr_conf->mtx); if (intr_conf->intr_cookie != NULL) { - err = bus_teardown_intr(intr_conf->pin->dev, - intr_conf->intr_res, intr_conf->intr_cookie); + err = bus_teardown_intr(sc->sc_dev, intr_conf->intr_res, + intr_conf->intr_cookie); if (err != 0) goto error_exit; else @@ -239,7 +255,7 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf) } if (intr_conf->intr_res != NULL) { - err = bus_release_resource(intr_conf->pin->dev, SYS_RES_IRQ, + err = bus_release_resource(sc->sc_dev, SYS_RES_IRQ, intr_conf->intr_rid, intr_conf->intr_res); if (err != 0) goto error_exit; @@ -249,7 +265,10 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf) } } - intr_conf->pin->flags = 0; + gpio_pin_release(intr_conf->pin); + intr_conf->pin = NULL; + + intr_conf->intr_mode = 0; err = 0; error_exit: @@ -386,7 +405,7 @@ gpioc_get_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv, struct gpioc_privs *priv_link; uint32_t flags; - flags = intr_conf->pin->flags; + flags = intr_conf->intr_mode; if (flags == 0) return (0); @@ -411,7 +430,7 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv, int res; res = 0; - if (intr_conf->pin->flags == 0 && flags == 0) { + if (intr_conf->intr_mode == 0 && flags == 0) { /* No interrupt configured and none requested: Do nothing. */ return (0); } @@ -419,17 +438,17 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv, while (intr_conf->config_locked == true) mtx_sleep(&intr_conf->config_locked, &intr_conf->mtx, 0, "gpicfg", 0); - if (intr_conf->pin->flags == 0 && flags != 0) { + if (intr_conf->intr_mode == 0 && flags != 0) { /* * No interrupt is configured, but one is requested: Allocate * and setup interrupt on the according pin. */ - res = gpioc_allocate_pin_intr(intr_conf, flags); + res = gpioc_allocate_pin_intr(sc, intr_conf, pin, flags); if (res == 0) res = gpioc_attach_priv_pin(priv, intr_conf); if (res == EEXIST) res = 0; - } else if (intr_conf->pin->flags == flags) { + } else if (intr_conf->intr_mode == flags) { /* * Same interrupt requested as already configured: Attach the * cdevpriv to the corresponding pin. @@ -437,14 +456,14 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv, res = gpioc_attach_priv_pin(priv, intr_conf); if (res == EEXIST) res = 0; - } else if (intr_conf->pin->flags != 0 && flags == 0) { + } else if (intr_conf->intr_mode != 0 && flags == 0) { /* * Interrupt configured, but none requested: Teardown and * release the pin when no other cdevpriv is attached. Otherwise * just detach pin and cdevpriv from each other. */ if (gpioc_intr_reconfig_allowed(priv, intr_conf)) { - res = gpioc_release_pin_intr(intr_conf); + res = gpioc_release_pin_intr(sc, intr_conf); } if (res == 0) res = gpioc_detach_priv_pin(priv, intr_conf); @@ -456,9 +475,10 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv, if (!gpioc_intr_reconfig_allowed(priv, intr_conf)) res = EBUSY; else { - res = gpioc_release_pin_intr(intr_conf); + res = gpioc_release_pin_intr(sc, intr_conf); if (res == 0) - res = gpioc_allocate_pin_intr(intr_conf, flags); + res = gpioc_allocate_pin_intr(sc, intr_conf, + pin, flags); if (res == 0) res = gpioc_attach_priv_pin(priv, intr_conf); if (res == EEXIST) @@ -475,18 +495,16 @@ gpioc_interrupt_handler(void *arg) { struct gpioc_pin_intr *intr_conf; struct gpioc_privs *privs; - struct gpioc_softc *sc; sbintime_t evtime; - uint32_t pin_state; + bool pin_state; intr_conf = arg; - sc = intr_conf->sc; /* Capture time and pin state first. */ evtime = sbinuptime(); - if (intr_conf->pin->flags & GPIO_INTR_EDGE_BOTH) - GPIO_PIN_GET(sc->sc_pdev, intr_conf->pin->pin, &pin_state); - else if (intr_conf->pin->flags & GPIO_INTR_EDGE_RISING) + if (intr_conf->intr_mode & GPIO_INTR_EDGE_BOTH) + gpio_pin_is_active(intr_conf->pin, &pin_state); + else if (intr_conf->intr_mode & GPIO_INTR_EDGE_RISING) pin_state = true; else pin_state = false; @@ -575,18 +593,11 @@ gpioc_attach(device_t dev) sc->sc_pdev = device_get_parent(dev); sc->sc_unit = device_get_unit(dev); - err = GPIO_PIN_MAX(sc->sc_pdev, &sc->sc_npins); - sc->sc_npins++; /* Number of pins is one more than max pin number. */ - if (err != 0) - return (err); + sc->sc_npins = gpiobus_get_npins(dev); sc->sc_pin_intr = malloc(sizeof(struct gpioc_pin_intr) * sc->sc_npins, M_GPIOC, M_WAITOK | M_ZERO); for (int i = 0; i < sc->sc_npins; i++) { - sc->sc_pin_intr[i].pin = malloc(sizeof(struct gpiobus_pin), - M_GPIOC, M_WAITOK | M_ZERO); sc->sc_pin_intr[i].sc = sc; - sc->sc_pin_intr[i].pin->pin = i; - sc->sc_pin_intr[i].pin->dev = sc->sc_pdev; mtx_init(&sc->sc_pin_intr[i].mtx, "gpioc pin", NULL, MTX_DEF); SLIST_INIT(&sc->sc_pin_intr[i].privs); } @@ -610,20 +621,16 @@ static int gpioc_detach(device_t dev) { struct gpioc_softc *sc = device_get_softc(dev); - int err; if (sc->sc_ctl_dev) destroy_dev(sc->sc_ctl_dev); for (int i = 0; i < sc->sc_npins; i++) { mtx_destroy(&sc->sc_pin_intr[i].mtx); - free(sc->sc_pin_intr[i].pin, M_GPIOC); + MPASS(sc->sc_pin_intr[i].pin == NULL); } free(sc->sc_pin_intr, M_GPIOC); - if ((err = bus_generic_detach(dev)) != 0) - return (err); - return (0); } @@ -655,7 +662,7 @@ gpioc_cdevpriv_dtor(void *data) KASSERT(consistency == 1, ("inconsistent links between pin config and cdevpriv")); if (gpioc_intr_reconfig_allowed(priv, pin_link->pin)) { - gpioc_release_pin_intr(pin_link->pin); + gpioc_release_pin_intr(priv->sc, pin_link->pin); } mtx_unlock(&pin_link->pin->mtx); SLIST_REMOVE(&priv->pins, pin_link, gpioc_pins, next); @@ -778,7 +785,6 @@ static int gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, struct thread *td) { - device_t bus; int max_pin, res; struct gpioc_softc *sc = cdev->si_drv1; struct gpioc_cdevpriv *priv; @@ -789,30 +795,32 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, struct gpio_event_config *evcfg; uint32_t caps, intrflags; - bus = GPIO_GET_BUS(sc->sc_pdev); - if (bus == NULL) - return (EINVAL); switch (cmd) { case GPIOMAXPIN: - max_pin = -1; - res = GPIO_PIN_MAX(sc->sc_pdev, &max_pin); + res = 0; + max_pin = sc->sc_npins - 1; bcopy(&max_pin, arg, sizeof(max_pin)); break; case GPIOGETCONFIG: bcopy(arg, &pin, sizeof(pin)); dprintf("get config pin %d\n", pin.gp_pin); - res = GPIO_PIN_GETFLAGS(sc->sc_pdev, pin.gp_pin, + res = GPIOBUS_PIN_GETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin, &pin.gp_flags); /* Fail early */ - if (res) + if (res != 0) break; res = devfs_get_cdevpriv((void **)&priv); - if (res) + if (res != 0) break; pin.gp_flags |= gpioc_get_intr_config(sc, priv, pin.gp_pin); - GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &pin.gp_caps); - GPIOBUS_PIN_GETNAME(bus, pin.gp_pin, pin.gp_name); + res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev, pin.gp_pin, + &pin.gp_caps); + if (res != 0) + break; + res = GPIOBUS_PIN_GETNAME(sc->sc_pdev, pin.gp_pin, pin.gp_name); + if (res != 0) + break; bcopy(&pin, arg, sizeof(pin)); break; case GPIOSETCONFIG: @@ -821,7 +829,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, res = devfs_get_cdevpriv((void **)&priv); if (res != 0) break; - res = GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &caps); + res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev, + pin.gp_pin, &caps); if (res != 0) break; res = gpio_check_flags(caps, pin.gp_flags); @@ -847,8 +856,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, } if (res != 0) break; - res = GPIO_PIN_SETFLAGS(sc->sc_pdev, pin.gp_pin, - (pin.gp_flags & ~GPIO_INTR_MASK)); + res = GPIOBUS_PIN_SETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin, + pin.gp_flags & ~GPIO_INTR_MASK); if (res != 0) break; res = gpioc_set_intr_config(sc, priv, pin.gp_pin, @@ -856,40 +865,43 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, break; case GPIOGET: bcopy(arg, &req, sizeof(req)); - res = GPIO_PIN_GET(sc->sc_pdev, req.gp_pin, + res = GPIOBUS_PIN_GET(sc->sc_pdev, sc->sc_dev, req.gp_pin, &req.gp_value); - dprintf("read pin %d -> %d\n", + if (res != 0) + break; + dprintf("read pin %d -> %d\n", req.gp_pin, req.gp_value); bcopy(&req, arg, sizeof(req)); break; case GPIOSET: bcopy(arg, &req, sizeof(req)); - res = GPIO_PIN_SET(sc->sc_pdev, req.gp_pin, + res = GPIOBUS_PIN_SET(sc->sc_pdev, sc->sc_dev, req.gp_pin, req.gp_value); - dprintf("write pin %d -> %d\n", + dprintf("write pin %d -> %d\n", req.gp_pin, req.gp_value); break; case GPIOTOGGLE: bcopy(arg, &req, sizeof(req)); - dprintf("toggle pin %d\n", + dprintf("toggle pin %d\n", req.gp_pin); - res = GPIO_PIN_TOGGLE(sc->sc_pdev, req.gp_pin); + res = GPIOBUS_PIN_TOGGLE(sc->sc_pdev, sc->sc_dev, req.gp_pin); break; case GPIOSETNAME: bcopy(arg, &pin, sizeof(pin)); dprintf("set name on pin %d\n", pin.gp_pin); - res = GPIOBUS_PIN_SETNAME(bus, pin.gp_pin, + res = GPIOBUS_PIN_SETNAME(sc->sc_pdev, pin.gp_pin, pin.gp_name); break; case GPIOACCESS32: a32 = (struct gpio_access_32 *)arg; - res = GPIO_PIN_ACCESS_32(sc->sc_pdev, a32->first_pin, - a32->clear_pins, a32->change_pins, &a32->orig_pins); + res = GPIOBUS_PIN_ACCESS_32(sc->sc_pdev, sc->sc_dev, + a32->first_pin, a32->clear_pins, a32->change_pins, + &a32->orig_pins); break; case GPIOCONFIG32: c32 = (struct gpio_config_32 *)arg; - res = GPIO_PIN_CONFIG_32(sc->sc_pdev, c32->first_pin, - c32->num_pins, c32->pin_flags); + res = GPIOBUS_PIN_CONFIG_32(sc->sc_pdev, sc->sc_dev, + c32->first_pin, c32->num_pins, c32->pin_flags); break; case GPIOCONFIGEVENTS: evcfg = (struct gpio_event_config *)arg; @@ -1050,9 +1062,6 @@ static device_method_t gpioc_methods[] = { DEVMETHOD(device_probe, gpioc_probe), DEVMETHOD(device_attach, gpioc_attach), DEVMETHOD(device_detach, gpioc_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD_END }; @@ -1063,5 +1072,5 @@ driver_t gpioc_driver = { sizeof(struct gpioc_softc) }; -DRIVER_MODULE(gpioc, gpio, gpioc_driver, 0, 0); +DRIVER_MODULE(gpioc, gpiobus, gpioc_driver, 0, 0); MODULE_VERSION(gpioc, 1); diff --git a/sys/dev/gpio/gpioled.c b/sys/dev/gpio/gpioled.c index ba53cb733971..71af5741b2fe 100644 --- a/sys/dev/gpio/gpioled.c +++ b/sys/dev/gpio/gpioled.c @@ -55,13 +55,13 @@ device_get_nameunit((_sc)->sc_dev), "gpioled", MTX_DEF) #define GPIOLED_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx) -struct gpioled_softc +struct gpioled_softc { device_t sc_dev; device_t sc_busdev; struct mtx sc_mtx; struct cdev *sc_leddev; - int sc_invert; + int sc_softinvert; }; static void gpioled_control(void *, int); @@ -69,20 +69,19 @@ static int gpioled_probe(device_t); static int gpioled_attach(device_t); static int gpioled_detach(device_t); -static void +static void gpioled_control(void *priv, int onoff) { struct gpioled_softc *sc; sc = (struct gpioled_softc *)priv; + if (onoff == -1) /* Keep the current state. */ + return; + if (sc->sc_softinvert) + onoff = !onoff; GPIOLED_LOCK(sc); - if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, - GPIO_PIN_OUTPUT) == 0) { - if (sc->sc_invert) - onoff = !onoff; - GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, - onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW); - } + GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, + onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW); GPIOLED_UNLOCK(sc); } @@ -95,26 +94,101 @@ gpioled_probe(device_t dev) } static int +gpioled_inv(device_t dev, uint32_t *pin_flags) +{ + struct gpioled_softc *sc; + int invert; + uint32_t pin_caps; + + sc = device_get_softc(dev); + + if (resource_int_value(device_get_name(dev), + device_get_unit(dev), "invert", &invert)) + invert = 0; + + if (GPIOBUS_PIN_GETCAPS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, + &pin_caps) != 0) { + if (bootverbose) + device_printf(sc->sc_dev, "unable to get pin caps\n"); + return (-1); + } + if (pin_caps & GPIO_PIN_INVOUT) + *pin_flags &= ~GPIO_PIN_INVOUT; + sc->sc_softinvert = 0; + if (invert) { + const char *invmode; + + if (resource_string_value(device_get_name(dev), + device_get_unit(dev), "invmode", &invmode)) + invmode = NULL; + + if (invmode) { + if (!strcmp(invmode, "sw")) + sc->sc_softinvert = 1; + else if (!strcmp(invmode, "hw")) { + if (pin_caps & GPIO_PIN_INVOUT) + *pin_flags |= GPIO_PIN_INVOUT; + else { + device_printf(sc->sc_dev, "hardware pin inversion not supported\n"); + return (-1); + } + } else { + if (strcmp(invmode, "auto") != 0) + device_printf(sc->sc_dev, "invalid pin inversion mode\n"); + invmode = NULL; + } + } + /* + * auto inversion mode: use hardware support if available, else fallback to + * software emulation. + */ + if (invmode == NULL) { + if (pin_caps & GPIO_PIN_INVOUT) + *pin_flags |= GPIO_PIN_INVOUT; + else + sc->sc_softinvert = 1; + } + } + MPASS(!invert || + (((*pin_flags & GPIO_PIN_INVOUT) != 0) && !sc->sc_softinvert) || + (((*pin_flags & GPIO_PIN_INVOUT) == 0) && sc->sc_softinvert)); + return (invert); +} + +static int gpioled_attach(device_t dev) { struct gpioled_softc *sc; int state; const char *name; + uint32_t pin_flags; + int invert; sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_busdev = device_get_parent(dev); GPIOLED_LOCK_INIT(sc); - state = 0; - - if (resource_string_value(device_get_name(dev), + if (resource_string_value(device_get_name(dev), device_get_unit(dev), "name", &name)) name = NULL; - resource_int_value(device_get_name(dev), - device_get_unit(dev), "invert", &sc->sc_invert); - resource_int_value(device_get_name(dev), - device_get_unit(dev), "state", &state); + + if (resource_int_value(device_get_name(dev), + device_get_unit(dev), "state", &state)) + state = 0; + + pin_flags = GPIO_PIN_OUTPUT; + invert = gpioled_inv(dev, &pin_flags); + if (invert < 0) + return (ENXIO); + device_printf(sc->sc_dev, "state %d invert %s\n", + state, (invert ? (sc->sc_softinvert ? "sw" : "hw") : "no")); + if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, + pin_flags) != 0) { + if (bootverbose) + device_printf(sc->sc_dev, "unable to set pin flags, %#x\n", pin_flags); + return (ENXIO); + } sc->sc_leddev = led_create_state(gpioled_control, sc, name ? name : device_get_nameunit(dev), state); diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index b12b78fac18c..da1bfbc268b8 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -426,6 +426,9 @@ ofw_gpiobus_attach(device_t dev) err = gpiobus_init_softc(dev); if (err != 0) return (err); + err = gpiobus_add_gpioc(dev); + if (err != 0) + return (err); bus_identify_children(dev); bus_enumerate_hinted_children(dev); /* diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 9b85c989dc96..a6a6ae68996c 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -210,7 +210,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static bool pmc_can_allocate_row(int ri, enum pmc_mode mode); static bool pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); -static int pmc_can_attach(struct pmc *pm, struct proc *p); +static bool pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); @@ -1029,19 +1029,19 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) * Check if PMC 'pm' may be attached to target process 't'. */ -static int +static bool pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ - int decline_attach, i; + bool decline_attach; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) - return 0; + return (true); PROC_LOCK(o); oc = o->p_ucred; @@ -1066,18 +1066,17 @@ pmc_can_attach(struct pmc *pm, struct proc *t) * Every one of the target's group ids, must be in the owner's * group list. */ - for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) + for (int i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); - - /* check the read and saved gids too */ - if (decline_attach == 0) - decline_attach = !groupmember(tc->cr_rgid, oc) || + if (!decline_attach) + decline_attach = !groupmember(tc->cr_gid, oc) || + !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); - return !decline_attach; + return (!decline_attach); } /* @@ -1412,7 +1411,7 @@ pmc_process_exec(struct thread *td, struct pmckern_procexec *pk) */ for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { - if (pmc_can_attach(pm, td->td_proc) != 0) { + if (pmc_can_attach(pm, td->td_proc)) { pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); } diff --git a/sys/dev/hwt/hwt_ioctl.c b/sys/dev/hwt/hwt_ioctl.c index 592db4931bb4..184c7e72f986 100644 --- a/sys/dev/hwt/hwt_ioctl.c +++ b/sys/dev/hwt/hwt_ioctl.c @@ -112,12 +112,11 @@ hwt_priv_check(struct proc *o, struct proc *t) error = EPERM; goto done; } - - /* Check the read and saved GIDs too. */ - if (!groupmember(tc->cr_rgid, oc) || + if (!groupmember(tc->cr_gid, oc) || + !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc)) { - error = EPERM; - goto done; + error = EPERM; + goto done; } done: diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c index 442111e5ffaf..8b6349f686eb 100644 --- a/sys/dev/ice/ice_lib.c +++ b/sys/dev/ice/ice_lib.c @@ -7818,7 +7818,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter) case IFCOUNTER_OPACKETS: return (es->tx_unicast + es->tx_multicast + es->tx_broadcast); case IFCOUNTER_OERRORS: - return (es->tx_errors); + return (if_get_counter_default(vsi->sc->ifp, counter) + + es->tx_errors); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IBYTES: @@ -7832,7 +7833,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter) case IFCOUNTER_IQDROPS: return (es->rx_discards); case IFCOUNTER_OQDROPS: - return (hs->tx_dropped_link_down); + return (if_get_counter_default(vsi->sc->ifp, counter) + + hs->tx_dropped_link_down); case IFCOUNTER_NOPROTO: return (es->rx_unknown_protocol); default: diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c index 728bb942d503..e4d87fe1fed2 100644 --- a/sys/dev/ichsmb/ichsmb_pci.c +++ b/sys/dev/ichsmb/ichsmb_pci.c @@ -107,6 +107,7 @@ #define ID_COMETLAKE2 0x06a3 #define ID_TIGERLAKE 0xa0a3 #define ID_TIGERLAKE2 0x43a3 +#define ID_ELKHARTLAKE 0x4b23 #define ID_GEMINILAKE 0x31d4 #define ID_CEDARFORK 0x18df #define ID_ICELAKE 0x34a3 @@ -206,6 +207,8 @@ static const struct pci_device_table ichsmb_devices[] = { PCI_DESCR("Intel Tiger Lake SMBus controller") }, { PCI_DEV(PCI_VENDOR_INTEL, ID_TIGERLAKE2), PCI_DESCR("Intel Tiger Lake SMBus controller") }, + { PCI_DEV(PCI_VENDOR_INTEL, ID_ELKHARTLAKE), + PCI_DESCR("Intel Elkhart Lake SMBus controller") }, { PCI_DEV(PCI_VENDOR_INTEL, ID_GEMINILAKE), PCI_DESCR("Intel Gemini Lake SMBus controller") }, { PCI_DEV(PCI_VENDOR_INTEL, ID_CEDARFORK), diff --git a/sys/dev/igc/if_igc.c b/sys/dev/igc/if_igc.c index a1ae35c7aa43..f199a128c783 100644 --- a/sys/dev/igc/if_igc.c +++ b/sys/dev/igc/if_igc.c @@ -2599,8 +2599,8 @@ igc_if_get_counter(if_ctx_t ctx, ift_counter cnt) sc->stats.ruc + sc->stats.roc + sc->stats.mpc + sc->stats.htdpmc); case IFCOUNTER_OERRORS: - return (sc->stats.ecol + sc->stats.latecol + - sc->watchdog_events); + return (if_get_counter_default(ifp, cnt) + + sc->stats.ecol + sc->stats.latecol + sc->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 6856b0551dde..668ccf056463 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -114,8 +114,8 @@ iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) * domain, and must collectively be assigned to use either IOMMU or * bounce mapping. */ -device_t -iommu_get_requester(device_t dev, uint16_t *rid) +int +iommu_get_requester(device_t dev, device_t *requesterp, uint16_t *rid) { devclass_t pci_class; device_t l, pci, pcib, pcip, pcibp, requester; @@ -129,7 +129,8 @@ iommu_get_requester(device_t dev, uint16_t *rid) pci = device_get_parent(dev); if (pci == NULL || device_get_devclass(pci) != pci_class) { *rid = 0; /* XXXKIB: Could be ACPI HID */ - return (requester); + *requesterp = NULL; + return (ENOTTY); } *rid = pci_get_rid(dev); @@ -141,16 +142,39 @@ iommu_get_requester(device_t dev, uint16_t *rid) */ for (;;) { pci = device_get_parent(l); - KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " - "for %s", device_get_name(dev), device_get_name(l))); - KASSERT(device_get_devclass(pci) == pci_class, - ("iommu_get_requester(%s): non-pci parent %s for %s", - device_get_name(dev), device_get_name(pci), - device_get_name(l))); + if (pci == NULL) { + if (bootverbose) { + printf( + "iommu_get_requester(%s): NULL parent for %s\n", + device_get_name(dev), device_get_name(l)); + } + *rid = 0; + *requesterp = NULL; + return (ENXIO); + } + if (device_get_devclass(pci) != pci_class) { + if (bootverbose) { + printf( + "iommu_get_requester(%s): non-pci parent %s for %s\n", + device_get_name(dev), device_get_name(pci), + device_get_name(l)); + } + *rid = 0; + *requesterp = NULL; + return (ENXIO); + } pcib = device_get_parent(pci); - KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " - "for %s", device_get_name(dev), device_get_name(pci))); + if (pcib == NULL) { + if (bootverbose) { + printf( + "iommu_get_requester(%s): NULL bridge for %s\n", + device_get_name(dev), device_get_name(pci)); + } + *rid = 0; + *requesterp = NULL; + return (ENXIO); + } /* * The parent of our "bridge" isn't another PCI bus, @@ -229,7 +253,8 @@ iommu_get_requester(device_t dev, uint16_t *rid) } } } - return (requester); + *requesterp = requester; + return (0); } struct iommu_ctx * @@ -237,10 +262,13 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) { device_t requester; struct iommu_ctx *ctx; + int error; bool disabled; uint16_t rid; - requester = iommu_get_requester(dev, &rid); + error = iommu_get_requester(dev, &requester, &rid); + if (error != 0) + return (NULL); /* * If the user requested the IOMMU disabled for the device, we diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index b1858f0df9f7..55044042c5d2 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -170,7 +170,7 @@ void iommu_domain_unload(struct iommu_domain *domain, void iommu_unit_pre_instantiate_ctx(struct iommu_unit *iommu); struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit *iommu, device_t dev, bool rmrr); -device_t iommu_get_requester(device_t dev, uint16_t *rid); +int iommu_get_requester(device_t dev, device_t *requester, uint16_t *rid); int iommu_init_busdma(struct iommu_unit *unit); void iommu_fini_busdma(struct iommu_unit *unit); diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c index 450fae662dd8..d4d4f328fb43 100644 --- a/sys/dev/irdma/irdma_cm.c +++ b/sys/dev/irdma/irdma_cm.c @@ -1316,7 +1316,7 @@ irdma_cm_timer_tick(struct timer_list *t) struct irdma_timer_entry *send_entry, *close_entry; struct list_head *list_core_temp; struct list_head *list_node; - struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer); + struct irdma_cm_core *cm_core = timer_container_of(cm_core, t, tcp_timer); struct irdma_sc_vsi *vsi; u32 settimer = 0; unsigned long timetosend; diff --git a/sys/dev/irdma/irdma_utils.c b/sys/dev/irdma/irdma_utils.c index 5fc37022981f..038f1980082b 100644 --- a/sys/dev/irdma/irdma_utils.c +++ b/sys/dev/irdma/irdma_utils.c @@ -876,7 +876,7 @@ irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred) static void irdma_terminate_timeout(struct timer_list *t) { - struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer); + struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer); struct irdma_sc_qp *qp = &iwqp->sc_qp; irdma_terminate_done(qp, 1); @@ -1528,7 +1528,7 @@ static void irdma_hw_stats_timeout(struct timer_list *t) { struct irdma_vsi_pestat *pf_devstat = - from_timer(pf_devstat, t, stats_timer); + timer_container_of(pf_devstat, t, stats_timer); struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi; if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 73c0fd1ab16f..6d08bd49bc04 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -184,6 +184,7 @@ static int ixgbe_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void ixgbe_if_queues_free(if_ctx_t); static void ixgbe_if_timer(if_ctx_t, uint16_t); +static const char *ixgbe_link_speed_to_str(u32 link_speed); static void ixgbe_if_update_admin_status(if_ctx_t); static void ixgbe_if_vlan_register(if_ctx_t, u16); static void ixgbe_if_vlan_unregister(if_ctx_t, u16); @@ -1349,8 +1350,6 @@ ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) return (0); case IFCOUNTER_IQDROPS: return (sc->iqdrops); - case IFCOUNTER_OQDROPS: - return (0); case IFCOUNTER_IERRORS: return (sc->ierrors); default: @@ -4027,6 +4026,33 @@ ixgbe_if_stop(if_ctx_t ctx) } /* ixgbe_if_stop */ /************************************************************************ + * ixgbe_link_speed_to_str - Convert link speed to string + * + * Helper function to convert link speed constants to human-readable + * string representations in conventional Gbps or Mbps. + ************************************************************************/ +static const char * +ixgbe_link_speed_to_str(u32 link_speed) +{ + switch (link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + return "10 Gbps"; + case IXGBE_LINK_SPEED_5GB_FULL: + return "5 Gbps"; + case IXGBE_LINK_SPEED_2_5GB_FULL: + return "2.5 Gbps"; + case IXGBE_LINK_SPEED_1GB_FULL: + return "1 Gbps"; + case IXGBE_LINK_SPEED_100_FULL: + return "100 Mbps"; + case IXGBE_LINK_SPEED_10_FULL: + return "10 Mbps"; + default: + return "Unknown"; + } +} /* ixgbe_link_speed_to_str */ + +/************************************************************************ * ixgbe_update_link_status - Update OS on link state * * Note: Only updates the OS on the cached link state. @@ -4042,9 +4068,9 @@ ixgbe_if_update_admin_status(if_ctx_t ctx) if (sc->link_up) { if (sc->link_active == false) { if (bootverbose) - device_printf(dev, "Link is up %d Gbps %s \n", - ((sc->link_speed == 128) ? 10 : 1), - "Full Duplex"); + device_printf(dev, + "Link is up %s Full Duplex\n", + ixgbe_link_speed_to_str(sc->link_speed)); sc->link_active = true; /* Update any Flow Control changes */ ixgbe_fc_enable(&sc->hw); diff --git a/sys/dev/ixgbe/ixgbe_e610.c b/sys/dev/ixgbe/ixgbe_e610.c index 95c6dca416c6..18c4612446e0 100644 --- a/sys/dev/ixgbe/ixgbe_e610.c +++ b/sys/dev/ixgbe/ixgbe_e610.c @@ -1400,40 +1400,6 @@ s32 ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link) } /** - * ixgbe_is_media_cage_present - check if media cage is present - * @hw: pointer to the HW struct - * - * Identify presence of media cage using the ACI command (0x06E0). - * - * Return: true if media cage is present, else false. If no cage, then - * media type is backplane or BASE-T. - */ -static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw) -{ - struct ixgbe_aci_cmd_get_link_topo *cmd; - struct ixgbe_aci_desc desc; - - cmd = &desc.params.get_link_topo; - - ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo); - - cmd->addr.topo_params.node_type_ctx = - (IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT << - IXGBE_ACI_LINK_TOPO_NODE_CTX_S); - - /* set node type */ - cmd->addr.topo_params.node_type_ctx |= - (IXGBE_ACI_LINK_TOPO_NODE_TYPE_M & - IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE); - - /* Node type cage can be used to determine if cage is present. If AQC - * returns error (ENOENT), then no cage present. If no cage present then - * connection type is backplane or BASE-T. - */ - return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); -} - -/** * ixgbe_get_media_type_from_phy_type - Gets media type based on phy type * @hw: pointer to the HW struct * diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 43c3af056b67..261f76055901 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1785,7 +1785,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: - return (vsi->oerrors); + return (if_get_counter_default(ifp, cnt) + vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); @@ -1800,7 +1800,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: - return (vsi->oqdrops); + return (if_get_counter_default(ifp, cnt) + vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c index d1c572e40669..262d6b58b705 100644 --- a/sys/dev/mpr/mpr.c +++ b/sys/dev/mpr/mpr.c @@ -1729,6 +1729,7 @@ mpr_get_tunables(struct mpr_softc *sc) sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD; sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; sc->use_phynum = 1; + sc->encl_min_slots = 0; sc->max_reqframes = MPR_REQ_FRAMES; sc->max_prireqframes = MPR_PRI_REQ_FRAMES; sc->max_replyframes = MPR_REPLY_FRAMES; @@ -1748,6 +1749,7 @@ mpr_get_tunables(struct mpr_softc *sc) TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu); TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time); TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum); + TUNABLE_INT_FETCH("hw.mpr.encl_min_slots", &sc->encl_min_slots); TUNABLE_INT_FETCH("hw.mpr.max_reqframes", &sc->max_reqframes); TUNABLE_INT_FETCH("hw.mpr.max_prireqframes", &sc->max_prireqframes); TUNABLE_INT_FETCH("hw.mpr.max_replyframes", &sc->max_replyframes); @@ -1797,6 +1799,10 @@ mpr_get_tunables(struct mpr_softc *sc) device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); + snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.encl_min_slots", + device_get_unit(sc->mpr_dev)); + TUNABLE_INT_FETCH(tmpstr, &sc->encl_min_slots); + snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_reqframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes); @@ -1951,6 +1957,10 @@ mpr_setup_sysctl(struct mpr_softc *sc) SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD, &sc->prp_page_alloc_fail, "PRP page allocation failures"); + + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, "encl_min_slots", CTLFLAG_RW, &sc->encl_min_slots, 0, + "force enclosure minimum slots"); } static struct mpr_debug_string { diff --git a/sys/dev/mpr/mpr_mapping.c b/sys/dev/mpr/mpr_mapping.c index f9a9ac1c53d0..38aa4dfc7ef2 100644 --- a/sys/dev/mpr/mpr_mapping.c +++ b/sys/dev/mpr/mpr_mapping.c @@ -2785,6 +2785,8 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc, * DPM, if it's being used. */ if (enc_idx != MPR_ENCTABLE_BAD_IDX) { + u16 new_num_slots; + et_entry = &sc->enclosure_table[enc_idx]; if (et_entry->init_complete && !et_entry->missing_count) { @@ -2796,6 +2798,17 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc, et_entry->enc_handle = le16toh(event_data-> EnclosureHandle); et_entry->start_slot = le16toh(event_data->StartSlot); + new_num_slots = le16toh(event_data->NumSlots); + if (new_num_slots < sc->encl_min_slots) { + mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d num_slots %d, overriding with %d.\n", + __func__, enc_idx, new_num_slots, sc->encl_min_slots); + new_num_slots = sc->encl_min_slots; + } + if (et_entry->num_slots != new_num_slots) { + mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d old num_slots %d, new %d.\n", + __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots); + et_entry->num_slots = new_num_slots; + } saved_phy_bits = et_entry->phy_bits; et_entry->phy_bits |= le32toh(event_data->PhyBits); if (saved_phy_bits != et_entry->phy_bits) @@ -2858,6 +2871,11 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc, et_entry->start_index = MPR_MAPTABLE_BAD_IDX; et_entry->dpm_entry_num = MPR_DPM_BAD_IDX; et_entry->num_slots = le16toh(event_data->NumSlots); + if (et_entry->num_slots < sc->encl_min_slots) { + mpr_dprint(sc, MPR_ERROR | MPR_MAPPING, "%s: Enclosure %d num_slots is %d, overriding with %d.\n", + __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots); + et_entry->num_slots = sc->encl_min_slots; + } et_entry->start_slot = le16toh(event_data->StartSlot); et_entry->phy_bits = le32toh(event_data->PhyBits); } diff --git a/sys/dev/mpr/mprvar.h b/sys/dev/mpr/mprvar.h index 0f1743f4266e..93f3fbffe079 100644 --- a/sys/dev/mpr/mprvar.h +++ b/sys/dev/mpr/mprvar.h @@ -366,6 +366,7 @@ struct mpr_softc { int spinup_wait_time; int use_phynum; int dump_reqs_alltypes; + int encl_min_slots; uint64_t chain_alloc_fail; uint64_t prp_page_alloc_fail; struct sysctl_ctx_list sysctl_ctx; diff --git a/sys/dev/mwl/if_mwl.c b/sys/dev/mwl/if_mwl.c index 0e2eb0b2d8fe..c885968dfe15 100644 --- a/sys/dev/mwl/if_mwl.c +++ b/sys/dev/mwl/if_mwl.c @@ -4017,7 +4017,7 @@ mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20; - if (ni->ni_chw != IEEE80211_STA_RX_BW_40) + if (ni->ni_chw != NET80211_STA_RX_BW_40) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40; } return pi; diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index 84f365024f13..ead91f0d01fe 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -295,7 +295,6 @@ nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn, void nvme_unregister_consumer(struct nvme_consumer *consumer) { - consumer->id = INVALID_CONSUMER_ID; } diff --git a/sys/dev/nvme/nvme_ahci.c b/sys/dev/nvme/nvme_ahci.c index 888207a454f7..b06661226d34 100644 --- a/sys/dev/nvme/nvme_ahci.c +++ b/sys/dev/nvme/nvme_ahci.c @@ -124,6 +124,5 @@ bad: static int nvme_ahci_detach(device_t dev) { - return (nvme_detach(dev)); } diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index fd7f00ced14b..3a1894bf754d 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -41,6 +41,9 @@ #include <sys/endian.h> #include <sys/stdarg.h> #include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_extern.h> +#include <vm/vm_map.h> #include "nvme_private.h" #include "nvme_linux.h" @@ -597,7 +600,6 @@ nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) static bool is_log_page_id_valid(uint8_t page_id) { - switch (page_id) { case NVME_LOG_ERROR: case NVME_LOG_HEALTH_INFORMATION: @@ -653,7 +655,6 @@ static void nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr, uint8_t state) { - if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE) nvme_printf(ctrlr, "SMART WARNING: available spare space below threshold\n"); @@ -781,7 +782,6 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) static void nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) { - ctrlr->int_coal_time = 0; TUNABLE_INT_FETCH("hw.nvme.int_coal_time", &ctrlr->int_coal_time); @@ -1268,6 +1268,34 @@ nvme_ctrlr_shared_handler(void *arg) nvme_mmio_write_4(ctrlr, intmc, 1); } +#define NVME_MAX_PAGES (int)(1024 / sizeof(vm_page_t)) + +static int +nvme_user_ioctl_req(vm_offset_t addr, size_t len, bool is_read, + vm_page_t *upages, int max_pages, int *npagesp, struct nvme_request **req, + nvme_cb_fn_t cb_fn, void *cb_arg) +{ + vm_prot_t prot = VM_PROT_READ; + int err; + + if (is_read) + prot |= VM_PROT_WRITE; /* Device will write to host memory */ + err = vm_fault_hold_pages(&curproc->p_vmspace->vm_map, + addr, len, prot, upages, max_pages, npagesp); + if (err != 0) + return (err); + *req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); + (*req)->payload = memdesc_vmpages(upages, len, addr & PAGE_MASK); + (*req)->payload_valid = true; + return (0); +} + +static void +nvme_user_ioctl_free(vm_page_t *pages, int npage) +{ + vm_page_unhold_pages(pages, npage); +} + static void nvme_pt_done(void *arg, const struct nvme_completion *cpl) { @@ -1290,30 +1318,28 @@ nvme_pt_done(void *arg, const struct nvme_completion *cpl) int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, - struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer, + struct nvme_pt_command *pt, uint32_t nsid, int is_user, int is_admin_cmd) { - struct nvme_request *req; - struct mtx *mtx; - struct buf *buf = NULL; - int ret = 0; + struct nvme_request *req; + struct mtx *mtx; + int ret = 0; + int npages = 0; + vm_page_t upages[NVME_MAX_PAGES]; if (pt->len > 0) { if (pt->len > ctrlr->max_xfer_size) { - nvme_printf(ctrlr, "pt->len (%d) " - "exceeds max_xfer_size (%d)\n", pt->len, - ctrlr->max_xfer_size); - return EIO; + nvme_printf(ctrlr, + "len (%d) exceeds max_xfer_size (%d)\n", + pt->len, ctrlr->max_xfer_size); + return (EIO); } - if (is_user_buffer) { - buf = uma_zalloc(pbuf_zone, M_WAITOK); - buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE; - if (vmapbuf(buf, pt->buf, pt->len, 1) < 0) { - ret = EFAULT; - goto err; - } - req = nvme_allocate_request_vaddr(buf->b_data, pt->len, - M_WAITOK, nvme_pt_done, pt); + if (is_user) { + ret = nvme_user_ioctl_req((vm_offset_t)pt->buf, pt->len, + pt->is_read, upages, nitems(upages), &npages, &req, + nvme_pt_done, pt); + if (ret != 0) + return (ret); } else req = nvme_allocate_request_vaddr(pt->buf, pt->len, M_WAITOK, nvme_pt_done, pt); @@ -1347,11 +1373,8 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0); mtx_unlock(mtx); - if (buf != NULL) { - vunmapbuf(buf); -err: - uma_zfree(pbuf_zone, buf); - } + if (npages > 0) + nvme_user_ioctl_free(upages, npages); return (ret); } @@ -1377,8 +1400,9 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, { struct nvme_request *req; struct mtx *mtx; - struct buf *buf = NULL; int ret = 0; + int npages = 0; + vm_page_t upages[NVME_MAX_PAGES]; /* * We don't support metadata. @@ -1389,28 +1413,16 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, if (npc->data_len > 0 && npc->addr != 0) { if (npc->data_len > ctrlr->max_xfer_size) { nvme_printf(ctrlr, - "npc->data_len (%d) exceeds max_xfer_size (%d)\n", + "data_len (%d) exceeds max_xfer_size (%d)\n", npc->data_len, ctrlr->max_xfer_size); return (EIO); } - /* - * We only support data out or data in commands, but not both at - * once. However, there's some comands with lower bit cleared - * that are really read commands, so we should filter & 3 == 0, - * but don't. - */ - if ((npc->opcode & 0x3) == 3) - return (EINVAL); if (is_user) { - buf = uma_zalloc(pbuf_zone, M_WAITOK); - buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ; - if (vmapbuf(buf, (void *)(uintptr_t)npc->addr, - npc->data_len, 1) < 0) { - ret = EFAULT; - goto err; - } - req = nvme_allocate_request_vaddr(buf->b_data, - npc->data_len, M_WAITOK, nvme_npc_done, npc); + ret = nvme_user_ioctl_req(npc->addr, npc->data_len, + npc->opcode & 0x1, upages, nitems(upages), &npages, + &req, nvme_npc_done, npc); + if (ret != 0) + return (ret); } else req = nvme_allocate_request_vaddr( (void *)(uintptr_t)npc->addr, npc->data_len, @@ -1420,8 +1432,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, req->cmd.opc = npc->opcode; req->cmd.fuse = npc->flags; - req->cmd.rsvd2 = htole16(npc->cdw2); - req->cmd.rsvd3 = htole16(npc->cdw3); + req->cmd.rsvd2 = htole32(npc->cdw2); + req->cmd.rsvd3 = htole32(npc->cdw3); req->cmd.cdw10 = htole32(npc->cdw10); req->cmd.cdw11 = htole32(npc->cdw11); req->cmd.cdw12 = htole32(npc->cdw12); @@ -1445,11 +1457,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0); mtx_unlock(mtx); - if (buf != NULL) { - vunmapbuf(buf); -err: - uma_zfree(pbuf_zone, buf); - } + if (npages > 0) + nvme_user_ioctl_free(upages, npages); return (ret); } @@ -1776,7 +1785,6 @@ void nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, struct nvme_request *req) { - nvme_qpair_submit_request(&ctrlr->adminq, req); } @@ -1793,14 +1801,12 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr) { - return (ctrlr->dev); } const struct nvme_controller_data * nvme_ctrlr_get_data(struct nvme_controller *ctrlr) { - return (&ctrlr->cdata); } @@ -1853,7 +1859,6 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr) int nvme_ctrlr_resume(struct nvme_controller *ctrlr) { - /* * Can't touch failed controllers, so nothing to do to resume. */ diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c index 993a7718356d..5a44ed425acb 100644 --- a/sys/dev/nvme/nvme_ctrlr_cmd.c +++ b/sys/dev/nvme/nvme_ctrlr_cmd.c @@ -281,7 +281,6 @@ nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr, struct nvme_error_information_entry *payload, uint32_t num_entries, nvme_cb_fn_t cb_fn, void *cb_arg) { - KASSERT(num_entries > 0, ("%s called with num_entries==0\n", __func__)); /* Controller's error log page entries is 0-based. */ @@ -302,7 +301,6 @@ nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr, uint32_t nsid, struct nvme_health_information_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg) { - nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_HEALTH_INFORMATION, nsid, payload, sizeof(*payload), cb_fn, cb_arg); } @@ -311,7 +309,6 @@ void nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr, struct nvme_firmware_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg) { - nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_FIRMWARE_SLOT, NVME_GLOBAL_NAMESPACE_TAG, payload, sizeof(*payload), cb_fn, cb_arg); diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index 3f29382fe42f..e84d2066930e 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -129,7 +129,6 @@ static int nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, struct thread *td) { - return (0); } @@ -231,7 +230,6 @@ nvme_ns_get_model_number(struct nvme_namespace *ns) const struct nvme_namespace_data * nvme_ns_get_data(struct nvme_namespace *ns) { - return (&ns->data); } @@ -631,7 +629,6 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, void nvme_ns_destruct(struct nvme_namespace *ns) { - if (ns->cdev != NULL) { if (ns->cdev->si_drv2 != NULL) destroy_dev(ns->cdev->si_drv2); diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c index 29b49b7df403..c07a68d2f0dc 100644 --- a/sys/dev/nvme/nvme_pci.c +++ b/sys/dev/nvme/nvme_pci.c @@ -151,7 +151,6 @@ nvme_pci_probe (device_t device) static int nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) { - ctrlr->resource_id = PCIR_BAR(0); ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 36f00fedc48e..52f9e12f8f9a 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -459,8 +459,7 @@ int nvme_detach(device_t dev); * vast majority of these without waiting for a tick plus scheduling delays. Since * these are on startup, this drastically reduces startup time. */ -static __inline -void +static __inline void nvme_completion_poll(struct nvme_completion_poll_status *status) { int timeout = ticks + 10 * hz; diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index bd8626e32209..4f2c44da3b4f 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -793,7 +793,6 @@ nvme_admin_qpair_destroy(struct nvme_qpair *qpair) void nvme_io_qpair_destroy(struct nvme_qpair *qpair) { - nvme_qpair_destroy(qpair); } @@ -1202,7 +1201,6 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) { - mtx_lock(&qpair->lock); _nvme_qpair_submit_request(qpair, req); mtx_unlock(&qpair->lock); @@ -1226,7 +1224,6 @@ nvme_qpair_enable(struct nvme_qpair *qpair) void nvme_qpair_reset(struct nvme_qpair *qpair) { - qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c index 4974bb718222..a06774a64761 100644 --- a/sys/dev/nvme/nvme_sim.c +++ b/sys/dev/nvme/nvme_sim.c @@ -301,7 +301,6 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb) static void nvme_sim_poll(struct cam_sim *sim) { - nvme_ctrlr_poll(sim2ctrlr(sim)); } diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c index a5a44721f9f9..50d19e730a16 100644 --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -153,7 +153,6 @@ nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS) static void nvme_qpair_reset_stats(struct nvme_qpair *qpair) { - /* * Reset the values. Due to sanity checks in * nvme_qpair_process_completions, we reset the number of interrupt diff --git a/sys/dev/nvme/nvme_util.c b/sys/dev/nvme/nvme_util.c index 0a07653a7378..cb0ba729ac96 100644 --- a/sys/dev/nvme/nvme_util.c +++ b/sys/dev/nvme/nvme_util.c @@ -208,31 +208,33 @@ nvme_opcode_sbuf(bool admin, uint8_t opc, struct sbuf *sb) if (s == NULL) sbuf_printf(sb, "%s (%02x)", type, opc); else - sbuf_printf(sb, "%s", s); + sbuf_printf(sb, "%s (%02x)", s, opc); } void nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb) { const char *s, *type; - uint16_t status; + uint16_t status, sc, sct; status = le16toh(cpl->status); - switch (NVME_STATUS_GET_SCT(status)) { + sc = NVME_STATUS_GET_SC(status); + sct = NVME_STATUS_GET_SCT(status); + switch (sct) { case NVME_SCT_GENERIC: - s = generic_status[NVME_STATUS_GET_SC(status)]; + s = generic_status[sc]; type = "GENERIC"; break; case NVME_SCT_COMMAND_SPECIFIC: - s = command_specific_status[NVME_STATUS_GET_SC(status)]; + s = command_specific_status[sc]; type = "COMMAND SPECIFIC"; break; case NVME_SCT_MEDIA_ERROR: - s = media_error_status[NVME_STATUS_GET_SC(status)]; + s = media_error_status[sc]; type = "MEDIA ERROR"; break; case NVME_SCT_PATH_RELATED: - s = path_related_status[NVME_STATUS_GET_SC(status)]; + s = path_related_status[sc]; type = "PATH RELATED"; break; case NVME_SCT_VENDOR_SPECIFIC: @@ -246,12 +248,11 @@ nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb) } if (type == NULL) - sbuf_printf(sb, "RESERVED (%02x/%02x)", - NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status)); + sbuf_printf(sb, "RESERVED (%02x/%02x)", sct, sc); else if (s == NULL) - sbuf_printf(sb, "%s (%02x)", type, NVME_STATUS_GET_SC(status)); + sbuf_printf(sb, "%s (%02x/%02x)", type, sct, sc); else - sbuf_printf(sb, "%s", s); + sbuf_printf(sb, "%s (%02x/%02x)", s, sct, sc); } void diff --git a/sys/dev/pci/pci_user.c b/sys/dev/pci/pci_user.c index f68b5b7e71ff..9768030995e7 100644 --- a/sys/dev/pci/pci_user.c +++ b/sys/dev/pci/pci_user.c @@ -79,6 +79,9 @@ struct pci_conf32 { u_int8_t pc_revid; /* chip revision ID */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ u_int32_t pd_unit; /* device unit number */ + int pd_numa_domain; /* device NUMA domain */ + u_int32_t pc_reported_len;/* length of PCI data reported */ + char pc_spare[64]; /* space for future fields */ }; struct pci_match_conf32 { @@ -502,11 +505,58 @@ pci_conf_match_freebsd6_32(struct pci_match_conf_freebsd6_32 *matches, int num_m #endif /* COMPAT_FREEBSD32 */ #endif /* !PRE7_COMPAT */ +#ifdef COMPAT_FREEBSD14 +struct pci_conf_freebsd14 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + u_int8_t pc_hdr; /* PCI header type */ + u_int16_t pc_subvendor; /* card vendor ID */ + u_int16_t pc_subdevice; /* card device ID, assigned by + card vendor */ + u_int16_t pc_vendor; /* chip vendor ID */ + u_int16_t pc_device; /* chip device ID, assigned by + chip vendor */ + u_int8_t pc_class; /* chip PCI class */ + u_int8_t pc_subclass; /* chip PCI subclass */ + u_int8_t pc_progif; /* chip PCI programming interface */ + u_int8_t pc_revid; /* chip revision ID */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_long pd_unit; /* device unit number */ +}; +#define PCIOCGETCONF_FREEBSD14 _IOWR('p', 5, struct pci_conf_io) + +#ifdef COMPAT_FREEBSD32 +struct pci_conf_freebsd14_32 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + u_int8_t pc_hdr; /* PCI header type */ + u_int16_t pc_subvendor; /* card vendor ID */ + u_int16_t pc_subdevice; /* card device ID, assigned by + card vendor */ + u_int16_t pc_vendor; /* chip vendor ID */ + u_int16_t pc_device; /* chip device ID, assigned by + chip vendor */ + u_int8_t pc_class; /* chip PCI class */ + u_int8_t pc_subclass; /* chip PCI subclass */ + u_int8_t pc_progif; /* chip PCI programming interface */ + u_int8_t pc_revid; /* chip revision ID */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_int32_t pd_unit; /* device unit number */ +}; +#define PCIOCGETCONF_FREEBSD14_32 \ + _IOC_NEWTYPE(PCIOCGETCONF_FREEBSD14, struct pci_conf_io32) +#endif /* COMPAT_FREEBSD32 */ +#endif /* COMPAT_FREEBSD14 */ + union pci_conf_union { struct pci_conf pc; #ifdef COMPAT_FREEBSD32 struct pci_conf32 pc32; #endif +#ifdef COMPAT_FREEBSD14 + struct pci_conf_freebsd14 pc14; +#ifdef COMPAT_FREEBSD32 + struct pci_conf_freebsd14_32 pc14_32; +#endif +#endif #ifdef PRE7_COMPAT struct pci_conf_freebsd6 pco; #ifdef COMPAT_FREEBSD32 @@ -522,10 +572,16 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches, switch (cmd) { case PCIOCGETCONF: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#endif return (pci_conf_match_native( (struct pci_match_conf *)matches, num_matches, match_buf)); #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14_32: +#endif return (pci_conf_match32((struct pci_match_conf32 *)matches, num_matches, match_buf)); #endif @@ -645,9 +701,15 @@ pci_match_conf_size(u_long cmd) switch (cmd) { case PCIOCGETCONF: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#endif return (sizeof(struct pci_match_conf)); #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14_32: +#endif return (sizeof(struct pci_match_conf32)); #endif #ifdef PRE7_COMPAT @@ -675,6 +737,14 @@ pci_conf_size(u_long cmd) case PCIOCGETCONF32: return (sizeof(struct pci_conf32)); #endif +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: + return (sizeof(struct pci_conf_freebsd14)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF_FREEBSD14_32: + return (sizeof(struct pci_conf_freebsd14_32)); +#endif +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6: return (sizeof(struct pci_conf_freebsd6)); @@ -698,6 +768,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd) switch (cmd) { case PCIOCGETCONF: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6: #endif @@ -706,6 +779,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd) #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14_32: +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6_32: #endif @@ -739,6 +815,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data, switch (cmd) { case PCIOCGETCONF: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6: #endif @@ -751,6 +830,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data, #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14_32: +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6_32: #endif @@ -781,8 +863,17 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup, pcup->pc = *pcp; return; +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: + memcpy(&pcup->pc14, pcp, sizeof(pcup->pc14)); + return; +#endif + #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14_32: +#endif pcup->pc32.pc_sel = pcp->pc_sel; pcup->pc32.pc_hdr = pcp->pc_hdr; pcup->pc32.pc_subvendor = pcp->pc_subvendor; @@ -796,8 +887,13 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup, strlcpy(pcup->pc32.pd_name, pcp->pd_name, sizeof(pcup->pc32.pd_name)); pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit; + if (cmd == PCIOCGETCONF32) { + pcup->pc32.pd_numa_domain = pcp->pd_numa_domain; + pcup->pc32.pc_reported_len = + (uint32_t)offsetof(struct pci_conf32, pc_spare); + } return; -#endif +#endif /* COMPAT_FREEBSD32 */ #ifdef PRE7_COMPAT #ifdef COMPAT_FREEBSD32 @@ -1024,7 +1120,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t struct pci_map *pm; struct pci_bar_mmap *pbm; size_t confsz, iolen; - int error, ionum, i, num_patterns; + int domain, error, ionum, i, num_patterns; union pci_conf_union pcu; #ifdef PRE7_COMPAT struct pci_io iodata; @@ -1044,6 +1140,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: #endif +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF_FREEBSD14_32: +#endif +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6: #ifdef COMPAT_FREEBSD32 @@ -1069,6 +1171,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF32: #endif +#ifdef COMPAT_FREEBSD14 + case PCIOCGETCONF_FREEBSD14: +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF_FREEBSD14_32: +#endif +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_FREEBSD6: #ifdef COMPAT_FREEBSD32 @@ -1201,6 +1309,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t dinfo->conf.pd_unit = 0; } + if (dinfo->cfg.dev != NULL && + bus_get_domain(dinfo->cfg.dev, &domain) == 0) + dinfo->conf.pd_numa_domain = domain; + else + dinfo->conf.pd_numa_domain = 0; + if (pattern_buf == NULL || pci_conf_match(cmd, pattern_buf, num_patterns, &dinfo->conf) == 0) { @@ -1217,6 +1331,9 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t break; } + dinfo->conf.pc_reported_len = + offsetof(struct pci_conf, pc_spare); + pci_conf_for_copyout(&dinfo->conf, &pcu, cmd); error = copyout(&pcu, (caddr_t)cio->matches + diff --git a/sys/dev/qat/qat_common/adf_gen4_timer.c b/sys/dev/qat/qat_common/adf_gen4_timer.c index 96b65cdff181..2c74d09418e5 100644 --- a/sys/dev/qat/qat_common/adf_gen4_timer.c +++ b/sys/dev/qat/qat_common/adf_gen4_timer.c @@ -57,7 +57,7 @@ end: static void timer_handler(struct timer_list *tl) { - struct adf_int_timer *int_timer = from_timer(int_timer, tl, timer); + struct adf_int_timer *int_timer = timer_container_of(int_timer, tl, timer); struct adf_accel_dev *accel_dev = int_timer->accel_dev; struct adf_hb_timer_data *hb_timer_data = NULL; u64 timeout_val = adf_get_next_timeout(int_timer->timeout_val); diff --git a/sys/dev/qlnx/qlnxe/ecore_dev.c b/sys/dev/qlnx/qlnxe/ecore_dev.c index 6187ecdbc446..389a95a4164c 100644 --- a/sys/dev/qlnx/qlnxe/ecore_dev.c +++ b/sys/dev/qlnx/qlnxe/ecore_dev.c @@ -5268,7 +5268,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn, } DP_VERBOSE(p_hwfn, ECORE_MSG_LINK, - "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n", + "Read default link: Speed %u Mb/sec, Adv. Speeds 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%u usec]\n", link->speed.forced_speed, link->speed.advertised_speeds, link->speed.autoneg, link->pause.autoneg, p_caps->default_eee, p_caps->eee_lpi_timer); @@ -6860,7 +6860,7 @@ int __ecore_configure_pf_max_bandwidth(struct ecore_hwfn *p_hwfn, p_hwfn->qm_info.pf_rl); DP_VERBOSE(p_hwfn, ECORE_MSG_LINK, - "Configured MAX bandwidth to be %08x Mb/sec\n", + "Configured MAX bandwidth to be %u Mb/sec\n", p_link->speed); return rc; @@ -6918,7 +6918,7 @@ int __ecore_configure_pf_min_bandwidth(struct ecore_hwfn *p_hwfn, rc = ecore_init_pf_wfq(p_hwfn, p_ptt, p_hwfn->rel_pf_id, min_bw); DP_VERBOSE(p_hwfn, ECORE_MSG_LINK, - "Configured MIN bandwidth to be %d Mb/sec\n", + "Configured MIN bandwidth to be %u Mb/sec\n", p_link->min_pf_rate); return rc; diff --git a/sys/dev/qlnx/qlnxe/ecore_mcp.c b/sys/dev/qlnx/qlnxe/ecore_mcp.c index ab14b1eb5186..6d1e5fe24d06 100644 --- a/sys/dev/qlnx/qlnxe/ecore_mcp.c +++ b/sys/dev/qlnx/qlnxe/ecore_mcp.c @@ -1638,7 +1638,7 @@ enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn, if (b_up) DP_VERBOSE(p_hwfn, ECORE_MSG_LINK, - "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n", + "Configuring Link: Speed %u Mb/sec, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n", phy_cfg.speed, phy_cfg.pause, phy_cfg.adv_speed, phy_cfg.loopback_mode); else diff --git a/sys/dev/qlnx/qlnxe/qlnx_def.h b/sys/dev/qlnx/qlnxe/qlnx_def.h index 4342bba89587..796845f3f8c6 100644 --- a/sys/dev/qlnx/qlnxe/qlnx_def.h +++ b/sys/dev/qlnx/qlnxe/qlnx_def.h @@ -696,22 +696,6 @@ extern int qlnx_alloc_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info, * Some OS specific stuff */ -#if (defined IFM_100G_SR4) -#define QLNX_IFM_100G_SR4 IFM_100G_SR4 -#define QLNX_IFM_100G_LR4 IFM_100G_LR4 -#define QLNX_IFM_100G_CR4 IFM_100G_CR4 -#else -#define QLNX_IFM_100G_SR4 IFM_UNKNOWN -#define QLNX_IFM_100G_LR4 IFM_UNKNOWN -#endif /* #if (defined IFM_100G_SR4) */ - -#if (defined IFM_25G_SR) -#define QLNX_IFM_25G_SR IFM_25G_SR -#define QLNX_IFM_25G_CR IFM_25G_CR -#else -#define QLNX_IFM_25G_SR IFM_UNKNOWN -#define QLNX_IFM_25G_CR IFM_UNKNOWN -#endif /* #if (defined IFM_25G_SR) */ #define QLNX_INC_IERRORS(ifp) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1) #define QLNX_INC_IQDROPS(ifp) if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1) diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c index 4ad190374f87..9963f472c615 100644 --- a/sys/dev/qlnx/qlnxe/qlnx_os.c +++ b/sys/dev/qlnx/qlnxe/qlnx_os.c @@ -2375,18 +2375,15 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha) ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_CR4), 0, NULL); } else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) { - ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_SR), 0, NULL); - ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_CR), 0, NULL); + ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_SR), 0, NULL); + ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_CR), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) { ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_KR2), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_CR2), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) { - ifmedia_add(&ha->media, - (IFM_ETHER | QLNX_IFM_100G_LR4), 0, NULL); - ifmedia_add(&ha->media, - (IFM_ETHER | QLNX_IFM_100G_SR4), 0, NULL); - ifmedia_add(&ha->media, - (IFM_ETHER | QLNX_IFM_100G_CR4), 0, NULL); + ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_LR4), 0, NULL); + ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_SR4), 0, NULL); + ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_CR4), 0, NULL); } ifmedia_add(&ha->media, (IFM_ETHER | IFM_FDX), 0, NULL); @@ -2724,7 +2721,9 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data) case SIOCSIFMEDIA: case SIOCGIFMEDIA: - QL_DPRINT4(ha, "SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", cmd); + case SIOCGIFXMEDIA: + QL_DPRINT4(ha, + "SIOCSIFMEDIA/SIOCGIFMEDIA/SIOCGIFXMEDIA (0x%lx)\n", cmd); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; @@ -3808,11 +3807,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link) case MEDIA_MODULE_FIBER: case MEDIA_UNSPECIFIED: if (if_link->speed == (100 * 1000)) - ifm_type = QLNX_IFM_100G_SR4; + ifm_type = IFM_100G_SR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_SR4; else if (if_link->speed == (25 * 1000)) - ifm_type = QLNX_IFM_25G_SR; + ifm_type = IFM_25G_SR; else if (if_link->speed == (10 * 1000)) ifm_type = (IFM_10G_LR | IFM_10G_SR); else if (if_link->speed == (1 * 1000)) @@ -3822,11 +3821,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link) case MEDIA_DA_TWINAX: if (if_link->speed == (100 * 1000)) - ifm_type = QLNX_IFM_100G_CR4; + ifm_type = IFM_100G_CR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_CR4; else if (if_link->speed == (25 * 1000)) - ifm_type = QLNX_IFM_25G_CR; + ifm_type = IFM_25G_CR; else if (if_link->speed == (10 * 1000)) ifm_type = IFM_10G_TWINAX; diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index 84ec174bd08e..2d7af254c52c 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -103,8 +103,10 @@ static const char *random_source_descr[ENTROPYSOURCE]; volatile int random_kthread_control; -/* Allow the sysadmin to select the broad category of - * entropy types to harvest. +/* + * Allow the sysadmin to select the broad category of entropy types to harvest. + * + * Updates are synchronized by the harvest mutex. */ __read_frequently u_int hc_source_mask; @@ -278,8 +280,15 @@ random_sources_feed(void) epoch_enter_preempt(rs_epoch, &et); CK_LIST_FOREACH(rrs, &source_list, rrs_entries) { for (i = 0; i < npools; i++) { + if (rrs->rrs_source->rs_read == NULL) { + /* Source pushes entropy asynchronously. */ + continue; + } n = rrs->rrs_source->rs_read(entropy, sizeof(entropy)); - KASSERT((n <= sizeof(entropy)), ("%s: rs_read returned too much data (%u > %zu)", __func__, n, sizeof(entropy))); + KASSERT((n <= sizeof(entropy)), + ("%s: rs_read returned too much data (%u > %zu)", + __func__, n, sizeof(entropy))); + /* * Sometimes the HW entropy source doesn't have anything * ready for us. This isn't necessarily untrustworthy. @@ -334,7 +343,17 @@ copy_event(uint32_t dst[static HARVESTSIZE + 1], { memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1)); memcpy(dst, event->he_entropy, event->he_size); - dst[HARVESTSIZE] = event->he_somecounter; + if (event->he_source <= RANDOM_ENVIRONMENTAL_END) { + /* + * For pure entropy sources the timestamp counter is generally + * quite determinstic since samples are taken at regular + * intervals, so does not contribute much to the entropy. To + * make health tests more effective, exclude it from the sample, + * since it might otherwise defeat the health tests in a + * scenario where the source is stuck. + */ + dst[HARVESTSIZE] = event->he_somecounter; + } } static void @@ -464,11 +483,12 @@ SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled, "Enable NIST SP 800-90B health tests for noise sources"); static void -random_healthtest_init(enum random_entropy_source source) +random_healthtest_init(enum random_entropy_source source, int min_entropy) { struct health_test_softc *ht; ht = &healthtest[source]; + memset(ht, 0, sizeof(*ht)); KASSERT(ht->ht_state == INIT, ("%s: health test state is %d for source %d", __func__, ht->ht_state, source)); @@ -485,20 +505,62 @@ random_healthtest_init(enum random_entropy_source source) } /* - * Set cutoff values for the two tests, assuming that each sample has - * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}. - * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false - * positive once in ~54.5 years. + * Set cutoff values for the two tests, given a min-entropy estimate for + * the source and allowing for an error rate of 1 in 2^{34}. With a + * min-entropy estimate of 1 bit and a sample rate of RANDOM_KTHREAD_HZ, + * we expect to see an false positive once in ~54.5 years. * * The RCT limit comes from the formula in section 4.4.1. * - * The APT cutoff is calculated using the formula in section 4.4.2 + * The APT cutoffs are calculated using the formula in section 4.4.2 * footnote 10 with the number of Bernoulli trials changed from W to * W-1, since the test as written counts the number of samples equal to - * the first sample in the window, and thus tests W-1 samples. + * the first sample in the window, and thus tests W-1 samples. We + * provide cutoffs for estimates up to sizeof(uint32_t)*HARVESTSIZE*8 + * bits. */ - ht->ht_rct_limit = 35; - ht->ht_apt_cutoff = 330; + const int apt_cutoffs[] = { + [1] = 329, + [2] = 195, + [3] = 118, + [4] = 73, + [5] = 48, + [6] = 33, + [7] = 23, + [8] = 17, + [9] = 13, + [10] = 11, + [11] = 9, + [12] = 8, + [13] = 7, + [14] = 6, + [15] = 5, + [16] = 5, + [17 ... 19] = 4, + [20 ... 25] = 3, + [26 ... 42] = 2, + [43 ... 64] = 1, + }; + const int error_rate = 34; + + if (min_entropy == 0) { + /* + * For environmental sources, the main source of entropy is the + * associated timecounter value. Since these sources can be + * influenced by unprivileged users, we conservatively use a + * min-entropy estimate of 1 bit per sample. For "pure" + * sources, we assume 8 bits per sample, as such sources provide + * a variable amount of data per read and in particular might + * only provide a single byte at a time. + */ + min_entropy = source >= RANDOM_PURE_START ? 8 : 1; + } else if (min_entropy < 0 || min_entropy >= nitems(apt_cutoffs)) { + panic("invalid min_entropy %d for %s", min_entropy, + random_source_descr[source]); + } + + ht->ht_rct_limit = 1 + howmany(error_rate, min_entropy); + ht->ht_apt_cutoff = apt_cutoffs[min_entropy]; } static int @@ -533,9 +595,9 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS) _RANDOM_HARVEST_ETHER_OFF | _RANDOM_HARVEST_UMA_OFF; int error; - u_int value, orig_value; + u_int value; - orig_value = value = hc_source_mask; + value = atomic_load_int(&hc_source_mask); error = sysctl_handle_int(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) return (error); @@ -546,12 +608,14 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS) /* * Disallow userspace modification of pure entropy sources. */ + RANDOM_HARVEST_LOCK(); hc_source_mask = (value & ~user_immutable_mask) | - (orig_value & user_immutable_mask); + (hc_source_mask & user_immutable_mask); + RANDOM_HARVEST_UNLOCK(); return (0); } SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask, - CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0, + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, random_check_uint_harvestmask, "IU", "Entropy harvesting mask"); @@ -563,9 +627,16 @@ random_print_harvestmask(SYSCTL_HANDLER_ARGS) error = sysctl_wire_old_buffer(req, 0); if (error == 0) { + u_int mask; + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); - for (i = ENTROPYSOURCE - 1; i >= 0; i--) - sbuf_cat(&sbuf, (hc_source_mask & (1 << i)) ? "1" : "0"); + mask = atomic_load_int(&hc_source_mask); + for (i = ENTROPYSOURCE - 1; i >= 0; i--) { + bool present; + + present = (mask & (1u << i)) != 0; + sbuf_cat(&sbuf, present ? "1" : "0"); + } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); } @@ -619,16 +690,21 @@ random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS) first = true; error = sysctl_wire_old_buffer(req, 0); if (error == 0) { + u_int mask; + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + mask = atomic_load_int(&hc_source_mask); for (i = ENTROPYSOURCE - 1; i >= 0; i--) { - if (i >= RANDOM_PURE_START && - (hc_source_mask & (1 << i)) == 0) + bool present; + + present = (mask & (1u << i)) != 0; + if (i >= RANDOM_PURE_START && !present) continue; if (!first) sbuf_cat(&sbuf, ","); - sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "[" : ""); + sbuf_cat(&sbuf, !present ? "[" : ""); sbuf_cat(&sbuf, random_source_descr[i]); - sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "]" : ""); + sbuf_cat(&sbuf, !present ? "]" : ""); first = false; } error = sbuf_finish(&sbuf); @@ -652,8 +728,8 @@ random_harvestq_init(void *unused __unused) RANDOM_HARVEST_INIT_LOCK(); harvest_context.hc_active_buf = 0; - for (int i = 0; i < ENTROPYSOURCE; i++) - random_healthtest_init(i); + for (int i = RANDOM_START; i <= RANDOM_ENVIRONMENTAL_END; i++) + random_healthtest_init(i, 0); } SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL); @@ -835,20 +911,6 @@ random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_sour } void -random_harvest_register_source(enum random_entropy_source source) -{ - - hc_source_mask |= (1 << source); -} - -void -random_harvest_deregister_source(enum random_entropy_source source) -{ - - hc_source_mask &= ~(1 << source); -} - -void random_source_register(const struct random_source *rsource) { struct random_sources *rrs; @@ -858,11 +920,12 @@ random_source_register(const struct random_source *rsource) rrs = malloc(sizeof(*rrs), M_ENTROPY, M_WAITOK); rrs->rrs_source = rsource; - random_harvest_register_source(rsource->rs_source); - printf("random: registering fast source %s\n", rsource->rs_ident); + random_healthtest_init(rsource->rs_source, rsource->rs_min_entropy); + RANDOM_HARVEST_LOCK(); + hc_source_mask |= (1 << rsource->rs_source); CK_LIST_INSERT_HEAD(&source_list, rrs, rrs_entries); RANDOM_HARVEST_UNLOCK(); } @@ -874,9 +937,8 @@ random_source_deregister(const struct random_source *rsource) KASSERT(rsource != NULL, ("invalid input to %s", __func__)); - random_harvest_deregister_source(rsource->rs_source); - RANDOM_HARVEST_LOCK(); + hc_source_mask &= ~(1 << rsource->rs_source); CK_LIST_FOREACH(rrs, &source_list, rrs_entries) if (rrs->rrs_source == rsource) { CK_LIST_REMOVE(rrs, rrs_entries); diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h index 6d742447ea8b..a6ca66c7d92e 100644 --- a/sys/dev/random/randomdev.h +++ b/sys/dev/random/randomdev.h @@ -52,7 +52,9 @@ random_check_uint_##name(SYSCTL_HANDLER_ARGS) \ } #endif /* SYSCTL_DECL */ +#ifdef MALLOC_DECLARE MALLOC_DECLARE(M_ENTROPY); +#endif extern bool random_bypass_before_seeding; extern bool read_random_bypassed_before_seeding; @@ -101,6 +103,7 @@ struct random_source { const char *rs_ident; enum random_entropy_source rs_source; random_source_read_t *rs_read; + int rs_min_entropy; }; void random_source_register(const struct random_source *); diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index 091ab2db72ec..67864c2de388 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -3558,6 +3558,7 @@ re_ioctl(if_t ifp, u_long command, caddr_t data) static void re_watchdog(struct rl_softc *sc) { + struct epoch_tracker et; if_t ifp; RL_LOCK_ASSERT(sc); @@ -3578,7 +3579,9 @@ re_watchdog(struct rl_softc *sc) if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + NET_EPOCH_ENTER(et); re_rxeof(sc, NULL); + NET_EPOCH_EXIT(et); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); re_init_locked(sc); if (!if_sendq_empty(ifp)) diff --git a/sys/dev/rtwn/if_rtwn.c b/sys/dev/rtwn/if_rtwn.c index 7a547e13cafa..25287f222270 100644 --- a/sys/dev/rtwn/if_rtwn.c +++ b/sys/dev/rtwn/if_rtwn.c @@ -268,6 +268,9 @@ rtwn_attach(struct rtwn_softc *sc) ic->ic_flags_ext |= IEEE80211_FEXT_WATCHDOG; #endif + /* Enable seqno offload */ + ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD; + /* Adjust capabilities. */ rtwn_adj_devcaps(sc); diff --git a/sys/dev/rtwn/if_rtwn_tx.c b/sys/dev/rtwn/if_rtwn_tx.c index 2c9c246dfbb4..fa7f35f2de83 100644 --- a/sys/dev/rtwn/if_rtwn_tx.c +++ b/sys/dev/rtwn/if_rtwn_tx.c @@ -183,6 +183,10 @@ rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni, } } + /* seqno allocate, only if AMPDU isn't running */ + if ((m->m_flags & M_AMPDU_MPDU) == 0) + ieee80211_output_seqno_assign(ni, -1, m); + cipher = IEEE80211_CIPHER_NONE; if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m); @@ -229,6 +233,10 @@ rtwn_tx_raw(struct rtwn_softc *sc, struct ieee80211_node *ni, uint8_t type; u_int cipher; + /* seqno allocate, only if AMPDU isn't running */ + if ((m->m_flags & M_AMPDU_MPDU) == 0) + ieee80211_output_seqno_assign(ni, -1, m); + /* Encrypt the frame if need be. */ cipher = IEEE80211_CIPHER_NONE; if (params->ibp_flags & IEEE80211_BPF_CRYPTO) { diff --git a/sys/dev/rtwn/rtl8192c/r92c_tx.c b/sys/dev/rtwn/rtl8192c/r92c_tx.c index 6b013de0c536..ba2f60bd9295 100644 --- a/sys/dev/rtwn/rtl8192c/r92c_tx.c +++ b/sys/dev/rtwn/rtl8192c/r92c_tx.c @@ -452,11 +452,10 @@ r92c_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni, } else { uint16_t seqno; - if (m->m_flags & M_AMPDU_MPDU) { - seqno = ni->ni_txseqs[tid] % IEEE80211_SEQ_RANGE; - ni->ni_txseqs[tid]++; - } else - seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE; + if (m->m_flags & M_AMPDU_MPDU) + ieee80211_output_seqno_assign(ni, -1, m); + + seqno = M_SEQNO_GET(m); /* Set sequence number. */ txd->txdseq = htole16(seqno); @@ -511,7 +510,7 @@ r92c_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni, rtwn_r92c_tx_setup_hwseq(sc, txd); } else { /* Set sequence number. */ - txd->txdseq |= htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE); + txd->txdseq |= htole16(M_SEQNO_GET(m)); } } diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c index acb238316559..6a7af0a9b674 100644 --- a/sys/dev/rtwn/rtl8812a/r12a_tx.c +++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c @@ -101,12 +101,12 @@ r12a_tx_set_vht_bw(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni) prim_chan = r12a_get_primary_channel(sc, ni->ni_chan); - if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_80)) { + if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_80)) { txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW, R12A_TXDW5_DATA_BW80)); txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN, prim_chan)); - } else if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_40)) { + } else if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_40)) { txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW, R12A_TXDW5_DATA_BW40)); txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN, @@ -433,12 +433,9 @@ r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni, } else { uint16_t seqno; - if (m->m_flags & M_AMPDU_MPDU) { - seqno = ni->ni_txseqs[tid]; - ni->ni_txseqs[tid]++; - } else - seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE; - + if (m->m_flags & M_AMPDU_MPDU) + ieee80211_output_seqno_assign(ni, -1, m); + seqno = M_SEQNO_GET(m); /* Set sequence number. */ txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, seqno)); } @@ -493,8 +490,7 @@ r12a_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni, txd->txdw3 |= htole32(SM(R12A_TXDW3_SEQ_SEL, uvp->id)); } else { /* Set sequence number. */ - txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, - M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE)); + txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, M_SEQNO_GET(m))); } } diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c index 900578b73de4..80028063bb0d 100644 --- a/sys/dev/sound/pci/hda/hdac.c +++ b/sys/dev/sound/pci/hda/hdac.c @@ -133,6 +133,7 @@ static const struct { { HDA_INTEL_PCH, "Intel Ibex Peak", 0, 0 }, { HDA_INTEL_PCH2, "Intel Ibex Peak", 0, 0 }, { HDA_INTEL_ELLK, "Intel Elkhart Lake", 0, 0 }, + { HDA_INTEL_ELLK2, "Intel Elkhart Lake", 0, 0 }, { HDA_INTEL_JLK2, "Intel Jasper Lake", 0, 0 }, { HDA_INTEL_BXTNP, "Intel Broxton-P", 0, 0 }, { HDA_INTEL_SCH, "Intel SCH", 0, 0 }, @@ -1773,17 +1774,17 @@ hdac_detach(device_t dev) struct hdac_softc *sc = device_get_softc(dev); int i, error; + callout_drain(&sc->poll_callout); + hdac_irq_free(sc); + taskqueue_drain(taskqueue_thread, &sc->unsolq_task); + error = bus_generic_detach(dev); if (error != 0) return (error); hdac_lock(sc); - callout_stop(&sc->poll_callout); hdac_reset(sc, false); hdac_unlock(sc); - callout_drain(&sc->poll_callout); - taskqueue_drain(taskqueue_thread, &sc->unsolq_task); - hdac_irq_free(sc); for (i = 0; i < sc->num_ss; i++) hdac_dma_free(sc, &sc->streams[i].bdl); @@ -2206,4 +2207,4 @@ static driver_t hdac_driver = { sizeof(struct hdac_softc), }; -DRIVER_MODULE(snd_hda, pci, hdac_driver, NULL, NULL); +DRIVER_MODULE_ORDERED(snd_hda, pci, hdac_driver, NULL, NULL, SI_ORDER_ANY); diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h index 223434a214b1..09a17f702019 100644 --- a/sys/dev/sound/pci/hda/hdac.h +++ b/sys/dev/sound/pci/hda/hdac.h @@ -66,6 +66,7 @@ #define HDA_INTEL_PCH HDA_MODEL_CONSTRUCT(INTEL, 0x3b56) #define HDA_INTEL_PCH2 HDA_MODEL_CONSTRUCT(INTEL, 0x3b57) #define HDA_INTEL_ELLK HDA_MODEL_CONSTRUCT(INTEL, 0x4b55) +#define HDA_INTEL_ELLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4b58) #define HDA_INTEL_JLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4dc8) #define HDA_INTEL_BXTNP HDA_MODEL_CONSTRUCT(INTEL, 0x5a98) #define HDA_INTEL_MACBOOKPRO92 HDA_MODEL_CONSTRUCT(INTEL, 0x7270) diff --git a/sys/dev/tpm/tpm20.c b/sys/dev/tpm/tpm20.c index 876dd0bcc40d..067e7ccae8f9 100644 --- a/sys/dev/tpm/tpm20.c +++ b/sys/dev/tpm/tpm20.c @@ -25,8 +25,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> #include <sys/random.h> +#include <dev/random/randomdev.h> #include "tpm20.h" @@ -184,6 +184,13 @@ tpm20_ioctl(struct cdev *dev, u_long cmd, caddr_t data, return (ENOTTY); } +#ifdef TPM_HARVEST +static const struct random_source random_tpm = { + .rs_ident = "TPM", + .rs_source = RANDOM_PURE_TPM, +}; +#endif + int tpm20_init(struct tpm_sc *sc) { @@ -206,7 +213,7 @@ tpm20_init(struct tpm_sc *sc) tpm20_release(sc); #ifdef TPM_HARVEST - random_harvest_register_source(RANDOM_PURE_TPM); + random_source_register(&random_tpm); TIMEOUT_TASK_INIT(taskqueue_thread, &sc->harvest_task, 0, tpm20_harvest, sc); taskqueue_enqueue_timeout(taskqueue_thread, &sc->harvest_task, 0); @@ -223,7 +230,7 @@ tpm20_release(struct tpm_sc *sc) #ifdef TPM_HARVEST if (device_is_attached(sc->dev)) taskqueue_drain_timeout(taskqueue_thread, &sc->harvest_task); - random_harvest_deregister_source(RANDOM_PURE_TPM); + random_source_deregister(&random_tpm); #endif if (sc->buf != NULL) diff --git a/sys/dev/tpm/tpm_tis_core.c b/sys/dev/tpm/tpm_tis_core.c index d8421f8156c9..4159de4daf3b 100644 --- a/sys/dev/tpm/tpm_tis_core.c +++ b/sys/dev/tpm/tpm_tis_core.c @@ -97,6 +97,7 @@ tpmtis_attach(device_t dev) { struct tpm_sc *sc; int result; + int poll = 0; sc = device_get_softc(dev); sc->dev = dev; @@ -105,6 +106,12 @@ tpmtis_attach(device_t dev) sx_init(&sc->dev_lock, "TPM driver lock"); sc->buf = malloc(TPM_BUFSIZE, M_TPM20, M_WAITOK); + resource_int_value("tpm", device_get_unit(dev), "use_polling", &poll); + if (poll != 0) { + device_printf(dev, "Using poll method to get TPM operation status \n"); + goto skip_irq; + } + sc->irq_rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid, RF_ACTIVE | RF_SHAREABLE); diff --git a/sys/dev/ufshci/ufshci.h b/sys/dev/ufshci/ufshci.h index b96d82ff836e..b055d2d2d769 100644 --- a/sys/dev/ufshci/ufshci.h +++ b/sys/dev/ufshci/ufshci.h @@ -716,6 +716,42 @@ struct ufshci_device_descriptor { _Static_assert(sizeof(struct ufshci_device_descriptor) == 89, "bad size for ufshci_device_descriptor"); +/* Defines the bit field of dExtendedUfsFeaturesSupport. */ +enum ufshci_desc_wb_ext_ufs_feature { + UFSHCI_DESC_EXT_UFS_FEATURE_FFU = (1 << 0), + UFSHCI_DESC_EXT_UFS_FEATURE_PSA = (1 << 1), + UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LIFE_SPAN = (1 << 2), + UFSHCI_DESC_EXT_UFS_FEATURE_REFRESH_OP = (1 << 3), + UFSHCI_DESC_EXT_UFS_FEATURE_TOO_HIGH_TEMP = (1 << 4), + UFSHCI_DESC_EXT_UFS_FEATURE_TOO_LOW_TEMP = (1 << 5), + UFSHCI_DESC_EXT_UFS_FEATURE_EXT_TEMP = (1 << 6), + UFSHCI_DESC_EXT_UFS_FEATURE_HPB_SUPPORT = (1 << 7), + UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER = (1 << 8), + UFSHCI_DESC_EXT_UFS_FEATURE_PERF_THROTTLING = (1 << 9), + UFSHCI_DESC_EXT_UFS_FEATURE_ADVANCED_RPMB = (1 << 10), + UFSHCI_DESC_EXT_UFS_FEATURE_ZONED_UFS_EXTENSION = (1 << 11), + UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LEVEL_EXCEPTION = (1 << 12), + UFSHCI_DESC_EXT_UFS_FEATURE_HID = (1 << 13), + UFSHCI_DESC_EXT_UFS_FEATURE_BARRIER = (1 << 14), + UFSHCI_DESC_EXT_UFS_FEATURE_CLEAR_ERROR_HISTORY = (1 << 15), + UFSHCI_DESC_EXT_UFS_FEATURE_EXT_IID = (1 << 16), + UFSHCI_DESC_EXT_UFS_FEATURE_FBO = (1 << 17), + UFSHCI_DESC_EXT_UFS_FEATURE_FAST_RECOVERY_MODE = (1 << 18), + UFSHCI_DESC_EXT_UFS_FEATURE_RPMB_VENDOR_CMD = (1 << 19), +}; + +/* Defines the bit field of bWriteBoosterBufferType. */ +enum ufshci_desc_wb_buffer_type { + UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED = 0x00, + UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED = 0x01, +}; + +/* Defines the bit field of bWriteBoosterBufferPreserveUserSpaceEn. */ +enum ufshci_desc_user_space_config { + UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION = 0x00, + UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE = 0x01, +}; + /* * UFS Spec 4.1, section 14.1.5.3 "Configuration Descriptor" * ConfigurationDescriptor use big-endian byte ordering. @@ -1014,4 +1050,37 @@ enum ufshci_attributes { UFSHCI_ATTR_B_REFRESH_METHOD = 0x2f, }; +/* bAvailableWriteBoosterBufferSize codes (UFS WriteBooster abailable buffer + * left %) */ +enum ufshci_wb_available_buffer_Size { + UFSHCI_ATTR_WB_AVAILABLE_0 = 0x00, /* 0% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_10 = 0x01, /* 10% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_20 = 0x02, /* 20% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_30 = 0x03, /* 30% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_40 = 0x04, /* 40% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_50 = 0x05, /* 50% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_60 = 0x06, /* 60% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_70 = 0x07, /* 70% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_80 = 0x08, /* 80% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_90 = 0x09, /* 90% buffer remains */ + UFSHCI_ATTR_WB_AVAILABLE_100 = 0x0A, /* 100% buffer remains */ +}; + +/* bWriteBoosterBufferLifeTimeEst codes (UFS WriteBooster buffer life %) */ +enum ufshci_wb_lifetime { + UFSHCI_ATTR_WB_LIFE_DISABLED = 0x00, /* Info not available */ + UFSHCI_ATTR_WB_LIFE_0_10 = 0x01, /* 0%–10% used */ + UFSHCI_ATTR_WB_LIFE_10_20 = 0x02, /* 10%–20% used */ + UFSHCI_ATTR_WB_LIFE_20_30 = 0x03, /* 20%–30% used */ + UFSHCI_ATTR_WB_LIFE_30_40 = 0x04, /* 30%–40% used */ + UFSHCI_ATTR_WB_LIFE_40_50 = 0x05, /* 40%–50% used */ + UFSHCI_ATTR_WB_LIFE_50_60 = 0x06, /* 50%–60% used */ + UFSHCI_ATTR_WB_LIFE_60_70 = 0x07, /* 60%–70% used */ + UFSHCI_ATTR_WB_LIFE_70_80 = 0x08, /* 70%–80% used */ + UFSHCI_ATTR_WB_LIFE_80_90 = 0x09, /* 80%–90% used */ + UFSHCI_ATTR_WB_LIFE_90_100 = 0x0A, /* 90%–100% used */ + UFSHCI_ATTR_WB_LIFE_EXCEEDED = + 0x0B, /* Exceeded estimated life (treat as WB disabled) */ +}; + #endif /* __UFSHCI_H__ */ diff --git a/sys/dev/ufshci/ufshci_ctrlr.c b/sys/dev/ufshci/ufshci_ctrlr.c index 37bd32665b2b..36be94b8b8b7 100644 --- a/sys/dev/ufshci/ufshci_ctrlr.c +++ b/sys/dev/ufshci/ufshci_ctrlr.c @@ -61,7 +61,7 @@ ufshci_ctrlr_enable_host_ctrlr(struct ufshci_controller *ctrlr) int ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev) { - uint32_t ver, cap, hcs, ie; + uint32_t ver, cap, hcs, ie, ahit; uint32_t timeout_period, retry_count; int error; @@ -127,6 +127,13 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev) if (error) return (error); + /* Read the UECPA register to clear */ + ufshci_mmio_read_4(ctrlr, uecpa); + + /* Diable Auto-hibernate */ + ahit = 0; + ufshci_mmio_write_4(ctrlr, ahit, ahit); + /* * The device_present(UFSHCI_HCS_REG_DP) bit becomes true if the host * controller has successfully received a Link Startup UIC command @@ -139,6 +146,16 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev) return (ENXIO); } + /* Allocate and initialize UTP Task Management Request List. */ + error = ufshci_utmr_req_queue_construct(ctrlr); + if (error) + return (error); + + /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */ + error = ufshci_utr_req_queue_construct(ctrlr); + if (error) + return (error); + /* Enable additional interrupts by programming the IE register. */ ie = ufshci_mmio_read_4(ctrlr, ie); ie |= UFSHCIM(UFSHCI_IE_REG_UTRCE); /* UTR Completion */ @@ -153,19 +170,12 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev) /* TODO: Initialize interrupt Aggregation Control Register (UTRIACR) */ - /* Allocate and initialize UTP Task Management Request List. */ - error = ufshci_utmr_req_queue_construct(ctrlr); - if (error) - return (error); - - /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */ - error = ufshci_utr_req_queue_construct(ctrlr); - if (error) - return (error); - /* TODO: Separate IO and Admin slot */ - /* max_hw_pend_io is the number of slots in the transfer_req_queue */ - ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries; + /* + * max_hw_pend_io is the number of slots in the transfer_req_queue. + * Reduce num_entries by one to reserve an admin slot. + */ + ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries - 1; return (0); } @@ -342,18 +352,19 @@ ufshci_ctrlr_start(struct ufshci_controller *ctrlr) return; } - /* Read Controller Descriptor (Device, Geometry)*/ + /* Read Controller Descriptor (Device, Geometry) */ if (ufshci_dev_get_descriptor(ctrlr) != 0) { ufshci_ctrlr_fail(ctrlr, false); return; } - /* TODO: Configure Write Protect */ + if (ufshci_dev_config_write_booster(ctrlr)) { + ufshci_ctrlr_fail(ctrlr, false); + return; + } /* TODO: Configure Background Operations */ - /* TODO: Configure Write Booster */ - if (ufshci_sim_attach(ctrlr) != 0) { ufshci_ctrlr_fail(ctrlr, false); return; diff --git a/sys/dev/ufshci/ufshci_dev.c b/sys/dev/ufshci/ufshci_dev.c index a0e32914e2aa..dd196b1d638b 100644 --- a/sys/dev/ufshci/ufshci_dev.c +++ b/sys/dev/ufshci/ufshci_dev.c @@ -60,6 +60,14 @@ ufshci_dev_read_geometry_descriptor(struct ufshci_controller *ctrlr, } static int +ufshci_dev_read_unit_descriptor(struct ufshci_controller *ctrlr, uint8_t lun, + struct ufshci_unit_descriptor *desc) +{ + return (ufshci_dev_read_descriptor(ctrlr, UFSHCI_DESC_TYPE_UNIT, lun, 0, + desc, sizeof(struct ufshci_unit_descriptor))); +} + +static int ufshci_dev_read_flag(struct ufshci_controller *ctrlr, enum ufshci_flags flag_type, uint8_t *flag) { @@ -114,6 +122,61 @@ ufshci_dev_set_flag(struct ufshci_controller *ctrlr, } static int +ufshci_dev_clear_flag(struct ufshci_controller *ctrlr, + enum ufshci_flags flag_type) +{ + struct ufshci_completion_poll_status status; + struct ufshci_query_param param; + + param.function = UFSHCI_QUERY_FUNC_STANDARD_WRITE_REQUEST; + param.opcode = UFSHCI_QUERY_OPCODE_CLEAR_FLAG; + param.type = flag_type; + param.index = 0; + param.selector = 0; + param.value = 0; + + status.done = 0; + ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb, + &status, param); + ufshci_completion_poll(&status); + if (status.error) { + ufshci_printf(ctrlr, "ufshci_dev_clear_flag failed!\n"); + return (ENXIO); + } + + return (0); +} + +static int +ufshci_dev_read_attribute(struct ufshci_controller *ctrlr, + enum ufshci_attributes attr_type, uint8_t index, uint8_t selector, + uint64_t *value) +{ + struct ufshci_completion_poll_status status; + struct ufshci_query_param param; + + param.function = UFSHCI_QUERY_FUNC_STANDARD_READ_REQUEST; + param.opcode = UFSHCI_QUERY_OPCODE_READ_ATTRIBUTE; + param.type = attr_type; + param.index = index; + param.selector = selector; + param.value = 0; + + status.done = 0; + ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb, + &status, param); + ufshci_completion_poll(&status); + if (status.error) { + ufshci_printf(ctrlr, "ufshci_dev_read_attribute failed!\n"); + return (ENXIO); + } + + *value = status.cpl.response_upiu.query_response_upiu.value_64; + + return (0); +} + +static int ufshci_dev_write_attribute(struct ufshci_controller *ctrlr, enum ufshci_attributes attr_type, uint8_t index, uint8_t selector, uint64_t value) @@ -270,7 +333,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr) */ const uint32_t fast_mode = 1; const uint32_t rx_bit_shift = 4; - const uint32_t power_mode = (fast_mode << rx_bit_shift) | fast_mode; + uint32_t power_mode, peer_granularity; /* Update lanes with available TX/RX lanes */ if (ufshci_uic_send_dme_get(ctrlr, PA_AvailTxDataLanes, @@ -295,6 +358,20 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr) ctrlr->rx_lanes)) return (ENXIO); + if (ctrlr->quirks & UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY) { + /* Before changing gears, first change the number of lanes. */ + if (ufshci_uic_send_dme_get(ctrlr, PA_PWRMode, &power_mode)) + return (ENXIO); + if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode)) + return (ENXIO); + + /* Wait for power mode changed. */ + if (ufshci_uic_power_mode_ready(ctrlr)) { + ufshci_reg_dump(ctrlr); + return (ENXIO); + } + } + /* Set HS-GEAR to max gear */ ctrlr->hs_gear = ctrlr->max_rx_hs_gear; if (ufshci_uic_send_dme_set(ctrlr, PA_TxGear, ctrlr->hs_gear)) @@ -346,6 +423,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr) return (ENXIO); /* Set TX/RX PWRMode */ + power_mode = (fast_mode << rx_bit_shift) | fast_mode; if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode)) return (ENXIO); @@ -366,7 +444,8 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr) pause_sbt("ufshci", ustosbt(1250), 0, C_PREL(1)); /* Test with dme_peer_get to make sure there are no errors. */ - if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity, NULL)) + if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity, + &peer_granularity)) return (ENXIO); } @@ -398,7 +477,7 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr) return (error); ver = be16toh(device->dev_desc.wSpecVersion); - ufshci_printf(ctrlr, "UFS device spec version %u.%u%u\n", + ufshci_printf(ctrlr, "UFS device spec version %u.%u.%u\n", UFSHCIV(UFSHCI_VER_REG_MJR, ver), UFSHCIV(UFSHCI_VER_REG_MNR, ver), UFSHCIV(UFSHCI_VER_REG_VS, ver)); ufshci_printf(ctrlr, "%u enabled LUNs found\n", @@ -426,3 +505,273 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr) return (0); } + +static int +ufshci_dev_enable_write_booster(struct ufshci_controller *ctrlr) +{ + struct ufshci_device *dev = &ctrlr->ufs_dev; + int error; + + /* Enable WriteBooster */ + error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN); + if (error) { + ufshci_printf(ctrlr, "Failed to enable WriteBooster\n"); + return (error); + } + dev->is_wb_enabled = true; + + /* Enable WriteBooster buffer flush during hibernate */ + error = ufshci_dev_set_flag(ctrlr, + UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE); + if (error) { + ufshci_printf(ctrlr, + "Failed to enable WriteBooster buffer flush during hibernate\n"); + return (error); + } + + /* Enable WriteBooster buffer flush */ + error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN); + if (error) { + ufshci_printf(ctrlr, + "Failed to enable WriteBooster buffer flush\n"); + return (error); + } + dev->is_wb_flush_enabled = true; + + return (0); +} + +static int +ufshci_dev_disable_write_booster(struct ufshci_controller *ctrlr) +{ + struct ufshci_device *dev = &ctrlr->ufs_dev; + int error; + + /* Disable WriteBooster buffer flush */ + error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN); + if (error) { + ufshci_printf(ctrlr, + "Failed to disable WriteBooster buffer flush\n"); + return (error); + } + dev->is_wb_flush_enabled = false; + + /* Disable WriteBooster buffer flush during hibernate */ + error = ufshci_dev_clear_flag(ctrlr, + UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE); + if (error) { + ufshci_printf(ctrlr, + "Failed to disable WriteBooster buffer flush during hibernate\n"); + return (error); + } + + /* Disable WriteBooster */ + error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN); + if (error) { + ufshci_printf(ctrlr, "Failed to disable WriteBooster\n"); + return (error); + } + dev->is_wb_enabled = false; + + return (0); +} + +static int +ufshci_dev_is_write_booster_buffer_life_time_left( + struct ufshci_controller *ctrlr, bool *is_life_time_left) +{ + struct ufshci_device *dev = &ctrlr->ufs_dev; + uint8_t buffer_lun; + uint64_t life_time; + uint32_t error; + + if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED) + buffer_lun = dev->wb_dedicated_lu; + else + buffer_lun = 0; + + error = ufshci_dev_read_attribute(ctrlr, + UFSHCI_ATTR_B_WB_BUFFER_LIFE_TIME_EST, buffer_lun, 0, &life_time); + if (error) + return (error); + + *is_life_time_left = (life_time != UFSHCI_ATTR_WB_LIFE_EXCEEDED); + + return (0); +} + +/* + * This function is not yet in use. It will be used when suspend/resume is + * implemented. + */ +static __unused int +ufshci_dev_need_write_booster_buffer_flush(struct ufshci_controller *ctrlr, + bool *need_flush) +{ + struct ufshci_device *dev = &ctrlr->ufs_dev; + bool is_life_time_left = false; + uint64_t available_buffer_size, current_buffer_size; + uint8_t buffer_lun; + uint32_t error; + + *need_flush = false; + + if (!dev->is_wb_enabled) + return (0); + + error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr, + &is_life_time_left); + if (error) + return (error); + + if (!is_life_time_left) + return (ufshci_dev_disable_write_booster(ctrlr)); + + if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED) + buffer_lun = dev->wb_dedicated_lu; + else + buffer_lun = 0; + + error = ufshci_dev_read_attribute(ctrlr, + UFSHCI_ATTR_B_AVAILABLE_WB_BUFFER_SIZE, buffer_lun, 0, + &available_buffer_size); + if (error) + return (error); + + switch (dev->wb_user_space_config_option) { + case UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION: + *need_flush = (available_buffer_size <= + UFSHCI_ATTR_WB_AVAILABLE_10); + break; + case UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE: + /* + * In PRESERVE USER SPACE mode, flush should be performed when + * the current buffer is greater than 0 and the available buffer + * below write_booster_flush_threshold is left. + */ + error = ufshci_dev_read_attribute(ctrlr, + UFSHCI_ATTR_D_CURRENT_WB_BUFFER_SIZE, buffer_lun, 0, + ¤t_buffer_size); + if (error) + return (error); + + if (current_buffer_size == 0) + return (0); + + *need_flush = (available_buffer_size < + dev->write_booster_flush_threshold); + break; + default: + ufshci_printf(ctrlr, + "Invalid bWriteBoosterBufferPreserveUserSpaceEn value"); + return (EINVAL); + } + + /* + * TODO: Need to handle WRITEBOOSTER_FLUSH_NEEDED exception case from + * wExceptionEventStatus attribute. + */ + + return (0); +} + +int +ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr) +{ + struct ufshci_device *dev = &ctrlr->ufs_dev; + uint32_t extended_ufs_feature_support; + uint32_t alloc_units; + struct ufshci_unit_descriptor unit_desc; + uint8_t lun; + bool is_life_time_left; + uint32_t mega_byte = 1024 * 1024; + uint32_t error = 0; + + extended_ufs_feature_support = be32toh( + dev->dev_desc.dExtendedUfsFeaturesSupport); + if (!(extended_ufs_feature_support & + UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER)) { + /* This device does not support Write Booster */ + return (0); + } + + if (ufshci_dev_enable_write_booster(ctrlr)) + return (0); + + /* Get WriteBooster buffer parameters */ + dev->wb_buffer_type = dev->dev_desc.bWriteBoosterBufferType; + dev->wb_user_space_config_option = + dev->dev_desc.bWriteBoosterBufferPreserveUserSpaceEn; + + /* + * Find the size of the write buffer. + * With LU-dedicated (00h), the WriteBooster buffer is assigned + * exclusively to one chosen LU (not one-per-LU), whereas Shared (01h) + * uses a single device-wide buffer shared by multiple LUs. + */ + if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED) { + alloc_units = be32toh( + dev->dev_desc.dNumSharedWriteBoosterBufferAllocUnits); + ufshci_printf(ctrlr, + "WriteBooster buffer type = Shared, alloc_units=%d\n", + alloc_units); + } else if (dev->wb_buffer_type == + UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED) { + ufshci_printf(ctrlr, "WriteBooster buffer type = Dedicated\n"); + for (lun = 0; lun < ctrlr->max_lun_count; lun++) { + /* Find a dedicated buffer using a unit descriptor */ + if (ufshci_dev_read_unit_descriptor(ctrlr, lun, + &unit_desc)) + continue; + + alloc_units = be32toh( + unit_desc.dLUNumWriteBoosterBufferAllocUnits); + if (alloc_units) { + dev->wb_dedicated_lu = lun; + break; + } + } + } else { + ufshci_printf(ctrlr, + "Not supported WriteBooster buffer type: 0x%x\n", + dev->wb_buffer_type); + goto out; + } + + if (alloc_units == 0) { + ufshci_printf(ctrlr, "The WriteBooster buffer size is zero\n"); + goto out; + } + + dev->wb_buffer_size_mb = alloc_units * + dev->geo_desc.bAllocationUnitSize * + (be32toh(dev->geo_desc.dSegmentSize)) / + (mega_byte / UFSHCI_SECTOR_SIZE); + + /* Set to flush when 40% of the available buffer size remains */ + dev->write_booster_flush_threshold = UFSHCI_ATTR_WB_AVAILABLE_40; + + /* + * Check if WriteBooster Buffer lifetime is left. + * WriteBooster Buffer lifetime — percent of life used based on P/E + * cycles. If "preserve user space" is enabled, writes to normal user + * space also consume WB life since the area is shared. + */ + error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr, + &is_life_time_left); + if (error) + goto out; + + if (!is_life_time_left) { + ufshci_printf(ctrlr, + "There is no WriteBooster buffer life time left.\n"); + goto out; + } + + ufshci_printf(ctrlr, "WriteBooster Enabled\n"); + return (0); +out: + ufshci_dev_disable_write_booster(ctrlr); + return (error); +} + diff --git a/sys/dev/ufshci/ufshci_pci.c b/sys/dev/ufshci/ufshci_pci.c index 65a69ee0b518..d64b7526f713 100644 --- a/sys/dev/ufshci/ufshci_pci.c +++ b/sys/dev/ufshci/ufshci_pci.c @@ -53,7 +53,8 @@ static struct _pcsid { { 0x98fa8086, "Intel Lakefield UFS Host Controller", UFSHCI_REF_CLK_19_2MHz, UFSHCI_QUIRK_LONG_PEER_PA_TACTIVATE | - UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE }, + UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE | + UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY }, { 0x54ff8086, "Intel UFS Host Controller", UFSHCI_REF_CLK_19_2MHz }, { 0x00000000, NULL } }; diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h index 1a2742ae2e80..2e033f84c373 100644 --- a/sys/dev/ufshci/ufshci_private.h +++ b/sys/dev/ufshci/ufshci_private.h @@ -46,6 +46,8 @@ MALLOC_DECLARE(M_UFSHCI); #define UFSHCI_UTR_ENTRIES (32) #define UFSHCI_UTRM_ENTRIES (8) +#define UFSHCI_SECTOR_SIZE (512) + struct ufshci_controller; struct ufshci_completion_poll_status { @@ -214,6 +216,15 @@ struct ufshci_device { struct ufshci_geometry_descriptor geo_desc; uint32_t unipro_version; + + /* WriteBooster */ + bool is_wb_enabled; + bool is_wb_flush_enabled; + uint32_t wb_buffer_type; + uint32_t wb_buffer_size_mb; + uint32_t wb_user_space_config_option; + uint8_t wb_dedicated_lu; + uint32_t write_booster_flush_threshold; }; /* @@ -229,7 +240,8 @@ struct ufshci_controller { 2 /* Need an additional 200 ms of PA_TActivate */ #define UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE \ 4 /* Need to wait 1250us after power mode change */ - +#define UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY \ + 8 /* Need to change the number of lanes before changing HS-GEAR. */ uint32_t ref_clk; struct cam_sim *ufshci_sim; @@ -356,6 +368,7 @@ int ufshci_dev_init_unipro(struct ufshci_controller *ctrlr); int ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr); int ufshci_dev_init_ufs_power_mode(struct ufshci_controller *ctrlr); int ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr); +int ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr); /* Controller Command */ void ufshci_ctrlr_cmd_send_task_mgmt_request(struct ufshci_controller *ctrlr, diff --git a/sys/dev/ufshci/ufshci_reg.h b/sys/dev/ufshci/ufshci_reg.h index 6c9b3e2c8c04..6d5768505102 100644 --- a/sys/dev/ufshci/ufshci_reg.h +++ b/sys/dev/ufshci/ufshci_reg.h @@ -274,7 +274,7 @@ struct ufshci_registers { #define UFSHCI_HCS_REG_UTMRLRDY_MASK (0x1) #define UFSHCI_HCS_REG_UCRDY_SHIFT (3) #define UFSHCI_HCS_REG_UCRDY_MASK (0x1) -#define UFSHCI_HCS_REG_UPMCRS_SHIFT (7) +#define UFSHCI_HCS_REG_UPMCRS_SHIFT (8) #define UFSHCI_HCS_REG_UPMCRS_MASK (0x7) #define UFSHCI_HCS_REG_UTPEC_SHIFT (12) #define UFSHCI_HCS_REG_UTPEC_MASK (0xF) diff --git a/sys/dev/ufshci/ufshci_sysctl.c b/sys/dev/ufshci/ufshci_sysctl.c index 5e5069f12e5f..56bc06b13f3c 100644 --- a/sys/dev/ufshci/ufshci_sysctl.c +++ b/sys/dev/ufshci/ufshci_sysctl.c @@ -152,6 +152,7 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr) struct sysctl_ctx_list *ctrlr_ctx; struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree; struct sysctl_oid_list *ctrlr_list, *ioq_list; + struct ufshci_device *dev = &ctrlr->ufs_dev; #define QUEUE_NAME_LENGTH 16 char queue_name[QUEUE_NAME_LENGTH]; int i; @@ -177,6 +178,25 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr) SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap", CTLFLAG_RD, &ctrlr->cap, 0, "Number of I/O queue pairs"); + SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_enabled", + CTLFLAG_RD, &dev->is_wb_enabled, 0, "WriteBooster enable/disable"); + + SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_flush_enabled", + CTLFLAG_RD, &dev->is_wb_flush_enabled, 0, + "WriteBooster flush enable/disable"); + + SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_type", + CTLFLAG_RD, &dev->wb_buffer_type, 0, "WriteBooster type"); + + SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_size_mb", + CTLFLAG_RD, &dev->wb_buffer_size_mb, 0, + "WriteBooster buffer size in MB"); + + SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, + "wb_user_space_config_option", CTLFLAG_RD, + &dev->wb_user_space_config_option, 0, + "WriteBooster preserve user space mode"); + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ctrlr->timeout_period, 0, ufshci_sysctl_timeout_period, "IU", diff --git a/sys/dev/ufshci/ufshci_uic_cmd.c b/sys/dev/ufshci/ufshci_uic_cmd.c index 2c5f635dc11e..b9c867ff7065 100644 --- a/sys/dev/ufshci/ufshci_uic_cmd.c +++ b/sys/dev/ufshci/ufshci_uic_cmd.c @@ -14,7 +14,7 @@ int ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr) { - uint32_t is; + uint32_t is, hcs; int timeout; /* Wait for the IS flag to change */ @@ -40,6 +40,15 @@ ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr) DELAY(10); } + /* Check HCS power mode change request status */ + hcs = ufshci_mmio_read_4(ctrlr, hcs); + if (UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs) != 0x01) { + ufshci_printf(ctrlr, + "Power mode change request status error: 0x%x\n", + UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs)); + return (ENXIO); + } + return (0); } @@ -112,6 +121,7 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr, struct ufshci_uic_cmd *uic_cmd, uint32_t *return_value) { int error; + uint32_t config_result_code; mtx_lock(&ctrlr->uic_cmd_lock); @@ -134,6 +144,13 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr, if (error) return (ENXIO); + config_result_code = ufshci_mmio_read_4(ctrlr, ucmdarg2); + if (config_result_code) { + ufshci_printf(ctrlr, + "Failed to send UIC command. (config result code = 0x%x)\n", + config_result_code); + } + if (return_value != NULL) *return_value = ufshci_mmio_read_4(ctrlr, ucmdarg3); diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c index 5be592512196..788b2b718062 100644 --- a/sys/dev/usb/controller/xhci.c +++ b/sys/dev/usb/controller/xhci.c @@ -156,6 +156,7 @@ struct xhci_std_temp { static void xhci_do_poll(struct usb_bus *); static void xhci_device_done(struct usb_xfer *, usb_error_t); +static void xhci_get_xecp(struct xhci_softc *); static void xhci_root_intr(struct xhci_softc *); static void xhci_free_device_ext(struct usb_device *); static struct xhci_endpoint_ext *xhci_get_endpoint_ext(struct usb_device *, @@ -566,6 +567,8 @@ xhci_init(struct xhci_softc *sc, device_t self, uint8_t dma32) device_printf(self, "%d bytes context size, %d-bit DMA\n", sc->sc_ctx_is_64_byte ? 64 : 32, (int)sc->sc_bus.dma_bits); + xhci_get_xecp(sc); + /* enable 64Kbyte control endpoint quirk */ sc->sc_bus.control_ep_quirk = (xhcictlquirk ? 1 : 0); @@ -654,6 +657,88 @@ xhci_uninit(struct xhci_softc *sc) } static void +xhci_get_xecp(struct xhci_softc *sc) +{ + + uint32_t hccp1; + uint32_t eec; + uint32_t eecp; + bool first = true; + + hccp1 = XREAD4(sc, capa, XHCI_HCSPARAMS0); + + if (XHCI_HCS0_XECP(hccp1) == 0) { + device_printf(sc->sc_bus.parent, + "xECP: no capabilities found\n"); + return; + } + + /* + * Parse the xECP Capabilities table and print known caps. + * Implemented, vendor and reserved xECP Capabilities values are + * documented in Table 7.2 of eXtensible Host Controller Interface for + * Universal Serial Bus (xHCI) Rev 1.2b 2023. + */ + device_printf(sc->sc_bus.parent, "xECP capabilities <"); + + eec = -1; + for (eecp = XHCI_HCS0_XECP(hccp1) << 2; + eecp != 0 && XHCI_XECP_NEXT(eec) != 0; + eecp += XHCI_XECP_NEXT(eec) << 2) { + eec = XREAD4(sc, capa, eecp); + + uint8_t xecpid = XHCI_XECP_ID(eec); + + if ((xecpid >= 11 && xecpid <= 16) || + (xecpid >= 19 && xecpid <= 191)) { + if (!first) + printf(","); + printf("RES(%x)", xecpid); + } else if (xecpid > 191) { + if (!first) + printf(","); + printf("VEND(%x)", xecpid); + } else { + if (!first) + printf(","); + switch (xecpid) + { + case XHCI_ID_USB_LEGACY: + printf("LEGACY"); + break; + case XHCI_ID_PROTOCOLS: + printf("PROTO"); + break; + case XHCI_ID_POWER_MGMT: + printf("POWER"); + break; + case XHCI_ID_VIRTUALIZATION: + printf("VIRT"); + break; + case XHCI_ID_MSG_IRQ: + printf("MSG IRQ"); + break; + case XHCI_ID_USB_LOCAL_MEM: + printf("LOCAL MEM"); + break; + case XHCI_ID_USB_DEBUG: + printf("DEBUG"); + break; + case XHCI_ID_EXT_MSI: + printf("EXT MSI"); + break; + case XHCI_ID_USB3_TUN: + printf("TUN"); + break; + + } + } + first = false; + } + printf(">\n"); +} + +static void xhci_set_hw_power_sleep(struct usb_bus *bus, uint32_t state) { struct xhci_softc *sc = XHCI_BUS2SC(bus); diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c index d5cfd228a429..820fb2f738a1 100644 --- a/sys/dev/usb/controller/xhci_pci.c +++ b/sys/dev/usb/controller/xhci_pci.c @@ -178,6 +178,8 @@ xhci_pci_match(device_t self) return ("Intel Tiger Lake-H USB 3.2 controller"); case 0x461e8086: return ("Intel Alder Lake-P Thunderbolt 4 USB controller"); + case 0x4b7d8086: + return ("Intel Elkhart Lake USB 3.1 controller"); case 0x51ed8086: return ("Intel Alder Lake USB 3.2 controller"); case 0x5aa88086: diff --git a/sys/dev/usb/controller/xhcireg.h b/sys/dev/usb/controller/xhcireg.h index 9d0b6e2f4b4b..821897155544 100644 --- a/sys/dev/usb/controller/xhcireg.h +++ b/sys/dev/usb/controller/xhcireg.h @@ -205,6 +205,11 @@ #define XHCI_ID_VIRTUALIZATION 0x0004 #define XHCI_ID_MSG_IRQ 0x0005 #define XHCI_ID_USB_LOCAL_MEM 0x0006 +/* values 7-9 are reserved */ +#define XHCI_ID_USB_DEBUG 0x000a +/* values 11-16 are reserved */ +#define XHCI_ID_EXT_MSI 0x0011 +#define XHCI_ID_USB3_TUN 0x0012 /* XHCI register R/W wrappers */ #define XREAD1(sc, what, a) \ diff --git a/sys/dev/usb/net/if_umb.c b/sys/dev/usb/net/if_umb.c index f640b4224aad..b1082b117259 100644 --- a/sys/dev/usb/net/if_umb.c +++ b/sys/dev/usb/net/if_umb.c @@ -177,9 +177,7 @@ static void umb_ncm_setup(struct umb_softc *, struct usb_config *); static void umb_close_bulkpipes(struct umb_softc *); static int umb_ioctl(if_t , u_long, caddr_t); static void umb_init(void *); -#ifdef DEV_NETMAP static void umb_input(if_t , struct mbuf *); -#endif static int umb_output(if_t , struct mbuf *, const struct sockaddr *, struct route *); static void umb_start(if_t ); @@ -585,9 +583,7 @@ umb_attach_task(struct usb_proc_msg *msg) if_setsoftc(ifp, sc); if_setflags(ifp, IFF_SIMPLEX | IFF_MULTICAST | IFF_POINTOPOINT); if_setioctlfn(ifp, umb_ioctl); -#ifdef DEV_NETMAP if_setinputfn(ifp, umb_input); -#endif if_setoutputfn(ifp, umb_output); if_setstartfn(ifp, umb_start); if_setinitfn(ifp, umb_init); diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c index e3509862ef54..ee9d8ab0c9bb 100644 --- a/sys/dev/usb/usb_hub.c +++ b/sys/dev/usb/usb_hub.c @@ -954,7 +954,8 @@ done: * packet. This function is called having the "bus_mtx" locked. *------------------------------------------------------------------------*/ void -uhub_root_intr(struct usb_bus *bus, const uint8_t *ptr, uint8_t len) +uhub_root_intr(struct usb_bus *bus, + const uint8_t *ptr __unused, uint8_t len __unused) { USB_BUS_LOCK_ASSERT(bus, MA_OWNED); diff --git a/sys/dev/usb/wlan/if_rsu.c b/sys/dev/usb/wlan/if_rsu.c index 07f7b6f3a708..e976948f6849 100644 --- a/sys/dev/usb/wlan/if_rsu.c +++ b/sys/dev/usb/wlan/if_rsu.c @@ -371,18 +371,16 @@ rsu_update_chw(struct ieee80211com *ic) /* * notification from net80211 that it'd like to do A-MPDU on the given TID. - * - * Note: this actually hangs traffic at the present moment, so don't use it. - * The firmware debug does indiciate it's sending and establishing a TX AMPDU - * session, but then no traffic flows. */ static int rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { -#if 0 struct rsu_softc *sc = ni->ni_ic->ic_softc; struct r92s_add_ba_req req; + RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: called, tid=%d\n", + __func__, tap->txa_tid); + /* Don't enable if it's requested or running */ if (IEEE80211_AMPDU_REQUESTED(tap)) return (0); @@ -397,23 +395,30 @@ rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) return (0); /* Send the firmware command */ - RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: establishing AMPDU TX for TID %d\n", + RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, + "%s: establishing AMPDU TX for TID %d\n", __func__, tap->txa_tid); RSU_LOCK(sc); - if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 1) { + if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 0) { RSU_UNLOCK(sc); + RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd failure\n", + __func__); /* Mark failure */ - (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0); + ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0); + /* Return 0, we've been driving this ourselves */ return (0); } RSU_UNLOCK(sc); + RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd success\n", + __func__); + /* Mark success; we don't get any further notifications */ - (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1); -#endif - /* Return 0, we're driving this ourselves */ + ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1); + + /* Return 0, we've been driving this ourselves */ return (0); } @@ -563,9 +568,7 @@ rsu_attach(device_t self) /* Enable basic HT */ ic->ic_htcaps = IEEE80211_HTC_HT | -#if 0 IEEE80211_HTC_AMPDU | -#endif IEEE80211_HTC_AMSDU | IEEE80211_HTCAP_MAXAMSDU_3839 | IEEE80211_HTCAP_SMPS_OFF; @@ -576,6 +579,7 @@ rsu_attach(device_t self) ic->ic_rxstream = sc->sc_nrxstream; } ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD; + ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD; rsu_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); @@ -1537,6 +1541,10 @@ rsu_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k, is_checked = 1; k->wk_flags |= IEEE80211_KEY_SWCRYPT; } else + /* + * TODO: should allocate these from the CAM space; + * skipping over the fixed slots and _BC / _BSS. + */ *keyix = R92S_MACID_BSS; } @@ -2166,7 +2174,7 @@ rsu_event_addba_req_report(struct rsu_softc *sc, uint8_t *buf, int len) __func__, ether_sprintf(ba->mac_addr), (int) ba->tid, - (int) le16toh(ba->ssn)); + (int) le16toh(ba->ssn) >> 4); /* XXX do node lookup; this is STA specific */ @@ -2212,6 +2220,11 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len) if (vap->iv_state == IEEE80211_S_AUTH) rsu_event_join_bss(sc, buf, len); break; + + /* TODO: what about R92S_EVT_ADD_STA? and decoding macid? */ + /* It likely is required for IBSS/AP mode */ + + /* TODO: should I be doing this transition in AP mode? */ case R92S_EVT_DEL_STA: RSU_DPRINTF(sc, RSU_DEBUG_FWCMD | RSU_DEBUG_STATE, "%s: disassociated from %s\n", __func__, @@ -2229,6 +2242,7 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len) break; case R92S_EVT_FWDBG: buf[60] = '\0'; + /* TODO: some are \n terminated, some aren't, sigh */ RSU_DPRINTF(sc, RSU_DEBUG_FWDBG, "FWDBG: %s\n", (char *)buf); break; case R92S_EVT_ADDBA_REQ_REPORT: @@ -2782,6 +2796,9 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, if (rate != 0) ridx = rate2ridx(rate); + /* Assign sequence number, A-MPDU or otherwise */ + ieee80211_output_seqno_assign(ni, -1, m0); + if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { @@ -2838,8 +2855,10 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, SM(R92S_TXDW0_OFFSET, sizeof(*txd)) | R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG); + /* TODO: correct macid here? It should be in the node */ txd->txdw1 |= htole32( SM(R92S_TXDW1_MACID, R92S_MACID_BSS) | SM(R92S_TXDW1_QSEL, qid)); + if (!hasqos) txd->txdw1 |= htole32(R92S_TXDW1_NONQOS); if (k != NULL && !(k->wk_flags & IEEE80211_KEY_SWENCRYPT)) { @@ -2860,8 +2879,13 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, SM(R92S_TXDW1_CIPHER, cipher) | SM(R92S_TXDW1_KEYIDX, k->wk_keyix)); } - /* XXX todo: set AGGEN bit if appropriate? */ - txd->txdw2 |= htole32(R92S_TXDW2_BK); + + /* + * Note: no need to set TXDW2_AGGEN/TXDW2_BK to mark + * A-MPDU and non-AMPDU candidates; the firmware will + * handle this for us. + */ + if (ismcast) txd->txdw2 |= htole32(R92S_TXDW2_BMCAST); @@ -2880,8 +2904,11 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, } /* - * Firmware will use and increment the sequence number for the - * specified priority. + * Pass in prio here, NOT the sequence number. + * + * The hardware is in theory incrementing sequence numbers + * for us, but I haven't yet figured out exactly when/how + * it's supposed to work. */ txd->txdw3 |= htole32(SM(R92S_TXDW3_SEQ, prio)); @@ -3481,7 +3508,8 @@ rsu_load_firmware(struct rsu_softc *sc) dmem.vcs_mode = R92S_VCS_MODE_RTS_CTS; dmem.turbo_mode = 0; dmem.bw40_en = !! (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40); - dmem.amsdu2ampdu_en = !! (sc->sc_ht); + /* net80211 handles AMSDUs just fine */ + dmem.amsdu2ampdu_en = 0; dmem.ampdu_en = !! (sc->sc_ht); dmem.agg_offload = !! (sc->sc_ht); dmem.qos_en = 1; diff --git a/sys/dev/usb/wlan/if_rsureg.h b/sys/dev/usb/wlan/if_rsureg.h index fb706a4d9b1a..e2074e1dd2ad 100644 --- a/sys/dev/usb/wlan/if_rsureg.h +++ b/sys/dev/usb/wlan/if_rsureg.h @@ -593,7 +593,14 @@ struct r92s_event_join_bss { struct ndis_wlan_bssid_ex bss; } __packed; -#define R92S_MACID_BSS 5 /* XXX hardcoded somewhere */ +/* + * This is hard-coded in the firmware for a STA mode + * BSS join. If you turn on FWDEBUG, you'll see this + * in the logs: + * + * rsu0: FWDBG: mac id #5: 0000005b, 000fffff, 00000000 + */ +#define R92S_MACID_BSS 5 /* Rx MAC descriptor. */ struct r92s_rx_stat { diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index 97c790dd5b81..147aa4044057 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -882,6 +882,7 @@ run_attach(device_t self) ic->ic_flags |= IEEE80211_F_DATAPAD; ic->ic_flags_ext |= IEEE80211_FEXT_SWBMISS; + ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD; run_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); @@ -3522,6 +3523,9 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) data->ni = ni; data->ridx = ridx; + /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */ + ieee80211_output_seqno_assign(ni, -1, m); + run_set_tx_desc(sc, data); /* @@ -3627,6 +3631,9 @@ run_tx_mgt(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) data->ni = ni; data->ridx = ridx; + /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */ + ieee80211_output_seqno_assign(ni, -1, m); + run_set_tx_desc(sc, data); RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending mgt frame len=%d rate=%d\n", @@ -3771,6 +3778,9 @@ run_tx_param(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni, break; data->ridx = ridx; + /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */ + ieee80211_output_seqno_assign(ni, -1, m); + run_set_tx_desc(sc, data); RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending raw frame len=%u rate=%u\n", @@ -6416,6 +6426,10 @@ run_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { /* For now, no A-MPDU TX support in the driver */ + /* + * TODO: maybe we needed to enable seqno generation too? + * What other TX desc bits are missing/needed? + */ return (0); } diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index ecb3dbb370e5..528ff3372097 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -133,12 +133,14 @@ static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); +#if defined(INET) || defined(INET6) static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, - uint16_t, int, struct virtio_net_hdr *); -static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, - uint16_t, int, struct virtio_net_hdr *); + bool, int, struct virtio_net_hdr *); +static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, + int); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); +#endif static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); @@ -1178,6 +1180,7 @@ vtnet_setup_interface(struct vtnet_softc *sc) if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0); + if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0); /* * Capabilities after here are not enabled by default. @@ -1760,164 +1763,165 @@ vtnet_rxq_new_buf(struct vtnet_rxq *rxq) return (error); } +#if defined(INET) || defined(INET6) static int -vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype, - int hoff, struct virtio_net_hdr *hdr) +vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6, + int protocol, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; - int error; - sc = rxq->vtnrx_sc; + /* + * The packet is likely from another VM on the same host or from the + * host that itself performed checksum offloading so Tx/Rx is basically + * a memcpy and the checksum has little value so far. + */ + + KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP, + ("%s: unsupported IP protocol %d", __func__, protocol)); /* - * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does - * not have an analogous CSUM flag. The checksum has been validated, - * but is incomplete (TCP/UDP pseudo header). - * - * The packet is likely from another VM on the same host that itself - * performed checksum offloading so Tx/Rx is basically a memcpy and - * the checksum has little value. - * - * Default to receiving the packet as-is for performance reasons, but - * this can cause issues if the packet is to be forwarded because it - * does not contain a valid checksum. This patch may be helpful: - * https://reviews.freebsd.org/D6611. In the meantime, have the driver - * compute the checksum if requested. - * - * BMV: Need to add an CSUM_PARTIAL flag? + * If the user don't want us to fix it up here by computing the + * checksum, just forward the order to compute the checksum by setting + * the corresponding mbuf flag (e.g., CSUM_TCP). */ + sc = rxq->vtnrx_sc; if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) { - error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr); - return (error); + switch (protocol) { + case IPPROTO_TCP: + m->m_pkthdr.csum_flags |= + (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP); + break; + case IPPROTO_UDP: + m->m_pkthdr.csum_flags |= + (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP); + break; + } + m->m_pkthdr.csum_data = hdr->csum_offset; + return (0); } /* * Compute the checksum in the driver so the packet will contain a * valid checksum. The checksum is at csum_offset from csum_start. */ - switch (etype) { -#if defined(INET) || defined(INET6) - case ETHERTYPE_IP: - case ETHERTYPE_IPV6: { - int csum_off, csum_end; - uint16_t csum; - - csum_off = hdr->csum_start + hdr->csum_offset; - csum_end = csum_off + sizeof(uint16_t); + int csum_off, csum_end; + uint16_t csum; - /* Assume checksum will be in the first mbuf. */ - if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) - return (1); + csum_off = hdr->csum_start + hdr->csum_offset; + csum_end = csum_off + sizeof(uint16_t); - /* - * Like in_delayed_cksum()/in6_delayed_cksum(), compute the - * checksum and write it at the specified offset. We could - * try to verify the packet: csum_start should probably - * correspond to the start of the TCP/UDP header. - * - * BMV: Need to properly handle UDP with zero checksum. Is - * the IPv4 header checksum implicitly validated? - */ - csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); - *(uint16_t *)(mtodo(m, csum_off)) = csum; - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; - } -#endif - default: - sc->vtnet_stats.rx_csum_bad_ethtype++; + /* Assume checksum will be in the first mbuf. */ + if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) { + sc->vtnet_stats.rx_csum_bad_offset++; return (1); } + /* + * Like in_delayed_cksum()/in6_delayed_cksum(), compute the + * checksum and write it at the specified offset. We could + * try to verify the packet: csum_start should probably + * correspond to the start of the TCP/UDP header. + * + * BMV: Need to properly handle UDP with zero checksum. Is + * the IPv4 header checksum implicitly validated? + */ + csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); + *(uint16_t *)(mtodo(m, csum_off)) = csum; + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; + return (0); } +static void +vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol) +{ + KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP, + ("%s: unsupported IP protocol %d", __func__, protocol)); + + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; +} + static int -vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, - uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused) +vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, + struct virtio_net_hdr *hdr) { -#if 0 + const struct ether_header *eh; struct vtnet_softc *sc; -#endif - int protocol; + int hoff, protocol; + uint16_t etype; + bool isipv6; + + KASSERT(hdr->flags & + (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID), + ("%s: missing checksum offloading flag %x", __func__, hdr->flags)); + + eh = mtod(m, const struct ether_header *); + etype = ntohs(eh->ether_type); + if (etype == ETHERTYPE_VLAN) { + /* TODO BMV: Handle QinQ. */ + const struct ether_vlan_header *evh = + mtod(m, const struct ether_vlan_header *); + etype = ntohs(evh->evl_proto); + hoff = sizeof(struct ether_vlan_header); + } else + hoff = sizeof(struct ether_header); -#if 0 sc = rxq->vtnrx_sc; -#endif + /* Check whether ethernet type is IP or IPv6, and get protocol. */ switch (etype) { #if defined(INET) case ETHERTYPE_IP: - if (__predict_false(m->m_len < hoff + sizeof(struct ip))) - protocol = IPPROTO_DONE; - else { + if (__predict_false(m->m_len < hoff + sizeof(struct ip))) { + sc->vtnet_stats.rx_csum_inaccessible_ipproto++; + return (1); + } else { struct ip *ip = (struct ip *)(m->m_data + hoff); protocol = ip->ip_p; } + isipv6 = false; break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) - || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) - protocol = IPPROTO_DONE; + || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) { + sc->vtnet_stats.rx_csum_inaccessible_ipproto++; + return (1); + } + isipv6 = true; break; #endif default: - protocol = IPPROTO_DONE; - break; + sc->vtnet_stats.rx_csum_bad_ethtype++; + return (1); } + /* Check whether protocol is TCP or UDP. */ switch (protocol) { case IPPROTO_TCP: case IPPROTO_UDP: - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; break; default: /* * FreeBSD does not support checksum offloading of this - * protocol. Let the stack re-verify the checksum later - * if the protocol is supported. + * protocol here. */ -#if 0 - if_printf(sc->vtnet_ifp, - "%s: checksum offload of unsupported protocol " - "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n", - __func__, etype, protocol, hdr->csum_start, - hdr->csum_offset); -#endif - break; + sc->vtnet_stats.rx_csum_bad_ipproto++; + return (1); } - return (0); -} - -static int -vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, - struct virtio_net_hdr *hdr) -{ - const struct ether_header *eh; - int hoff; - uint16_t etype; - - eh = mtod(m, const struct ether_header *); - etype = ntohs(eh->ether_type); - if (etype == ETHERTYPE_VLAN) { - /* TODO BMV: Handle QinQ. */ - const struct ether_vlan_header *evh = - mtod(m, const struct ether_vlan_header *); - etype = ntohs(evh->evl_proto); - hoff = sizeof(struct ether_vlan_header); - } else - hoff = sizeof(struct ether_header); - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr)); + return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol, + hdr)); else /* VIRTIO_NET_HDR_F_DATA_VALID */ - return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr)); + vtnet_rxq_csum_data_valid(rxq, m, protocol); + + return (0); } +#endif static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) @@ -2040,10 +2044,15 @@ vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { +#if defined(INET) || defined(INET6) if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; +#else + sc->vtnet_stats.rx_csum_bad_ethtype++; + rxq->vtnrx_stats.vrxs_csum_failed++; +#endif } if (hdr->gso_size != 0) { @@ -2497,6 +2506,10 @@ vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, hdr->csum_start = vtnet_gtoh16(sc, csum_start); hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); txq->vtntx_stats.vtxs_csum++; + } else if ((flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) && + (proto == IPPROTO_TCP || proto == IPPROTO_UDP) && + (m->m_pkthdr.csum_data == 0xFFFF)) { + hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; } if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { @@ -2551,8 +2564,10 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, error = sglist_append_mbuf(sg, m); if (error) { m = m_defrag(m, M_NOWAIT); - if (m == NULL) + if (m == NULL) { + sc->vtnet_stats.tx_defrag_failed++; goto fail; + } *m_head = m; sc->vtnet_stats.tx_defragged++; @@ -2568,7 +2583,6 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, return (error); fail: - sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; @@ -2609,7 +2623,8 @@ vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) m->m_flags &= ~M_VLANTAG; } - if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { + if (m->m_pkthdr.csum_flags & + (VTNET_CSUM_ALL_OFFLOAD | CSUM_DATA_VALID)) { m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; @@ -3031,16 +3046,14 @@ vtnet_get_counter(if_t ifp, ift_counter cnt) return (rxaccum.vrxs_iqdrops); case IFCOUNTER_IERRORS: return (rxaccum.vrxs_ierrors); + case IFCOUNTER_IBYTES: + return (rxaccum.vrxs_ibytes); case IFCOUNTER_OPACKETS: return (txaccum.vtxs_opackets); case IFCOUNTER_OBYTES: - if (!VTNET_ALTQ_ENABLED) - return (txaccum.vtxs_obytes); - /* FALLTHROUGH */ + return (txaccum.vtxs_obytes); case IFCOUNTER_OMCASTS: - if (!VTNET_ALTQ_ENABLED) - return (txaccum.vtxs_omcasts); - /* FALLTHROUGH */ + return (txaccum.vtxs_omcasts); default: return (if_get_counter_default(ifp, cnt)); } @@ -3813,9 +3826,9 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) if_printf(ifp, "error setting host MAC filter table\n"); out: - if (promisc != 0 && vtnet_set_promisc(sc, true) != 0) + if (promisc && vtnet_set_promisc(sc, true) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); - if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0) + if (allmulti && vtnet_set_allmulti(sc, true) != 0) if_printf(ifp, "cannot enable all-multicast mode\n"); } @@ -4100,21 +4113,29 @@ vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, stats = &rxq->vtnrx_stats; - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_ipackets, "Receive packets"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_ibytes, "Receive bytes"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_iqdrops, "Receive drops"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_ierrors, "Receive errors"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_csum, "Receive checksum offloaded"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_csum_failed, "Receive checksum offload failed"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_host_lro, "Receive host segmentation offloaded"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); } @@ -4135,17 +4156,23 @@ vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, stats = &txq->vtntx_stats; - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_opackets, "Transmit packets"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_obytes, "Transmit bytes"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_omcasts, "Transmit multicasts"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_csum, "Transmit checksum offloaded"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); - SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", + CTLFLAG_RD | CTLFLAG_STATS, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); } @@ -4170,6 +4197,102 @@ vtnet_setup_queue_sysctl(struct vtnet_softc *sc) } } +static int +vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_rxq_stats *rxst; + int i; + + stats->rx_csum_failed = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxst = &sc->vtnet_rxqs[i].vtnrx_stats; + stats->rx_csum_failed += rxst->vrxs_csum_failed; + } + return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req)); +} + +static int +vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_rxq_stats *rxst; + int i; + + stats->rx_csum_offloaded = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxst = &sc->vtnet_rxqs[i].vtnrx_stats; + stats->rx_csum_offloaded += rxst->vrxs_csum; + } + return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req)); +} + +static int +vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_rxq_stats *rxst; + int i; + + stats->rx_task_rescheduled = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxst = &sc->vtnet_rxqs[i].vtnrx_stats; + stats->rx_task_rescheduled += rxst->vrxs_rescheduled; + } + return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req)); +} + +static int +vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_txq_stats *txst; + int i; + + stats->tx_csum_offloaded = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + txst = &sc->vtnet_txqs[i].vtntx_stats; + stats->tx_csum_offloaded += txst->vtxs_csum; + } + return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req)); +} + +static int +vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_txq_stats *txst; + int i; + + stats->tx_tso_offloaded = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + txst = &sc->vtnet_txqs[i].vtntx_stats; + stats->tx_tso_offloaded += txst->vtxs_tso; + } + return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req)); +} + +static int +vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS) +{ + struct vtnet_softc *sc = (struct vtnet_softc *)arg1; + struct vtnet_statistics *stats = &sc->vtnet_stats; + struct vtnet_txq_stats *txst; + int i; + + stats->tx_task_rescheduled = 0; + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + txst = &sc->vtnet_txqs[i].vtntx_stats; + stats->tx_task_rescheduled += txst->vtxs_rescheduled; + } + return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req)); +} + static void vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_softc *sc) @@ -4189,69 +4312,75 @@ vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", - CTLFLAG_RD, &stats->mbuf_alloc_failed, + CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", - CTLFLAG_RD, &stats->rx_frame_too_large, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", - CTLFLAG_RD, &stats->rx_enq_replacement_failed, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", - CTLFLAG_RD, &stats->rx_mergeable_failed, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", - CTLFLAG_RD, &stats->rx_csum_bad_ethtype, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", - CTLFLAG_RD, &stats->rx_csum_bad_ipproto, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", - CTLFLAG_RD, &stats->rx_csum_bad_offset, + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", - CTLFLAG_RD, &stats->rx_csum_bad_proto, - "Received checksum offloaded buffer with incorrect protocol"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", - CTLFLAG_RD, &stats->rx_csum_failed, + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto", + CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto, + "Received checksum offloaded buffer with inaccessible IP protocol"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_rx_csum_failed, "QU", "Received buffer checksum offload failed"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", - CTLFLAG_RD, &stats->rx_csum_offloaded, + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU", "Received buffer checksum offload succeeded"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", - CTLFLAG_RD, &stats->rx_task_rescheduled, + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU", "Times the receive interrupt task rescheduled itself"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", - CTLFLAG_RD, &stats->tx_csum_unknown_ethtype, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", - CTLFLAG_RD, &stats->tx_csum_proto_mismatch, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch, "Aborted transmit of checksum offloaded buffer because mismatched " "protocols"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", - CTLFLAG_RD, &stats->tx_tso_not_tcp, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", - CTLFLAG_RD, &stats->tx_tso_without_csum, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum, "Aborted transmit of TSO buffer without TCP checksum offload"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", - CTLFLAG_RD, &stats->tx_defragged, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged, "Transmit mbufs defragged"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", - CTLFLAG_RD, &stats->tx_defrag_failed, + CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed, "Aborted transmit of buffer because defrag failed"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", - CTLFLAG_RD, &stats->tx_csum_offloaded, + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU", "Offloaded checksum of transmitted buffer"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", - CTLFLAG_RD, &stats->tx_tso_offloaded, + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU", "Segmentation offload of transmitted buffer"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", - CTLFLAG_RD, &stats->tx_task_rescheduled, + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled", + CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, + sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU", "Times the transmit interrupt task rescheduled itself"); } diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index 0144b0f3232d..cab7ced639a7 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -46,7 +46,7 @@ struct vtnet_statistics { uint64_t rx_csum_bad_ethtype; uint64_t rx_csum_bad_ipproto; uint64_t rx_csum_bad_offset; - uint64_t rx_csum_bad_proto; + uint64_t rx_csum_inaccessible_ipproto; uint64_t tx_csum_unknown_ethtype; uint64_t tx_csum_proto_mismatch; uint64_t tx_tso_not_tcp; diff --git a/sys/dev/vmgenc/vmgenc_acpi.c b/sys/dev/vmgenc/vmgenc_acpi.c index 2ad8929dfd34..18519a8e4f22 100644 --- a/sys/dev/vmgenc/vmgenc_acpi.c +++ b/sys/dev/vmgenc/vmgenc_acpi.c @@ -56,6 +56,7 @@ #include <contrib/dev/acpica/include/acpi.h> #include <dev/acpica/acpivar.h> +#include <dev/random/randomdev.h> #include <dev/random/random_harvestq.h> #include <dev/vmgenc/vmgenc_acpi.h> @@ -210,6 +211,11 @@ acpi_GetPackedUINT64(device_t dev, ACPI_HANDLE handle, char *path, } +static const struct random_source random_vmgenid = { + .rs_ident = "VM Generation ID", + .rs_source = RANDOM_PURE_VMGENID, +}; + static int vmgenc_attach(device_t dev) { @@ -234,7 +240,7 @@ vmgenc_attach(device_t dev) memcpy(sc->vmg_cache_guid, __DEVOLATILE(void *, sc->vmg_pguid), sizeof(sc->vmg_cache_guid)); - random_harvest_register_source(RANDOM_PURE_VMGENID); + random_source_register(&random_vmgenid); vmgenc_harvest_all(sc->vmg_cache_guid, sizeof(sc->vmg_cache_guid)); AcpiInstallNotifyHandler(h, ACPI_DEVICE_NOTIFY, vmgenc_notify, dev); diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 9f2b009d02ec..460a508a60dc 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -901,6 +901,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) sc->cdev = NULL; sx_xunlock(&vmmdev_mtx); + vm_suspend(sc->vm, VM_SUSPEND_DESTROY); destroy_dev(cdev); vmmdev_destroy(sc); diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index e5fdb395c9f7..862780741ee7 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -371,8 +371,6 @@ nfsrv_atroot(struct vnode *vp, uint64_t *retp) /* * Set the credentials to refer to root. - * If only the various BSDen could agree on whether cr_gid is a separate - * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index e9f1dc23ddbe..77e71d4153c9 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -1098,9 +1098,10 @@ newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; - nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); - for (i = 0; i < nfscr->nfsc_ngroups; i++) - nfscr->nfsc_groups[i] = cr->cr_groups[i]; + nfscr->nfsc_ngroups = MIN(cr->cr_ngroups + 1, NFS_MAXGRPS + 1); + nfscr->nfsc_groups[0] = cr->cr_gid; + for (i = 1; i < nfscr->nfsc_ngroups; i++) + nfscr->nfsc_groups[i] = cr->cr_groups[i - 1]; } /* diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index a8b06fdb261b..eee571a04821 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3474,7 +3474,7 @@ nfs_advlock(struct vop_advlock_args *ap) u_quad_t size; struct nfsmount *nmp; - error = NFSVOPLOCK(vp, LK_SHARED); + error = NFSVOPLOCK(vp, LK_EXCLUSIVE); if (error != 0) return (EBADF); nmp = VFSTONFS(vp->v_mount); @@ -3511,11 +3511,6 @@ nfs_advlock(struct vop_advlock_args *ap) cred = p->p_ucred; else cred = td->td_ucred; - NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); - if (VN_IS_DOOMED(vp)) { - error = EBADF; - goto out; - } /* * If this is unlocking a write locked region, flush and diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index b2966934f9b7..7040c4afb797 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2607,6 +2607,7 @@ again: * rpc reply */ if (siz == 0) { +ateof: vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); @@ -2648,6 +2649,8 @@ again: ncookies--; } if (cpos >= cend || ncookies == 0) { + if (eofflag != 0) + goto ateof; siz = fullsiz; toff = off; goto again; diff --git a/sys/fs/procfs/procfs.c b/sys/fs/procfs/procfs.c index ab60ba47f322..cd66dd6f8b3b 100644 --- a/sys/fs/procfs/procfs.c +++ b/sys/fs/procfs/procfs.c @@ -156,42 +156,42 @@ procfs_init(PFS_INIT_ARGS) root = pi->pi_root; - pfs_create_link(root, "curproc", procfs_docurproc, - NULL, NULL, NULL, 0); - pfs_create_link(root, "self", procfs_docurproc, - NULL, NULL, NULL, 0); - - dir = pfs_create_dir(root, "pid", - procfs_attr_all_rx, NULL, NULL, PFS_PROCDEP); - pfs_create_file(dir, "cmdline", procfs_doproccmdline, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "dbregs", procfs_doprocdbregs, + pfs_create_link(root, NULL, "curproc", procfs_docurproc, NULL, NULL, + NULL, 0); + pfs_create_link(root, NULL, "self", procfs_docurproc, NULL, NULL, NULL, + 0); + + pfs_create_dir(root, &dir, "pid", procfs_attr_all_rx, NULL, NULL, + PFS_PROCDEP); + pfs_create_file(dir, NULL, "cmdline", procfs_doproccmdline, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "dbregs", procfs_doprocdbregs, procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW); - pfs_create_file(dir, "etype", procfs_doproctype, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "fpregs", procfs_doprocfpregs, + pfs_create_file(dir, NULL, "etype", procfs_doproctype, NULL, NULL, NULL, + PFS_RD); + pfs_create_file(dir, NULL, "fpregs", procfs_doprocfpregs, procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW); - pfs_create_file(dir, "map", procfs_doprocmap, - NULL, procfs_notsystem, NULL, PFS_RD); - pfs_create_file(dir, "mem", procfs_doprocmem, - procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW); - pfs_create_file(dir, "note", procfs_doprocnote, - procfs_attr_w, procfs_candebug, NULL, PFS_WR); - pfs_create_file(dir, "notepg", procfs_doprocnote, - procfs_attr_w, procfs_candebug, NULL, PFS_WR); - pfs_create_file(dir, "regs", procfs_doprocregs, - procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW); - pfs_create_file(dir, "rlimit", procfs_doprocrlimit, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "status", procfs_doprocstatus, - NULL, NULL, NULL, PFS_RD); - pfs_create_file(dir, "osrel", procfs_doosrel, - procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR); - - pfs_create_link(dir, "file", procfs_doprocfile, - NULL, procfs_notsystem, NULL, 0); - pfs_create_link(dir, "exe", procfs_doprocfile, - NULL, procfs_notsystem, NULL, 0); + pfs_create_file(dir, NULL, "map", procfs_doprocmap, NULL, + procfs_notsystem, NULL, PFS_RD); + pfs_create_file(dir, NULL, "mem", procfs_doprocmem, procfs_attr_rw, + procfs_candebug, NULL, PFS_RDWR | PFS_RAW); + pfs_create_file(dir, NULL, "note", procfs_doprocnote, procfs_attr_w, + procfs_candebug, NULL, PFS_WR); + pfs_create_file(dir, NULL, "notepg", procfs_doprocnote, procfs_attr_w, + procfs_candebug, NULL, PFS_WR); + pfs_create_file(dir, NULL, "regs", procfs_doprocregs, procfs_attr_rw, + procfs_candebug, NULL, PFS_RDWR | PFS_RAW); + pfs_create_file(dir, NULL, "rlimit", procfs_doprocrlimit, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "status", procfs_doprocstatus, NULL, NULL, + NULL, PFS_RD); + pfs_create_file(dir, NULL, "osrel", procfs_doosrel, procfs_attr_rw, + procfs_candebug, NULL, PFS_RDWR); + + pfs_create_link(dir, NULL, "file", procfs_doprocfile, NULL, + procfs_notsystem, NULL, 0); + pfs_create_link(dir, NULL, "exe", procfs_doprocfile, NULL, + procfs_notsystem, NULL, 0); return (0); } diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c index 38070e0946bb..49c084d02ff8 100644 --- a/sys/fs/procfs/procfs_status.c +++ b/sys/fs/procfs/procfs_status.c @@ -141,13 +141,9 @@ procfs_doprocstatus(PFS_FILL_ARGS) (u_long)cr->cr_uid, (u_long)cr->cr_ruid, (u_long)cr->cr_rgid); - - /* egid (cr->cr_svgid) is equal to cr_ngroups[0] - see also getegid(2) in /sys/kern/kern_prot.c */ - - for (i = 0; i < cr->cr_ngroups; i++) { + sbuf_printf(sb, ",%lu", (u_long)cr->cr_gid); + for (i = 0; i < cr->cr_ngroups; i++) sbuf_printf(sb, ",%lu", (u_long)cr->cr_groups[i]); - } if (jailed(cr)) { mtx_lock(&cr->cr_prison->pr_mtx); diff --git a/sys/fs/pseudofs/pseudofs.c b/sys/fs/pseudofs/pseudofs.c index ef45f96a6192..7a4e67455214 100644 --- a/sys/fs/pseudofs/pseudofs.c +++ b/sys/fs/pseudofs/pseudofs.c @@ -133,7 +133,7 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn) for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) { if (strcmp(pn->pn_name, iter->pn_name) != 0) continue; - printf("pfs_add_node: homonymous siblings: '%s/%s' type %d", + printf("pfs_add_node: homonymous siblings: '%s/%s' type %d\n", parent->pn_name, pn->pn_name, pn->pn_type); /* Do not detach, because we are not yet attached. */ pn->pn_parent = NULL; @@ -234,81 +234,101 @@ pfs_fixup_dir(struct pfs_node *parent) /* * Create a directory */ -struct pfs_node * -pfs_create_dir(struct pfs_node *parent, const char *name, - pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, - int flags) +int +pfs_create_dir(struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_attr_t attr, pfs_vis_t vis, + pfs_destroy_t destroy, int flags) { - struct pfs_node *pn; + struct pfs_node *pdir, *pn; int rc; - pn = pfs_alloc_node_flags(parent->pn_info, name, + /* Preserve in case the caller is reusing the one pointer for both. */ + pdir = parent; + if (opn != NULL) + *opn = NULL; + pn = pfs_alloc_node_flags(pdir->pn_info, name, (flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir, flags); if (pn == NULL) - return (NULL); + return (ENOMEM); pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; - rc = pfs_add_node(parent, pn); + rc = pfs_add_node(pdir, pn); if (rc == 0) rc = pfs_fixup_dir_flags(pn, flags); if (rc != 0) { pfs_destroy(pn); pn = NULL; + } else if (opn != NULL) { + *opn = pn; } - return (pn); + + return (rc); } /* * Create a file */ -struct pfs_node * -pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill, - pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, - int flags) +int +pfs_create_file(struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_fill_t fill, pfs_attr_t attr, + pfs_vis_t vis, pfs_destroy_t destroy, int flags) { struct pfs_node *pn; + int rc; + if (opn != NULL) + *opn = NULL; pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_file, flags); if (pn == NULL) - return (NULL); + return (ENOMEM); + pn->pn_fill = fill; pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; - if (pfs_add_node(parent, pn) != 0) { + if ((rc = pfs_add_node(parent, pn)) != 0) { pfs_destroy(pn); pn = NULL; + } else if (opn != NULL) { + *opn = pn; } - return (pn); + + return (rc); } /* * Create a symlink */ -struct pfs_node * -pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill, - pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, - int flags) +int +pfs_create_link(struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_fill_t fill, pfs_attr_t attr, + pfs_vis_t vis, pfs_destroy_t destroy, int flags) { struct pfs_node *pn; + int rc; + if (opn != NULL) + *opn = NULL; pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_symlink, flags); if (pn == NULL) - return (NULL); + return (ENOMEM); + pn->pn_fill = fill; pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; - if (pfs_add_node(parent, pn) != 0) { + if ((rc = pfs_add_node(parent, pn)) != 0) { pfs_destroy(pn); pn = NULL; + } else if (opn != NULL) { + *opn = pn; } - return (pn); + return (rc); } /* @@ -475,6 +495,7 @@ pfs_init(struct pfs_info *pi, struct vfsconf *vfc) if (error) { pfs_destroy(root); pi->pi_root = NULL; + pfs_fileno_uninit(pi); return (error); } diff --git a/sys/fs/pseudofs/pseudofs.h b/sys/fs/pseudofs/pseudofs.h index c60dd7b339d1..2b08dcad978d 100644 --- a/sys/fs/pseudofs/pseudofs.h +++ b/sys/fs/pseudofs/pseudofs.h @@ -255,17 +255,18 @@ int pfs_uninit (struct pfs_info *pi, struct vfsconf *vfc); /* * Directory structure construction and manipulation */ -struct pfs_node *pfs_create_dir (struct pfs_node *parent, const char *name, - pfs_attr_t attr, pfs_vis_t vis, - pfs_destroy_t destroy, int flags); -struct pfs_node *pfs_create_file(struct pfs_node *parent, const char *name, - pfs_fill_t fill, pfs_attr_t attr, - pfs_vis_t vis, pfs_destroy_t destroy, - int flags); -struct pfs_node *pfs_create_link(struct pfs_node *parent, const char *name, - pfs_fill_t fill, pfs_attr_t attr, +int pfs_create_dir (struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags); +int pfs_create_file (struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_fill_t fill, + pfs_attr_t attr, pfs_vis_t vis, + pfs_destroy_t destroy, int flags); +int pfs_create_link (struct pfs_node *parent, struct pfs_node **opn, + const char *name, pfs_fill_t fill, + pfs_attr_t attr, pfs_vis_t vis, + pfs_destroy_t destroy, int flags); struct pfs_node *pfs_find_node (struct pfs_node *parent, const char *name); void pfs_purge (struct pfs_node *pn); int pfs_destroy (struct pfs_node *pn); diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c index acf18de5ab51..c110107bb210 100644 --- a/sys/fs/tarfs/tarfs_vnops.c +++ b/sys/fs/tarfs/tarfs_vnops.c @@ -334,6 +334,10 @@ tarfs_readdir(struct vop_readdir_args *ap) tnp, tnp->name, uio->uio_offset, uio->uio_resid); if (uio->uio_offset == TARFS_COOKIE_EOF) { + if (eofflag != NULL) { + TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__); + *eofflag = 1; + } TARFS_DPF(VNODE, "%s: EOF\n", __func__); return (0); } diff --git a/sys/geom/cache/g_cache.c b/sys/geom/cache/g_cache.c index 9d0b10f4192e..c6b80786ade5 100644 --- a/sys/geom/cache/g_cache.c +++ b/sys/geom/cache/g_cache.c @@ -504,7 +504,7 @@ g_cache_create(struct g_class *mp, struct g_provider *pp, return (NULL); } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); sc->sc_type = type; sc->sc_bshift = bshift; @@ -665,7 +665,7 @@ g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) G_CACHE_DEBUG(3, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "cache:taste"); + gp = g_new_geom(mp, "cache:taste"); gp->start = g_cache_start; gp->orphan = g_cache_orphan; gp->access = g_cache_access; diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c index 2173a84c7acf..fe83b54953cc 100644 --- a/sys/geom/concat/g_concat.c +++ b/sys/geom/concat/g_concat.c @@ -646,7 +646,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md, return (NULL); } } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO); gp->start = g_concat_start; gp->spoiled = g_concat_orphan; @@ -753,7 +753,7 @@ g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) G_CONCAT_DEBUG(3, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "concat:taste"); + gp = g_new_geom(mp, "concat:taste"); gp->start = g_concat_start; gp->access = g_concat_access; gp->orphan = g_concat_orphan; diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 5bd2d465183e..7fca50e7635c 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -769,7 +769,7 @@ g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, g_topology_assert(); - gp = g_new_geomf(mp, "eli:taste"); + gp = g_new_geom(mp, "eli:taste"); gp->start = g_eli_start; gp->access = g_std_access; /* diff --git a/sys/geom/gate/g_gate.c b/sys/geom/gate/g_gate.c index ecdcacff6707..76a4328227dd 100644 --- a/sys/geom/gate/g_gate.c +++ b/sys/geom/gate/g_gate.c @@ -571,7 +571,7 @@ g_gate_create(struct g_gate_ctl_create *ggio) } } - gp = g_new_geomf(&g_gate_class, "%s", name); + gp = g_new_geom(&g_gate_class, name); gp->start = g_gate_start; gp->access = g_gate_access; gp->orphan = g_gate_orphan; diff --git a/sys/geom/geom.h b/sys/geom/geom.h index 908ce86f03a6..50e6627b0157 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -289,8 +289,9 @@ int g_handleattr_int(struct bio *bp, const char *attribute, int val); int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val); int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val); int g_handleattr_str(struct bio *bp, const char *attribute, const char *str); -struct g_consumer * g_new_consumer(struct g_geom *gp); -struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) +struct g_consumer *g_new_consumer(struct g_geom *gp); +struct g_geom *g_new_geom(struct g_class *mp, const char *name); +struct g_geom *g_new_geomf(struct g_class *mp, const char *fmt, ...) __printflike(2, 3); struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) __printflike(2, 3); diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 4a2a850c2eab..27c65f15d5e3 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -355,7 +355,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); - gp = g_new_geomf(mp, "%s", pp->name); + gp = g_new_geom(mp, pp->name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c index 341233a6ef47..ffd46db55416 100644 --- a/sys/geom/geom_event.c +++ b/sys/geom/geom_event.c @@ -347,6 +347,7 @@ static void g_post_event_ep_va(g_event_t *func, void *arg, int wuflag, struct g_event *ep, va_list ap) { + struct thread *td; void *p; u_int n; @@ -366,8 +367,12 @@ g_post_event_ep_va(g_event_t *func, void *arg, int wuflag, TAILQ_INSERT_TAIL(&g_events, ep, events); mtx_unlock(&g_eventlock); wakeup(&g_wait_event); - curthread->td_pflags |= TDP_GEOM; - ast_sched(curthread, TDA_GEOM); + + td = curthread; + if ((td->td_pflags & TDP_KTHREAD) == 0) { + td->td_pflags |= TDP_GEOM; + ast_sched(td, TDA_GEOM); + } } void diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c index 0491b0069be4..935293950c37 100644 --- a/sys/geom/geom_slice.c +++ b/sys/geom/geom_slice.c @@ -529,7 +529,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co g_topology_assert(); vp = (void **)extrap; - gp = g_new_geomf(mp, "%s", pp->name); + gp = g_new_geom(mp, pp->name); gsp = g_slice_alloc(slices, extra); gsp->start = start; gp->softc = gsp; diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index 1429c84942ed..2a6ce1ab6486 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -368,20 +368,15 @@ g_retaste(struct g_class *mp) } struct g_geom * -g_new_geomf(struct g_class *mp, const char *fmt, ...) +g_new_geom(struct g_class *mp, const char *name) { + int len; struct g_geom *gp; - va_list ap; - struct sbuf *sb; g_topology_assert(); G_VALID_CLASS(mp); - sb = sbuf_new_auto(); - va_start(ap, fmt); - sbuf_vprintf(sb, fmt, ap); - va_end(ap); - sbuf_finish(sb); - gp = g_malloc(sizeof(*gp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); + len = strlen(name); + gp = g_malloc(sizeof(*gp) + len + 1, M_WAITOK | M_ZERO); gp->name = (char *)(gp + 1); gp->class = mp; gp->rank = 1; @@ -389,8 +384,7 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...) LIST_INIT(&gp->provider); LIST_INSERT_HEAD(&mp->geom, gp, geom); TAILQ_INSERT_HEAD(&geoms, gp, geoms); - strcpy(gp->name, sbuf_data(sb)); - sbuf_delete(sb); + memcpy(gp->name, name, len); /* Fill in defaults from class */ gp->start = mp->start; gp->spoiled = mp->spoiled; @@ -404,6 +398,23 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...) return (gp); } +struct g_geom * +g_new_geomf(struct g_class *mp, const char *fmt, ...) +{ + struct g_geom *gp; + va_list ap; + struct sbuf *sb; + + sb = sbuf_new_auto(); + va_start(ap, fmt); + sbuf_vprintf(sb, fmt, ap); + va_end(ap); + sbuf_finish(sb); + gp = g_new_geom(mp, sbuf_data(sb)); + sbuf_delete(sb); + return (gp); +} + void g_destroy_geom(struct g_geom *gp) { diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c index 6d9f6239e632..b520194b7d7c 100644 --- a/sys/geom/journal/g_journal.c +++ b/sys/geom/journal/g_journal.c @@ -2477,7 +2477,7 @@ g_journal_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) if (pp->geom->class == mp) return (NULL); - gp = g_new_geomf(mp, "journal:taste"); + gp = g_new_geom(mp, "journal:taste"); /* This orphan function should be never called. */ gp->orphan = g_journal_taste_orphan; cp = g_new_consumer(gp); diff --git a/sys/geom/label/g_label.c b/sys/geom/label/g_label.c index acb17d40914e..faefbd7c2ef6 100644 --- a/sys/geom/label/g_label.c +++ b/sys/geom/label/g_label.c @@ -399,7 +399,7 @@ g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) if (strcmp(pp->geom->class->name, mp->name) == 0) return (NULL); - gp = g_new_geomf(mp, "label:taste"); + gp = g_new_geom(mp, "label:taste"); gp->start = g_label_start_taste; gp->access = g_label_access_taste; gp->orphan = g_label_orphan_taste; diff --git a/sys/geom/linux_lvm/g_linux_lvm.c b/sys/geom/linux_lvm/g_linux_lvm.c index c63318fed729..f333c08f45d9 100644 --- a/sys/geom/linux_lvm/g_linux_lvm.c +++ b/sys/geom/linux_lvm/g_linux_lvm.c @@ -537,7 +537,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) g_topology_assert(); g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); - gp = g_new_geomf(mp, "linux_lvm:taste"); + gp = g_new_geom(mp, "linux_lvm:taste"); /* This orphan function should be never called. */ gp->orphan = g_llvm_taste_orphan; cp = g_new_consumer(gp); @@ -557,7 +557,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) vg = md.md_vg; if (vg->vg_geom == NULL) { /* new volume group */ - gp = g_new_geomf(mp, "%s", vg->vg_name); + gp = g_new_geom(mp, vg->vg_name); gp->start = g_llvm_start; gp->spoiled = g_llvm_orphan; gp->orphan = g_llvm_orphan; diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 25c0490938ef..03902a2f2491 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -3149,7 +3149,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, /* * Action geom. */ - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); gp->start = g_mirror_start; gp->orphan = g_mirror_orphan; @@ -3290,7 +3290,7 @@ g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "mirror:taste"); + gp = g_new_geom(mp, "mirror:taste"); /* * This orphan function should be never called. */ diff --git a/sys/geom/mirror/g_mirror_ctl.c b/sys/geom/mirror/g_mirror_ctl.c index 82bc05a142c0..b31bf098ac4b 100644 --- a/sys/geom/mirror/g_mirror_ctl.c +++ b/sys/geom/mirror/g_mirror_ctl.c @@ -433,7 +433,7 @@ g_mirror_ctl_create(struct gctl_req *req, struct g_class *mp) g_topology_lock(); mediasize = OFF_MAX; sectorsize = 0; - gp = g_new_geomf(mp, "%s", md.md_name); + gp = g_new_geom(mp, md.md_name); gp->orphan = g_mirror_create_orphan; cp = g_new_consumer(gp); for (no = 1; no < *nargs; no++) { diff --git a/sys/geom/mountver/g_mountver.c b/sys/geom/mountver/g_mountver.c index de3a298735d4..c7d55c4734a2 100644 --- a/sys/geom/mountver/g_mountver.c +++ b/sys/geom/mountver/g_mountver.c @@ -291,7 +291,7 @@ g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *p return (EEXIST); } } - gp = g_new_geomf(mp, "%s", name); + gp = g_new_geom(mp, name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE); TAILQ_INIT(&sc->sc_queue); diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c index a4935df7eaa1..250a2c60ffee 100644 --- a/sys/geom/multipath/g_multipath.c +++ b/sys/geom/multipath/g_multipath.c @@ -549,7 +549,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md) } } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF); memcpy(sc->sc_uuid, md->md_uuid, sizeof(sc->sc_uuid)); @@ -821,7 +821,7 @@ g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) g_topology_assert(); - gp = g_new_geomf(mp, "multipath:taste"); + gp = g_new_geom(mp, "multipath:taste"); gp->start = g_multipath_start; gp->access = g_multipath_access; gp->orphan = g_multipath_orphan; diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c index a32111e3a29a..1fb99f4a0a5b 100644 --- a/sys/geom/nop/g_nop.c +++ b/sys/geom/nop/g_nop.c @@ -416,7 +416,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, return (EEXIST); } } - gp = g_new_geomf(mp, "%s", name); + gp = g_new_geom(mp, name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); sc->sc_offset = offset; sc->sc_explicitsize = explicitsize; diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 88e44b335b29..8a7f67d8a313 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -998,7 +998,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) } if (null == NULL) - gp = g_new_geomf(&g_part_class, "%s", pp->name); + gp = g_new_geom(&g_part_class, pp->name); gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM, M_WAITOK); table = gp->softc; @@ -1979,7 +1979,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) * With that we become part of the topology. Obtain read access * to the provider. */ - gp = g_new_geomf(mp, "%s", pp->name); + gp = g_new_geom(mp, pp->name); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); diff --git a/sys/geom/raid/g_raid.c b/sys/geom/raid/g_raid.c index a483622d14a5..590f28aaa46c 100644 --- a/sys/geom/raid/g_raid.c +++ b/sys/geom/raid/g_raid.c @@ -1876,7 +1876,7 @@ g_raid_create_node(struct g_class *mp, g_topology_assert(); G_RAID_DEBUG(1, "Creating array %s.", name); - gp = g_new_geomf(mp, "%s", name); + gp = g_new_geom(mp, name); sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO); gp->start = g_raid_start; gp->orphan = g_raid_orphan; @@ -2217,7 +2217,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) geom = NULL; status = G_RAID_MD_TASTE_FAIL; - gp = g_new_geomf(mp, "raid:taste"); + gp = g_new_geom(mp, "raid:taste"); /* * This orphan function should be never called. */ diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c index c2d05b48d80d..64951bd01deb 100644 --- a/sys/geom/raid3/g_raid3.c +++ b/sys/geom/raid3/g_raid3.c @@ -3164,7 +3164,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) /* * Action geom. */ - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, M_WAITOK | M_ZERO); @@ -3338,7 +3338,7 @@ g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); G_RAID3_DEBUG(2, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "raid3:taste"); + gp = g_new_geom(mp, "raid3:taste"); /* This orphan function should be never called. */ gp->orphan = g_raid3_taste_orphan; cp = g_new_consumer(gp); diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c index 824de07e4836..5eafcce917cf 100644 --- a/sys/geom/raid3/g_raid3_ctl.c +++ b/sys/geom/raid3/g_raid3_ctl.c @@ -425,7 +425,7 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp) no = gctl_get_paraml(req, "number", sizeof(*no)); else no = NULL; - gp = g_new_geomf(mp, "raid3:insert"); + gp = g_new_geom(mp, "raid3:insert"); gp->orphan = g_raid3_ctl_insert_orphan; cp = g_new_consumer(gp); error = g_attach(cp, pp); diff --git a/sys/geom/shsec/g_shsec.c b/sys/geom/shsec/g_shsec.c index 3ccc23e7eb8b..9da814e5eb34 100644 --- a/sys/geom/shsec/g_shsec.c +++ b/sys/geom/shsec/g_shsec.c @@ -545,7 +545,7 @@ g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md) return (NULL); } } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO); gp->start = g_shsec_start; gp->spoiled = g_shsec_orphan; @@ -643,7 +643,7 @@ g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) G_SHSEC_DEBUG(3, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "shsec:taste"); + gp = g_new_geom(mp, "shsec:taste"); gp->start = g_shsec_start; gp->access = g_shsec_access; gp->orphan = g_shsec_orphan; diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c index 6f336c18c8e6..ba1953f036d3 100644 --- a/sys/geom/stripe/g_stripe.c +++ b/sys/geom/stripe/g_stripe.c @@ -454,11 +454,9 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_length = length; - if ((bp->bio_flags & BIO_UNMAPPED) != 0) { - bp->bio_ma_n = round_page(bp->bio_ma_offset + - bp->bio_length) / PAGE_SIZE; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) addr = NULL; - } else + else addr = bp->bio_data; cbp->bio_caller2 = sc->sc_disks[no]; @@ -864,7 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, return (NULL); } } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); gp->start = g_stripe_start; gp->spoiled = g_stripe_orphan; @@ -965,7 +963,7 @@ g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); - gp = g_new_geomf(mp, "stripe:taste"); + gp = g_new_geom(mp, "stripe:taste"); gp->start = g_stripe_start; gp->access = g_stripe_access; gp->orphan = g_stripe_orphan; diff --git a/sys/geom/union/g_union.c b/sys/geom/union/g_union.c index 9734fc1bcfe3..302761597f6f 100644 --- a/sys/geom/union/g_union.c +++ b/sys/geom/union/g_union.c @@ -246,7 +246,7 @@ g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose) return; } } - gp = g_new_geomf(mp, "%s", name); + gp = g_new_geom(mp, name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); rw_init(&sc->sc_rwlock, "gunion"); TAILQ_INIT(&sc->sc_wiplist); @@ -358,6 +358,7 @@ fail2: fail1: g_destroy_consumer(lowercp); g_destroy_provider(newpp); + g_free(sc); g_destroy_geom(gp); } diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c index c7d737493f11..1490ed103329 100644 --- a/sys/geom/virstor/g_virstor.c +++ b/sys/geom/virstor/g_virstor.c @@ -771,7 +771,7 @@ g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags) LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name); /* We need a dummy geom to attach a consumer to the given provider */ - gp = g_new_geomf(mp, "virstor:taste.helper"); + gp = g_new_geom(mp, "virstor:taste.helper"); gp->start = (void *)invalid_call; /* XXX: hacked up so the */ gp->access = (void *)invalid_call; /* compiler doesn't complain. */ gp->orphan = (void *)invalid_call; /* I really want these to fail. */ @@ -1085,7 +1085,7 @@ create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md) return (NULL); } } - gp = g_new_geomf(mp, "%s", md->md_name); + gp = g_new_geom(mp, md->md_name); gp->softc = NULL; /* to circumevent races that test softc */ gp->start = g_virstor_start; diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c index 91ef0fb1ef95..e9934ba6c784 100644 --- a/sys/geom/zero/g_zero.c +++ b/sys/geom/zero/g_zero.c @@ -102,7 +102,7 @@ g_zero_init(struct g_class *mp) struct g_provider *pp; g_topology_assert(); - gp = g_new_geomf(mp, "gzero"); + gp = g_new_geom(mp, "gzero"); gp->start = g_zero_start; gp->access = g_std_access; gpp = pp = g_new_providerf(gp, "%s", gp->name); diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index 88b8967cd693..ac0cc4ba74e7 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -249,9 +249,9 @@ device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support +device wlan_tkip # 802.11 TKIP support device wlan_ccmp # 802.11 CCMP support device wlan_gcmp # 802.11 GCMP support -device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device ath # Atheros CardBus/PCI NICs device ath_hal # Atheros CardBus/PCI chip support diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 5a53fac50f2c..1bc2491a1a12 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -2610,11 +2610,13 @@ note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) int structsize; p = arg; - size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); + size = sizeof(structsize) + + (1 + p->p_ucred->cr_ngroups) * sizeof(gid_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(gid_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); + sbuf_bcat(sb, &p->p_ucred->cr_gid, sizeof(gid_t)); sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * sizeof(gid_t)); } diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 36ce44b988be..87ffdb8dbf07 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -145,13 +145,6 @@ FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* - * This ensures that there is at least one entry so that the sysinit_set - * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never - * executed. - */ -SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); - -/* * The sysinit linker set compiled into the kernel. These are placed onto the * sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional * sysinits to the linked list but the linker set here does not change. @@ -296,7 +289,7 @@ mi_startup(void) BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem); #if defined(VERBOSE_SYSINIT) - if (sip->subsystem > last && verbose_sysinit != 0) { + if (sip->subsystem != last && verbose_sysinit != 0) { verbose = 1; printf("subsystem %x\n", sip->subsystem); } diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index fcd232cde21e..e42e7dcf8b44 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -663,4 +663,6 @@ struct sysent sysent[] = { { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ }; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index a27ab33b34da..2a833d2eafbe 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -665,20 +665,26 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) } while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); got_set = tmp & ~flg; got_cleared = flg & ~tmp; - tmp = fp->f_flag & FNONBLOCK; - error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error != 0) - goto revert_f_setfl; - tmp = fp->f_flag & FASYNC; - error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); - if (error == 0) { - fdrop(fp, td); - break; + if (((got_set | got_cleared) & FNONBLOCK) != 0) { + tmp = fp->f_flag & FNONBLOCK; + error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); + if (error != 0) + goto revert_flags; + } + if (((got_set | got_cleared) & FASYNC) != 0) { + tmp = fp->f_flag & FASYNC; + error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); + if (error != 0) + goto revert_nonblock; + } + fdrop(fp, td); + break; +revert_nonblock: + if (((got_set | got_cleared) & FNONBLOCK) != 0) { + tmp = ~fp->f_flag & FNONBLOCK; + (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); } - atomic_clear_int(&fp->f_flag, FNONBLOCK); - tmp = 0; - (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); -revert_f_setfl: +revert_flags: do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -5250,6 +5256,8 @@ file_type_to_name(short type) return ("eventfd"); case DTYPE_TIMERFD: return ("timerfd"); + case DTYPE_JAILDESC: + return ("jail"); default: return ("unkn"); } diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c index 0cb0f566a839..7c0654769581 100644 --- a/sys/kern/kern_environment.c +++ b/sys/kern/kern_environment.c @@ -1098,65 +1098,65 @@ kernenv_next(char *cp) } void -tunable_int_init(void *data) +tunable_int_init(const void *data) { - struct tunable_int *d = (struct tunable_int *)data; + const struct tunable_int *d = data; TUNABLE_INT_FETCH(d->path, d->var); } void -tunable_long_init(void *data) +tunable_long_init(const void *data) { - struct tunable_long *d = (struct tunable_long *)data; + const struct tunable_long *d = data; TUNABLE_LONG_FETCH(d->path, d->var); } void -tunable_ulong_init(void *data) +tunable_ulong_init(const void *data) { - struct tunable_ulong *d = (struct tunable_ulong *)data; + const struct tunable_ulong *d = data; TUNABLE_ULONG_FETCH(d->path, d->var); } void -tunable_int64_init(void *data) +tunable_int64_init(const void *data) { - struct tunable_int64 *d = (struct tunable_int64 *)data; + const struct tunable_int64 *d = data; TUNABLE_INT64_FETCH(d->path, d->var); } void -tunable_uint64_init(void *data) +tunable_uint64_init(const void *data) { - struct tunable_uint64 *d = (struct tunable_uint64 *)data; + const struct tunable_uint64 *d = data; TUNABLE_UINT64_FETCH(d->path, d->var); } void -tunable_quad_init(void *data) +tunable_quad_init(const void *data) { - struct tunable_quad *d = (struct tunable_quad *)data; + const struct tunable_quad *d = data; TUNABLE_QUAD_FETCH(d->path, d->var); } void -tunable_bool_init(void *data) +tunable_bool_init(const void *data) { - struct tunable_bool *d = (struct tunable_bool *)data; + const struct tunable_bool *d = data; TUNABLE_BOOL_FETCH(d->path, d->var); } void -tunable_str_init(void *data) +tunable_str_init(const void *data) { - struct tunable_str *d = (struct tunable_str *)data; + const struct tunable_str *d = data; TUNABLE_STR_FETCH(d->path, d->var, d->size); } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index eb77a5064113..b5360f3a1055 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -50,6 +50,7 @@ #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/fcntl.h> +#include <sys/jail.h> #include <sys/kthread.h> #include <sys/selinfo.h> #include <sys/queue.h> @@ -163,6 +164,9 @@ static int filt_kqueue(struct knote *kn, long hint); static int filt_procattach(struct knote *kn); static void filt_procdetach(struct knote *kn); static int filt_proc(struct knote *kn, long hint); +static int filt_jailattach(struct knote *kn); +static void filt_jaildetach(struct knote *kn); +static int filt_jail(struct knote *kn, long hint); static int filt_fileattach(struct knote *kn); static void filt_timerexpire(void *knx); static void filt_timerexpire_l(struct knote *kn, bool proc_locked); @@ -195,6 +199,12 @@ static const struct filterops proc_filtops = { .f_detach = filt_procdetach, .f_event = filt_proc, }; +static const struct filterops jail_filtops = { + .f_isfd = 0, + .f_attach = filt_jailattach, + .f_detach = filt_jaildetach, + .f_event = filt_jail, +}; static const struct filterops timer_filtops = { .f_isfd = 0, .f_attach = filt_timerattach, @@ -365,6 +375,7 @@ static struct { [~EVFILT_USER] = { &user_filtops, 1 }, [~EVFILT_SENDFILE] = { &null_filtops }, [~EVFILT_EMPTY] = { &file_filtops, 1 }, + [~EVFILT_JAIL] = { &jail_filtops, 1 }, }; /* @@ -528,7 +539,8 @@ filt_proc(struct knote *kn, long hint) * process forked. Additionally, for each knote attached to the * parent, check whether user wants to track the new process. If so * attach a new knote to it, and immediately report an event with the - * child's pid. + * child's pid. This is also called on jail creation, which is treated + * the same way by jail events. */ void knote_fork(struct knlist *list, int pid) @@ -555,6 +567,8 @@ knote_fork(struct knlist *list, int pid) /* * The same as knote(), activate the event. */ + _Static_assert(NOTE_JAIL_CHILD == NOTE_FORK, + "NOTE_JAIL_CHILD should be the same as NOTE_FORK"); if ((kn->kn_sfflags & NOTE_TRACK) == 0) { if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 1); @@ -614,6 +628,124 @@ knote_fork(struct knlist *list, int pid) } } +int +filt_jailattach(struct knote *kn) +{ + struct prison *pr; + bool immediate; + + immediate = false; + if (kn->kn_id == 0) { + /* Let jid=0 watch the current prison (including prison0). */ + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + } else if (kn->kn_flags & (EV_FLAG1 | EV_FLAG2)) { + /* + * The kernel registers prisons before they are valid, + * so prison_find_child will fail. + */ + TAILQ_FOREACH(pr, &allprison, pr_list) { + if (pr->pr_id < kn->kn_id) + continue; + if (pr->pr_id > kn->kn_id) { + pr = NULL; + break; + } + mtx_lock(&pr->pr_mtx); + break; + } + if (pr == NULL) + return (ENOENT); + } else { + sx_slock(&allprison_lock); + pr = prison_find_child(curthread->td_ucred->cr_prison, + kn->kn_id); + sx_sunlock(&allprison_lock); + if (pr == NULL) + return (ENOENT); + if (!prison_isalive(pr)) { + mtx_unlock(&pr->pr_mtx); + return (ENOENT); + } + } + kn->kn_ptr.p_prison = pr; + kn->kn_flags |= EV_CLEAR; + + /* + * Internal flag indicating registration done by kernel for the + * purposes of getting a NOTE_CHILD notification. + */ + if (kn->kn_flags & EV_FLAG2) { + kn->kn_flags &= ~EV_FLAG2; + kn->kn_data = kn->kn_sdata; /* parent id */ + kn->kn_fflags = NOTE_CHILD; + kn->kn_sfflags &= ~NOTE_JAIL_CTRLMASK; + immediate = true; /* Force immediate activation of child note. */ + } + /* + * Internal flag indicating registration done by kernel (for other than + * NOTE_CHILD). + */ + if (kn->kn_flags & EV_FLAG1) { + kn->kn_flags &= ~EV_FLAG1; + } + + knlist_add(pr->pr_klist, kn, 1); + + /* Immediately activate any child notes. */ + if (immediate) + KNOTE_ACTIVATE(kn, 0); + + mtx_unlock(&pr->pr_mtx); + return (0); +} + +void +filt_jaildetach(struct knote *kn) +{ + if (kn->kn_ptr.p_prison != NULL) { + knlist_remove(kn->kn_knlist, kn, 0); + kn->kn_ptr.p_prison = NULL; + } else + kn->kn_status |= KN_DETACHED; +} + +int +filt_jail(struct knote *kn, long hint) +{ + struct prison *pr; + u_int event; + + pr = kn->kn_ptr.p_prison; + if (pr == NULL) /* already activated, from attach filter */ + return (0); + + /* Mask off extra data. */ + event = (u_int)hint & NOTE_JAIL_CTRLMASK; + + /* If the user is interested in this event, record it. */ + if (kn->kn_sfflags & event) + kn->kn_fflags |= event; + + /* Report the attached process id. */ + if (event == NOTE_JAIL_ATTACH) { + if (kn->kn_data != 0) + kn->kn_fflags |= NOTE_JAIL_ATTACH_MULTI; + kn->kn_data = hint & NOTE_JAIL_DATAMASK; + } + + /* Prison is gone, so flag the event as finished. */ + if (event == NOTE_JAIL_REMOVE) { + kn->kn_flags |= EV_EOF | EV_ONESHOT; + kn->kn_ptr.p_prison = NULL; + if (kn->kn_fflags == 0) + kn->kn_flags |= EV_DROP; + return (1); + } + + return (kn->kn_fflags != 0); +} + /* * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. @@ -1597,8 +1729,8 @@ findkn: /* * If possible, find an existing knote to use for this kevent. */ - if (kev->filter == EVFILT_PROC && - (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { + if ((kev->filter == EVFILT_PROC || kev->filter == EVFILT_JAIL) + && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { /* This is an internal creation of a process tracking * note. Don't attempt to coalesce this with an * existing note. @@ -1771,7 +1903,7 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp) kq = fp->f_data; if (fp->f_type != DTYPE_KQUEUE || kq == NULL) - return (EBADF); + return (EINVAL); *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { @@ -2800,6 +2932,7 @@ knote_init(void) knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue); + prison0.pr_klist = knlist_alloc(&prison0.pr_mtx); } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 7c9a15ae18f3..3d18b03119ff 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -39,15 +39,18 @@ #include <sys/kernel.h> #include <sys/systm.h> #include <sys/errno.h> +#include <sys/file.h> #include <sys/sysproto.h> #include <sys/malloc.h> #include <sys/osd.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/epoch.h> +#include <sys/event.h> #include <sys/taskqueue.h> #include <sys/fcntl.h> #include <sys/jail.h> +#include <sys/jaildesc.h> #include <sys/linker.h> #include <sys/lock.h> #include <sys/mman.h> @@ -154,7 +157,8 @@ static void prison_complete(void *context, int pending); static void prison_deref(struct prison *pr, int flags); static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison); static int prison_lock_xlock(struct prison *pr, int flags); -static void prison_cleanup(struct prison *pr); +static void prison_cleanup_locked(struct prison *pr); +static void prison_cleanup_unlocked(struct prison *pr); static void prison_free_not_last(struct prison *pr); static void prison_proc_free_not_last(struct prison *pr); static void prison_proc_relink(struct prison *opr, struct prison *npr, @@ -167,6 +171,7 @@ static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); static void prison_racct_detach(struct prison *pr); #endif +static void prison_knote(struct prison *pr, long hint); /* Flags for prison_deref */ #define PD_DEREF 0x01 /* Decrement pr_ref */ @@ -985,6 +990,7 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af) int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { + struct file *jfp_out; struct nameidata nd; #ifdef INET struct prison_ip *ip4; @@ -995,6 +1001,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) struct vfsopt *opt; struct vfsoptlist *opts; struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr; + struct ucred *jdcred; struct vnode *root; char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; char *g_path, *osrelstr; @@ -1008,7 +1015,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) int created, cuflags, descend, drflags, enforce; int error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; - int deadid, jid, jsys, len, level; + int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level; int childmax, osreldt, rsnum, slevel; #ifdef INET int ip4s; @@ -1018,22 +1025,32 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) int ip6s; bool redo_ip6; #endif + bool maybe_changed; uint64_t pr_allow, ch_allow, pr_flags, ch_flags; uint64_t pr_allow_diff; unsigned tallow; char numbuf[12]; - error = priv_check(td, PRIV_JAIL_SET); - if (!error && (flags & JAIL_ATTACH)) - error = priv_check(td, PRIV_JAIL_ATTACH); - if (error) - return (error); mypr = td->td_ucred->cr_prison; - if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) + if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE) && + mypr->pr_childmax == 0) return (EPERM); if (flags & ~JAIL_SET_MASK) return (EINVAL); + if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) == + (JAIL_USE_DESC | JAIL_AT_DESC)) + return (EINVAL); + prison_hold(mypr); +#ifdef INET + ip4 = NULL; +#endif +#ifdef INET6 + ip6 = NULL; +#endif + g_path = NULL; + jfp_out = NULL; + jfd_out = -1; /* * Check all the parameters before committing to anything. Not all * errors can be caught early, but we may as well try. Also, this @@ -1046,14 +1063,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) */ error = vfs_buildopts(optuio, &opts); if (error) - return (error); -#ifdef INET - ip4 = NULL; -#endif -#ifdef INET6 - ip6 = NULL; -#endif - g_path = NULL; + goto done_free; cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { @@ -1062,6 +1072,61 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) goto done_errmsg; } + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); + if (error == ENOENT) { + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC)) { + vfs_opterror(opts, "missing desc"); + goto done_errmsg; + } + jfd_in = -1; + } else if (error != 0) + goto done_free; + else { + if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC))) { + vfs_opterror(opts, "unexpected desc"); + goto done_errmsg; + } + if (flags & JAIL_AT_DESC) { + /* + * Look up and create jails based on the + * descriptor's prison. + */ + prison_free(mypr); + error = jaildesc_find(td, jfd_in, &mypr, NULL); + if (error != 0) { + vfs_opterror(opts, error == ENOENT ? + "descriptor to dead jail" : + "not a jail descriptor"); + goto done_errmsg; + } + if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) { + error = EPERM; + goto done_free; + } + } + if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { + /* Allocate a jail descriptor to return later. */ + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + flags & JAIL_OWN_DESC); + if (error) + goto done_free; + } + } + + /* + * Delay the permission check if using a jail descriptor, + * until we get the descriptor's credentials. + */ + if (!(flags & JAIL_USE_DESC)) { + error = priv_check(td, PRIV_JAIL_SET); + if (error == 0 && (flags & JAIL_ATTACH)) + error = priv_check(td, PRIV_JAIL_ATTACH); + if (error) + goto done_free; + } + error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == ENOENT) jid = 0; @@ -1422,6 +1487,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) pr = NULL; inspr = NULL; deadpr = NULL; + maybe_changed = false; if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { namelc = strrchr(name, '.'); jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); @@ -1436,7 +1502,45 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) error = EAGAIN; goto done_deref; } - if (jid != 0) { + if (flags & JAIL_USE_DESC) { + /* Get the jail from its descriptor. */ + error = jaildesc_find(td, jfd_in, &pr, &jdcred); + if (error) { + vfs_opterror(opts, error == ENOENT ? + "descriptor to dead jail" : + "not a jail descriptor"); + goto done_deref; + } + drflags |= PD_DEREF; + error = priv_check_cred(jdcred, PRIV_JAIL_SET); + if (error == 0 && (flags & JAIL_ATTACH)) + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); + crfree(jdcred); + if (error) + goto done_deref; + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + if (cuflags == JAIL_CREATE) { + error = EEXIST; + vfs_opterror(opts, "jail %d already exists", + pr->pr_id); + goto done_deref; + } + if (!prison_isalive(pr)) { + /* While a jid can be resurrected, the prison + * itself cannot. + */ + error = ENOENT; + vfs_opterror(opts, "jail %d is dying", pr->pr_id); + goto done_deref; + } + if (jid != 0 && jid != pr->pr_id) { + error = EINVAL; + vfs_opterror(opts, "cannot change jid"); + goto done_deref; + } + jid = pr->pr_id; + } else if (jid != 0) { if (jid < 0) { error = EINVAL; vfs_opterror(opts, "negative jid"); @@ -1570,7 +1674,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) } } } - /* Update: must provide a jid or name. */ + /* Update: must provide a desc, jid, or name. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "update specified no jail"); @@ -1643,6 +1747,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) tpr->pr_childcount++; + pr->pr_klist = knlist_alloc(&pr->pr_mtx); /* Set some default values, and inherit some from the parent. */ if (namelc == NULL) @@ -1722,8 +1827,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) * Grab a reference for existing prisons, to ensure they * continue to exist for the duration of the call. */ - prison_hold(pr); - drflags |= PD_DEREF; + if (!(drflags & PD_DEREF)) { + prison_hold(pr); + drflags |= PD_DEREF; + } #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((pr->pr_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { @@ -1880,6 +1987,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) goto done_deref; } } + maybe_changed = true; /* Set the parameters of the prison. */ #ifdef INET @@ -2112,7 +2220,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) * reference via persistence, or is about to gain one via attachment. */ if (created) { - drflags = prison_lock_xlock(pr, drflags); + sx_assert(&allprison_lock, SX_XLOCKED); + mtx_lock(&ppr->pr_mtx); + knote_fork(ppr->pr_klist, pr->pr_id); + mtx_unlock(&ppr->pr_mtx); + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; pr->pr_state = PRISON_STATE_ALIVE; } @@ -2146,10 +2259,37 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) printf("Warning jail jid=%d: mountd/nfsd requires a separate" " file system\n", pr->pr_id); + /* + * Now that the prison is fully created without error, set the + * jail descriptor if one was requested. This is the only + * parameter that is returned to the caller (except the error + * message). + */ + if (jfd_out >= 0) { + if (!(drflags & PD_LOCKED)) { + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + } + jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1; + if (optuio->uio_segflg == UIO_SYSSPACE) + *(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out; + else + (void)copyout(&jfd_out, + optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out)); + jaildesc_set_prison(jfp_out, pr); + } + drflags &= ~PD_KILL; td->td_retval[0] = pr->pr_id; done_deref: + /* + * Report changes to kevent. This can happen even if the + * system call fails, as changes might have been made before + * the failure. + */ + if (maybe_changed && !created) + prison_knote(pr, NOTE_JAIL_SET); /* Release any temporary prison holds and/or locks. */ if (pr != NULL) prison_deref(pr, drflags); @@ -2176,15 +2316,21 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) } } done_free: + /* Clean up other resources. */ #ifdef INET prison_ip_free(ip4); #endif #ifdef INET6 prison_ip_free(ip6); #endif + if (jfp_out != NULL) + fdrop(jfp_out, td); + if (error && jfd_out >= 0) + (void)kern_close(td, jfd_out); if (g_path != NULL) free(g_path, M_TEMP); vfs_freeopts(opts); + prison_free(mypr); return (error); } @@ -2329,16 +2475,21 @@ int kern_jail_get(struct thread *td, struct uio *optuio, int flags) { struct bool_flags *bf; + struct file *jfp_out; struct jailsys_flags *jsf; struct prison *pr, *mypr; struct vfsopt *opt; struct vfsoptlist *opts; char *errmsg, *name; int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos; + int jfd_in, jfd_out; unsigned f; if (flags & ~JAIL_GET_MASK) return (EINVAL); + if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) == + (JAIL_USE_DESC | JAIL_AT_DESC)) + return (EINVAL); /* Get the parameter list. */ error = vfs_buildopts(optuio, &opts); @@ -2346,13 +2497,70 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) return (error); errmsg_pos = vfs_getopt_pos(opts, "errmsg"); mypr = td->td_ucred->cr_prison; + prison_hold(mypr); pr = NULL; + jfp_out = NULL; + jfd_out = -1; /* - * Find the prison specified by one of: lastjid, jid, name. + * Find the prison specified by one of: desc, lastjid, jid, name. */ sx_slock(&allprison_lock); drflags = PD_LIST_SLOCKED; + + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); + if (error == ENOENT) { + if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) { + vfs_opterror(opts, "missing desc"); + goto done; + } + } else if (error == 0) { + if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC))) { + vfs_opterror(opts, "unexpected desc"); + goto done; + } + if (flags & JAIL_USE_DESC) { + /* Get the jail from its descriptor. */ + error = jaildesc_find(td, jfd_in, &pr, NULL); + if (error) { + vfs_opterror(opts, error == ENOENT ? + "descriptor to dead jail" : + "not a jail descriptor"); + goto done; + } + drflags |= PD_DEREF; + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { + error = ENOENT; + vfs_opterror(opts, "jail %d is dying", + pr->pr_id); + goto done; + } + goto found_prison; + } + if (flags & JAIL_AT_DESC) { + /* Look up jails based on the descriptor's prison. */ + prison_free(mypr); + error = jaildesc_find(td, jfd_in, &mypr, NULL); + if (error != 0) { + vfs_opterror(opts, error == ENOENT ? + "descriptor to dead jail" : + "not a jail descriptor"); + goto done; + } + } + if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { + /* Allocate a jail descriptor to return later. */ + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + flags & JAIL_OWN_DESC); + if (error) + goto done; + } + } else + goto done; + error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { TAILQ_FOREACH(pr, &allprison, pr_list) { @@ -2421,9 +2629,17 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) found_prison: /* Get the parameters of the prison. */ - prison_hold(pr); - drflags |= PD_DEREF; + if (!(drflags & PD_DEREF)) { + prison_hold(pr); + drflags |= PD_DEREF; + } td->td_retval[0] = pr->pr_id; + if (jfd_out >= 0) { + error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out)); + if (error != 0 && error != ENOENT) + goto done; + jaildesc_set_prison(jfp_out, pr); + } error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); if (error != 0 && error != ENOENT) goto done; @@ -2603,6 +2819,13 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) prison_deref(pr, drflags); else if (drflags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); + else if (drflags & PD_LIST_XLOCKED) + sx_xunlock(&allprison_lock); + /* Clean up other resources. */ + if (jfp_out != NULL) + (void)fdrop(jfp_out, td); + if (error && jfd_out >= 0) + (void)kern_close(td, jfd_out); if (error && errmsg_pos >= 0) { /* Write the error message back to userspace. */ vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); @@ -2619,6 +2842,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) } } vfs_freeopts(opts); + prison_free(mypr); return (error); } @@ -2643,14 +2867,54 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap) sx_xunlock(&allprison_lock); return (EINVAL); } + prison_hold(pr); + prison_remove(pr); + return (0); +} + +/* + * struct jail_remove_jd_args { + * int fd; + * }; + */ +int +sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap) +{ + struct prison *pr; + struct ucred *jdcred; + int error; + + error = jaildesc_find(td, uap->fd, &pr, &jdcred); + if (error) + return (error); + error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE); + crfree(jdcred); + if (error) { + prison_free(pr); + return (error); + } + sx_xlock(&allprison_lock); + mtx_lock(&pr->pr_mtx); + prison_remove(pr); + return (0); +} + +/* + * Begin the removal process for a prison. The allprison lock should + * be held exclusively, and the prison should be both locked and held. + */ +void +prison_remove(struct prison *pr) +{ + sx_assert(&allprison_lock, SA_XLOCKED); + mtx_assert(&pr->pr_mtx, MA_OWNED); if (!prison_isalive(pr)) { /* Silently ignore already-dying prisons. */ mtx_unlock(&pr->pr_mtx); sx_xunlock(&allprison_lock); - return (0); + return; } - prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED); - return (0); + prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); } /* @@ -2685,6 +2949,44 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap) return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED)); } +/* + * struct jail_attach_jd_args { + * int fd; + * }; + */ +int +sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap) +{ + struct prison *pr; + struct ucred *jdcred; + int drflags, error; + + sx_slock(&allprison_lock); + drflags = PD_LIST_SLOCKED; + error = jaildesc_find(td, uap->fd, &pr, &jdcred); + if (error) + goto fail; + drflags |= PD_DEREF; + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); + crfree(jdcred); + if (error) + goto fail; + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + + /* Do not allow a process to attach to a prison that is not alive. */ + if (!prison_isalive(pr)) { + error = EINVAL; + goto fail; + } + + return (do_jail_attach(td, pr, drflags)); + + fail: + prison_deref(pr, drflags); + return (error); +} + static int do_jail_attach(struct thread *td, struct prison *pr, int drflags) { @@ -2703,9 +3005,12 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags) * a process root from one prison, but attached to the jail * of another. */ - prison_hold(pr); + if (!(drflags & PD_DEREF)) { + prison_hold(pr); + drflags |= PD_DEREF; + } refcount_acquire(&pr->pr_uref); - drflags |= PD_DEREF | PD_DEUREF; + drflags |= PD_DEUREF; mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; @@ -2755,6 +3060,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags) prison_proc_relink(oldcred->cr_prison, pr, p); prison_deref(oldcred->cr_prison, drflags); crfree(oldcred); + prison_knote(pr, NOTE_JAIL_ATTACH | td->td_proc->p_pid); /* * If the prison was killed while changing credentials, die along @@ -3182,9 +3488,10 @@ prison_deref(struct prison *pr, int flags) refcount_load(&prison0.pr_uref) > 0, ("prison0 pr_uref=0")); pr->pr_state = PRISON_STATE_DYING; + prison_cleanup_locked(pr); mtx_unlock(&pr->pr_mtx); flags &= ~PD_LOCKED; - prison_cleanup(pr); + prison_cleanup_unlocked(pr); } } } @@ -3327,8 +3634,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison) } if (!(cpr->pr_flags & PR_REMOVE)) continue; - prison_cleanup(cpr); + prison_cleanup_unlocked(cpr); mtx_lock(&cpr->pr_mtx); + prison_cleanup_locked(cpr); cpr->pr_flags &= ~PR_REMOVE; if (cpr->pr_flags & PR_PERSIST) { cpr->pr_flags &= ~PR_PERSIST; @@ -3363,8 +3671,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison) if (rpr != NULL) LIST_REMOVE(rpr, pr_sibling); - prison_cleanup(pr); + prison_cleanup_unlocked(pr); mtx_lock(&pr->pr_mtx); + prison_cleanup_locked(pr); if (pr->pr_flags & PR_PERSIST) { pr->pr_flags &= ~PR_PERSIST; prison_proc_free_not_last(pr); @@ -3411,10 +3720,22 @@ prison_lock_xlock(struct prison *pr, int flags) /* * Release a prison's resources when it starts dying (when the last user - * reference is dropped, or when it is killed). + * reference is dropped, or when it is killed). Two functions are called, + * for work that requires a locked prison or an unlocked one. */ static void -prison_cleanup(struct prison *pr) +prison_cleanup_locked(struct prison *pr) +{ + sx_assert(&allprison_lock, SA_XLOCKED); + mtx_assert(&pr->pr_mtx, MA_OWNED); + prison_knote(pr, NOTE_JAIL_REMOVE); + knlist_detach(pr->pr_klist); + jaildesc_prison_cleanup(pr); + pr->pr_klist = NULL; +} + +static void +prison_cleanup_unlocked(struct prison *pr) { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_NOTOWNED); @@ -4616,6 +4937,7 @@ sysctl_jail_param(SYSCTL_HANDLER_ARGS) * jail creation time but cannot be changed in an existing jail. */ SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); +SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor"); SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); @@ -5039,6 +5361,22 @@ prison_racct_detach(struct prison *pr) } #endif /* RACCT */ +/* + * Submit a knote for a prison, locking if necessary. + */ +static void +prison_knote(struct prison *pr, long hint) +{ + int locked; + + locked = mtx_owned(&pr->pr_mtx); + if (!locked) + mtx_lock(&pr->pr_mtx); + KNOTE_LOCKED(pr->pr_klist, hint); + if (!locked) + mtx_unlock(&pr->pr_mtx); +} + #ifdef DDB static void diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c new file mode 100644 index 000000000000..c9e80f5d8941 --- /dev/null +++ b/sys/kern/kern_jaildesc.c @@ -0,0 +1,278 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 James Gritton. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/kernel.h> +#include <sys/jail.h> +#include <sys/jaildesc.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/priv.h> +#include <sys/stat.h> +#include <sys/sysproto.h> +#include <sys/systm.h> +#include <sys/ucred.h> +#include <sys/user.h> +#include <sys/vnode.h> + +MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors"); + +static fo_stat_t jaildesc_stat; +static fo_close_t jaildesc_close; +static fo_fill_kinfo_t jaildesc_fill_kinfo; +static fo_cmp_t jaildesc_cmp; + +static struct fileops jaildesc_ops = { + .fo_read = invfo_rdwr, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = invfo_ioctl, + .fo_poll = invfo_poll, + .fo_kqfilter = invfo_kqfilter, + .fo_stat = jaildesc_stat, + .fo_close = jaildesc_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = jaildesc_fill_kinfo, + .fo_cmp = jaildesc_cmp, + .fo_flags = DFLAG_PASSABLE, +}; + +/* + * Given a jail descriptor number, return its prison and/or its + * credential. They are returned held, and will need to be released + * by the caller. + */ +int +jaildesc_find(struct thread *td, int fd, struct prison **prp, + struct ucred **ucredp) +{ + struct file *fp; + struct jaildesc *jd; + struct prison *pr; + int error; + + error = fget(td, fd, &cap_no_rights, &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_JAILDESC) { + error = EINVAL; + goto out; + } + jd = fp->f_data; + JAILDESC_LOCK(jd); + pr = jd->jd_prison; + if (pr == NULL || !prison_isvalid(pr)) { + error = ENOENT; + JAILDESC_UNLOCK(jd); + goto out; + } + if (prp != NULL) { + prison_hold(pr); + *prp = pr; + } + JAILDESC_UNLOCK(jd); + if (ucredp != NULL) + *ucredp = crhold(fp->f_cred); + out: + fdrop(fp, td); + return (error); +} + +/* + * Allocate a new jail decriptor, not yet associated with a prison. + * Return the file pointer (with a reference held) and the descriptor + * number. + */ +int +jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning) +{ + struct file *fp; + struct jaildesc *jd; + int error; + + if (owning) { + error = priv_check(td, PRIV_JAIL_REMOVE); + if (error != 0) + return (error); + } + jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO); + error = falloc_caps(td, &fp, fdp, 0, NULL); + if (error != 0) { + free(jd, M_JAILDESC); + return (error); + } + finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ? + FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops); + JAILDESC_LOCK_INIT(jd); + if (owning) + jd->jd_flags |= JDF_OWNING; + *fpp = fp; + return (0); +} + +/* + * Assocate a jail descriptor with its prison. + */ +void +jaildesc_set_prison(struct file *fp, struct prison *pr) +{ + struct jaildesc *jd; + + mtx_assert(&pr->pr_mtx, MA_OWNED); + jd = fp->f_data; + JAILDESC_LOCK(jd); + jd->jd_prison = pr; + LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list); + prison_hold(pr); + JAILDESC_UNLOCK(jd); +} + +/* + * Detach all the jail descriptors from a prison. + */ +void +jaildesc_prison_cleanup(struct prison *pr) +{ + struct jaildesc *jd; + + mtx_assert(&pr->pr_mtx, MA_OWNED); + while ((jd = LIST_FIRST(&pr->pr_descs))) { + JAILDESC_LOCK(jd); + LIST_REMOVE(jd, jd_list); + jd->jd_prison = NULL; + JAILDESC_UNLOCK(jd); + prison_free(pr); + } +} + +static int +jaildesc_close(struct file *fp, struct thread *td) +{ + struct jaildesc *jd; + struct prison *pr; + + jd = fp->f_data; + fp->f_data = NULL; + if (jd != NULL) { + JAILDESC_LOCK(jd); + pr = jd->jd_prison; + if (pr != NULL) { + /* + * Free or remove the associated prison. + * This requires a second check after re- + * ordering locks. This jaildesc can remain + * unlocked once we have a prison reference, + * because that prison is the only place that + * still points back to it. + */ + prison_hold(pr); + JAILDESC_UNLOCK(jd); + if (jd->jd_flags & JDF_OWNING) { + sx_xlock(&allprison_lock); + prison_lock(pr); + if (jd->jd_prison != NULL) { + /* + * Unlink the prison, but don't free + * it; that will be done as part of + * of prison_remove. + */ + LIST_REMOVE(jd, jd_list); + prison_remove(pr); + } else { + prison_unlock(pr); + sx_xunlock(&allprison_lock); + } + } else { + prison_lock(pr); + if (jd->jd_prison != NULL) { + LIST_REMOVE(jd, jd_list); + prison_free(pr); + } + prison_unlock(pr); + } + prison_free(pr); + } + JAILDESC_LOCK_DESTROY(jd); + free(jd, M_JAILDESC); + } + return (0); +} + +static int +jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) +{ + struct jaildesc *jd; + + bzero(sb, sizeof(struct stat)); + jd = fp->f_data; + JAILDESC_LOCK(jd); + if (jd->jd_prison != NULL) { + sb->st_ino = jd->jd_prison->pr_id; + sb->st_mode = S_IFREG | S_IRWXU; + } else + sb->st_mode = S_IFREG; + JAILDESC_UNLOCK(jd); + return (0); +} + +static int +jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + struct jaildesc *jd; + + jd = fp->f_data; + kif->kf_type = KF_TYPE_JAILDESC; + kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0; + return (0); +} + +static int +jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td) +{ + struct jaildesc *jd1, *jd2; + int jid1, jid2; + + if (fp2->f_type != DTYPE_JAILDESC) + return (3); + jd1 = fp1->f_data; + JAILDESC_LOCK(jd1); + jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0; + JAILDESC_UNLOCK(jd1); + jd2 = fp2->f_data; + JAILDESC_LOCK(jd2); + jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0; + JAILDESC_UNLOCK(jd2); + return (kcmp_cmp(jid1, jid2)); +} diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 879220be050b..653ce1ee556b 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -751,11 +751,14 @@ malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds, return (malloc_large(size, mtp, DOMAINSET_RR(), flags DEBUG_REDZONE_ARG)); - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); - do { - va = malloc_domain(&size, &indx, mtp, domain, flags); - } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0); + indx = -1; + va = NULL; + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) == 0) + do { + va = malloc_domain(&size, &indx, mtp, domain, flags); + } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0); malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); + if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index f952b3fc8805..8b5908f5219a 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -1136,9 +1136,9 @@ __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line) * General init routine used by the MTX_SYSINIT() macro. */ void -mtx_sysinit(void *arg) +mtx_sysinit(const void *arg) { - struct mtx_args *margs = arg; + const struct mtx_args *margs = arg; mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 379fbda619c0..6e56664d12ce 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1112,13 +1112,14 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) if (cred->cr_flags & CRED_FLAG_CAPMODE) kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE; /* XXX bde doesn't like KI_NGROUPS */ - if (cred->cr_ngroups > KI_NGROUPS) { + if (1 + cred->cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } else - kp->ki_ngroups = cred->cr_ngroups; - bcopy(cred->cr_groups, kp->ki_groups, - kp->ki_ngroups * sizeof(gid_t)); + kp->ki_ngroups = 1 + cred->cr_ngroups; + kp->ki_groups[0] = cred->cr_gid; + bcopy(cred->cr_groups, kp->ki_groups + 1, + (kp->ki_ngroups - 1) * sizeof(gid_t)); kp->ki_rgid = cred->cr_rgid; kp->ki_svgid = cred->cr_svgid; /* If jailed(cred), emulate the old P_JAILED flag. */ @@ -2943,8 +2944,11 @@ sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS) cred = crhold(p->p_ucred); PROC_UNLOCK(p); - error = SYSCTL_OUT(req, cred->cr_groups, - cred->cr_ngroups * sizeof(gid_t)); + error = SYSCTL_OUT(req, &cred->cr_gid, sizeof(gid_t)); + if (error == 0) + error = SYSCTL_OUT(req, cred->cr_groups, + cred->cr_ngroups * sizeof(gid_t)); + crfree(cred); return (error); } diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index c1633dd19de2..7206572ffc02 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -337,9 +337,9 @@ rm_wowned(const struct rmlock *rm) } void -rm_sysinit(void *arg) +rm_sysinit(const void *arg) { - struct rm_args *args; + const struct rm_args *args; args = arg; rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index e182d1fe9baf..84a3a890be63 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -266,9 +266,9 @@ _rw_destroy(volatile uintptr_t *c) } void -rw_sysinit(void *arg) +rw_sysinit(const void *arg) { - struct rw_args *args; + const struct rw_args *args; args = arg; rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index accea5d288eb..c005e112d3b9 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -222,9 +222,9 @@ owner_sx(const struct lock_object *lock, struct thread **owner) #endif void -sx_sysinit(void *arg) +sx_sysinit(const void *arg) { - struct sx_args *sargs = arg; + const struct sx_args *sargs = arg; sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags); } diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 0e8c2b9f362e..4329959a2ef4 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -347,6 +347,17 @@ kern_thr_exit(struct thread *td) p = td->td_proc; /* + * Clear kernel ASTs in advance of selecting the last exiting + * thread and acquiring schedulers locks. It is fine to + * clear the ASTs here even if we are not going to exit after + * all. On the other hand, leaving them pending could trigger + * execution in subsystems in a context where they are not + * prepared to handle top kernel actions, even in execution of + * an unrelated thread. + */ + ast_kclear(td); + + /* * If all of the threads in a process call this routine to * exit (e.g. all threads call pthread_exit()), exactly one * thread should return to the caller to terminate the process diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 50b040132396..3180c66cb42b 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -1694,8 +1694,10 @@ thread_single_end(struct proc *p, int mode) thread_unlock(td); } } - KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0, - ("inconsistent boundary count %d", p->p_boundary_count)); + KASSERT(mode != SINGLE_BOUNDARY || P_SHOULDSTOP(p) || + p->p_boundary_count == 0, + ("pid %d proc %p flags %#x inconsistent boundary count %d", + p->p_pid, p, p->p_flag, p->p_boundary_count)); PROC_SUNLOCK(p); wakeup(&p->p_flag); } diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index fbf81d423b95..09070eea284f 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -220,3 +220,13 @@ SYSCTL_PROC(_debug, OID_AUTO, tslog_user, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_SKIP, 0, 0, sysctl_debug_tslog_user, "", "Dump recorded userland event timestamps"); + +void +sysinit_tslog_shim(const void *data) +{ + const struct sysinit_tslog *x = data; + + tslog(curthread, TS_ENTER, "SYSINIT", x->name); + (x->func)(x->data); + tslog(curthread, TS_EXIT, "SYSINIT", x->name); +} diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index 62a3da964c37..bf5bda7e058d 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -280,6 +280,9 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS) struct sbuf sb; device_t dev = (device_t)arg1; device_t iommu; +#ifdef IOMMU + device_t requester; +#endif int error; uint16_t rid; const char *c; @@ -314,9 +317,15 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS) } rid = 0; #ifdef IOMMU - iommu_get_requester(dev, &rid); + error = iommu_get_requester(dev, &requester, &rid); + /* + * Do not return requester error from sysctl, iommu + * unit might be assigned by other means. + */ +#else + error = ENXIO; #endif - if (rid != 0) + if (error == 0) sbuf_printf(&sb, "%srid=%#x", c, rid); break; default: diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index ab47b6ad29a3..a65c3ca128d9 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -57,7 +57,7 @@ * b : public affirmation by word or example of usually * religious faith or conviction <the heroic witness to divine * life -- Pilot> - * 6 capitalized : a member of the Jehovah's Witnesses + * 6 capitalized : a member of the Jehovah's Witnesses */ /* @@ -131,7 +131,7 @@ #define LI_SLEEPABLE 0x00040000 /* Lock may be held while sleeping. */ #ifndef WITNESS_COUNT -#define WITNESS_COUNT 1536 +#define WITNESS_COUNT 1536 #endif #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ #define WITNESS_PENDLIST (512 + (MAXCPU * 4)) @@ -158,20 +158,18 @@ * These flags go in the witness relationship matrix and describe the * relationship between any two struct witness objects. */ -#define WITNESS_UNRELATED 0x00 /* No lock order relation. */ -#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ -#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ -#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ -#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ -#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) -#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) -#define WITNESS_RELATED_MASK \ - (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) -#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been - * observed. */ -#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ -#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ -#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ +#define WITNESS_UNRELATED 0x00 /* No lock order relation. */ +#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ +#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ +#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ +#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ +#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) +#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) +#define WITNESS_RELATED_MASK (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) +#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been observed. */ +#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ +#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ +#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ /* Descendant to ancestor flags */ #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) @@ -218,20 +216,18 @@ struct lock_list_entry { * (for example, "vnode interlock"). */ struct witness { - char w_name[MAX_W_NAME]; - uint32_t w_index; /* Index in the relationship matrix */ + char w_name[MAX_W_NAME]; + uint32_t w_index; /* Index in the relationship matrix */ struct lock_class *w_class; - STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ - STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ - struct witness *w_hash_next; /* Linked list in hash buckets. */ - const char *w_file; /* File where last acquired */ - uint32_t w_line; /* Line where last acquired */ - uint32_t w_refcount; - uint16_t w_num_ancestors; /* direct/indirect - * ancestor count */ - uint16_t w_num_descendants; /* direct/indirect - * descendant count */ - int16_t w_ddb_level; + STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ + STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ + struct witness *w_hash_next; /* Linked list in hash buckets. */ + const char *w_file; /* File where last acquired */ + uint32_t w_line; /* Line where last acquired */ + uint32_t w_refcount; + uint16_t w_num_ancestors; /* direct/indirect ancestor count */ + uint16_t w_num_descendants; /* direct/indirect descendant count */ + int16_t w_ddb_level; unsigned w_displayed:1; unsigned w_reversed:1; }; @@ -265,7 +261,7 @@ struct witness_lock_order_data { /* * The witness lock order data hash table. Keys are witness index tuples * (struct witness_lock_order_key), elements are lock order data objects - * (struct witness_lock_order_data). + * (struct witness_lock_order_data). */ struct witness_lock_order_hash { struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; @@ -295,7 +291,6 @@ struct witness_order_list_entry { static __inline int witness_lock_type_equal(struct witness *w1, struct witness *w2) { - return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); } @@ -304,7 +299,6 @@ static __inline int witness_lock_order_key_equal(const struct witness_lock_order_key *a, const struct witness_lock_order_key *b) { - return (a->from == b->from && a->to == b->to); } @@ -415,7 +409,7 @@ SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin, int badstack_sbuf_size; int witness_count = WITNESS_COUNT; -SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN, +SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN, &witness_count, 0, ""); /* @@ -760,7 +754,6 @@ static int witness_spin_warn = 0; static const char * fixup_filename(const char *file) { - if (file == NULL) return (NULL); while (strncmp(file, "../", 3) == 0) @@ -835,7 +828,7 @@ witness_startup(void *mem) w_free_cnt--; for (i = 0; i < witness_count; i++) { - memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * + memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * (witness_count + 1)); } @@ -989,16 +982,16 @@ witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...), { int i; - for (i = 0; i < indent; i++) - prnt(" "); + for (i = 0; i < indent; i++) + prnt(" "); prnt("%s (type: %s, depth: %d, active refs: %d)", w->w_name, w->w_class->lc_name, w->w_ddb_level, w->w_refcount); - if (w->w_displayed) { - prnt(" -- (already displayed)\n"); - return; - } - w->w_displayed = 1; + if (w->w_displayed) { + prnt(" -- (already displayed)\n"); + return; + } + w->w_displayed = 1; if (w->w_file != NULL && w->w_line != 0) prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file), w->w_line); @@ -1079,7 +1072,6 @@ witness_ddb_display(int(*prnt)(const char *fmt, ...)) int witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) { - if (witness_watch == -1 || KERNEL_PANICKED()) return (0); @@ -1257,7 +1249,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file, w->w_reversed = 1; mtx_unlock_spin(&w_mtx); witness_output( - "acquiring duplicate lock of same type: \"%s\"\n", + "acquiring duplicate lock of same type: \"%s\"\n", w->w_name); witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name, fixup_filename(plock->li_file), plock->li_line); @@ -1743,7 +1735,7 @@ found: /* * In order to reduce contention on w_mtx, we want to keep always an - * head object into lists so that frequent allocation from the + * head object into lists so that frequent allocation from the * free witness pool (and subsequent locking) is avoided. * In order to maintain the current code simple, when the head * object is totally unloaded it means also that we do not have @@ -1781,7 +1773,7 @@ witness_thread_exit(struct thread *td) n++; witness_list_lock(&lle->ll_children[i], witness_output); - + } kassert_panic( "Thread %p cannot exit while holding sleeplocks\n", td); @@ -1948,7 +1940,6 @@ found: static void depart(struct witness *w) { - MPASS(w->w_refcount == 0); if (w->w_class->lc_flags & LC_SLEEPLOCK) { w_sleep_cnt--; @@ -1999,18 +1990,18 @@ adopt(struct witness *parent, struct witness *child) child->w_num_ancestors++; } - /* - * Find each ancestor of 'pi'. Note that 'pi' itself is counted as + /* + * Find each ancestor of 'pi'. Note that 'pi' itself is counted as * an ancestor of 'pi' during this loop. */ for (i = 1; i <= w_max_used_index; i++) { - if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && + if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && (i != pi)) continue; /* Find each descendant of 'i' and mark it as a descendant. */ for (j = 1; j <= w_max_used_index; j++) { - /* + /* * Skip children that are already marked as * descendants of 'i'. */ @@ -2021,7 +2012,7 @@ adopt(struct witness *parent, struct witness *child) * We are only interested in descendants of 'ci'. Note * that 'ci' itself is counted as a descendant of 'ci'. */ - if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && + if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && (j != ci)) continue; w_rmatrix[i][j] |= WITNESS_ANCESTOR; @@ -2029,16 +2020,16 @@ adopt(struct witness *parent, struct witness *child) w_data[i].w_num_descendants++; w_data[j].w_num_ancestors++; - /* + /* * Make sure we aren't marking a node as both an - * ancestor and descendant. We should have caught + * ancestor and descendant. We should have caught * this as a lock order reversal earlier. */ if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) && (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) { printf("witness rmatrix paradox! [%d][%d]=%d " "both ancestor and descendant\n", - i, j, w_rmatrix[i][j]); + i, j, w_rmatrix[i][j]); kdb_backtrace(); printf("Witness disabled.\n"); witness_watch = -1; @@ -2047,7 +2038,7 @@ adopt(struct witness *parent, struct witness *child) (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) { printf("witness rmatrix paradox! [%d][%d]=%d " "both ancestor and descendant\n", - j, i, w_rmatrix[j][i]); + j, i, w_rmatrix[j][i]); kdb_backtrace(); printf("Witness disabled.\n"); witness_watch = -1; @@ -2124,7 +2115,6 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname) static int isitmychild(struct witness *parent, struct witness *child) { - return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); } @@ -2134,7 +2124,6 @@ isitmychild(struct witness *parent, struct witness *child) static int isitmydescendant(struct witness *ancestor, struct witness *descendant) { - return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, __func__)); } @@ -2182,7 +2171,7 @@ witness_get(void) STAILQ_REMOVE_HEAD(&w_free, w_list); w_free_cnt--; index = w->w_index; - MPASS(index > 0 && index == w_max_used_index+1 && + MPASS(index > 0 && index == w_max_used_index + 1 && index < witness_count); bzero(w, sizeof(*w)); w->w_index = index; @@ -2194,7 +2183,6 @@ witness_get(void) static void witness_free(struct witness *w) { - STAILQ_INSERT_HEAD(&w_free, w, w_list); w_free_cnt++; } @@ -2219,11 +2207,10 @@ witness_lock_list_get(void) bzero(lle, sizeof(*lle)); return (lle); } - + static void witness_lock_list_free(struct lock_list_entry *lle) { - mtx_lock_spin(&w_mtx); lle->ll_next = w_lock_list_free; w_lock_list_free = lle; @@ -2297,7 +2284,6 @@ witness_voutput(const char *fmt, va_list ap) static int witness_thread_has_locks(struct thread *td) { - if (td->td_sleeplocks == NULL) return (0); return (td->td_sleeplocks->ll_count != 0); @@ -2573,14 +2559,12 @@ witness_setflag(struct lock_object *lock, int flag, int set) void witness_norelease(struct lock_object *lock) { - witness_setflag(lock, LI_NORELEASE, 1); } void witness_releaseok(struct lock_object *lock) { - witness_setflag(lock, LI_NORELEASE, 0); } @@ -2588,7 +2572,6 @@ witness_releaseok(struct lock_object *lock) static void witness_ddb_list(struct thread *td) { - KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); KASSERT(kdb_active, ("%s: not in the debugger", __func__)); @@ -2653,7 +2636,6 @@ DB_SHOW_ALIAS_FLAGS(alllocks, db_witness_list_all, DB_CMD_MEMSAFE); DB_SHOW_COMMAND_FLAGS(witness, db_witness_display, DB_CMD_MEMSAFE) { - witness_ddb_display(db_printf); } #endif @@ -2673,9 +2655,9 @@ sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx) /* Allocate and init temporary storage space. */ tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO); tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO); - tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, + tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, M_WAITOK | M_ZERO); - tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, + tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, M_WAITOK | M_ZERO); stack_zero(&tmp_data1->wlod_stack); stack_zero(&tmp_data2->wlod_stack); @@ -2750,12 +2732,12 @@ restart: sbuf_printf(sb, "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n", - tmp_w1->w_name, tmp_w1->w_class->lc_name, + tmp_w1->w_name, tmp_w1->w_class->lc_name, tmp_w2->w_name, tmp_w2->w_class->lc_name); if (data1) { sbuf_printf(sb, "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", - tmp_w1->w_name, tmp_w1->w_class->lc_name, + tmp_w1->w_name, tmp_w1->w_class->lc_name, tmp_w2->w_name, tmp_w2->w_class->lc_name); stack_sbuf_print(sb, &tmp_data1->wlod_stack); sbuf_putc(sb, '\n'); @@ -2763,7 +2745,7 @@ restart: if (data2 && data2 != data1) { sbuf_printf(sb, "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", - tmp_w2->w_name, tmp_w2->w_class->lc_name, + tmp_w2->w_name, tmp_w2->w_class->lc_name, tmp_w1->w_name, tmp_w1->w_class->lc_name); stack_sbuf_print(sb, &tmp_data2->wlod_stack); sbuf_putc(sb, '\n'); @@ -2823,7 +2805,6 @@ sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS) static int sbuf_db_printf_drain(void *arg __unused, const char *data, int len) { - return (db_printf("%.*s", len, data)); } @@ -3068,7 +3049,7 @@ witness_lock_order_get(struct witness *parent, struct witness *child) & WITNESS_LOCK_ORDER_KNOWN) == 0) goto out; - hash = witness_hash_djb2((const char*)&key, + hash = witness_hash_djb2((const char *)&key, sizeof(key)) % w_lohash.wloh_size; data = w_lohash.wloh_array[hash]; while (data != NULL) { @@ -3089,7 +3070,6 @@ out: static int witness_lock_order_check(struct witness *parent, struct witness *child) { - if (parent != child && w_rmatrix[parent->w_index][child->w_index] & WITNESS_LOCK_ORDER_KNOWN && @@ -3115,7 +3095,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child) & WITNESS_LOCK_ORDER_KNOWN) return (1); - hash = witness_hash_djb2((const char*)&key, + hash = witness_hash_djb2((const char *)&key, sizeof(key)) % w_lohash.wloh_size; w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; data = w_lofree; @@ -3134,7 +3114,6 @@ witness_lock_order_add(struct witness *parent, struct witness *child) static void witness_increment_graph_generation(void) { - if (witness_cold == 0) mtx_assert(&w_mtx, MA_OWNED); w_generation++; @@ -3143,7 +3122,6 @@ witness_increment_graph_generation(void) static int witness_output_drain(void *arg __unused, const char *data, int len) { - witness_output("%.*s", len, data); return (len); } diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index 11bd1b6f30e1..54b03fc82c90 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -129,7 +129,7 @@ procdesc_find(struct thread *td, int fd, const cap_rights_t *rightsp, if (error) return (error); if (fp->f_type != DTYPE_PROCDESC) { - error = EBADF; + error = EINVAL; goto out; } pd = fp->f_data; @@ -175,7 +175,7 @@ kern_pdgetpid(struct thread *td, int fd, const cap_rights_t *rightsp, if (error) return (error); if (fp->f_type != DTYPE_PROCDESC) { - error = EBADF; + error = EINVAL; goto out; } *pidp = procdesc_pid(fp); diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 4122f9261871..4cef89cd5219 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -602,4 +602,6 @@ const char *syscallnames[] = { "inotify_rm_watch", /* 594 = inotify_rm_watch */ "getgroups", /* 595 = getgroups */ "setgroups", /* 596 = setgroups */ + "jail_attach_jd", /* 597 = jail_attach_jd */ + "jail_remove_jd", /* 598 = jail_remove_jd */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index fa64597d14a5..911f9093824b 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3383,5 +3383,15 @@ _In_reads_(gidsetsize) const gid_t *gidset ); } +597 AUE_JAIL_ATTACH STD { + int jail_attach_jd( + int fd + ); + } +598 AUE_JAIL_REMOVE STD { + int jail_remove_jd( + int fd + ); + } ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index 2b1ea9eed8d4..e28fef931ea8 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3500,6 +3500,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } + /* jail_attach_jd */ + case 597: { + struct jail_attach_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } + /* jail_remove_jd */ + case 598: { + struct jail_remove_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } default: *n_args = 0; break; @@ -9367,6 +9381,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* jail_attach_jd */ + case 597: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; + /* jail_remove_jd */ + case 598: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11365,6 +11399,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* jail_attach_jd */ + case 597: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* jail_remove_jd */ + case 598: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 19870e989437..6138e543fae7 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1807,9 +1807,7 @@ uipc_filt_sowrite(struct knote *kn, long hint) kn->kn_data = uipc_stream_sbspace(&so2->so_rcv); if (so2->so_rcv.sb_state & SBS_CANTRCVMORE) { - /* - * XXXGL: maybe kn->kn_flags |= EV_EOF ? - */ + kn->kn_flags |= EV_EOF; return (1); } else if (kn->kn_sfflags & NOTE_LOWAT) return (kn->kn_data >= kn->kn_sdata); diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index cd30d5cfae47..ceda770cb714 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -103,6 +103,16 @@ struct vattr va_null; * Routines having to do with the management of the vnode table. */ +void +vfs_unref_vfsconf(struct vfsconf *vfsp) +{ + vfsconf_lock(); + KASSERT(vfsp->vfc_refcount > 0, + ("vfs %p refcount underflow %d", vfsp, vfsp->vfc_refcount)); + vfsp->vfc_refcount--; + vfsconf_unlock(); +} + static struct vfsconf * vfs_byname_locked(const char *name) { @@ -123,9 +133,11 @@ vfs_byname(const char *name) { struct vfsconf *vfsp; - vfsconf_slock(); + vfsconf_lock(); vfsp = vfs_byname_locked(name); - vfsconf_sunlock(); + if (vfsp != NULL) + vfsp->vfc_refcount++; + vfsconf_unlock(); return (vfsp); } @@ -387,7 +399,7 @@ vfs_register(struct vfsconf *vfc) static int once; struct vfsconf *tvfc; uint32_t hashval; - int secondpass; + int error, prevmaxconf, secondpass; if (!once) { vattr_null(&va_null); @@ -405,6 +417,7 @@ vfs_register(struct vfsconf *vfc) return (EEXIST); } + prevmaxconf = maxvfsconf; if (vfs_typenumhash != 0) { /* * Calculate a hash on vfc_name to use for vfc_typenum. Unless @@ -497,16 +510,24 @@ vfs_register(struct vfsconf *vfc) vfc->vfc_vfsops = &vfsops_sigdefer; } - if (vfc->vfc_flags & VFCF_JAIL) - prison_add_vfs(vfc); - /* * Call init function for this VFS... */ if ((vfc->vfc_flags & VFCF_SBDRY) != 0) - vfc->vfc_vfsops_sd->vfs_init(vfc); + error = vfc->vfc_vfsops_sd->vfs_init(vfc); else - vfc->vfc_vfsops->vfs_init(vfc); + error = vfc->vfc_vfsops->vfs_init(vfc); + + if (error != 0) { + maxvfsconf = prevmaxconf; + TAILQ_REMOVE(&vfsconf, vfc, vfc_list); + vfsconf_unlock(); + return (error); + } + + if ((vfc->vfc_flags & VFCF_JAIL) != 0) + prison_add_vfs(vfc); + vfsconf_unlock(); /* diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 8e64a7fe966b..13403acacc08 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -683,7 +683,6 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, MPASSERT(mp->mnt_vfs_ops == 1, mp, ("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops)); (void) vfs_busy(mp, MBF_NOWAIT); - atomic_add_acq_int(&vfsp->vfc_refcount, 1); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; mp->mnt_stat.f_type = vfsp->vfc_typenum; @@ -731,7 +730,6 @@ vfs_mount_destroy(struct mount *mp) __FILE__, __LINE__)); MPPASS(mp->mnt_writeopcount == 0, mp); MPPASS(mp->mnt_secondary_writes == 0, mp); - atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1); if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { struct vnode *vp; @@ -769,6 +767,9 @@ vfs_mount_destroy(struct mount *mp) vfs_free_addrlist(mp->mnt_export); free(mp->mnt_export, M_MOUNT); } + vfsconf_lock(); + mp->mnt_vfc->vfc_refcount--; + vfsconf_unlock(); crfree(mp->mnt_cred); uma_zfree(mount_zone, mp); } @@ -1133,6 +1134,7 @@ vfs_domount_first( if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred, vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) { vput(vp); + vfs_unref_vfsconf(vfsp); return (EPERM); } @@ -1169,6 +1171,7 @@ vfs_domount_first( } if (error != 0) { vput(vp); + vfs_unref_vfsconf(vfsp); return (error); } vn_seqc_write_begin(vp); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a6e38be89291..57732ddab7d9 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2186,6 +2186,8 @@ freevnode(struct vnode *vp) { struct bufobj *bo; + ASSERT_VOP_UNLOCKED(vp, __func__); + /* * The vnode has been marked for destruction, so free it. * @@ -2222,12 +2224,16 @@ freevnode(struct vnode *vp) mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) { + int error __diagused; + /* * Use LK_NOWAIT to shut up witness about the lock. We may get * here while having another vnode locked when trying to * satisfy a lookup and needing to recycle. */ - VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT); + error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT); + VNASSERT(error == 0, vp, + ("freevnode: cannot lock vp %p for pollinfo destroy", vp)); destroy_vpollinfo(vp->v_pollinfo); VOP_UNLOCK(vp); vp->v_pollinfo = NULL; diff --git a/sys/libkern/arm64/crc32c_armv8.S b/sys/libkern/arm64/crc32c_armv8.S index 649afff4b711..430b24f7615a 100644 --- a/sys/libkern/arm64/crc32c_armv8.S +++ b/sys/libkern/arm64/crc32c_armv8.S @@ -39,14 +39,14 @@ ENTRY(armv8_crc32c) cbz w2, end tbz x1, #0x0, half_word_aligned sub w2, w2, 0x1 - ldr w10, [x1], #0x1 + ldrb w10, [x1], #0x1 crc32cb w0, w0, w10 half_word_aligned: cmp w2, #0x2 b.lo last_byte tbz x1, #0x1, word_aligned sub w2, w2, 0x2 - ldr w10, [x1], #0x2 + ldrh w10, [x1], #0x2 crc32ch w0, w0, w10 word_aligned: cmp w2, #0x4 @@ -69,11 +69,11 @@ last_word: crc32cw w0, w0, w10 last_half_word: tbz w2, #0x1, last_byte - ldr w10, [x1], #0x2 + ldrh w10, [x1], #0x2 crc32ch w0, w0, w10 last_byte: tbz w2, #0x0, end - ldr w10, [x1], #0x1 + ldrb w10, [x1], #0x1 crc32cb w0, w0, w10 end: ret diff --git a/sys/modules/dtb/rockchip/Makefile b/sys/modules/dtb/rockchip/Makefile index 33c2048cbb15..9c8ca1acc837 100644 --- a/sys/modules/dtb/rockchip/Makefile +++ b/sys/modules/dtb/rockchip/Makefile @@ -21,7 +21,8 @@ DTS= \ rockchip/rk3566-quartz64-a.dts \ rockchip/rk3568-nanopi-r5s.dts \ rockchip/rk3566-radxa-zero-3e.dts \ - rockchip/rk3566-radxa-zero-3w.dts + rockchip/rk3566-radxa-zero-3w.dts \ + rockchip/rk3568-bpi-r2-pro.dts DTSO= rk3328-analog-sound.dtso \ rk3328-i2c0.dtso \ diff --git a/sys/modules/irdma/Makefile b/sys/modules/irdma/Makefile index b2ffb67ca66f..a9ef6e63d3f2 100644 --- a/sys/modules/irdma/Makefile +++ b/sys/modules/irdma/Makefile @@ -1,8 +1,8 @@ .include <bsd.own.mk> -OFED_INC_DIR = ${.CURDIR}/../../ofed/include -ICE_DIR = ${.CURDIR}/../../dev/ice -.PATH: ${.CURDIR}/../../dev/irdma +OFED_INC_DIR = ${SRCTOP}/sys/ofed/include +ICE_DIR = ${SRCTOP}/sys/dev/ice +.PATH: ${SRCTOP}/sys/dev/irdma KMOD= irdma SRCS= icrdma.c diff --git a/sys/modules/sound/driver/hda/Makefile b/sys/modules/sound/driver/hda/Makefile index 0eec98fc53e1..1e137dc5671c 100644 --- a/sys/modules/sound/driver/hda/Makefile +++ b/sys/modules/sound/driver/hda/Makefile @@ -2,7 +2,7 @@ KMOD= snd_hda SRCS= device_if.h bus_if.h pci_if.h channel_if.h mixer_if.h hdac_if.h -SRCS+= hdaa.c hdaa.h hdaa_patches.c hdac.c hdac_if.h hdac_if.c -SRCS+= hdacc.c hdac_private.h hdac_reg.h hda_reg.h hdac.h +SRCS+= hdaa.c hdaa.h hdaa_patches.c hdacc.c hdac.c hdac_if.c +SRCS+= hdac_private.h hdac_reg.h hda_reg.h hdac.h .include <bsd.kmod.mk> diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 72167b752e53..8b3e64eba2f3 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -258,6 +258,9 @@ /* dops->d_revalidate() takes 4 args */ /* #undef HAVE_D_REVALIDATE_4ARGS */ +/* Define if d_set_d_op() is available */ +/* #undef HAVE_D_SET_D_OP */ + /* Define to 1 if you have the 'execvpe' function. */ #define HAVE_EXECVPE 1 @@ -483,9 +486,6 @@ /* building against unsupported kernel version */ /* #undef HAVE_LINUX_EXPERIMENTAL */ -/* Define to 1 if you have the <linux/stat.h> header file. */ -/* #undef HAVE_LINUX_STAT_H */ - /* makedev() is declared in sys/mkdev.h */ /* #undef HAVE_MAKEDEV_IN_MKDEV */ @@ -840,7 +840,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" +#define ZFS_META_ALIAS "zfs-2.4.99-29-FreeBSD_g7939bad5e" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -870,10 +870,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" +#define ZFS_META_RELEASE "29-FreeBSD_g7939bad5e" /* Define the project version. */ -#define ZFS_META_VERSION "2.4.0" +#define ZFS_META_VERSION "2.4.99" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 2b5d717da216..fff89435a0ff 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.0-rc1-0-g00dfa094a" +#define ZFS_META_GITREV "zfs-2.4.99-29-g7939bad5e" diff --git a/sys/net/if.c b/sys/net/if.c index 202be4794f6e..b6a798aa0fab 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -74,7 +74,6 @@ #include <vm/uma.h> #include <net/bpf.h> -#include <net/ethernet.h> #include <net/if.h> #include <net/if_arp.h> #include <net/if_clone.h> @@ -1102,6 +1101,7 @@ if_detach_internal(struct ifnet *ifp, bool vmove) struct ifaddr *ifa; int i; struct domain *dp; + void *if_afdata[AF_MAX]; #ifdef VIMAGE bool shutdown; @@ -1225,15 +1225,30 @@ finish_vnet_shutdown: IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; + if (i != 0) { + /* + * Defer the dom_ifdetach call. + */ + _Static_assert(sizeof(if_afdata) == sizeof(ifp->if_afdata), + "array size mismatch"); + memcpy(if_afdata, ifp->if_afdata, sizeof(if_afdata)); + memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); + } IF_AFDATA_UNLOCK(ifp); if (i == 0) return; + /* + * XXXZL: This net epoch wait is not necessary if we have done right. + * But if we do not, at least we can make a guarantee that threads those + * enter net epoch will see NULL address family dependent data, + * e.g. if_afdata[AF_INET6]. A clear NULL pointer derefence is much + * better than writing to freed memory. + */ + NET_EPOCH_WAIT(); SLIST_FOREACH(dp, &domains, dom_next) { - if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { - (*dp->dom_ifdetach)(ifp, - ifp->if_afdata[dp->dom_family]); - ifp->if_afdata[dp->dom_family] = NULL; - } + if (dp->dom_ifdetach != NULL && + if_afdata[dp->dom_family] != NULL) + (*dp->dom_ifdetach)(ifp, if_afdata[dp->dom_family]); } } diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 66555fd1feb5..cea7f1cb5e23 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -522,11 +522,11 @@ SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap, "Log MAC address port flapping"); /* allow IP addresses on bridge members */ -VNET_DEFINE_STATIC(bool, member_ifaddrs) = false; +VNET_DEFINE_STATIC(bool, member_ifaddrs) = true; #define V_member_ifaddrs VNET(member_ifaddrs) SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false, - "Allow layer 3 addresses on bridge members"); + "Allow layer 3 addresses on bridge members (deprecated)"); static bool bridge_member_ifaddrs(void) @@ -1448,24 +1448,30 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) /* * If member_ifaddrs is disabled, do not allow an interface with - * assigned IP addresses to be added to a bridge. + * assigned IP addresses to be added to a bridge. Skip this check + * for gif interfaces, because the IP address assigned to a gif + * interface is separate from the bridge's Ethernet segment. */ - if (!V_member_ifaddrs) { + if (ifs->if_type != IFT_GIF) { struct ifaddr *ifa; CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) { -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - return (EXTERROR(EINVAL, - "Member interface may not have " - "an IPv4 address configured")); -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) + if (ifa->ifa_addr->sa_family != AF_INET && + ifa->ifa_addr->sa_family != AF_INET6) + continue; + + if (V_member_ifaddrs) { + if_printf(sc->sc_ifp, + "WARNING: Adding member interface %s which " + "has an IP address assigned is deprecated " + "and will be unsupported in a future " + "release.\n", ifs->if_xname); + break; + } else { return (EXTERROR(EINVAL, "Member interface may not have " - "an IPv6 address configured")); -#endif + "an IP address assigned")); + } } } diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c index 581c2434b8fb..fbffa8f359a0 100644 --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -69,6 +69,7 @@ #include <net/if_media.h> #include <net/if_private.h> #include <net/if_types.h> +#include <net/if_vlan_var.h> #include <net/netisr.h> #ifdef RSS #include <net/rss_config.h> @@ -434,6 +435,21 @@ epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; } +/* + * Update ifp->if_hwassist according to the current value of ifp->if_capenable. + */ +static void +epair_caps_changed(struct ifnet *ifp) +{ + uint64_t hwassist = 0; + + if (ifp->if_capenable & IFCAP_TXCSUM) + hwassist |= CSUM_IP_TCP | CSUM_IP_UDP; + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP; + ifp->if_hwassist = hwassist; +} + static int epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { @@ -461,6 +477,44 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = 0; break; + case SIOCGIFCAP: + ifr->ifr_reqcap = ifp->if_capabilities; + ifr->ifr_curcap = ifp->if_capenable; + error = 0; + break; + case SIOCSIFCAP: + /* + * Enable/disable capabilities as requested, besides + * IFCAP_RXCSUM(_IPV6), which always remain enabled. + * Incoming packets may have the mbuf flag CSUM_DATA_VALID set. + * Without IFCAP_RXCSUM(_IPV6), this flag would have to be + * removed, which does not seem helpful. + */ + ifp->if_capenable = ifr->ifr_reqcap | IFCAP_RXCSUM | + IFCAP_RXCSUM_IPV6; + epair_caps_changed(ifp); + /* + * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the + * other epair interface as well. + * A bridge disables IFCAP_TXCSUM(_IPV6) when adding one epair + * interface if another interface in the bridge has it disabled. + * In that case this capability needs to be disabled on the + * other epair interface to avoid sending packets in the bridge + * that rely on this capability. + */ + sc = ifp->if_softc; + if ((ifp->if_capenable ^ sc->oifp->if_capenable) & + (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) { + sc->oifp->if_capenable &= + ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + sc->oifp->if_capenable |= ifp->if_capenable & + (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + epair_caps_changed(sc->oifp); + } + VLAN_CAPABILITIES(ifp); + error = 0; + break; + default: /* Let the common ethernet handler process this. */ error = ether_ioctl(ifp, cmd, data); @@ -572,8 +626,11 @@ epair_setup_ifp(struct epair_softc *sc, char *name, int unit) ifp->if_dname = epairname; ifp->if_dunit = unit; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_capabilities = IFCAP_VLAN_MTU; - ifp->if_capenable = IFCAP_VLAN_MTU; + ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM | + IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM | + IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + epair_caps_changed(ifp); ifp->if_transmit = epair_transmit; ifp->if_qflush = epair_qflush; ifp->if_start = epair_start; diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 3ae0c01c0efc..9c157bf3d3c2 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -695,7 +695,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && - bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) + memcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 98c59e5de988..1e6d98291c04 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -712,7 +712,7 @@ static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif -static int iflib_register(if_ctx_t); +static void iflib_register(if_ctx_t); static void iflib_deregister(if_ctx_t); static void iflib_unregister_vlan_handlers(if_ctx_t ctx); static uint16_t iflib_get_mbuf_size_for(unsigned int size); @@ -3646,6 +3646,12 @@ defrag: bus_dmamap_unload(buf_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); DBG_COUNTER_INC(encap_txd_encap_fail); + if (ctx->ifc_sysctl_simple_tx) { + *m_headp = m_head = iflib_remove_mbuf(txq); + m_freem(*m_headp); + DBG_COUNTER_INC(tx_frees); + *m_headp = NULL; + } if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); @@ -4298,6 +4304,10 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); DBG_COUNTER_INC(tx_frees); + if (err == ENOBUFS) + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + else + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } return (err); @@ -5136,10 +5146,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct ctx->ifc_dev = dev; ctx->ifc_softc = sc; - if ((err = iflib_register(ctx)) != 0) { - device_printf(dev, "iflib_register failed %d\n", err); - goto fail_ctx_free; - } + iflib_register(ctx); iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; @@ -5363,7 +5370,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct DEBUGNET_SET(ctx->ifc_ifp, iflib); - if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; @@ -5387,7 +5393,6 @@ fail_unlock: CTX_UNLOCK(ctx); IFNET_WUNLOCK(); iflib_deregister(ctx); -fail_ctx_free: device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); @@ -5685,7 +5690,7 @@ _iflib_pre_assert(if_softc_ctx_t scctx) MPASS(scctx->isc_txrx->ift_rxd_flush); } -static int +static void iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; @@ -5718,6 +5723,7 @@ iflib_register(if_ctx_t ctx) if_settransmitfn(ifp, iflib_if_transmit); #endif if_setqflushfn(ifp, iflib_if_qflush); + if_setgetcounterfn(ifp, iflib_if_get_counter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, @@ -5731,7 +5737,6 @@ iflib_register(if_ctx_t ctx) ifmedia_init(ctx->ifc_mediap, IFM_IMASK, iflib_media_change, iflib_media_status); } - return (0); } static void @@ -7146,6 +7151,11 @@ iflib_simple_transmit(if_t ifp, struct mbuf *m) bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); (void)iflib_txd_db_check(txq, true); + } else { + if (error == ENOBUFS) + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + else + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); mtx_unlock(&txq->ift_mtx); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index d6c13470f2eb..e6fb1c2c3e1b 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -326,6 +326,7 @@ pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) _Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size"); SYSCTL_DECL(_net_pf); +MALLOC_DECLARE(M_PF); MALLOC_DECLARE(M_PFHASH); MALLOC_DECLARE(M_PF_RULE_ITEM); @@ -645,6 +646,7 @@ struct pf_kpool { int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; + sa_family_t ipv6_nexthop_af; }; struct pf_rule_actions { @@ -859,8 +861,8 @@ struct pf_krule { u_int8_t keep_state; sa_family_t af; u_int8_t proto; - u_int8_t type; - u_int8_t code; + uint16_t type; + uint16_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; @@ -2421,7 +2423,7 @@ int pf_multihome_scan_init(int, int, struct pf_pdesc *); int pf_multihome_scan_asconf(int, int, struct pf_pdesc *); u_int32_t pf_new_isn(struct pf_kstate *); -void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, u_short *, +void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, @@ -2612,6 +2614,7 @@ struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); +void pf_rule_tree_free(struct pf_krule_global *); struct pf_krule *pf_krule_alloc(void); @@ -2680,7 +2683,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *, struct pf_addr *, struct pf_kpool *); u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, - sa_family_t *, struct pfi_kkif **nkif, + sa_family_t *, struct pfi_kkif **, struct pf_addr *, struct pf_kpool *, pf_sn_types_t); int pf_get_transaddr_af(struct pf_krule *, diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c index d96d7988a864..1dd8e38b9896 100644 --- a/sys/net80211/ieee80211_ddb.c +++ b/sys/net80211/ieee80211_ddb.c @@ -296,7 +296,7 @@ _db_show_sta(const struct ieee80211_node *ni) ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan); db_printf("\thtopmode 0x%x htstbc 0x%x chw %d (%s)\n", ni->ni_htopmode, ni->ni_htstbc, - ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw)); + ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw)); /* XXX ampdu state */ for (i = 0; i < WME_NUM_TID; i++) diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h index 3684fba52c5c..954801d95787 100644 --- a/sys/net80211/ieee80211_freebsd.h +++ b/sys/net80211/ieee80211_freebsd.h @@ -341,11 +341,16 @@ struct mbuf *ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen); #define M_AGE_SUB(m,adj) (m->m_pkthdr.csum_data -= adj) /* - * Store the sequence number. + * Store / retrieve the sequence number in an mbuf. + * + * The sequence number being stored/retreived is the 12 bit + * base sequence number, not the 16 bit sequence number field. + * I.e., it's from 0..4095 inclusive, with no 4 bit padding for + * fragment numbers. */ #define M_SEQNO_SET(m, seqno) \ - ((m)->m_pkthdr.tso_segsz = (seqno)) -#define M_SEQNO_GET(m) ((m)->m_pkthdr.tso_segsz) + ((m)->m_pkthdr.tso_segsz = ((seqno) % IEEE80211_SEQ_RANGE)) +#define M_SEQNO_GET(m) (((m)->m_pkthdr.tso_segsz) % IEEE80211_SEQ_RANGE) #define MTAG_ABI_NET80211 1132948340 /* net80211 ABI */ diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index c28f124648a1..3af56a228295 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -1476,7 +1476,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40; - ni->ni_chw = IEEE80211_STA_RX_BW_40; + ni->ni_chw = NET80211_STA_RX_BW_40; if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan)) ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan)) @@ -1484,7 +1484,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni) if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40; } else { - ni->ni_chw = IEEE80211_STA_RX_BW_20; + ni->ni_chw = NET80211_STA_RX_BW_20; ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE; } ni->ni_htctlchan = ni->ni_chan->ic_ieee; @@ -1580,7 +1580,7 @@ ieee80211_ht_node_join(struct ieee80211_node *ni) if (ni->ni_flags & IEEE80211_NODE_HT) { vap->iv_ht_sta_assoc++; - if (ni->ni_chw == IEEE80211_STA_RX_BW_40) + if (ni->ni_chw == NET80211_STA_RX_BW_40) vap->iv_ht40_sta_assoc++; } htinfo_update(vap); @@ -1598,7 +1598,7 @@ ieee80211_ht_node_leave(struct ieee80211_node *ni) if (ni->ni_flags & IEEE80211_NODE_HT) { vap->iv_ht_sta_assoc--; - if (ni->ni_chw == IEEE80211_STA_RX_BW_40) + if (ni->ni_chw == NET80211_STA_RX_BW_40) vap->iv_ht40_sta_assoc--; } htinfo_update(vap); @@ -1827,7 +1827,7 @@ htinfo_update_chw(struct ieee80211_node *ni, int htflags, int vhtflags) done: /* update node's (11n) tx channel width */ ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20; return (ret); } @@ -1933,7 +1933,7 @@ ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags) { #define _RETURN_CHAN_BITS(_cb) \ do { \ - IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \ + if (0) IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \ "%s:%d: selected %b", __func__, __LINE__, \ (_cb), IEEE80211_CHAN_BITS); \ return (_cb); \ @@ -2689,11 +2689,11 @@ ht_recv_action_ht_txchwidth(struct ieee80211_node *ni, * here. */ chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: HT txchwidth, width %d%s (%s)", __func__, - chw, ni->ni_chw != chw ? "*" : "", ieee80211_ni_chw_to_str(chw)); + chw, ni->ni_chw != chw ? "*" : "", net80211_ni_chw_to_str(chw)); if (chw != ni->ni_chw) { /* XXX does this need to change the ht40 station count? */ ni->ni_chw = chw; @@ -3832,5 +3832,5 @@ ieee80211_ht_check_tx_ht40(const struct ieee80211_node *ni) return (IEEE80211_IS_CHAN_HT40(bss_chan) && IEEE80211_IS_CHAN_HT40(ni->ni_chan) && - (ni->ni_chw == IEEE80211_STA_RX_BW_40)); + (ni->ni_chw == NET80211_STA_RX_BW_40)); } diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c index a201d1b278f0..49ba00299fee 100644 --- a/sys/net80211/ieee80211_node.c +++ b/sys/net80211/ieee80211_node.c @@ -2673,7 +2673,7 @@ ieee80211_dump_node(struct ieee80211_node_table *nt __unused, ni->ni_htctlchan, ni->ni_ht2ndchan); net80211_printf("\thtopmode %x htstbc %x htchw %d (%s)\n", ni->ni_htopmode, ni->ni_htstbc, - ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw)); + ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw)); net80211_printf("\tvhtcap %x freq1 %d freq2 %d vhtbasicmcs %x\n", ni->ni_vhtcap, (int) ni->ni_vht_chan1, (int) ni->ni_vht_chan2, (int) ni->ni_vht_basicmcs); @@ -2831,7 +2831,7 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp) ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "", /* XXX update for VHT string */ ni->ni_flags & IEEE80211_NODE_HT ? - (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "", + (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "", ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "", ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "", ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" : diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h index ef25fa0d7fdd..f1246dd12419 100644 --- a/sys/net80211/ieee80211_node.h +++ b/sys/net80211/ieee80211_node.h @@ -109,33 +109,33 @@ enum ieee80211_mesh_mlstate { "\20\1IDLE\2OPENSNT\2OPENRCV\3CONFIRMRCV\4ESTABLISHED\5HOLDING" /* - * This structure is shared with LinuxKPI 802.11 code describing up-to - * which channel width the station can receive. + * This enum was shared with the LinuxKPI enum ieee80211_sta_rx_bandwidth + * describing up-to which channel width the station can receive. * Rather than using hardcoded MHz values for the channel width use an enum with * flags. This allows us to keep the uint8_t slot for ni_chw in - * struct ieee80211_node and means we do not have to sync to the value for - * LinuxKPI. + * struct ieee80211_node it means we do not have to sync to the value for + * LinuxKPI (just the names). * * NB: BW_20 needs to 0 and values need to be sorted! Cannot make it * bitfield-alike for use with %b. */ -enum ieee80211_sta_rx_bw { - IEEE80211_STA_RX_BW_20 = 0x00, - IEEE80211_STA_RX_BW_40, - IEEE80211_STA_RX_BW_80, - IEEE80211_STA_RX_BW_160, - IEEE80211_STA_RX_BW_320, +enum net80211_sta_rx_bw { + NET80211_STA_RX_BW_20 = 0x00, + NET80211_STA_RX_BW_40, + NET80211_STA_RX_BW_80, + NET80211_STA_RX_BW_160, + NET80211_STA_RX_BW_320, } __packed; static inline const char * -ieee80211_ni_chw_to_str(enum ieee80211_sta_rx_bw bw) +net80211_ni_chw_to_str(enum net80211_sta_rx_bw bw) { switch (bw) { - case IEEE80211_STA_RX_BW_20: return ("BW_20"); - case IEEE80211_STA_RX_BW_40: return ("BW_40"); - case IEEE80211_STA_RX_BW_80: return ("BW_80"); - case IEEE80211_STA_RX_BW_160: return ("BW_160"); - case IEEE80211_STA_RX_BW_320: return ("BW_320"); + case NET80211_STA_RX_BW_20: return ("BW_20"); + case NET80211_STA_RX_BW_40: return ("BW_40"); + case NET80211_STA_RX_BW_80: return ("BW_80"); + case NET80211_STA_RX_BW_160: return ("BW_160"); + case NET80211_STA_RX_BW_320: return ("BW_320"); } } @@ -285,7 +285,7 @@ struct ieee80211_node { uint8_t ni_ht2ndchan; /* HT 2nd channel */ uint8_t ni_htopmode; /* HT operating mode */ uint8_t ni_htstbc; /* HT */ - enum ieee80211_sta_rx_bw ni_chw; /* negotiated channel width */ + enum net80211_sta_rx_bw ni_chw; /* negotiated channel width */ struct ieee80211_htrateset ni_htrates; /* negotiated ht rate set */ struct ieee80211_tx_ampdu ni_tx_ampdu[WME_NUM_TID]; struct ieee80211_rx_ampdu ni_rx_ampdu[WME_NUM_TID]; diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index 57fe687adffe..116fc76a9ce1 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -1082,6 +1082,12 @@ ieee80211_send_nulldata(struct ieee80211_node *ni) uint8_t *frm; int ret; + /* Don't send NULL frames if we've been configured not to do so. */ + if ((ic->ic_flags_ext & IEEE80211_FEXT_NO_NULLDATA) != 0) { + ieee80211_node_decref(ni); + return (0); + } + if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH, ni, "block %s frame in CAC state", "null data"); diff --git a/sys/net80211/ieee80211_phy.c b/sys/net80211/ieee80211_phy.c index eb96d74a2bd9..7f53c717152b 100644 --- a/sys/net80211/ieee80211_phy.c +++ b/sys/net80211/ieee80211_phy.c @@ -658,26 +658,26 @@ static uint16_t ieee80211_vht_mcs_allowed_list_160[] = { * * See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details. * - * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw + * @param bw channel bandwidth, via enum net80211_sta_rx_bw * @param nss number of spatial streams, 1..8 * @returns bitmask of valid MCS rates from 0..9 */ uint16_t -ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss) +ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw bw, uint8_t nss) { if (nss == 0 || nss > 8) return (0); switch (bw) { - case IEEE80211_STA_RX_BW_20: + case NET80211_STA_RX_BW_20: return (ieee80211_vht_mcs_allowed_list_20[nss - 1]); - case IEEE80211_STA_RX_BW_40: + case NET80211_STA_RX_BW_40: return (ieee80211_vht_mcs_allowed_list_40[nss - 1]); - case IEEE80211_STA_RX_BW_80: + case NET80211_STA_RX_BW_80: return (ieee80211_vht_mcs_allowed_list_80[nss - 1]); - case IEEE80211_STA_RX_BW_160: + case NET80211_STA_RX_BW_160: return (ieee80211_vht_mcs_allowed_list_160[nss - 1]); - case IEEE80211_STA_RX_BW_320: + case NET80211_STA_RX_BW_320: /* invalid for VHT */ return (0); } @@ -689,14 +689,14 @@ ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss) * * See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details. * - * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw + * @param bw channel bandwidth, via enum net80211_sta_rx_bw * @param nss number of spatial streams, 1..8 * @param mcs MCS rate, 0..9 * @retval true if the NSS / MCS / bandwidth combination is valid * @retval false if the NSS / MCS / bandwidth combination is not valid */ bool -ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw bw, uint8_t nss, +ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs) { uint16_t mask; @@ -737,7 +737,7 @@ static struct mcs_entry mcs_entries[] = { /** * @brief Calculate the bitrate of the given VHT MCS rate. * - * @param bw Channel bandwidth (enum ieee80211_sta_rx_bw) + * @param bw Channel bandwidth (enum net80211_sta_rx_bw) * @param nss Number of spatial streams, 1..8 * @param mcs MCS, 0..9 * @param is_shortgi True if short guard-interval (400nS) @@ -746,7 +746,7 @@ static struct mcs_entry mcs_entries[] = { * @returns The bitrate in kbit/sec. */ uint32_t -ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw, +ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs, bool is_shortgi) { uint32_t sym_len, n_carriers; @@ -773,16 +773,16 @@ ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw, * See 802.11-2020 Table 21-5 (Timing-related constraints.) */ switch (bw) { - case IEEE80211_STA_RX_BW_20: + case NET80211_STA_RX_BW_20: n_carriers = 52; break; - case IEEE80211_STA_RX_BW_40: + case NET80211_STA_RX_BW_40: n_carriers = 108; break; - case IEEE80211_STA_RX_BW_80: + case NET80211_STA_RX_BW_80: n_carriers = 234; break; - case IEEE80211_STA_RX_BW_160: + case NET80211_STA_RX_BW_160: n_carriers = 468; break; default: diff --git a/sys/net80211/ieee80211_phy.h b/sys/net80211/ieee80211_phy.h index 749b082e34e9..391c8bfc5010 100644 --- a/sys/net80211/ieee80211_phy.h +++ b/sys/net80211/ieee80211_phy.h @@ -221,13 +221,13 @@ uint32_t ieee80211_compute_duration_ht(uint32_t frameLen, uint16_t rate, int streams, int isht40, int isShortGI); -enum ieee80211_sta_rx_bw; +enum net80211_sta_rx_bw; -uint16_t ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw, +uint16_t ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw, uint8_t); -bool ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw, +bool ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw, uint8_t, uint8_t); -uint32_t ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw, +uint32_t ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw, uint8_t, uint8_t, bool); #endif /* _KERNEL */ diff --git a/sys/net80211/ieee80211_sta.c b/sys/net80211/ieee80211_sta.c index 463a8b16773b..19e5ffe9a367 100644 --- a/sys/net80211/ieee80211_sta.c +++ b/sys/net80211/ieee80211_sta.c @@ -1934,7 +1934,7 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, vap->iv_flags&IEEE80211_F_USEPROT ? ", protection" : "", ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "", ni->ni_flags & IEEE80211_NODE_HT ? - (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "", + (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "", ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "", ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "", ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" : diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h index a0293f814899..b9bc2357428d 100644 --- a/sys/net80211/ieee80211_var.h +++ b/sys/net80211/ieee80211_var.h @@ -700,13 +700,14 @@ MALLOC_DECLARE(M_80211_VAP); #define IEEE80211_FEXT_QUIET_IE 0x00800000 /* STATUS: quiet IE in a beacon has been added */ #define IEEE80211_FEXT_UAPSD 0x01000000 /* CONF: enable U-APSD */ #define IEEE80211_FEXT_AMPDU_OFFLOAD 0x02000000 /* CONF: driver/fw handles AMPDU[-TX] itself */ +#define IEEE80211_FEXT_NO_NULLDATA 0x04000000 /* CONF: don't originate NULL data frames from net80211 */ #define IEEE80211_FEXT_BITS \ "\20\2INACT\3SCANWAIT\4BGSCAN\5WPS\6TSN\7SCANREQ\10RESUME" \ "\0114ADDR\12NONEPR_PR\13SWBMISS\14DFS\15DOTD\16STATEWAIT\17REINIT" \ "\20BPF\21WDSLEGACY\22PROBECHAN\23UNIQMAC\24SCAN_OFFLOAD\25SEQNO_OFFLOAD" \ "\26FRAG_OFFLOAD\27VHT" \ - "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD" + "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD\33NO_NULLDATA" /* ic_flags_ht/iv_flags_ht */ #define IEEE80211_FHT_NONHT_PR 0x00000001 /* STATUS: non-HT sta present */ diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c index de0b691d4d2a..10a5fc7f08ab 100644 --- a/sys/net80211/ieee80211_vht.c +++ b/sys/net80211/ieee80211_vht.c @@ -974,7 +974,7 @@ ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni) return (IEEE80211_IS_CHAN_VHT40(bss_chan) && IEEE80211_IS_CHAN_VHT40(ni->ni_chan) && - (ni->ni_chw == IEEE80211_STA_RX_BW_40)); + (ni->ni_chw == NET80211_STA_RX_BW_40)); } /* @@ -1003,7 +1003,7 @@ ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni) */ return (IEEE80211_IS_CHAN_VHT80(bss_chan) && IEEE80211_IS_CHAN_VHT80(ni->ni_chan) && - (ni->ni_chw != IEEE80211_STA_RX_BW_20)); + (ni->ni_chw != NET80211_STA_RX_BW_20)); } /* @@ -1030,7 +1030,7 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni) * If a HT TX width action frame sets it to 20MHz * then reject doing 160MHz. */ - if (ni->ni_chw == IEEE80211_STA_RX_BW_20) + if (ni->ni_chw == NET80211_STA_RX_BW_20) return (false); if (IEEE80211_IS_CHAN_VHT160(bss_chan) && @@ -1062,19 +1062,19 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni) */ bool ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni, - enum ieee80211_sta_rx_bw bw) + enum net80211_sta_rx_bw bw) { switch (bw) { - case IEEE80211_STA_RX_BW_20: + case NET80211_STA_RX_BW_20: return (ieee80211_vht_check_tx_vht(ni)); - case IEEE80211_STA_RX_BW_40: + case NET80211_STA_RX_BW_40: return (ieee80211_vht_check_tx_vht40(ni)); - case IEEE80211_STA_RX_BW_80: + case NET80211_STA_RX_BW_80: return (ieee80211_vht_check_tx_vht80(ni)); - case IEEE80211_STA_RX_BW_160: + case NET80211_STA_RX_BW_160: return (ieee80211_vht_check_tx_vht160(ni)); - case IEEE80211_STA_RX_BW_320: + case NET80211_STA_RX_BW_320: return (false); default: return (false); @@ -1096,7 +1096,7 @@ ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni, */ bool ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *ni, - enum ieee80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs) + enum net80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs) { uint8_t mc; diff --git a/sys/net80211/ieee80211_vht.h b/sys/net80211/ieee80211_vht.h index a1529df4a85b..b9b19fbc6008 100644 --- a/sys/net80211/ieee80211_vht.h +++ b/sys/net80211/ieee80211_vht.h @@ -65,8 +65,8 @@ void ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni, bool ieee80211_vht_check_tx_vht(const struct ieee80211_node *); bool ieee80211_vht_check_tx_bw(const struct ieee80211_node *, - enum ieee80211_sta_rx_bw); + enum net80211_sta_rx_bw); bool ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *, - enum ieee80211_sta_rx_bw bw, uint8_t, uint8_t); + enum net80211_sta_rx_bw bw, uint8_t, uint8_t); #endif /* _NET80211_IEEE80211_VHT_H_ */ diff --git a/sys/netgraph/ng_parse.c b/sys/netgraph/ng_parse.c index 448ecc92f075..5e1a1bb47ac0 100644 --- a/sys/netgraph/ng_parse.c +++ b/sys/netgraph/ng_parse.c @@ -1199,14 +1199,14 @@ ng_parse_composite(const struct ng_parse_type *type, const char *s, int *off, const u_char *const start, u_char *const buf, int *buflen, const enum comptype ctype) { - const int num = ng_get_composite_len(type, start, buf, ctype); int nextIndex = 0; /* next implicit array index */ u_int index; /* field or element index */ int *foff; /* field value offsets in string */ int align, len, blen, error = 0; /* Initialize */ - if (num < 0) + const int num = ng_get_composite_len(type, start, buf, ctype); + if (num < 0 || num > INT_MAX / sizeof(*foff)) return (EINVAL); foff = malloc(num * sizeof(*foff), M_NETGRAPH_PARSE, M_NOWAIT | M_ZERO); if (foff == NULL) { diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 963449d4b4b1..70a61dbf93a3 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -522,9 +522,16 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred /* * Check if bridge wants to allow adding addrs to member interfaces. */ - if (ifp->if_bridge && bridge_member_ifaddrs_p && - !bridge_member_ifaddrs_p()) - return (EINVAL); + if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF && + bridge_member_ifaddrs_p != NULL) { + if (bridge_member_ifaddrs_p()) + if_printf(ifp, "WARNING: Assigning an IP address to " + "an interface which is also a bridge member is " + "deprecated and will be unsupported in a future " + "release.\n"); + else + return (EINVAL); + } /* * See whether address already exist. @@ -1882,6 +1889,8 @@ in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; + MPASS(ifp->if_afdata[AF_INET] == NULL); + igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index e24790ece43d..473c534ef83d 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -61,6 +61,9 @@ #include <net/vnet.h> #include <netinet/in.h> +#ifdef DDB +#include <netinet/in_kdtrace.h> +#endif #include <netinet/in_pcb.h> #include <netinet/in_var.h> #include <netinet/tcp_var.h> diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index b6c55fac50b3..6e08ad2796a8 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -128,8 +128,25 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, "Enable/Disable TCP SACK support"); VNET_DEFINE(int, tcp_do_newsack) = 1; -SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_do_newsack), 0, + +static int +sysctl_net_inet_tcp_sack_revised(SYSCTL_HANDLER_ARGS) +{ + int error; + int new; + + new = V_tcp_do_newsack; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + V_tcp_do_newsack = new; + gone_in(16, "net.inet.tcp.sack.revised will be deprecated." + " net.inet.tcp.sack.enable will always follow RFC6675 SACK.\n"); + } + return (error); +} + +SYSCTL_PROC(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_INT, + &VNET_NAME(tcp_do_newsack), 0, sysctl_net_inet_tcp_sack_revised, "CU", "Use revised SACK loss recovery per RFC 6675"); VNET_DEFINE(int, tcp_do_lrd) = 1; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 2e039ebbfdd2..cc83a21773a8 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -645,14 +645,14 @@ out: static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { - int error = ENOENT; struct tcp_function_set fs; struct tcp_function_block *blk; + int error; - memset(&fs, 0, sizeof(fs)); + memset(&fs, 0, sizeof(struct tcp_function_set)); rw_rlock(&tcp_function_lock); blk = find_tcp_fb_locked(V_tcp_func_set_ptr, NULL); - if (blk) { + if (blk != NULL) { /* Found him */ strcpy(fs.function_set_name, blk->tfb_tcp_block_name); fs.pcbcnt = blk->tfb_refcnt; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 80e6b53d10df..1ee6c6e31f33 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -102,15 +102,15 @@ #include <security/mac/mac_framework.h> -VNET_DEFINE_STATIC(int, tcp_syncookies) = 1; +VNET_DEFINE_STATIC(bool, tcp_syncookies) = true; #define V_tcp_syncookies VNET(tcp_syncookies) -SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW, +SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookies), 0, "Use TCP SYN cookies if the syncache overflows"); -VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0; +VNET_DEFINE_STATIC(bool, tcp_syncookiesonly) = false; #define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) -SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, +SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); @@ -553,9 +553,8 @@ syncache_timer(void *xsch) static inline bool syncache_cookiesonly(void) { - - return (V_tcp_syncookies && (V_tcp_syncache.paused || - V_tcp_syncookiesonly)); + return ((V_tcp_syncookies && V_tcp_syncache.paused) || + V_tcp_syncookiesonly); } /* @@ -1083,40 +1082,48 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #endif if (sc == NULL) { - /* - * There is no syncache entry, so see if this ACK is - * a returning syncookie. To do this, first: - * A. Check if syncookies are used in case of syncache - * overflows - * B. See if this socket has had a syncache entry dropped in - * the recent past. We don't want to accept a bogus - * syncookie if we've never received a SYN or accept it - * twice. - * C. check that the syncookie is valid. If it is, then - * cobble up a fake syncache entry, and return. - */ - if (locked && !V_tcp_syncookies) { - SCH_UNLOCK(sch); - TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) - log(LOG_DEBUG, "%s; %s: Spurious ACK, " - "segment rejected (syncookies disabled)\n", - s, __func__); - goto failed; - } - if (locked && !V_tcp_syncookiesonly && - sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { + if (locked) { + /* + * The syncache is currently in use (neither disabled, + * nor paused), but no entry was found. + */ + if (!V_tcp_syncookies) { + /* + * Since no syncookies are used in case of + * a bucket overflow, don't even check for + * a valid syncookie. + */ + SCH_UNLOCK(sch); + TCPSTAT_INC(tcps_sc_spurcookie); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious ACK, " + "segment rejected " + "(syncookies disabled)\n", + s, __func__); + goto failed; + } + if (sch->sch_last_overflow < + time_uptime - SYNCOOKIE_LIFETIME) { + /* + * Since the bucket did not overflow recently, + * don't even check for a valid syncookie. + */ + SCH_UNLOCK(sch); + TCPSTAT_INC(tcps_sc_spurcookie); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious ACK, " + "segment rejected " + "(no syncache entry)\n", + s, __func__); + goto failed; + } SCH_UNLOCK(sch); - TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) - log(LOG_DEBUG, "%s; %s: Spurious ACK, " - "segment rejected (no syncache entry)\n", - s, __func__); - goto failed; } - if (locked) - SCH_UNLOCK(sch); bzero(&scs, sizeof(scs)); + /* + * Now check, if the syncookie is valid. If it is, create an on + * stack syncache entry. + */ if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) { sc = &scs; TCPSTAT_INC(tcps_sc_recvcookie); @@ -1291,10 +1298,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (__predict_false(*lsop == NULL)) { TCPSTAT_INC(tcps_sc_aborted); TCPSTATES_DEC(TCPS_SYN_RECEIVED); - } else + } else if (sc != &scs) TCPSTAT_INC(tcps_sc_completed); -/* how do we find the inp for the new socket? */ if (sc != &scs) syncache_free(sc); return (1); @@ -1669,7 +1675,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc->sc_tsoff = tcp_new_ts_offset(inc); } if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) { - int wscale = 0; + u_int wscale = 0; /* * Pick the smallest possible scaling factor that @@ -1719,13 +1725,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0) sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos); - if (V_tcp_syncookies) + if (V_tcp_syncookies || V_tcp_syncookiesonly) sc->sc_iss = syncookie_generate(sch, sc); else sc->sc_iss = arc4random(); #ifdef INET6 if (autoflowlabel) { - if (V_tcp_syncookies) + if (V_tcp_syncookies || V_tcp_syncookiesonly) sc->sc_flowlabel = sc->sc_iss; else sc->sc_flowlabel = ip6_randomflowlabel(); @@ -2265,7 +2271,7 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch, uint32_t hash; uint8_t *secbits; tcp_seq ack, seq; - int wnd, wscale = 0; + int wnd; union syncookie cookie; /* @@ -2316,12 +2322,14 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch, sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx]; - /* We can simply recompute receive window scale we sent earlier. */ - while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max) - wscale++; - /* Only use wscale if it was enabled in the orignal SYN. */ if (cookie.flags.wscale_idx > 0) { + u_int wscale = 0; + + /* Recompute the receive window scale that was sent earlier. */ + while (wscale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << wscale) < sb_max) + wscale++; sc->sc_requested_r_scale = wscale; sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx]; sc->sc_flags |= SCF_WINSCALE; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 3e6519118a40..cea8a916679b 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -223,16 +223,18 @@ VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL); * udp_append() will convert to a sockaddr_in6 before passing the address * into the socket code. * - * In the normal case udp_append() will return 0, indicating that you - * must unlock the inp. However if a tunneling protocol is in place we increment - * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we - * then decrement the reference count. If the inp_rele returns 1, indicating the - * inp is gone, we return that to the caller to tell them *not* to unlock - * the inp. In the case of multi-cast this will cause the distribution - * to stop (though most tunneling protocols known currently do *not* use - * multicast). + * In the normal case udp_append() will return 'false', indicating that you + * must unlock the inpcb. However if a tunneling protocol is in place we + * increment the inpcb refcnt and unlock the inpcb, on return from the tunneling + * protocol we then decrement the reference count. If in_pcbrele_rlocked() + * returns 'true', indicating the inpcb is gone, we return that to the caller + * to tell them *not* to unlock the inpcb. In the case of multicast this will + * cause the distribution to stop (though most tunneling protocols known + * currently do *not* use multicast). + * + * The mbuf is always consumed. */ -static int +static bool udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, struct sockaddr_in *udp_in) { @@ -255,15 +257,16 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, in_pcbref(inp); INP_RUNLOCK(inp); - filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0], - up->u_tun_ctx); + filtered = (*up->u_tun_func)(n, off, inp, + (struct sockaddr *)&udp_in[0], up->u_tun_ctx); INP_RLOCK(inp); - if (in_pcbrele_rlocked(inp)) - return (1); - if (filtered) { - INP_RUNLOCK(inp); - return (1); + if (in_pcbrele_rlocked(inp)) { + if (!filtered) + m_freem(n); + return (true); } + if (filtered) + return (false); } off += sizeof(struct udphdr); @@ -273,18 +276,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) { m_freem(n); - return (0); + return (false); } if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */ if (IPSEC_ENABLED(ipv4) && UDPENCAP_INPUT(ipv4, n, off, AF_INET) != 0) - return (0); /* Consumed. */ + return (false); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); - return (0); + return (false); } #endif /* MAC */ if (inp->inp_flags & INP_CONTROLOPTS || @@ -330,7 +333,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); - return (0); + return (false); } static bool @@ -699,7 +702,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh); else UDP_PROBE(receive, NULL, inp, ip, inp, uh); - if (udp_append(inp, ip, m, iphlen, udp_in) == 0) + if (!udp_append(inp, ip, m, iphlen, udp_in)) INP_RUNLOCK(inp); return (IPPROTO_DONE); diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index ce0655408a28..4f756a75fac7 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1235,11 +1235,20 @@ in6_addifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *i int carp_attached = 0; int error; - /* Check if this interface is a bridge member */ - if (ifp->if_bridge && bridge_member_ifaddrs_p && - !bridge_member_ifaddrs_p()) { - error = EINVAL; - goto out; + /* + * Check if bridge wants to allow adding addrs to member interfaces. + */ + if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF && + bridge_member_ifaddrs_p != NULL) { + if (bridge_member_ifaddrs_p()) { + if_printf(ifp, "WARNING: Assigning an IP address to " + "an interface which is also a bridge member is " + "deprecated and will be unsupported in a future " + "release.\n"); + } else { + error = EINVAL; + goto out; + } } /* @@ -2618,6 +2627,8 @@ in6_domifdetach(struct ifnet *ifp, void *aux) { struct in6_ifextra *ext = (struct in6_ifextra *)aux; + MPASS(ifp->if_afdata[AF_INET6] == NULL); + mld_domifdetach(ifp); scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ifp, ext->nd_ifinfo); diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index f284f7fa5ffc..cc149616006e 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -83,7 +83,6 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) static int get_rand_ifid(struct ifnet *, struct in6_addr *); -static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *); static int in6_ifattach_loopback(struct ifnet *); static void in6_purgemaddrs(struct ifnet *); @@ -271,8 +270,8 @@ found: * * altifp - secondary EUI64 source */ -static int -get_ifid(struct ifnet *ifp0, struct ifnet *altifp, +int +in6_get_ifid(struct ifnet *ifp0, struct ifnet *altifp, struct in6_addr *in6) { struct ifnet *ifp; @@ -356,7 +355,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); } else { NET_EPOCH_ENTER(et); - error = get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr); + error = in6_get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr); NET_EPOCH_EXIT(et); if (error != 0) { nd6log((LOG_ERR, diff --git a/sys/netinet6/in6_ifattach.h b/sys/netinet6/in6_ifattach.h index 897926e90078..fd52422b10be 100644 --- a/sys/netinet6/in6_ifattach.h +++ b/sys/netinet6/in6_ifattach.h @@ -41,6 +41,7 @@ void in6_ifdetach(struct ifnet *); void in6_ifdetach_destroy(struct ifnet *); void in6_tmpaddrtimer(void *); int in6_get_hw_ifid(struct ifnet *, struct in6_addr *); +int in6_get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *); int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *); #endif /* _KERNEL */ diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index b9af0a78a584..6fe78083df23 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -1182,9 +1182,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast) struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; struct in6_aliasreq ifra; - struct in6_ifaddr *ia, *ib; + struct in6_ifaddr *ia = NULL, *ib = NULL; int error, plen0; - struct in6_addr mask; + struct in6_addr *ifid_addr = NULL, mask; int prefixlen = pr->ndpr_plen; int updateflags; char ip6buf[INET6_ADDRSTRLEN]; @@ -1212,18 +1212,42 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast) * with different interface identifiers. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */ - if (ifa) + if (ifa) { ib = (struct in6_ifaddr *)ifa; - else - return NULL; + ifid_addr = &ib->ia_addr.sin6_addr; + + /* prefixlen + ifidlen must be equal to 128 */ + plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); + if (prefixlen != plen0) { + ifa_free(ifa); + ifid_addr = NULL; + nd6log((LOG_DEBUG, + "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n", + __func__, if_name(ifp), prefixlen, 128 - plen0)); + } + } - /* prefixlen + ifidlen must be equal to 128 */ - plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); - if (prefixlen != plen0) { - ifa_free(ifa); + /* No suitable LL address, get the ifid directly */ + if (ifid_addr == NULL) { + struct in6_addr taddr; + ifa = ifa_alloc(sizeof(taddr), M_WAITOK); + if (ifa) { + ib = (struct in6_ifaddr *)ifa; + ifid_addr = &ib->ia_addr.sin6_addr; + if(in6_get_ifid(ifp, NULL, ifid_addr) != 0) { + nd6log((LOG_DEBUG, + "%s: failed to get ifid for %s\n", + __func__, if_name(ifp))); + ifa_free(ifa); + ifid_addr = NULL; + } + } + } + + if (ifid_addr == NULL) { nd6log((LOG_INFO, - "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n", - __func__, if_name(ifp), prefixlen, 128 - plen0)); + "%s: could not determine ifid for %s\n", + __func__, if_name(ifp))); return NULL; } @@ -1233,13 +1257,13 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast) IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask); /* interface ID */ ifra.ifra_addr.sin6_addr.s6_addr32[0] |= - (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); + (ifid_addr->s6_addr32[0] & ~mask.s6_addr32[0]); ifra.ifra_addr.sin6_addr.s6_addr32[1] |= - (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); + (ifid_addr->s6_addr32[1] & ~mask.s6_addr32[1]); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= - (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); + (ifid_addr->s6_addr32[2] & ~mask.s6_addr32[2]); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= - (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); + (ifid_addr->s6_addr32[3] & ~mask.s6_addr32[3]); ifa_free(ifa); /* lifetimes. */ diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 585c196391c0..7b9405ee1f8d 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -605,7 +605,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) rt_kif = rpool_first->kif; /* * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = r->af; } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { @@ -634,8 +635,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } rt = sp->pfs_1400.rt; /* - * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * Guess the AF of the route address, FreeBSD 14 does + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = sp->pfs_1400.af; } @@ -1741,16 +1743,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); - data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); + data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); if ((error = copyin(ifr->ifr_cap_nv.buffer, data, ifr->ifr_cap_nv.length)) != 0) { - free(data, M_TEMP); + free(data, M_PF); return (error); } if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { - free(data, M_TEMP); + free(data, M_PF); return (EINVAL); } @@ -1758,7 +1760,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) pfsync_nvstatus_to_kstatus(nvl, &status); nvlist_destroy(nvl); - free(data, M_TEMP); + free(data, M_PF); error = pfsync_kstatus_to_softc(&status, sc); return (error); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 8cd4fff95b15..5889bb9d68e6 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -1667,7 +1667,6 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, #ifdef INET6 struct nd_neighbor_solicit nd; struct pf_addr *target; - u_short action, reason; if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6) goto copy; @@ -1676,7 +1675,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_SOLICIT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; daddr = target; @@ -1684,7 +1684,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_ADVERT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; saddr = target; @@ -3632,6 +3633,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip4->ip_src; pd->dst = (struct pf_addr *)&ip4->ip_dst; pd->off = sizeof(struct ip); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP; + } break; case AF_INET6: ip6 = mtod(pd->m, struct ip6_hdr *); @@ -3649,6 +3662,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip6->ip6_src; pd->dst = (struct pf_addr *)&ip6->ip6_dst; pd->off = sizeof(struct ip6_hdr); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6; + } /* * If we're dealing with a reassembled packet we need to adjust @@ -4019,7 +4044,7 @@ pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th, optsoff = pd->off + sizeof(struct tcphdr); #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) if (olen < TCPOLEN_MINSACK || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); eoh = opts + olen; @@ -5082,7 +5107,7 @@ pf_get_wscale(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5107,7 +5132,7 @@ pf_get_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5960,7 +5985,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, if (r->rt) { /* * Set act.rt here instead of in pf_rule_to_actions() because - * it is applied only from the last pass rule. + * it is applied only from the last pass rule. For rules + * with the prefer-ipv6-nexthop option act.rt_af is a hint + * about AF of the forwarded packet and might be changed. */ pd->act.rt = r->rt; if (r->rt == PF_REPLYTO) @@ -7633,7 +7660,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) while (off < len) { struct sctp_paramhdr h; - if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL, + if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, pd->af)) return (PF_DROP); @@ -7653,7 +7680,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (in_nullhost(t)) @@ -7697,7 +7724,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0) break; @@ -7727,7 +7754,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), @@ -7742,7 +7769,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), ntohs(ah.ph.param_length) - sizeof(ah), pd, @@ -8024,7 +8051,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + ICMP_MINLEN; if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(ip)"); @@ -8045,6 +8072,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, return (PF_DROP); pd2.tot_len = ntohs(h2.ip_len); + pd2.ttl = h2.ip_ttl; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; @@ -8055,7 +8083,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(ip6)"); @@ -8067,6 +8095,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, pd2.tot_len = ntohs(h2_6.ip6_plen) + sizeof(struct ip6_hdr); + pd2.ttl = h2_6.ip6_hlim; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; @@ -8107,7 +8136,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ - if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason, + if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " @@ -8303,7 +8332,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int action; if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(udp)"); @@ -8434,7 +8463,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int copyback = 0; int action; - if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason, + if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " @@ -8590,7 +8619,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN, - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short i" "(icmp)"); @@ -8710,7 +8739,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, - sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { + sizeof(struct icmp6_hdr), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(icmp6)"); @@ -8825,6 +8854,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, default: { int action; + /* + * Placeholder value, so future calls to pf_change_ap() + * don't try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; key.af = pd2.af; key.proto = pd2.proto; pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); @@ -8887,7 +8921,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, */ void * pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, - u_short *actionp, u_short *reasonp, sa_family_t af) + u_short *reasonp, sa_family_t af) { int iplen = 0; switch (af) { @@ -8897,12 +8931,7 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { - if (fragoff >= len) - ACTION_SET(actionp, PF_PASS); - else { - ACTION_SET(actionp, PF_DROP); - REASON_SET(reasonp, PFRES_FRAG); - } + REASON_SET(reasonp, PFRES_FRAG); return (NULL); } iplen = ntohs(h->ip_len); @@ -8919,7 +8948,6 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, #endif /* INET6 */ } if (m->m_pkthdr.len < off + len || iplen < off + len) { - ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } @@ -8974,9 +9002,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1, *md; - struct route ro; - const struct sockaddr *gw = &ro.ro_dst; - struct sockaddr_in *dst; + struct route_in6 ro; + union sockaddr_union rt_gw; + const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst; + union sockaddr_union *dst; struct ip *ip; struct ifnet *ifp = NULL; int error = 0; @@ -9071,10 +9100,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, ip = mtod(m0, struct ip *); bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(struct sockaddr_in); - dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + dst = (union sockaddr_union *)&ro.ro_dst; + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_addr = ip->ip_dst; + if (ifp) { /* Only needed in forward direction and route-to */ + bzero(&rt_gw, sizeof(rt_gw)); + ro.ro_flags |= RT_HAS_GW; + gw = &rt_gw; + switch (pd->act.rt_af) { +#ifdef INET + case AF_INET: + rt_gw.sin.sin_family = AF_INET; + rt_gw.sin.sin_len = sizeof(struct sockaddr_in); + rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + rt_gw.sin6.sin6_family = AF_INET6; + rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6); + pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr, + &pd->act.rt_addr, AF_INET6); + break; +#endif /* INET6 */ + default: + /* Normal af-to without route-to */ + break; + } + } if (pd->dir == PF_IN) { if (ip->ip_ttl <= IPTTLDEC) { @@ -9098,10 +9152,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, /* Use the gateway if needed. */ if (nh->nh_flags & NHF_GATEWAY) { - gw = &nh->gw_sa; + gw = (const union sockaddr_union *)&nh->gw_sa; ro.ro_flags |= RT_HAS_GW; } else { - dst->sin_addr = ip->ip_dst; + dst->sin.sin_addr = ip->ip_dst; } /* @@ -9126,6 +9180,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, PF_STATE_UNLOCK(s); } + /* It must have been either set from rt_af or from fib4_lookup */ + KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__)); + if (ifp == NULL) { m0 = pd->m; pd->m = NULL; @@ -9210,9 +9267,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, m_clrprotoflags(m0); /* Avoid confusing lower layers. */ md = m0; - error = pf_dummynet_route(pd, s, r, ifp, gw, &md); + error = pf_dummynet_route(pd, s, r, ifp, + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } goto done; @@ -9253,9 +9312,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, md = m0; pd->pf_mtag = pf_find_mtag(md); error = pf_dummynet_route(pd, s, r, ifp, - gw, &md); + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, + (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } } else @@ -9962,9 +10023,12 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) pd->proto = h->ip_p; /* IGMP packets have router alert options, allow them */ if (pd->proto == IPPROTO_IGMP) { - /* According to RFC 1112 ttl must be set to 1. */ - if ((h->ip_ttl != 1) || - !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) { + /* + * According to RFC 1112 ttl must be set to 1 in all IGMP + * packets sent to 224.0.0.1 + */ + if ((h->ip_ttl != 1) && + (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); @@ -9982,7 +10046,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) end < pd->off + sizeof(ext)) return (PF_PASS); if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET)) { + reason, AF_INET)) { DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr"); return (PF_DROP); } @@ -10008,7 +10072,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, while (off < end) { if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, - sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { + sizeof(opt.ip6o_type), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type"); return (PF_DROP); } @@ -10016,7 +10080,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, off++; continue; } - if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL, + if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt"); return (PF_DROP); @@ -10041,7 +10105,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } - if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL, + if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo"); return (PF_DROP); @@ -10090,7 +10154,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) break; case IPPROTO_HOPOPTS: if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); return (PF_DROP); } @@ -10117,7 +10181,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_DROP); } if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment"); return (PF_DROP); } @@ -10145,7 +10209,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr"); return (PF_DROP); } @@ -10166,7 +10230,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) case IPPROTO_AH: case IPPROTO_DSTOPTS: if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); return (PF_DROP); } @@ -10199,7 +10263,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short icmp6hdr"); return (PF_DROP); @@ -10432,7 +10496,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_TCP: { struct tcphdr *th = &pd->hdr.tcp; - if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action, + if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); @@ -10448,7 +10512,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_UDP: { struct udphdr *uh = &pd->hdr.udp; - if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action, + if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); @@ -10469,7 +10533,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } case IPPROTO_SCTP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp), - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10499,7 +10563,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } case IPPROTO_ICMP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10513,7 +10577,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, size_t icmp_hlen = sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10539,7 +10603,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } if (icmp_hlen > sizeof(struct icmp6_hdr) && !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10549,6 +10613,13 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, break; } #endif /* INET6 */ + default: + /* + * Placeholder value, so future calls to pf_change_ap() don't + * try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; + break; } if (pd->sport) @@ -10556,6 +10627,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (pd->dport) pd->odport = pd->ndport = *pd->dport; + MPASS(pd->pcksum != NULL); + return (0); } diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 51b3fd6390e1..8edd5a5110a1 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_POOL_ENDPI 0x40 +#define PF_POOL_IPV6NH 0x80 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index e5da05a958f6..06c40a03f575 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -187,6 +187,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z); #define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); +MALLOC_DEFINE(M_PF, "pf", "pf(4)"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE @@ -1181,18 +1182,18 @@ pf_rule_tree_alloc(int flags) { struct pf_krule_global *tree; - tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags); + tree = malloc(sizeof(struct pf_krule_global), M_PF, flags); if (tree == NULL) return (NULL); RB_INIT(tree); return (tree); } -static void +void pf_rule_tree_free(struct pf_krule_global *tree) { - free(tree, M_TEMP); + free(tree, M_PF); } static int @@ -1211,7 +1212,7 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) return (ENOMEM); rs = pf_find_or_create_kruleset(anchor); if (rs == NULL) { - free(tree, M_TEMP); + pf_rule_tree_free(tree); return (EINVAL); } pf_rule_tree_free(rs->rules[rs_num].inactive.tree); @@ -1432,7 +1433,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); - free(old_tree, M_TEMP); + pf_rule_tree_free(old_tree); return (0); } @@ -2276,6 +2277,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); rule->route.cur = TAILQ_FIRST(&rule->route.list); + rule->route.ipv6_nexthop_af = AF_INET6; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -4076,7 +4078,7 @@ DIOCCHANGERULE_error: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4098,10 +4100,10 @@ DIOCGETSTATES_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATES_retry; } @@ -4123,13 +4125,15 @@ DIOCGETSTATES_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pfsync_state_1301) * count); - if (error) + if (error) { + free(pstore, M_PF); goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATES_full: ps->ps_len = sizeof(struct pfsync_state_1301) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4155,7 +4159,7 @@ DIOCGETSTATES_full: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4177,10 +4181,10 @@ DIOCGETSTATESV2_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATESV2_retry; } @@ -4201,13 +4205,15 @@ DIOCGETSTATESV2_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pf_state_export) * count); - if (error) + if (error) { + free(pstore, M_PF); goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATESV2_full: ps->ps_len = nr * sizeof(struct pf_state_export); - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4737,17 +4743,17 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4769,17 +4775,17 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4805,7 +4811,7 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_NOWAIT | M_ZERO); + M_PF, M_NOWAIT | M_ZERO); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); @@ -4816,7 +4822,7 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4843,7 +4849,7 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); @@ -4856,7 +4862,7 @@ DIOCCHANGEADDR_error: PF_TABLE_STATS_UNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); break; } @@ -4881,10 +4887,10 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } @@ -4894,7 +4900,7 @@ DIOCCHANGEADDR_error: &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4922,10 +4928,10 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -4933,7 +4939,7 @@ DIOCCHANGEADDR_error: io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4968,10 +4974,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -4982,7 +4988,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5003,10 +5009,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5016,7 +5022,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5040,11 +5046,11 @@ DIOCCHANGEADDR_error: goto fail; } totlen = count * sizeof(struct pfr_addr); - pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP, + pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5055,7 +5061,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5076,14 +5082,14 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5104,14 +5110,14 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, - sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); - free(pfrastats, M_TEMP); + free(pfrastats, M_PF); break; } @@ -5132,10 +5138,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5145,7 +5151,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5166,10 +5172,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_RLOCK(); @@ -5179,7 +5185,7 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5200,10 +5206,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5211,7 +5217,7 @@ DIOCCHANGEADDR_error: io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5249,10 +5255,10 @@ DIOCCHANGEADDR_error: } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5262,7 +5268,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5270,13 +5276,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5291,7 +5297,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5300,7 +5306,7 @@ DIOCCHANGEADDR_error: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5308,7 +5314,7 @@ DIOCCHANGEADDR_error: } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5330,10 +5336,10 @@ DIOCCHANGEADDR_error: } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5344,7 +5350,7 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5352,13 +5358,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5373,7 +5379,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5382,14 +5388,14 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5415,10 +5421,10 @@ DIOCCHANGEADDR_error: totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5431,7 +5437,7 @@ DIOCCHANGEADDR_error: if (ers == NULL || ioe->ticket == 0 || ioe->ticket != ers->inactive.ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5440,14 +5446,14 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5458,7 +5464,7 @@ DIOCCHANGEADDR_error: if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5467,7 +5473,7 @@ DIOCCHANGEADDR_error: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5477,7 +5483,7 @@ DIOCCHANGEADDR_error: rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5490,7 +5496,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5498,7 +5504,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5513,7 +5519,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5522,7 +5528,7 @@ DIOCCHANGEADDR_error: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5536,7 +5542,7 @@ DIOCCHANGEADDR_error: else dehook_pf_eth(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5565,7 +5571,7 @@ DIOCCHANGEADDR_error: nr = 0; - p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO); + p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); @@ -5584,11 +5590,11 @@ DIOCCHANGEADDR_error: error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { - free(pstore, M_TEMP); + free(pstore, M_PF); goto fail; } psn->psn_len = sizeof(struct pf_src_node) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -5655,13 +5661,13 @@ DIOCCHANGEADDR_error: bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); - free(ifstore, M_TEMP); + free(ifstore, M_PF); break; } diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index bc9e1dc72902..b8b5157c9b15 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, uint64_t hashidx; int cnt; sa_family_t wanted_af; + u_int8_t pool_type; + bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; KASSERT(saf != 0, ("%s: saf == 0", __func__)); KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); + /* + * Given (*naf) is a hint about AF of the forwarded packet. + * It might be changed if prefer_ipv6_nexthop is enabled and + * the combination of nexthop AF and packet AF allows for it. + */ wanted_af = (*naf); mtx_lock(&rpool->mtx); @@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - /* - * For single addresses check their address family. Unless they - * have none, which happens when addresses are added with - * the old ioctl mechanism. In such case trust that the address - * has the proper AF. - */ - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + } + + /* + * For pools with a single host with the prefer-ipv6-nexthop option + * we can return pool address of any AF, unless the forwarded packet + * is IPv6, then we can return only if pool address is IPv6. + * For non-prefer-ipv6-nexthop we can return pool address only + * of wanted AF, unless the pool address'es AF is unknown, which + * happens in case old ioctls have been used to set up the pool. + * + * Round-robin pools have their own logic for retrying next addresses. + */ + pool_type = rpool->opts & PF_POOL_TYPEMASK; + if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || + ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && + rpool->cur->addr.type != PF_ADDR_TABLE && + rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { + if (prefer_ipv6_nexthop) { + if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + wanted_af = rpool->cur->af; + } else { + if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } } - switch (rpool->opts & PF_POOL_TYPEMASK) { + switch (pool_type) { case PF_POOL_NONE: pf_addrcpy(naddr, raddr, wanted_af); break; @@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_random: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_random; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else if (init_addr != NULL && PF_AZERO(init_addr, @@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_srchash: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_srchash; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else { @@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, { struct pf_kpooladdr *acur = rpool->cur; + retry_other_af_rr: + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, @@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, true)) goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, - wanted_af)) + } else if (rpool->cur->af == wanted_af && + pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) goto get_addr; - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + /* Reset table index when changing wanted AF. */ + rpool->tblidx = -1; + rpool->ipv6_nexthop_af = AF_INET; + goto retry_other_af_rr; + } try_next: + /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ + rpool->ipv6_nexthop_af = AF_INET6; if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + try_next_ipv6_nexthop_rr: + /* Reset table index when iterating pools or changing wanted AF. */ rpool->tblidx = -1; + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (pfr_pool_get(rpool->cur->addr.p.tbl, + if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, NULL, - true)) { - /* table contains no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + true)) + goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, wanted_af, - pf_islinklocal, true)) { - /* interface has no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, + true)) + goto get_addr; } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + if (rpool->cur->af == wanted_af) { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + pf_addrcpy(&rpool->counter, raddr, wanted_af); + goto get_addr; } - pf_addrcpy(&rpool->counter, raddr, wanted_af); } - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + rpool->ipv6_nexthop_af = AF_INET; + goto try_next_ipv6_nexthop_rr; + } + if (rpool->cur != acur) + goto try_next; + reason = PFRES_MAPFAILED; + goto done_pool_mtx; get_addr: pf_addrcpy(naddr, &rpool->counter, wanted_af); if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) @@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } } + if (wanted_af == 0) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + if (nkif) *nkif = rpool->cur->kif; + (*naf) = wanted_af; + done_pool_mtx: mtx_unlock(&rpool->mtx); diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 45b5b8dd5fef..73f018db0266 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -763,6 +763,8 @@ static const struct nlattr_parser nla_p_rule[] = { { .type = PF_RT_RCV_IFNOT, .off = _OUT(rcvifnot), .cb = nlattr_get_bool }, { .type = PF_RT_PKTRATE, .off = _OUT(pktrate), .arg = &threshold_parser, .cb = nlattr_get_nested }, { .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 }, + { .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 }, + { .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 }, }; NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule); #undef _OUT @@ -984,8 +986,12 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u8(nw, PF_RT_AF, rule->af); nlattr_add_u8(nw, PF_RT_NAF, rule->naf); nlattr_add_u8(nw, PF_RT_PROTO, rule->proto); + nlattr_add_u8(nw, PF_RT_TYPE, rule->type); nlattr_add_u8(nw, PF_RT_CODE, rule->code); + nlattr_add_u16(nw, PF_RT_TYPE_2, rule->type); + nlattr_add_u16(nw, PF_RT_CODE_2, rule->code); + nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags); nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset); nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl); @@ -1945,7 +1951,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) n = pfr_table_count(&attrs.pfrio_table, attrs.pfrio_flags); pfrtstats = mallocarray(n, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); error = pfr_get_tstats(&attrs.pfrio_table, pfrtstats, &n, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); @@ -1997,7 +2003,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) } } } - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index 87daac393821..b60d3d4797c6 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -283,6 +283,8 @@ enum pf_rule_type_t { PF_RT_SRC_NODES_ROUTE = 81, /* u64 */ PF_RT_PKTRATE = 82, /* nested, pf_threshold_type_t */ PF_RT_MAX_PKT_SIZE = 83, /* u16 */ + PF_RT_TYPE_2 = 84, /* u16 */ + PF_RT_CODE_2 = 85, /* u16 */ }; enum pf_addrule_type_t { diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index a684d778ab42..56074bedbc40 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1354,7 +1354,7 @@ pf_normalize_ip6(int off, u_short *reason, pf_rule_to_actions(r, &pd->act); } - if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), NULL, reason, AF_INET6)) + if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6)) return (PF_DROP); /* Offset now points to data portion. */ @@ -1542,7 +1542,7 @@ pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th, olen = (th->th_off << 2) - sizeof(*th); if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m, - pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(*th), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -1645,7 +1645,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, if (olen >= TCPOLEN_TIMESTAMP && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && - pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) { + pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) { /* Modulate the timestamps. Can be used for NAT detection, OS * uptime determination or reboot detection. */ @@ -1975,7 +1975,7 @@ pf_normalize_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); optsoff = pd->off + sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -2009,7 +2009,7 @@ pf_scan_sctp(struct pf_pdesc *pd) int ret; while (pd->off + chunk_off < pd->tot_len) { - if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, + if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, pd->af)) return (PF_DROP); @@ -2026,7 +2026,7 @@ pf_scan_sctp(struct pf_pdesc *pd) struct sctp_init_chunk init; if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init, - sizeof(init), NULL, NULL, pd->af)) + sizeof(init), NULL, pd->af)) return (PF_DROP); /* diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c index 89486928e6e1..2f484e2dabc6 100644 --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -505,6 +505,7 @@ int pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) { int error = 0; + uint8_t tmp; #define ERROUT(x) ERROUT_FUNCTION(errout, x) @@ -610,8 +611,10 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) PFNV_CHK(pf_nvuint8(nvl, "keep_state", &rule->keep_state)); PFNV_CHK(pf_nvuint8(nvl, "af", &rule->af)); PFNV_CHK(pf_nvuint8(nvl, "proto", &rule->proto)); - PFNV_CHK(pf_nvuint8(nvl, "type", &rule->type)); - PFNV_CHK(pf_nvuint8(nvl, "code", &rule->code)); + PFNV_CHK(pf_nvuint8(nvl, "type", &tmp)); + rule->type = tmp; + PFNV_CHK(pf_nvuint8(nvl, "code", &tmp)); + rule->code = tmp; PFNV_CHK(pf_nvuint8(nvl, "flags", &rule->flags)); PFNV_CHK(pf_nvuint8(nvl, "flagset", &rule->flagset)); PFNV_CHK(pf_nvuint8(nvl, "min_ttl", &rule->min_ttl)); diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c index 150626c5f3fb..8c041d45eae8 100644 --- a/sys/netpfil/pf/pf_osfp.c +++ b/sys/netpfil/pf/pf_osfp.c @@ -82,7 +82,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, const struct tcphdr *tcp) ip6 = mtod(pd->m, struct ip6_hdr *); break; } - if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, NULL, + if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, pd->af)) return (NULL); return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 039908a53126..1711e690f6bb 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -59,8 +59,8 @@ #error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead." #endif -#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) -#define rs_free(x) free(x, M_TEMP) +#define rs_malloc(x) malloc(x, M_PF, M_NOWAIT|M_ZERO) +#define rs_free(x) free(x, M_PF) VNET_DEFINE(struct pf_kanchor_global, pf_anchors); VNET_DEFINE(struct pf_kanchor, pf_main_anchor); @@ -336,6 +336,12 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset) int i; while (ruleset != NULL) { + for (int i = 0; i < PF_RULESET_MAX; i++) { + pf_rule_tree_free(ruleset->rules[i].active.tree); + ruleset->rules[i].active.tree = NULL; + pf_rule_tree_free(ruleset->rules[i].inactive.tree); + ruleset->rules[i].inactive.tree = NULL; + } if (ruleset == &pf_main_ruleset || !RB_EMPTY(&ruleset->anchor->children) || ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 630c88b97dd7..48f9df5b7e38 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -234,9 +234,9 @@ device wlan # 802.11 support options IEEE80211_SUPPORT_MESH # enable 802.11s draft support options IEEE80211_DEBUG # enable debug msgs device wlan_wep # 802.11 WEP support +device wlan_tkip # 802.11 TKIP support device wlan_ccmp # 802.11 CCMP support device wlan_gcmp # 802.11 GCMP support -device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device ath # Atheros CardBus/PCI NICs device ath_hal # Atheros CardBus/PCI chip support diff --git a/sys/powerpc/conf/GENERIC64LE b/sys/powerpc/conf/GENERIC64LE index eb9a9441425d..9af71f30626d 100644 --- a/sys/powerpc/conf/GENERIC64LE +++ b/sys/powerpc/conf/GENERIC64LE @@ -230,9 +230,9 @@ device wlan # 802.11 support options IEEE80211_SUPPORT_MESH # enable 802.11s draft support options IEEE80211_DEBUG # enable debug msgs device wlan_wep # 802.11 WEP support +device wlan_tkip # 802.11 TKIP support device wlan_ccmp # 802.11 CCMP support device wlan_gcmp # 802.11 GCMP support -device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device ath # Atheros CardBus/PCI NICs device ath_hal # Atheros CardBus/PCI chip support diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index 1221521be368..de7119dd534a 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -49,6 +49,7 @@ enum vm_suspend_how { VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, + VM_SUSPEND_DESTROY, VM_SUSPEND_LAST }; diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index 7528ef6e4698..ec4514f70fa6 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -1036,10 +1036,14 @@ vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { + struct vm *vm; + vm = vcpu->vm; vcpu_lock(vcpu); - while (1) { + if (vm->suspend) + break; + if (aplic_check_pending(vcpu->cookie)) break; diff --git a/sys/sys/cpu.h b/sys/sys/cpu.h index b6a0094f0c51..5bb55679a05b 100644 --- a/sys/sys/cpu.h +++ b/sys/sys/cpu.h @@ -40,25 +40,31 @@ #define CPU_IVAR_CPUID_SIZE 3 #define CPU_IVAR_CPUID 4 -static __inline struct pcpu *cpu_get_pcpu(device_t dev) +static __inline struct pcpu * +cpu_get_pcpu(device_t dev) { uintptr_t v = 0; + BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_PCPU, &v); return ((struct pcpu *)v); } -static __inline int32_t cpu_get_nominal_mhz(device_t dev) +static __inline int32_t +cpu_get_nominal_mhz(device_t dev) { uintptr_t v = 0; + if (BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_NOMINAL_MHZ, &v) != 0) return (-1); return ((int32_t)v); } -static __inline const uint32_t *cpu_get_cpuid(device_t dev, size_t *count) +static __inline const uint32_t * +cpu_get_cpuid(device_t dev, size_t *count) { uintptr_t v = 0; + if (BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_CPUID_SIZE, &v) != 0) return (NULL); @@ -124,10 +130,10 @@ TAILQ_HEAD(cf_level_lst, cf_level); * state. It is probably a bug to not combine this with "info only" */ #define CPUFREQ_TYPE_MASK 0xffff -#define CPUFREQ_TYPE_RELATIVE (1<<0) -#define CPUFREQ_TYPE_ABSOLUTE (1<<1) -#define CPUFREQ_FLAG_INFO_ONLY (1<<16) -#define CPUFREQ_FLAG_UNCACHED (1<<17) +#define CPUFREQ_TYPE_RELATIVE (1 << 0) +#define CPUFREQ_TYPE_ABSOLUTE (1 << 1) +#define CPUFREQ_FLAG_INFO_ONLY (1 << 16) +#define CPUFREQ_FLAG_UNCACHED (1 << 17) /* * When setting a level, the caller indicates the priority of this request. @@ -162,7 +168,7 @@ int cpufreq_settings_changed(device_t dev); * The new level and the result of the change (0 is success) is passed in. * If the driver wishes to revoke the change from cpufreq_pre_change, it * stores a non-zero error code in the result parameter and the change will - * not be made. If the post-change eventhandler gets a non-zero result, + * not be made. If the post-change eventhandler gets a non-zero result, * no change was made and the previous level remains in effect. If a change * is revoked, the post-change eventhandler is still called with the error * value supplied by the revoking driver. This gives listeners who cached diff --git a/sys/sys/event.h b/sys/sys/event.h index 1b30e4292de8..f161d2c938c1 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -45,7 +45,8 @@ #define EVFILT_USER (-11) /* User events */ #define EVFILT_SENDFILE (-12) /* attached to sendfile requests */ #define EVFILT_EMPTY (-13) /* empty send socket buf */ -#define EVFILT_SYSCOUNT 13 +#define EVFILT_JAIL (-14) /* attached to struct prison */ +#define EVFILT_SYSCOUNT 14 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define EV_SET(kevp_, a, b, c, d, e, f) do { \ @@ -204,10 +205,19 @@ struct freebsd11_kevent32 { #define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */ #define NOTE_PDATAMASK 0x000fffff /* mask for pid */ -/* additional flags for EVFILT_PROC */ -#define NOTE_TRACK 0x00000001 /* follow across forks */ +/* data/hint flags for EVFILT_JAIL */ +#define NOTE_JAIL_SET 0x80000000 /* jail was modified */ +#define NOTE_JAIL_CHILD 0x40000000 /* child jail was created */ +#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */ +#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */ +#define NOTE_JAIL_ATTACH_MULTI 0x08000000 /* multiple procs attached */ +#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */ +#define NOTE_JAIL_DATAMASK 0x000fffff /* mask for pid */ + +/* additional flags for EVFILT_PROC and EVFILT_JAIL */ +#define NOTE_TRACK 0x00000001 /* follow across fork/create */ #define NOTE_TRACKERR 0x00000002 /* could not track child */ -#define NOTE_CHILD 0x00000004 /* am a child process */ +#define NOTE_CHILD 0x00000004 /* am a child process/jail */ /* additional flags for EVFILT_TIMER */ #define NOTE_SECONDS 0x00000001 /* data is seconds */ @@ -309,6 +319,7 @@ struct knote { struct proc *p_proc; /* proc pointer */ struct kaiocb *p_aio; /* AIO job pointer */ struct aioliojob *p_lio; /* LIO job pointer */ + struct prison *p_prison; /* prison pointer */ void *p_v; /* generic other pointer */ } kn_ptr; const struct filterops *kn_fop; diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h index 7bf1d264ff5e..6783a0d2d84f 100644 --- a/sys/sys/exterrvar.h +++ b/sys/sys/exterrvar.h @@ -31,7 +31,7 @@ #error "Specify error category before including sys/exterrvar.h" #endif -#ifdef BLOAT_KERNEL_WITH_EXTERR +#ifdef EXTERR_STRINGS #define SET_ERROR_MSG(mmsg) (mmsg) #else #define SET_ERROR_MSG(mmsg) NULL diff --git a/sys/sys/file.h b/sys/sys/file.h index 63313926c4f0..cc3c733580fd 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -72,6 +72,7 @@ struct nameidata; #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ #define DTYPE_INOTIFY 15 /* inotify descriptor */ +#define DTYPE_JAILDESC 16 /* jail descriptor */ #ifdef _KERNEL diff --git a/sys/sys/jail.h b/sys/sys/jail.h index d2655c52e832..e12e8c3178c9 100644 --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -99,8 +99,12 @@ enum prison_state { #define JAIL_UPDATE 0x02 /* Update parameters of existing jail */ #define JAIL_ATTACH 0x04 /* Attach to jail upon creation */ #define JAIL_DYING 0x08 /* Allow getting a dying jail */ -#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */ -#define JAIL_GET_MASK 0x08 +#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */ +#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */ +#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */ +#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */ +#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */ +#define JAIL_GET_MASK 0xf8 #define JAIL_SYS_DISABLE 0 #define JAIL_SYS_NEW 1 @@ -115,7 +119,9 @@ int jail(struct jail *); int jail_set(struct iovec *, unsigned int, int); int jail_get(struct iovec *, unsigned int, int); int jail_attach(int); +int jail_attach_jd(int); int jail_remove(int); +int jail_remove_jd(int); __END_DECLS #else /* _KERNEL */ @@ -144,6 +150,8 @@ MALLOC_DECLARE(M_PRISON); #define JAIL_META_PRIVATE "meta" #define JAIL_META_SHARED "env" +struct jaildesc; +struct knlist; struct racct; struct prison_racct; @@ -189,7 +197,9 @@ struct prison { struct vnode *pr_root; /* (c) vnode to rdir */ struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */ - void *pr_sparep[3]; + struct knlist *pr_klist; /* (m) attached knotes */ + LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */ + void *pr_sparep; int pr_childcount; /* (a) number of child jails */ int pr_childmax; /* (p) maximum child jails */ unsigned pr_allow; /* (p) PR_ALLOW_* flags */ @@ -425,10 +435,11 @@ SYSCTL_DECL(_security_jail_param); /* * Kernel support functions for jail(). */ -struct ucred; +struct knote; struct mount; struct sockaddr; struct statfs; +struct ucred; struct vfsconf; /* @@ -463,6 +474,7 @@ void prison_proc_free(struct prison *); void prison_proc_link(struct prison *, struct proc *); void prison_proc_unlink(struct prison *, struct proc *); void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *); +void prison_remove(struct prison *); void prison_set_allow(struct ucred *cred, unsigned flag, int enable); bool prison_ischild(struct prison *, struct prison *); bool prison_isalive(const struct prison *); diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h new file mode 100644 index 000000000000..2451b04f7302 --- /dev/null +++ b/sys/sys/jaildesc.h @@ -0,0 +1,83 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 James Gritton. + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_JAILDESC_H_ +#define _SYS_JAILDESC_H_ + +#ifdef _KERNEL + +#include <sys/queue.h> +#include <sys/_lock.h> +#include <sys/_mutex.h> +#include <sys/_types.h> + +struct prison; + +/*- + * struct jaildesc describes a jail descriptor, which points to a struct + * prison. struct prison in turn has a linked list of struct jaildesc. + * + * Locking key: + * (c) set on creation, remains unchanged + * (d) jd_lock + * (p) jd_prison->pr_mtx + */ +struct jaildesc { + LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */ + struct prison *jd_prison; /* (d) the prison */ + struct mtx jd_lock; + unsigned jd_flags; /* (d) JDF_* flags */ +}; + +/* + * Locking macros for the jaildesc. + */ +#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock) +#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \ + NULL, MTX_DEF) +#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock) +#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock) + +/* + * Flags for the jd_flags field + */ +#define JDF_REMOVED 0x00000002 /* jail was removed */ +#define JDF_OWNING 0x00000004 /* closing descriptor removes jail */ + +int jaildesc_find(struct thread *td, int fd, struct prison **prp, + struct ucred **ucredp); +int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning); +void jaildesc_set_prison(struct file *jd, struct prison *pr); +void jaildesc_prison_cleanup(struct prison *pr); + +#endif /* _KERNEL */ + +#endif /* !_SYS_JAILDESC_H_ */ diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 380099092107..417afd4dbbe4 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -249,15 +249,8 @@ struct sysinit_tslog { const void *data; const char *name; }; -static inline void -sysinit_tslog_shim(const void *data) -{ - const struct sysinit_tslog *x = data; - - TSRAW(curthread, TS_ENTER, "SYSINIT", x->name); - (x->func)(x->data); - TSRAW(curthread, TS_EXIT, "SYSINIT", x->name); -} +void sysinit_tslog_shim(const void *); + #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit_tslog uniquifier ## _sys_init_tslog = { \ func, \ @@ -322,7 +315,7 @@ void sysinit_add(struct sysinit **set, struct sysinit **set_end); * int * please avoid using for new tunables! */ -extern void tunable_int_init(void *); +extern void tunable_int_init(const void *); struct tunable_int { const char *path; int *var; @@ -341,7 +334,7 @@ struct tunable_int { /* * long */ -extern void tunable_long_init(void *); +extern void tunable_long_init(const void *); struct tunable_long { const char *path; long *var; @@ -360,7 +353,7 @@ struct tunable_long { /* * unsigned long */ -extern void tunable_ulong_init(void *); +extern void tunable_ulong_init(const void *); struct tunable_ulong { const char *path; unsigned long *var; @@ -379,7 +372,7 @@ struct tunable_ulong { /* * int64_t */ -extern void tunable_int64_init(void *); +extern void tunable_int64_init(const void *); struct tunable_int64 { const char *path; int64_t *var; @@ -398,7 +391,7 @@ struct tunable_int64 { /* * uint64_t */ -extern void tunable_uint64_init(void *); +extern void tunable_uint64_init(const void *); struct tunable_uint64 { const char *path; uint64_t *var; @@ -417,7 +410,7 @@ struct tunable_uint64 { /* * quad */ -extern void tunable_quad_init(void *); +extern void tunable_quad_init(const void *); struct tunable_quad { const char *path; quad_t *var; @@ -436,7 +429,7 @@ struct tunable_quad { /* * bool */ -extern void tunable_bool_init(void *); +extern void tunable_bool_init(const void *); struct tunable_bool { const char *path; bool *var; @@ -452,7 +445,7 @@ struct tunable_bool { #define TUNABLE_BOOL_FETCH(path, var) getenv_bool((path), (var)) -extern void tunable_str_init(void *); +extern void tunable_str_init(const void *); struct tunable_str { const char *path; char *var; diff --git a/sys/sys/mount.h b/sys/sys/mount.h index f6480b173a5c..18f85192f6c3 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1007,6 +1007,7 @@ struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, void statfs_scale_blocks(struct statfs *sf, long max_size); struct vfsconf *vfs_byname(const char *); struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *); +void vfs_unref_vfsconf(struct vfsconf *vfsp); void vfs_mount_destroy(struct mount *); void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); void vfs_freeopts(struct vfsoptlist *opts); diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index 56c03a1b0be9..08d4e2d28b33 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -91,7 +91,7 @@ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts); void _mtx_destroy(volatile uintptr_t *c); -void mtx_sysinit(void *arg); +void mtx_sysinit(const void *arg); int _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF); int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line); diff --git a/sys/sys/param.h b/sys/sys/param.h index fc2a78883f1e..ce91430909ce 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500063 +#define __FreeBSD_version 1600000 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/pciio.h b/sys/sys/pciio.h index 6467e82b1b3d..64c0b32cb8e2 100644 --- a/sys/sys/pciio.h +++ b/sys/sys/pciio.h @@ -77,6 +77,9 @@ struct pci_conf { u_int8_t pc_revid; /* chip revision ID */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ u_long pd_unit; /* device unit number */ + int pd_numa_domain; /* device NUMA domain */ + size_t pc_reported_len;/* length of PCI data reported */ + char pc_spare[64]; /* space for future fields */ }; struct pci_match_conf { @@ -165,7 +168,6 @@ struct pci_bar_ioreq { #define PCIIO_BAR_MMAP_RW 0x04 #define PCIIO_BAR_MMAP_ACTIVATE 0x08 -#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io) #define PCIOCREAD _IOWR('p', 2, struct pci_io) #define PCIOCWRITE _IOWR('p', 3, struct pci_io) #define PCIOCATTACHED _IOWR('p', 4, struct pci_io) @@ -173,5 +175,6 @@ struct pci_bar_ioreq { #define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io) #define PCIOCBARMMAP _IOWR('p', 8, struct pci_bar_mmap) #define PCIOCBARIO _IOWR('p', 9, struct pci_bar_ioreq) +#define PCIOCGETCONF _IOWR('p', 10, struct pci_conf_io) #endif /* !_SYS_PCIIO_H_ */ diff --git a/sys/sys/power.h b/sys/sys/power.h index 9afa55dd403a..3ee021b0e587 100644 --- a/sys/sys/power.h +++ b/sys/sys/power.h @@ -28,6 +28,7 @@ #ifndef _SYS_POWER_H_ #define _SYS_POWER_H_ +#ifdef _KERNEL #include <sys/_eventhandler.h> @@ -60,4 +61,5 @@ extern void power_profile_set_state(int); typedef void (*power_profile_change_hook)(void *, int); EVENTHANDLER_DECLARE(power_profile_change, power_profile_change_hook); +#endif /* _KERNEL */ #endif /* !_SYS_POWER_H_ */ diff --git a/sys/sys/random.h b/sys/sys/random.h index 5abf762cd200..2a68f0c99b6d 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -142,9 +142,6 @@ random_harvest_direct(const void *entropy, u_int size, enum random_entropy_sourc random_harvest_direct_(entropy, size, origin); } -void random_harvest_register_source(enum random_entropy_source); -void random_harvest_deregister_source(enum random_entropy_source); - #if defined(RANDOM_ENABLE_UMA) #define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c) #else /* !defined(RANDOM_ENABLE_UMA) */ diff --git a/sys/sys/rmlock.h b/sys/sys/rmlock.h index 664356998438..eae7342527e3 100644 --- a/sys/sys/rmlock.h +++ b/sys/sys/rmlock.h @@ -52,7 +52,7 @@ void rm_init(struct rmlock *rm, const char *name); void rm_init_flags(struct rmlock *rm, const char *name, int opts); void rm_destroy(struct rmlock *rm); int rm_wowned(const struct rmlock *rm); -void rm_sysinit(void *arg); +void rm_sysinit(const void *arg); void _rm_wlock_debug(struct rmlock *rm, const char *file, int line); void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line); diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h index 0ebe90e09bed..929f78c1d204 100644 --- a/sys/sys/rwlock.h +++ b/sys/sys/rwlock.h @@ -128,7 +128,7 @@ */ void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts); void _rw_destroy(volatile uintptr_t *c); -void rw_sysinit(void *arg); +void rw_sysinit(const void *arg); int _rw_wowned(const volatile uintptr_t *c); void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line); int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF); diff --git a/sys/sys/sx.h b/sys/sys/sx.h index deb277decc75..d28cae9d01e5 100644 --- a/sys/sys/sx.h +++ b/sys/sys/sx.h @@ -99,7 +99,7 @@ * Function prototipes. Routines that start with an underscore are not part * of the public interface and are wrappered with a macro. */ -void sx_sysinit(void *arg); +void sx_sysinit(const void *arg); #define sx_init(sx, desc) sx_init_flags((sx), (desc), 0) void sx_init_flags(struct sx *sx, const char *description, int opts); void sx_destroy(struct sx *sx); diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 2d6903967e15..cff27b8be316 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -535,4 +535,6 @@ #define SYS_inotify_rm_watch 594 #define SYS_getgroups 595 #define SYS_setgroups 596 -#define SYS_MAXSYSCALL 597 +#define SYS_jail_attach_jd 597 +#define SYS_jail_remove_jd 598 +#define SYS_MAXSYSCALL 599 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index d1172c2dc7bf..443dbadcfbff 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -438,4 +438,6 @@ MIASM = \ inotify_add_watch_at.o \ inotify_rm_watch.o \ getgroups.o \ - setgroups.o + setgroups.o \ + jail_attach_jd.o \ + jail_remove_jd.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 98311a6dbf94..8dda4b4533ea 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1901,6 +1901,12 @@ struct setgroups_args { char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)]; }; +struct jail_attach_jd_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct jail_remove_jd_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; int sys__exit(struct thread *, struct _exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2305,6 +2311,8 @@ int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args * int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); int sys_getgroups(struct thread *, struct getgroups_args *); int sys_setgroups(struct thread *, struct setgroups_args *); +int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *); +int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *); #ifdef COMPAT_43 @@ -3301,6 +3309,8 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *); #define SYS_AUE_inotify_rm_watch AUE_INOTIFY #define SYS_AUE_getgroups AUE_GETGROUPS #define SYS_AUE_setgroups AUE_SETGROUPS +#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH +#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE #undef PAD_ #undef PADL_ diff --git a/sys/sys/ttycom.h b/sys/sys/ttycom.h index d7ddc66b09fb..43e8b98a5bc4 100644 --- a/sys/sys/ttycom.h +++ b/sys/sys/ttycom.h @@ -69,8 +69,8 @@ /* 89-91 conflicts: tun and tap */ #define TIOCTIMESTAMP _IOR('t', 89, struct timeval) /* enable/get timestamp * of last input event */ -#define TIOCMGDTRWAIT _IOR('t', 90, int) /* modem: get wait on close */ -#define TIOCMSDTRWAIT _IOW('t', 91, int) /* modem: set wait on close */ +/* TIOCMGDTRWAIT _IOR('t', 90, int) * was modem: get wait on close */ +/* TIOCMSDTRWAIT _IOW('t', 91, int) * was modem: set wait on close */ /* 92-93 tun and tap */ /* 94-97 conflicts: tun and tap */ #define TIOCDRAIN _IO('t', 94) /* wait till output drained */ diff --git a/sys/sys/user.h b/sys/sys/user.h index 103236b6ed1b..3183f0792256 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -266,6 +266,7 @@ struct user { #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 #define KF_TYPE_INOTIFY 15 +#define KF_TYPE_JAILDESC 16 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -453,6 +454,9 @@ struct kinfo_file { uint64_t kf_timerfd_addr; } kf_timerfd; struct { + int32_t kf_jid; + } kf_jail; + struct { uint64_t kf_kqueue_addr; int32_t kf_kqueue_count; int32_t kf_kqueue_state; diff --git a/sys/tools/makeobjops.awk b/sys/tools/makeobjops.awk index 5ea658c5a3b3..522fb04ec4d1 100644 --- a/sys/tools/makeobjops.awk +++ b/sys/tools/makeobjops.awk @@ -315,7 +315,7 @@ function handle_method (static, doc) printh("\t" join(";\n\t", arguments, num_arguments) ";"); } else { - prototype = "static __inline " ret " " umname "("; + prototype = "static __inline " ret "\n" umname "("; printh(format_line(prototype argument_list ")", line_width, length(prototype))); } @@ -327,7 +327,7 @@ function handle_method (static, doc) firstvar = "((kobj_t)" firstvar ")"; if (prolog != "") printh(prolog); - printh("\tKOBJOPLOOKUP(" firstvar "->ops," mname ");"); + printh("\tKOBJOPLOOKUP(" firstvar "->ops, " mname ");"); rceq = (ret != "void") ? "rc = " : ""; printh("\t" rceq "((" mname "_t *) _m)(" varname_list ");"); if (epilog != "") diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 5189f7405400..679b2e20e88b 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -4017,8 +4017,9 @@ restart: rr = rdomain == UMA_ANYDOMAIN; if (rr) { aflags = (flags & ~M_WAITOK) | M_NOWAIT; - vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, - &aflags); + if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, + &aflags) != 0) + return (NULL); } else { aflags = flags; domain = rdomain; @@ -5245,8 +5246,9 @@ uma_prealloc(uma_zone_t zone, int items) slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { aflags = M_NOWAIT; - vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, - &aflags); + if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, + &aflags) != 0) + panic("%s: Domainset is empty", __func__); for (;;) { slab = keg_alloc_slab(keg, zone, domain, M_WAITOK, aflags); diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index b44bdb96b0d4..9fa17da954f7 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -58,6 +58,9 @@ static int vm_domainset_default_stride = 64; +static bool vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain); + + /* * Determine which policy is to be used for this allocation. */ @@ -93,28 +96,15 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, pindex += (((uintptr_t)obj) / sizeof(*obj)); di->di_offset = pindex; } - /* Skip domains below min on the first pass. */ - di->di_minskip = true; } static void vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) { + /* Grab the next domain in 'ds_order'. */ *domain = di->di_domain->ds_order[ - ++(*di->di_iter) % di->di_domain->ds_cnt]; -} - -static void -vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) -{ - int d; - - do { - d = di->di_domain->ds_order[ - ++(*di->di_iter) % di->di_domain->ds_cnt]; - } while (d == di->di_domain->ds_prefer); - *domain = d; + (*di->di_iter)++ % di->di_domain->ds_cnt]; } static void @@ -127,79 +117,144 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) *domain = di->di_domain->ds_order[d]; } -static void -vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) +/* + * Internal function determining the current phase's first candidate domain. + * + * Returns whether these is an eligible domain, which is returned through + * '*domain'. '*domain' can be modified even if there is no eligible domain. + * + * See herald comment of vm_domainset_iter_first() below about phases. + */ +static bool +vm_domainset_iter_phase_first(struct vm_domainset_iter *di, int *domain) { - - KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); switch (di->di_policy) { case DOMAINSET_POLICY_FIRSTTOUCH: - /* - * To prevent impossible allocations we convert an invalid - * first-touch to round-robin. - */ - /* FALLTHROUGH */ - case DOMAINSET_POLICY_INTERLEAVE: - /* FALLTHROUGH */ + *domain = PCPU_GET(domain); + break; case DOMAINSET_POLICY_ROUNDROBIN: vm_domainset_iter_rr(di, domain); break; case DOMAINSET_POLICY_PREFER: - vm_domainset_iter_prefer(di, domain); + *domain = di->di_domain->ds_prefer; + break; + case DOMAINSET_POLICY_INTERLEAVE: + vm_domainset_iter_interleave(di, domain); break; default: panic("%s: Unknown policy %d", __func__, di->di_policy); } KASSERT(*domain < vm_ndomains, ("%s: Invalid domain %d", __func__, *domain)); + + /* + * Has the policy's start domain already been visited? + */ + if (!DOMAINSET_ISSET(*domain, &di->di_remain_mask)) + return (vm_domainset_iter_next(di, domain)); + + DOMAINSET_CLR(*domain, &di->di_remain_mask); + + /* Does it have enough free pages (phase 1)? */ + if (di->di_minskip && vm_page_count_min_domain(*domain)) { + /* Mark the domain as eligible for phase 2. */ + DOMAINSET_SET(*domain, &di->di_min_mask); + return (vm_domainset_iter_next(di, domain)); + } + + return (true); } -static void +/* + * Resets an iterator to point to the first candidate domain. + * + * Returns whether there is an eligible domain to start with. '*domain' may be + * modified even if there is none. + * + * There must have been one call to vm_domainset_iter_init() before. + * + * This function must be called at least once before calling + * vm_domainset_iter_next(). Note that functions wrapping + * vm_domainset_iter_init() usually do that themselves. + * + * This function may be called again to reset the iterator to the policy's first + * candidate domain. After each reset, the iterator will visit the same domains + * as in the previous iteration minus those on which vm_domainset_iter_ignore() + * has been called. Note that the first candidate domain may change at each + * reset (at time of this writing, only on the DOMAINSET_POLICY_ROUNDROBIN + * policy). + * + * Domains which have a number of free pages over 'v_free_min' are always + * visited first (this is called the "phase 1" in comments, "phase 2" being the + * examination of the remaining domains; no domains are ever visited twice). + */ +static bool vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) { + /* Initialize the mask of domains to visit. */ + DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask); + /* + * No candidate domains for phase 2 at start. This will be filled by + * phase 1. + */ + DOMAINSET_ZERO(&di->di_min_mask); + /* Skip domains below 'v_free_min' on phase 1. */ + di->di_minskip = true; - switch (di->di_policy) { - case DOMAINSET_POLICY_FIRSTTOUCH: - *domain = PCPU_GET(domain); - if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) { - /* - * Add an extra iteration because we will visit the - * current domain a second time in the rr iterator. - */ - di->di_n = di->di_domain->ds_cnt + 1; - break; - } - /* - * To prevent impossible allocations we convert an invalid - * first-touch to round-robin. - */ - /* FALLTHROUGH */ - case DOMAINSET_POLICY_ROUNDROBIN: - di->di_n = di->di_domain->ds_cnt; + return (vm_domainset_iter_phase_first(di, domain)); +} + +/* + * Advances the iterator to the next candidate domain. + * + * Returns whether there was another domain to visit. '*domain' may be modified + * even if there is none. + * + * vm_domainset_iter_first() must have been called at least once before using + * this function (see its herald comment for more details on iterators). + */ +static bool +vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) +{ + /* Loop while there remains domains to visit in the current phase. */ + while (!DOMAINSET_EMPTY(&di->di_remain_mask)) { + /* Grab the next domain in 'ds_order'. */ vm_domainset_iter_rr(di, domain); - break; - case DOMAINSET_POLICY_PREFER: - *domain = di->di_domain->ds_prefer; - di->di_n = di->di_domain->ds_cnt; - break; - case DOMAINSET_POLICY_INTERLEAVE: - vm_domainset_iter_interleave(di, domain); - di->di_n = di->di_domain->ds_cnt; - break; - default: - panic("%s: Unknown policy %d", __func__, di->di_policy); + KASSERT(*domain < vm_ndomains, + ("%s: Invalid domain %d", __func__, *domain)); + + if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) { + DOMAINSET_CLR(*domain, &di->di_remain_mask); + if (!di->di_minskip || !vm_page_count_min_domain(*domain)) + return (true); + DOMAINSET_SET(*domain, &di->di_min_mask); + } } - KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); - KASSERT(*domain < vm_ndomains, - ("%s: Invalid domain %d", __func__, *domain)); + + /* + * If phase 1 (skip low memory domains) is over, start phase 2 (consider + * low memory domains). + */ + if (di->di_minskip) { + di->di_minskip = false; + /* Browse domains that were under 'v_free_min'. */ + DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask); + return (vm_domainset_iter_phase_first(di, domain)); + } + + return (false); } -void +int vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, - vm_pindex_t pindex, int *domain, int *req, struct pctrie_iter *pages) + vm_pindex_t pindex, int *domain, int *req) { struct domainset_ref *dr; + di->di_flags = *req; + *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | + VM_ALLOC_NOWAIT; + /* * Object policy takes precedence over thread policy. The policies * are immutable and unsynchronized. Updates can race but pointer @@ -209,36 +264,21 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, dr = &obj->domain; else dr = &curthread->td_domain; + vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); - di->di_flags = *req; - *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | - VM_ALLOC_NOWAIT; - vm_domainset_iter_first(di, domain); - if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_page(di, obj, domain, pages); + /* + * XXXOC: Shouldn't we just panic on 'false' if VM_ALLOC_WAITOK was + * passed? + */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } int vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, int *domain, struct pctrie_iter *pages) { - if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) - return (ENOMEM); - - /* If there are more domains to visit we run the iterator. */ - while (--di->di_n != 0) { - vm_domainset_iter_next(di, domain); - if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && - (!di->di_minskip || !vm_page_count_min_domain(*domain))) - return (0); - } - - /* If we skipped domains below min restart the search. */ - if (di->di_minskip) { - di->di_minskip = false; - vm_domainset_iter_first(di, domain); + if (vm_domainset_iter_next(di, domain)) return (0); - } /* If we visited all domains and this was a NOWAIT we return error. */ if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) @@ -257,61 +297,43 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, return (ENOMEM); /* Restart the search. */ - vm_domainset_iter_first(di, domain); - - return (0); + /* XXXOC: Shouldn't we just panic on 'false'? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } -static void +static int _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, int *flags) { - di->di_flags = *flags; *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; - vm_domainset_iter_first(di, domain); - if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_policy(di, domain); + /* XXXOC: Shouldn't we just panic on 'false' if M_WAITOK was passed? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } -void +int vm_domainset_iter_policy_init(struct vm_domainset_iter *di, struct domainset *ds, int *domain, int *flags) { vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); - _vm_domainset_iter_policy_init(di, domain, flags); + return (_vm_domainset_iter_policy_init(di, domain, flags)); } -void +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, struct domainset_ref *dr, int *domain, int *flags) { vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); - _vm_domainset_iter_policy_init(di, domain, flags); + return (_vm_domainset_iter_policy_init(di, domain, flags)); } int vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) { - if (DOMAINSET_EMPTY(&di->di_valid_mask)) - return (ENOMEM); - - /* If there are more domains to visit we run the iterator. */ - while (--di->di_n != 0) { - vm_domainset_iter_next(di, domain); - if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && - (!di->di_minskip || !vm_page_count_min_domain(*domain))) - return (0); - } - - /* If we skipped domains below min restart the search. */ - if (di->di_minskip) { - di->di_minskip = false; - vm_domainset_iter_first(di, domain); + if (vm_domainset_iter_next(di, domain)) return (0); - } /* If we visited all domains and this was a NOWAIT we return error. */ if ((di->di_flags & M_WAITOK) == 0) @@ -321,9 +343,8 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) vm_wait_doms(&di->di_valid_mask, 0); /* Restart the search. */ - vm_domainset_iter_first(di, domain); - - return (0); + /* XXXOC: Shouldn't we just panic on 'false'? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } void @@ -345,12 +366,12 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, return (EJUSTRETURN); } -void +int vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, - vm_pindex_t pindex, int *domain, int *flags, struct pctrie_iter *pages) + vm_pindex_t pindex, int *domain, int *flags) { - *domain = 0; + return (0); } int @@ -360,20 +381,20 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) return (EJUSTRETURN); } -void +int vm_domainset_iter_policy_init(struct vm_domainset_iter *di, struct domainset *ds, int *domain, int *flags) { - *domain = 0; + return (0); } -void +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, struct domainset_ref *dr, int *domain, int *flags) { - *domain = 0; + return (0); } void diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h index 0d325a642f40..ef86c8ccb5e4 100644 --- a/sys/vm/vm_domainset.h +++ b/sys/vm/vm_domainset.h @@ -33,23 +33,26 @@ struct pctrie_iter; struct vm_domainset_iter { struct domainset *di_domain; unsigned int *di_iter; + /* Initialized from 'di_domain', initial value after reset. */ domainset_t di_valid_mask; + /* Domains to browse in the current phase. */ + domainset_t di_remain_mask; + /* Domains skipped in phase 1 because under 'v_free_min'. */ + domainset_t di_min_mask; vm_pindex_t di_offset; int di_flags; uint16_t di_policy; - domainid_t di_n; bool di_minskip; }; int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *, int *, struct pctrie_iter *); -void vm_domainset_iter_page_init(struct vm_domainset_iter *, - struct vm_object *, vm_pindex_t, int *, int *, - struct pctrie_iter *); +int vm_domainset_iter_page_init(struct vm_domainset_iter *, + struct vm_object *, vm_pindex_t, int *, int *); int vm_domainset_iter_policy(struct vm_domainset_iter *, int *); -void vm_domainset_iter_policy_init(struct vm_domainset_iter *, +int vm_domainset_iter_policy_init(struct vm_domainset_iter *, struct domainset *, int *, int *); -void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *, +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *, struct domainset_ref *, int *, int *); void vm_domainset_iter_ignore(struct vm_domainset_iter *, int); diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 93ec6014c27d..1fd6518cf4ed 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -91,6 +91,8 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); +int vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, + vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 3e57e8d4f1d0..58f8ac16fa0c 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1995,32 +1995,43 @@ vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, } /* - * Hold each of the physical pages that are mapped by the specified range of - * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid - * and allow the specified types of access, "prot". If all of the implied - * pages are successfully held, then the number of held pages is returned - * together with pointers to those pages in the array "ma". However, if any - * of the pages cannot be held, -1 is returned. + * Hold each of the physical pages that are mapped by the specified + * range of virtual addresses, ["addr", "addr" + "len"), if those + * mappings are valid and allow the specified types of access, "prot". + * If all of the implied pages are successfully held, then the number + * of held pages is assigned to *ppages_count, together with pointers + * to those pages in the array "ma". The returned value is zero. + * + * However, if any of the pages cannot be held, an error is returned, + * and no pages are held. + * Error values: + * ENOMEM - the range is not valid + * EINVAL - the provided vm_page array is too small to hold all pages + * EAGAIN - a page was not mapped, and the thread is in nofaulting mode + * EFAULT - a page with requested permissions cannot be mapped + * (more detailed result from vm_fault() is lost) */ int -vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, - vm_prot_t prot, vm_page_t *ma, int max_count) +vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, + vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count) { vm_offset_t end, va; vm_page_t *mp; - int count; + int count, error; boolean_t pmap_failed; - if (len == 0) + if (len == 0) { + *ppages_count = 0; return (0); + } end = round_page(addr + len); addr = trunc_page(addr); if (!vm_map_range_valid(map, addr, end)) - return (-1); + return (ENOMEM); if (atop(end - addr) > max_count) - panic("vm_fault_quick_hold_pages: count > max_count"); + return (EINVAL); count = atop(end - addr); /* @@ -2062,19 +2073,49 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, * the proper behaviour explicitly. */ if ((prot & VM_PROT_QUICK_NOFAULT) != 0 && - (curthread->td_pflags & TDP_NOFAULTING) != 0) - goto error; - for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) + (curthread->td_pflags & TDP_NOFAULTING) != 0) { + error = EAGAIN; + goto fail; + } + for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { if (*mp == NULL && vm_fault(map, va, prot, - VM_FAULT_NORMAL, mp) != KERN_SUCCESS) - goto error; + VM_FAULT_NORMAL, mp) != KERN_SUCCESS) { + error = EFAULT; + goto fail; + } + } } - return (count); -error: + *ppages_count = count; + return (0); +fail: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) vm_page_unwire(*mp, PQ_INACTIVE); - return (-1); + return (error); +} + + /* + * Hold each of the physical pages that are mapped by the specified range of + * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid + * and allow the specified types of access, "prot". If all of the implied + * pages are successfully held, then the number of held pages is returned + * together with pointers to those pages in the array "ma". However, if any + * of the pages cannot be held, -1 is returned. + */ +int +vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, + vm_prot_t prot, vm_page_t *ma, int max_count) +{ + int error, pages_count; + + error = vm_fault_hold_pages(map, addr, len, prot, ma, + max_count, &pages_count); + if (error != 0) { + if (error == EINVAL) + panic("vm_fault_quick_hold_pages: count > max_count"); + return (-1); + } + return (pages_count); } /* diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 94df2c2f9a9e..e0f1807a1b32 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -453,7 +453,7 @@ vm_thread_stack_create(struct domainset *ds, int pages) obj = vm_thread_kstack_size_to_obj(pages); if (vm_ndomains > 1) obj->domain.dr_policy = ds; - vm_domainset_iter_page_init(&di, obj, 0, &domain, &req, NULL); + vm_domainset_iter_page_init(&di, obj, 0, &domain, &req); do { /* * Get a kernel virtual address for this thread's kstack. diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index e7d7b6726d2c..ac327aa37b72 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -323,7 +323,9 @@ kmem_alloc_attr_domainset(struct domainset *ds, vm_size_t size, int flags, start_segind = -1; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0) + return (NULL); + do { addr = kmem_alloc_attr_domain(domain, size, flags, low, high, memattr); @@ -417,7 +419,9 @@ kmem_alloc_contig_domainset(struct domainset *ds, vm_size_t size, int flags, start_segind = -1; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags)) + return (NULL); + do { addr = kmem_alloc_contig_domain(domain, size, flags, low, high, alignment, boundary, memattr); @@ -517,7 +521,9 @@ kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags) void *addr; int domain; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0) + return (NULL); + do { addr = kmem_malloc_domain(domain, size, flags); if (addr != NULL) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index abad5efb8a79..16878604fa11 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2015,8 +2015,9 @@ vm_page_alloc_iter(vm_object_t object, vm_pindex_t pindex, int req, vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, - pages); + if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_domain_iter(object, pindex, domain, req, pages); @@ -2268,7 +2269,9 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, start_segind = -1; - vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_contig_domain(object, pindex, domain, req, npages, low, high, alignment, boundary, memattr); @@ -2596,7 +2599,9 @@ vm_page_alloc_noobj(int req) vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_noobj_domain(domain, req); if (m != NULL) @@ -2615,7 +2620,9 @@ vm_page_alloc_noobj_contig(int req, u_long npages, vm_paddr_t low, vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_noobj_contig_domain(domain, req, npages, low, high, alignment, boundary, memattr); @@ -3334,7 +3341,9 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, ret = ERANGE; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (ret); + do { status = vm_page_reclaim_contig_domain(domain, req, npages, low, high, alignment, boundary); diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c index a4c1a7836268..f8900fe0561f 100644 --- a/sys/x86/iommu/amd_intrmap.c +++ b/sys/x86/iommu/amd_intrmap.c @@ -112,6 +112,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector, { struct amdiommu_ctx *ctx; struct amdiommu_unit *unit; + device_t requester; + int error __diagused; uint16_t rid; bool is_iommu; @@ -180,7 +182,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector, *addr |= ((uint64_t)cpu & 0xffffff00) << 32; } - iommu_get_requester(src, &rid); + error = iommu_get_requester(src, &requester, &rid); + MPASS(error == 0); AMDIOMMU_LOCK(unit); amdiommu_qi_invalidate_ir_locked(unit, rid); AMDIOMMU_UNLOCK(unit); @@ -220,6 +223,7 @@ static struct amdiommu_ctx * amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) { devclass_t src_class; + device_t requester; struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; uint32_t edte; @@ -251,7 +255,8 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte, bootverbose); if (error == 0) { - iommu_get_requester(src, &rid); + error = iommu_get_requester(src, &requester, &rid); + MPASS(error == 0); ctx = amdiommu_get_ctx_for_dev(unit, src, rid, 0, false /* XXXKIB */, false, dte, edte); } @@ -266,6 +271,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, u_int cookie) { struct amdiommu_unit *unit; + device_t requester; + int error __diagused; uint16_t rid; MPASS(ctx != NULL); @@ -291,7 +298,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, atomic_thread_fence_rel(); bzero(irte, sizeof(*irte)); } - iommu_get_requester(src, &rid); + error = iommu_get_requester(src, &requester, &rid); + MPASS(error == 0); AMDIOMMU_LOCK(unit); amdiommu_qi_invalidate_ir_locked(unit, rid); AMDIOMMU_UNLOCK(unit); diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c index 06e41523624b..f12a0c9bae9b 100644 --- a/sys/x86/iommu/intel_intrmap.c +++ b/sys/x86/iommu/intel_intrmap.c @@ -234,6 +234,8 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar) { devclass_t src_class; struct dmar_unit *unit; + device_t requester; + int error __diagused; /* * We need to determine if the interrupt source generates FSB @@ -253,8 +255,10 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar) unit = dmar_find_hpet(src, rid); } else { unit = dmar_find(src, bootverbose); - if (unit != NULL && rid != NULL) - iommu_get_requester(src, rid); + if (unit != NULL && rid != NULL) { + error = iommu_get_requester(src, &requester, rid); + MPASS(error == 0); + } } return (unit); } diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 4d64eaf78b29..7661c82f4394 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -2613,7 +2613,7 @@ print_vmx_info(void) "\020EPT#VE" /* EPT-violation #VE */ "\021XSAVES" /* Enable XSAVES/XRSTORS */ ); - printf("\n Exit Controls=0x%b", mask, + printf("\n Exit Controls=0x%b", exit, "\020" "\003DR" /* Save debug controls */ /* Ignore Host address-space size */ @@ -2625,7 +2625,7 @@ print_vmx_info(void) "\026EFER-LD" /* Load MSR_EFER */ "\027PTMR-SV" /* Save VMX-preemption timer value */ ); - printf("\n Entry Controls=0x%b", mask, + printf("\n Entry Controls=0x%b", entry, "\020" "\003DR" /* Save debug controls */ /* Ignore IA-32e mode guest */ |