diff options
Diffstat (limited to 'sys')
418 files changed, 18767 insertions, 3521 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 385eb9667652..81427b5b18b6 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -184,6 +184,9 @@ device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme +# Universal Flash Storage Host Controller Interface support +device ufshci # UFS host controller + # Intel Volume Management Device (VMD) support device vmd @@ -383,6 +386,7 @@ options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus options IICHID_SAMPLING # Workaround missing GPIO INTR support +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ # EFI devices device efidev # EFI pseudo-device diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c index 8869dc1383e6..7df6193d6dc0 100644 --- a/sys/amd64/vmm/io/vioapic.c +++ b/sys/amd64/vmm/io/vioapic.c @@ -130,6 +130,15 @@ vioapic_send_intr(struct vioapic *vioapic, int pin) vector = low & IOART_INTVEC; dest = high >> APIC_ID_SHIFT; + /* + * Ideally we'd just call lapic_intr_msi() here with the + * constructed MSI instead of interpreting it for ourselves. + * But until/unless we support emulated IOMMUs with interrupt + * remapping, interpretation is simple. We just need to mask + * in the Extended Destination ID bits for the 15-bit + * enlightenment (http://david.woodhou.se/ExtDestId.pdf) + */ + dest |= ((high & APIC_EXT_ID_MASK) >> APIC_EXT_ID_SHIFT) << 8; vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector); } diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index fd511733492e..0cae01f172ec 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -115,6 +115,11 @@ lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) * physical otherwise. */ dest = (addr >> 12) & 0xff; + /* + * Extended Destination ID support uses bits 5-11 of the address: + * http://david.woodhou.se/ExtDestId.pdf + */ + dest |= ((addr >> 5) & 0x7f) << 8; phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) != (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)); delmode = msg & APIC_DELMODE_MASK; diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index 366f1da9f850..2e2224595ab4 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -48,7 +48,12 @@ SYSCTL_DECL(_hw_vmm); static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, NULL); -#define CPUID_VM_HIGH 0x40000000 +#define CPUID_VM_SIGNATURE 0x40000000 +#define CPUID_BHYVE_FEATURES 0x40000001 +#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES + +/* Features advertised in CPUID_BHYVE_FEATURES %eax */ +#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0) /* MSI Extended Dest ID */ static const char bhyve_id[12] = "bhyve bhyve "; @@ -100,7 +105,7 @@ x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx, if (cpu_exthigh != 0 && func >= 0x80000000) { if (func > cpu_exthigh) func = cpu_exthigh; - } else if (func >= 0x40000000) { + } else if (func >= CPUID_VM_SIGNATURE) { if (func > CPUID_VM_HIGH) func = CPUID_VM_HIGH; } else if (func > cpu_high) { @@ -601,13 +606,20 @@ x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx, regs[3] = 0; break; - case 0x40000000: + case CPUID_VM_SIGNATURE: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4); bcopy(bhyve_id + 4, ®s[2], 4); bcopy(bhyve_id + 8, ®s[3], 4); break; + case CPUID_BHYVE_FEATURES: + regs[0] = CPUID_BHYVE_FEAT_EXT_DEST_ID; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + break; + default: default_leaf: /* diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c index 2061e38a155f..f1b6f0bc9193 100644 --- a/sys/arm/allwinner/aw_gpio.c +++ b/sys/arm/allwinner/aw_gpio.c @@ -1162,11 +1162,12 @@ aw_gpio_attach(device_t dev) fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) goto fail; config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); + bus_attach_children(dev); return (0); diff --git a/sys/arm/allwinner/axp209.c b/sys/arm/allwinner/axp209.c index 239ead02d0e0..ff999a0f9b9b 100644 --- a/sys/arm/allwinner/axp209.c +++ b/sys/arm/allwinner/axp209.c @@ -1322,7 +1322,7 @@ axp2xx_attach(device_t dev) case AXP209: sc->pins = axp209_pins; sc->npins = nitems(axp209_pins); - sc->gpiodev = gpiobus_attach_bus(dev); + sc->gpiodev = gpiobus_add_bus(dev); sc->sensors = axp209_sensors; sc->nsensors = nitems(axp209_sensors); @@ -1333,7 +1333,7 @@ axp2xx_attach(device_t dev) case AXP221: sc->pins = axp221_pins; sc->npins = nitems(axp221_pins); - sc->gpiodev = gpiobus_attach_bus(dev); + sc->gpiodev = gpiobus_add_bus(dev); sc->sensors = axp221_sensors; sc->nsensors = nitems(axp221_sensors); @@ -1374,6 +1374,7 @@ axp2xx_attach(device_t dev) } } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/allwinner/axp81x.c b/sys/arm/allwinner/axp81x.c index fc1a168595e5..71f0c8156a0d 100644 --- a/sys/arm/allwinner/axp81x.c +++ b/sys/arm/allwinner/axp81x.c @@ -1609,7 +1609,8 @@ axp8xx_attach(device_t dev) EVENTHANDLER_REGISTER(shutdown_final, axp8xx_shutdown, dev, SHUTDOWN_PRI_LAST); - sc->gpiodev = gpiobus_attach_bus(dev); + sc->gpiodev = gpiobus_add_bus(dev); + bus_attach_children(dev); return (0); } diff --git a/sys/arm/arm/pmu_fdt.c b/sys/arm/arm/pmu_fdt.c index 3e733f3e1b18..dd6087652e38 100644 --- a/sys/arm/arm/pmu_fdt.c +++ b/sys/arm/arm/pmu_fdt.c @@ -152,7 +152,7 @@ pmu_parse_intr(device_t dev, struct pmu_softc *sc) if (intr_is_per_cpu(sc->irq[0].res)) { if (has_affinity) { device_printf(dev, - "Per CPU interupt have declared affinity\n"); + "Per CPU interrupt have declared affinity\n"); err = ENXIO; goto done; } @@ -179,7 +179,7 @@ pmu_parse_intr(device_t dev, struct pmu_softc *sc) if (intr_is_per_cpu(sc->irq[i].res)) { - device_printf(dev, "Unexpected per CPU interupt\n"); + device_printf(dev, "Unexpected per CPU interrupt\n"); err = ENXIO; goto done; } diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c index 48d1d2af5abc..93ee5d7c8bd3 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c @@ -840,10 +840,11 @@ bcm_gpio_attach(device_t dev) fdt_pinctrl_register(dev, "brcm,pins"); fdt_pinctrl_configure_tree(dev); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) goto fail; + bus_attach_children(dev); return (0); fail: diff --git a/sys/arm/broadcom/bcm2835/raspberrypi_gpio.c b/sys/arm/broadcom/bcm2835/raspberrypi_gpio.c index 5a0f5cf2b1b3..b286654c6f18 100644 --- a/sys/arm/broadcom/bcm2835/raspberrypi_gpio.c +++ b/sys/arm/broadcom/bcm2835/raspberrypi_gpio.c @@ -404,10 +404,11 @@ rpi_fw_gpio_attach(device_t dev) } } free(names, M_OFWPROP); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) goto fail; + bus_attach_children(dev); return (0); fail: diff --git a/sys/arm/conf/GENERIC b/sys/arm/conf/GENERIC index 7394f3842d43..26b0c7bf0294 100644 --- a/sys/arm/conf/GENERIC +++ b/sys/arm/conf/GENERIC @@ -261,6 +261,7 @@ device aw_thermal # Allwinner Thermal Sensor Controller # HID support device hid # Generic HID support device hidbus # Generic HID Bus +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ # Flattened Device Tree options FDT # Configure using FDT/DTB data diff --git a/sys/arm/freescale/imx/imx_gpio.c b/sys/arm/freescale/imx/imx_gpio.c index 7610d28af90e..3b19ef1b5e67 100644 --- a/sys/arm/freescale/imx/imx_gpio.c +++ b/sys/arm/freescale/imx/imx_gpio.c @@ -861,13 +861,14 @@ imx51_gpio_attach(device_t dev) gpio_pic_register_isrcs(sc); intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); #endif - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { imx51_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/freescale/vybrid/vf_gpio.c b/sys/arm/freescale/vybrid/vf_gpio.c index c81524a8a27e..b4e1ba9af586 100644 --- a/sys/arm/freescale/vybrid/vf_gpio.c +++ b/sys/arm/freescale/vybrid/vf_gpio.c @@ -147,13 +147,14 @@ vf_gpio_attach(device_t dev) "vf_gpio%d.%d", device_get_unit(dev), i); } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { bus_release_resources(dev, vf_gpio_spec, sc->res); mtx_destroy(&sc->sc_mtx); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/mv/a37x0_gpio.c b/sys/arm/mv/a37x0_gpio.c index 86110ff87ab1..754663d2991e 100644 --- a/sys/arm/mv/a37x0_gpio.c +++ b/sys/arm/mv/a37x0_gpio.c @@ -291,10 +291,11 @@ a37x0_gpio_attach(device_t dev) if (sc->sc_npins > sc->sc_max_pins) return (ENXIO); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) return (ENXIO); + bus_attach_children(dev); return (0); } diff --git a/sys/arm/mv/gpio.c b/sys/arm/mv/gpio.c index 934c00236153..b3c2314fb2d6 100644 --- a/sys/arm/mv/gpio.c +++ b/sys/arm/mv/gpio.c @@ -340,7 +340,7 @@ mv_gpio_attach(device_t dev) if (rv != 0) return (rv); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { mtx_destroy(&sc->mutex); bus_release_resource(dev, SYS_RES_IRQ, @@ -348,6 +348,7 @@ mv_gpio_attach(device_t dev) return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c index 7acdfff539dc..4cc9b7030a65 100644 --- a/sys/arm/mv/mvebu_gpio.c +++ b/sys/arm/mv/mvebu_gpio.c @@ -804,12 +804,13 @@ mvebu_gpio_attach(device_t dev) } } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { mvebu_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c index e37fd69a121e..aa34537352be 100644 --- a/sys/arm/nvidia/tegra_gpio.c +++ b/sys/arm/nvidia/tegra_gpio.c @@ -818,12 +818,13 @@ tegra_gpio_attach(device_t dev) return (ENXIO); } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { tegra_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/ti/ti_gpio.c b/sys/arm/ti/ti_gpio.c index aceb3d63204e..01b9597a4418 100644 --- a/sys/arm/ti/ti_gpio.c +++ b/sys/arm/ti/ti_gpio.c @@ -674,12 +674,13 @@ ti_gpio_attach(device_t dev) } } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { ti_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm/xilinx/zy7_gpio.c b/sys/arm/xilinx/zy7_gpio.c index 71b6fc3c0586..2434e43bf27c 100644 --- a/sys/arm/xilinx/zy7_gpio.c +++ b/sys/arm/xilinx/zy7_gpio.c @@ -441,12 +441,13 @@ zy7_gpio_attach(device_t dev) return (ENOMEM); } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { zy7_gpio_detach(dev); return (ENOMEM); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c index ebaaccea1d99..c28b1c62d78c 100644 --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -171,12 +171,13 @@ apple_pinctrl_attach(device_t dev) OF_xref_from_node(ofw_bus_get_node(dev))); } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } + bus_attach_children(dev); return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 7cd5327b9f1b..5c81c6cdce3d 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -195,7 +195,7 @@ freebsd32_fetch_syscall_args(struct thread *td) register_t *ap; struct syscall_args *sa; int error, i, nap, narg; - unsigned int args[4]; + unsigned int args[6]; nap = 4; p = td->td_proc; diff --git a/sys/arm64/conf/std.dev b/sys/arm64/conf/std.dev index c5c364ffda04..719f272426dd 100644 --- a/sys/arm64/conf/std.dev +++ b/sys/arm64/conf/std.dev @@ -115,6 +115,7 @@ device mmcsd # mmc/sd flash cards options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ # Firmware device mmio_sram # Generic on-chip SRAM diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index 847bc7394dd0..61614f532634 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -371,12 +371,13 @@ rk_gpio_attach(device_t dev) sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { rk_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm64/rockchip/rk_grf_gpio.c b/sys/arm64/rockchip/rk_grf_gpio.c index 6818bd85bb95..6ac419889614 100644 --- a/sys/arm64/rockchip/rk_grf_gpio.c +++ b/sys/arm64/rockchip/rk_grf_gpio.c @@ -181,11 +181,12 @@ rk_grf_gpio_attach(device_t dev) return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index 83d964360343..c9de1fe4c391 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -536,6 +536,12 @@ typedef struct zio_gbh { offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)]) #define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd)) +#define ASHIFT_UBERBLOCK_SHIFT(ashift) \ + MIN(MAX(ashift, UBERBLOCK_SHIFT), \ + MAX_UBERBLOCK_SHIFT) +#define ASHIFT_UBERBLOCK_SIZE(ashift) \ + (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift)) + typedef struct vdev_phys { char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; zio_eck_t vp_zbt; @@ -2015,11 +2021,11 @@ typedef struct vdev_indirect_config { typedef struct vdev { STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */ - STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */ vdev_list_t v_children; /* children of this vdev */ const char *v_name; /* vdev name */ uint64_t v_guid; /* vdev guid */ - uint64_t v_txg; /* most recent transaction */ + uint64_t v_label; /* label instantiated from (top vdev) */ + uint64_t v_txg; /* most recent transaction (top vdev) */ uint64_t v_id; /* index in parent */ uint64_t v_psize; /* physical device capacity */ int v_ashift; /* offset to block shift */ diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 971510ebb6b6..90cd21a80923 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -83,8 +83,8 @@ /* 76 is obsolete vhangup */ /* 77 is obsolete vlimit */ #define FREEBSD32_SYS_mincore 78 -#define FREEBSD32_SYS_getgroups 79 -#define FREEBSD32_SYS_setgroups 80 +#define FREEBSD32_SYS_freebsd14_getgroups 79 +#define FREEBSD32_SYS_freebsd14_setgroups 80 #define FREEBSD32_SYS_getpgrp 81 #define FREEBSD32_SYS_setpgid 82 #define FREEBSD32_SYS_freebsd32_setitimer 83 @@ -513,4 +513,6 @@ #define FREEBSD32_SYS_exterrctl 592 #define FREEBSD32_SYS_inotify_add_watch_at 593 #define FREEBSD32_SYS_inotify_rm_watch 594 -#define FREEBSD32_SYS_MAXSYSCALL 595 +#define FREEBSD32_SYS_getgroups 595 +#define FREEBSD32_SYS_setgroups 596 +#define FREEBSD32_SYS_MAXSYSCALL 597 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 79c414b675b1..f0f8d26554b5 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -84,8 +84,8 @@ const char *freebsd32_syscallnames[] = { "obs_vhangup", /* 76 = obsolete vhangup */ "obs_vlimit", /* 77 = obsolete vlimit */ "mincore", /* 78 = mincore */ - "getgroups", /* 79 = getgroups */ - "setgroups", /* 80 = setgroups */ + "compat14.getgroups", /* 79 = freebsd14 getgroups */ + "compat14.setgroups", /* 80 = freebsd14 setgroups */ "getpgrp", /* 81 = getpgrp */ "setpgid", /* 82 = setpgid */ "freebsd32_setitimer", /* 83 = freebsd32_setitimer */ @@ -600,4 +600,6 @@ const char *freebsd32_syscallnames[] = { "exterrctl", /* 592 = exterrctl */ "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ "inotify_rm_watch", /* 594 = inotify_rm_watch */ + "getgroups", /* 595 = getgroups */ + "setgroups", /* 596 = setgroups */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 1a4b0d87722c..12f1a346c3e9 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -146,8 +146,8 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 76 = obsolete vhangup */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 77 = obsolete vlimit */ { .sy_narg = AS(mincore_args), .sy_call = (sy_call_t *)sys_mincore, .sy_auevent = AUE_MINCORE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 78 = mincore */ - { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = getgroups */ - { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = setgroups */ + { compat14(AS(freebsd14_getgroups_args),getgroups), .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = freebsd14 getgroups */ + { compat14(AS(freebsd14_setgroups_args),setgroups), .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = freebsd14 setgroups */ { .sy_narg = 0, .sy_call = (sy_call_t *)sys_getpgrp, .sy_auevent = AUE_GETPGRP, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 81 = getpgrp */ { .sy_narg = AS(setpgid_args), .sy_call = (sy_call_t *)sys_setpgid, .sy_auevent = AUE_SETPGRP, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 82 = setpgid */ { .sy_narg = AS(freebsd32_setitimer_args), .sy_call = (sy_call_t *)freebsd32_setitimer, .sy_auevent = AUE_SETITIMER, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 83 = freebsd32_setitimer */ @@ -662,4 +662,6 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ + { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ + { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index f9dc514bee7d..e471c5148021 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -457,22 +457,6 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } - /* getgroups */ - case 79: { - struct getgroups_args *p = params; - iarg[a++] = p->gidsetsize; /* int */ - uarg[a++] = (intptr_t)p->gidset; /* gid_t * */ - *n_args = 2; - break; - } - /* setgroups */ - case 80: { - struct setgroups_args *p = params; - iarg[a++] = p->gidsetsize; /* int */ - uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */ - *n_args = 2; - break; - } /* getpgrp */ case 81: { *n_args = 0; @@ -3413,6 +3397,22 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } + /* getgroups */ + case 595: { + struct getgroups_args *p = params; + iarg[a++] = p->gidsetsize; /* int */ + uarg[a++] = (intptr_t)p->gidset; /* gid_t * */ + *n_args = 2; + break; + } + /* setgroups */ + case 596: { + struct setgroups_args *p = params; + iarg[a++] = p->gidsetsize; /* int */ + uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -4112,32 +4112,6 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; - /* getgroups */ - case 79: - switch (ndx) { - case 0: - p = "int"; - break; - case 1: - p = "userland gid_t *"; - break; - default: - break; - }; - break; - /* setgroups */ - case 80: - switch (ndx) { - case 0: - p = "int"; - break; - case 1: - p = "userland const gid_t *"; - break; - default: - break; - }; - break; /* getpgrp */ case 81: break; @@ -9222,6 +9196,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* getgroups */ + case 595: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland gid_t *"; + break; + default: + break; + }; + break; + /* setgroups */ + case 596: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland const gid_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -9488,16 +9488,6 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; - /* getgroups */ - case 79: - if (ndx == 0 || ndx == 1) - p = "int"; - break; - /* setgroups */ - case 80: - if (ndx == 0 || ndx == 1) - p = "int"; - break; /* getpgrp */ case 81: /* setpgid */ @@ -11130,6 +11120,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* getgroups */ + case 595: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* setgroups */ + case 596: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index 3644ef80861b..5851ac08f083 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -35,6 +35,7 @@ #include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/bitfield.h> #include <linux/if_ether.h> /* linux_80211.c */ @@ -121,7 +122,20 @@ enum ieee80211_rate_control_changed_flags { /* 802.11-2016, 9.4.2.158.3 Supported VHT-MCS and NSS Set field. */ #define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) /* part of tx_highest */ -#define IEEE80211_VHT_MAX_AMPDU_1024K 7 /* 9.4.2.56.3 A-MPDU Parameters field, Table 9-163 */ +/* + * 802.11-2020, 9.4.2.157.2 VHT Capabilities Information field, + * Table 9-271-Subfields of the VHT Capabilities Information field (continued). + */ +enum ieee80211_vht_max_ampdu_len_exp { + IEEE80211_VHT_MAX_AMPDU_8K = 0, + IEEE80211_VHT_MAX_AMPDU_16K = 1, + IEEE80211_VHT_MAX_AMPDU_32K = 2, + IEEE80211_VHT_MAX_AMPDU_64K = 3, + IEEE80211_VHT_MAX_AMPDU_128K = 4, + IEEE80211_VHT_MAX_AMPDU_256K = 5, + IEEE80211_VHT_MAX_AMPDU_512K = 6, + IEEE80211_VHT_MAX_AMPDU_1024K = 7, +}; #define IEEE80211_WEP_IV_LEN 3 /* net80211: IEEE80211_WEP_IVLEN */ #define IEEE80211_WEP_ICV_LEN 4 diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h index 18b34f0e90ec..044f348ef08b 100644 --- a/sys/compat/linuxkpi/common/include/net/cfg80211.h +++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h @@ -36,6 +36,7 @@ #include <linux/mutex.h> #include <linux/if_ether.h> #include <linux/ethtool.h> +#include <linux/debugfs.h> #include <linux/device.h> #include <linux/netdevice.h> #include <linux/random.h> @@ -722,8 +723,10 @@ struct linuxkpi_ieee80211_regdomain { #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 0x04 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x08 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 0x10 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 0x10 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x20 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 0x40 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 0x40 #define VENDOR_CMD_RAW_DATA (void *)(uintptr_t)(-ENOENT) @@ -2065,6 +2068,18 @@ nl80211_chan_width_to_mhz(enum nl80211_chan_width width) } static __inline ssize_t +wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, const char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *, struct file *, char *, size_t, void *), + void *data) +{ + TODO(); + return (-ENXIO); +} + + +static __inline ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, const char __user *userbuf, size_t count, ssize_t (*handler)(struct wiphy *, struct file *, char *, size_t, void *), diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index af3199c38939..2ed595095f9e 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -1184,7 +1184,7 @@ struct wireless_dev *linuxkpi_ieee80211_vif_to_wdev(struct ieee80211_vif *); void linuxkpi_ieee80211_connection_loss(struct ieee80211_vif *); void linuxkpi_ieee80211_beacon_loss(struct ieee80211_vif *); struct sk_buff *linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *, - uint8_t *, uint8_t *, size_t, size_t); + const uint8_t *, const uint8_t *, size_t, size_t); void linuxkpi_ieee80211_tx_status(struct ieee80211_hw *, struct sk_buff *); void linuxkpi_ieee80211_tx_status_ext(struct ieee80211_hw *, struct ieee80211_tx_status *); @@ -2161,8 +2161,8 @@ ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } static __inline struct sk_buff * -ieee80211_probereq_get(struct ieee80211_hw *hw, uint8_t *addr, - uint8_t *ssid, size_t ssid_len, size_t tailroom) +ieee80211_probereq_get(struct ieee80211_hw *hw, const uint8_t *addr, + const uint8_t *ssid, size_t ssid_len, size_t tailroom) { return (linuxkpi_ieee80211_probereq_get(hw, addr, ssid, ssid_len, diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index 1d00e8da8f9a..a7d6003843ba 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -7668,8 +7668,8 @@ linuxkpi_ieee80211_queue_work(struct ieee80211_hw *hw, } struct sk_buff * -linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *hw, uint8_t *addr, - uint8_t *ssid, size_t ssid_len, size_t tailroom) +linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *hw, const uint8_t *addr, + const uint8_t *ssid, size_t ssid_len, size_t tailroom) { struct sk_buff *skb; struct ieee80211_frame *wh; diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 2458756ae350..92e98aa57ebf 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2436,7 +2436,7 @@ options HID_DEBUG # enable debug msgs device hidbus # HID bus device hidmap # HID to evdev mapping device hidraw # Raw access driver -options HIDRAW_MAKE_UHID_ALIAS # install /dev/uhid alias +options HIDRAW_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/hidraw device hconf # Multitouch configuration TLC device hcons # Consumer controls device hgame # Generic game controllers @@ -2446,6 +2446,8 @@ device hmt # HID multitouch (MS-compatible) device hpen # Generic pen driver device hsctrl # System controls device ps4dshock # Sony PS4 DualShock 4 gamepad driver +device u2f # FIDO/U2F authenticator +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ device xb360gp # XBox 360 gamepad driver ##################################################################### diff --git a/sys/conf/files b/sys/conf/files index be65ed20d6aa..d89813c70355 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1750,6 +1750,7 @@ dev/hid/hpen.c optional hpen dev/hid/hsctrl.c optional hsctrl dev/hid/ietp.c optional ietp dev/hid/ps4dshock.c optional ps4dshock +dev/hid/u2f.c optional u2f dev/hid/xb360gp.c optional xb360gp dev/hifn/hifn7751.c optional hifn dev/hptiop/hptiop.c optional hptiop scbus @@ -2280,6 +2281,8 @@ dev/ixgbe/ixgbe_x540.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_x550.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" +dev/ixgbe/ixgbe_e610.c optional ix inet | ixv inet \ + compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82598.c optional ix inet | ixv inet \ @@ -3448,7 +3451,6 @@ dev/virtio/mmio/virtio_mmio.c optional virtio_mmio dev/virtio/mmio/virtio_mmio_acpi.c optional virtio_mmio acpi dev/virtio/mmio/virtio_mmio_cmdline.c optional virtio_mmio dev/virtio/mmio/virtio_mmio_fdt.c optional virtio_mmio fdt -dev/virtio/mmio/virtio_mmio_if.m optional virtio_mmio dev/virtio/network/if_vtnet.c optional vtnet dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/block/virtio_blk.c optional virtio_blk diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 80548320c3fc..9b6ba03b78df 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -419,6 +419,9 @@ contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S optional zfs com contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S optional zfs compile-with "${ZFS_S}" contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S optional zfs compile-with "${ZFS_S}" +# zfs AVX2 implementation of aes-gcm from BoringSSL +contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S optional zfs compile-with "${ZFS_S}" + # zfs sha2 hash support zfs-sha256-x86_64.o optional zfs \ dependency "$S/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S" \ diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 641001efab5e..45966fea8041 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -178,6 +178,8 @@ crypto/des/des_enc.c optional netsmb crypto/openssl/ossl_aarch64.c optional ossl crypto/openssl/aarch64/chacha-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" +crypto/openssl/aarch64/chacha-armv8-sve.S optional ossl \ + compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/poly1305-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/sha1-armv8.S optional ossl \ diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 59e51c394a35..bb3c7af82a4d 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -124,7 +124,10 @@ PORTSMODULESENV=\ all: .for __i in ${PORTS_MODULES} @${ECHO} "===> Ports module ${__i} (all)" - cd ${PORTSDIR:U/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B clean build + port=${__i}; flavor=$${port#*@}; port=$${port%@*}; flavor=$${flavor%$${port}}; \ + cd ${PORTSDIR:U/usr/ports}/$${port}; \ + ${PORTSMODULESENV} ${MAKE} -B $${flavor:+FLAVOR=}$${flavor} \ + clean build .endfor .for __target in install reinstall clean @@ -132,7 +135,10 @@ ${__target}: ports-${__target} ports-${__target}: .for __i in ${PORTS_MODULES} @${ECHO} "===> Ports module ${__i} (${__target})" - cd ${PORTSDIR:U/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B ${__target:C/(re)?install/deinstall reinstall/} + port=${__i}; flavor=$${port#*@}; port=$${port%@*}; flavor=$${flavor%$${port}}; \ + cd ${PORTSDIR:U/usr/ports}/$${port}; \ + ${PORTSMODULESENV} ${MAKE} -B $${flavor:+FLAVOR=}$${flavor} \ + ${__target:C/(re)?install/deinstall reinstall/} .endfor .endfor .endif @@ -366,6 +372,19 @@ _ILINKS+= x86 _ILINKS+= i386 .endif +.if ${MK_REPRODUCIBLE_BUILD} != "no" +PREFIX_SYSDIR=/usr/src/sys +PREFIX_OBJDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/${KERN_IDENT} +CFLAGS+= -ffile-prefix-map=${SYSDIR}=${PREFIX_SYSDIR} +CFLAGS+= -ffile-prefix-map=${.OBJDIR}=${PREFIX_OBJDIR} +.if defined(SYSROOT) +CFLAGS+= -ffile-prefix-map=${SYSROOT}=/sysroot +.endif +.else +PREFIX_SYSDIR=${SYSDIR} +PREFIX_OBJDIR=${.OBJDIR} +.endif + # Ensure that the link exists without depending on it when it exists. # Ensure that debug info references the path in the source tree. .for _link in ${_ILINKS} @@ -373,9 +392,9 @@ _ILINKS+= i386 ${SRCS} ${DEPENDOBJS}: ${_link} .endif .if ${_link} == "machine" -CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include +CFLAGS+= -fdebug-prefix-map=./machine=${PREFIX_SYSDIR}/${MACHINE}/include .else -CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include +CFLAGS+= -fdebug-prefix-map=./${_link}=${PREFIX_SYSDIR}/${_link}/include .endif .endfor @@ -448,7 +467,7 @@ config.o env.o hints.o vers.o vnode_if.o: NEWVERS_ENV+= MAKE="${MAKE}" .if ${MK_REPRODUCIBLE_BUILD} != "no" -NEWVERS_ARGS+= -R +NEWVERS_ARGS+= -R -d ${PREFIX_OBJDIR} .endif vers.c: .NOMETA_CMP $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP:Nvers.*} ${NEWVERS_ENV} sh $S/conf/newvers.sh ${NEWVERS_ARGS} ${KERN_IDENT} diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 78178065e15b..1fcfd6467e7f 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -214,7 +214,8 @@ ZFS_CFLAGS+= -I$S/contrib/openzfs/module/icp/include \ .if ${MACHINE_ARCH} == "amd64" ZFS_CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \ - -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW + -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW \ + -DHAVE_VAES -DHAVE_VPCLMULQDQ .endif .if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \ diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index 645c04cdd135..0fd2d4050cf1 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -303,6 +303,25 @@ all: ${PROG} beforedepend: ${_ILINKS} beforebuild: ${_ILINKS} +.if ${MK_REPRODUCIBLE_BUILD} != "no" +PREFIX_SYSDIR=/usr/src/sys +CFLAGS+= -ffile-prefix-map=${SYSDIR}=${PREFIX_SYSDIR} +.if defined(KERNBUILDDIR) +PREFIX_KERNBUILDDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/${KERNBUILDDIR:T} +PREFIX_OBJDIR=${PREFIX_KERNBUILDDIR}/modules/usr/src/sys/modules/${.OBJDIR:T} +CFLAGS+= -ffile-prefix-map=${KERNBUILDDIR}=${PREFIX_KERNBUILDDIR} +.else +PREFIX_OBJDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/modules/${.OBJDIR:T} +.endif +CFLAGS+= -ffile-prefix-map=${.OBJDIR}=${PREFIX_OBJDIR} +.if defined(SYSROOT) +CFLAGS+= -ffile-prefix-map=${SYSROOT}=/sysroot +.endif +.else +PREFIX_SYSDIR=${SYSDIR} +PREFIX_OBJDIR=${.OBJDIR} +.endif + # Ensure that the links exist without depending on it when it exists which # causes all the modules to be rebuilt when the directory pointed to changes. # Ensure that debug info references the path in the source tree. @@ -311,9 +330,9 @@ beforebuild: ${_ILINKS} OBJS_DEPEND_GUESS+= ${_link} .endif .if ${_link} == "machine" -CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include +CFLAGS+= -fdebug-prefix-map=./machine=${PREFIX_SYSDIR}/${MACHINE}/include .else -CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include +CFLAGS+= -fdebug-prefix-map=./${_link}=${PREFIX_SYSDIR}/${_link}/include .endif .endfor diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index 66926805052c..8b60da95741e 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -110,14 +110,18 @@ COPYRIGHT="$COPYRIGHT # We expand include_metadata later since we may set it to the # future value of modified. +builddir=$(pwd) include_metadata=yes modified=no -while getopts crRvV: opt; do +while getopts cd:rRvV: opt; do case "$opt" in c) echo "$COPYRIGHT" exit 0 ;; + d) + builddir=$OPTARG + ;; r) include_metadata=no ;; @@ -187,7 +191,7 @@ fi touch version v=$(cat version) u=${USER:-root} -d=$(pwd) +d=$builddir h=${HOSTNAME:-$(hostname)} if [ -n "$SOURCE_DATE_EPOCH" ]; then if ! t=$(date -ur $SOURCE_DATE_EPOCH 2>/dev/null); then diff --git a/sys/conf/options b/sys/conf/options index a637b0b74a77..bb3d08e88e21 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -1009,6 +1009,7 @@ IICHID_DEBUG opt_hid.h IICHID_SAMPLING opt_hid.h HKBD_DFLT_KEYMAP opt_hkbd.h HIDRAW_MAKE_UHID_ALIAS opt_hid.h +U2F_MAKE_UHID_ALIAS opt_hid.h # kenv options # The early kernel environment (loader environment, config(8)-provided static) diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 885a64037f89..70a2364f1fc6 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -109,7 +109,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-CURRENT" + FreeBSD="15.0-PRERELEASE" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh index 17e976ebcc39..2807d9e77127 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh @@ -5,12 +5,13 @@ # # Usage: # -# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--poweroff] -# [--release][--repo][--tarball] +# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM] +# [--poweroff][--release][--repo][--tarball] # # OS: OS name like 'fedora41' # --enable-debug: Build RPMs with '--enable-debug' (for testing) # --dkms: Build DKMS RPMs as well +# --patch-level NUM: Use a custom patch level number for packages. # --poweroff: Power-off the VM after building # --release Build zfs-release*.rpm as well # --repo After building everything, copy RPMs into /tmp/repo @@ -21,6 +22,7 @@ ENABLE_DEBUG="" DKMS="" +PATCH_LEVEL="" POWEROFF="" RELEASE="" REPO="" @@ -35,6 +37,11 @@ while [[ $# -gt 0 ]]; do DKMS=1 shift ;; + --patch-level) + PATCH_LEVEL=$2 + shift + shift + ;; --poweroff) POWEROFF=1 shift @@ -215,6 +222,10 @@ function rpm_build_and_install() { run ./autogen.sh echo "##[endgroup]" + if [ -n "$PATCH_LEVEL" ] ; then + sed -i -E 's/(Release:\s+)1/\1'$PATCH_LEVEL'/g' META + fi + echo "##[group]Configure" run ./configure --enable-debuginfo $extra echo "##[endgroup]" @@ -328,7 +339,13 @@ fi # almalinux9.5 # fedora42 source /etc/os-release -sudo hostname "$ID$VERSION_ID" + if which hostnamectl &> /dev/null ; then + # Fedora 42+ use hostnamectl + sudo hostnamectl set-hostname "$ID$VERSION_ID" + sudo hostnamectl set-hostname --pretty "$ID$VERSION_ID" +else + sudo hostname "$ID$VERSION_ID" +fi # save some sysinfo uname -a > /var/tmp/uname.txt diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml index 5b5afe746859..d8a95954fe1a 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml @@ -32,6 +32,11 @@ on: options: - "Build RPMs" - "Test repo" + patch_level: + type: string + required: false + default: "" + description: "(optional) patch level number" repo_url: type: string required: false @@ -78,7 +83,13 @@ jobs: mkdir -p /tmp/repo ssh zfs@vm0 '$HOME/zfs/.github/workflows/scripts/qemu-test-repo-vm.sh' ${{ github.event.inputs.repo_url }} else - .github/workflows/scripts/qemu-4-build.sh --repo --release --dkms --tarball ${{ matrix.os }} + EXTRA="" + if [ -n "${{ github.event.inputs.patch_level }}" ] ; then + EXTRA="--patch-level ${{ github.event.inputs.patch_level }}" + fi + + .github/workflows/scripts/qemu-4-build.sh $EXTRA \ + --repo --release --dkms --tarball ${{ matrix.os }} fi - name: Prepare artifacts diff --git a/sys/contrib/openzfs/.mailmap b/sys/contrib/openzfs/.mailmap index b6d942c000b8..e6f09c6c9d43 100644 --- a/sys/contrib/openzfs/.mailmap +++ b/sys/contrib/openzfs/.mailmap @@ -23,6 +23,7 @@ # These maps are making names consistent where they have varied but the email # address has never changed. In most cases, the full name is in the # Signed-off-by of a commit with a matching author. +Achill Gilgenast <achill@achill.org> Ahelenia ZiemiaÅ„ska <nabijaczleweli@gmail.com> Ahelenia ZiemiaÅ„ska <nabijaczleweli@nabijaczleweli.xyz> Alex John <alex@stty.io> @@ -37,6 +38,7 @@ Crag Wang <crag0715@gmail.com> Damian Szuberski <szuberskidamian@gmail.com> Daniel Kolesa <daniel@octaforge.org> Debabrata Banerjee <dbavatar@gmail.com> +Diwakar Kristappagari <diwakar-k@hpe.com> Finix Yan <yanchongwen@hotmail.com> Gaurav Kumar <gauravk.18@gmail.com> Gionatan Danti <g.danti@assyoma.it> @@ -145,6 +147,7 @@ Gaurav Kumar <gauravk.18@gmail.com> <gaurkuma@users.noreply.github.com> George Gaydarov <git@gg7.io> <gg7@users.noreply.github.com> Georgy Yakovlev <gyakovlev@gentoo.org> <168902+gyakovlev@users.noreply.github.com> Gerardwx <gerardw@alum.mit.edu> <Gerardwx@users.noreply.github.com> +Germano Massullo <germano.massullo@gmail.com> <Germano0@users.noreply.github.com> Gian-Carlo DeFazio <defazio1@llnl.gov> <defaziogiancarlo@users.noreply.github.com> Giuseppe Di Natale <dinatale2@llnl.gov> <dinatale2@users.noreply.github.com> Hajo Möller <dasjoe@gmail.com> <dasjoe@users.noreply.github.com> @@ -164,6 +167,7 @@ John Ramsden <johnramsden@riseup.net> <johnramsden@users.noreply.github.com> Jonathon Fernyhough <jonathon@m2x.dev> <559369+jonathonf@users.noreply.github.com> Jose Luis Duran <jlduran@gmail.com> <jlduran@users.noreply.github.com> Justin Hibbits <chmeeedalf@gmail.com> <chmeeedalf@users.noreply.github.com> +Kaitlin Hoang <kthoang@amazon.com> <khoang98@users.noreply.github.com> Kevin Greene <kevin.greene@delphix.com> <104801862+kxgreene@users.noreply.github.com> Kevin Jin <lostking2008@hotmail.com> <33590050+jxdking@users.noreply.github.com> Kevin P. Fleming <kevin@km6g.us> <kpfleming@users.noreply.github.com> diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS index a9d249a66f1e..6c34c07f39ef 100644 --- a/sys/contrib/openzfs/AUTHORS +++ b/sys/contrib/openzfs/AUTHORS @@ -10,6 +10,7 @@ PAST MAINTAINERS: CONTRIBUTORS: Aaron Fineman <abyxcos@gmail.com> + Achill Gilgenast <achill@achill.org> Adam D. Moss <c@yotes.com> Adam Leventhal <ahl@delphix.com> Adam Stevko <adam.stevko@gmail.com> @@ -59,6 +60,7 @@ CONTRIBUTORS: Andreas Buschmann <andreas.buschmann@tech.net.de> Andreas Dilger <adilger@intel.com> Andreas Vögele <andreas@andreasvoegele.com> + Andres <a-d-j-i@users.noreply.github.com> Andrew Barnes <barnes333@gmail.com> Andrew Hamilton <ahamilto@tjhsst.edu> Andrew Innes <andrew.c12@gmail.com> @@ -72,6 +74,7 @@ CONTRIBUTORS: Andrey Prokopenko <job@terem.fr> Andrey Vesnovaty <andrey.vesnovaty@gmail.com> Andriy Gapon <avg@freebsd.org> + Andriy Tkachuk <andriy.tkachuk@seagate.com> Andy Bakun <github@thwartedefforts.org> Andy Fiddaman <omnios@citrus-it.co.uk> Aniruddha Shankar <k@191a.net> @@ -120,6 +123,7 @@ CONTRIBUTORS: Caleb James DeLisle <calebdelisle@lavabit.com> Cameron Harr <harr1@llnl.gov> Cao Xuewen <cao.xuewen@zte.com.cn> + Carl George <carlwgeorge@gmail.com> Carlo Landmeter <clandmeter@gmail.com> Carlos Alberto Lopez Perez <clopez@igalia.com> Cedric Maunoury <cedric.maunoury@gmail.com> @@ -200,6 +204,7 @@ CONTRIBUTORS: Dimitri John Ledkov <xnox@ubuntu.com> Dimitry Andric <dimitry@andric.com> Dirkjan Bussink <d.bussink@gmail.com> + Diwakar Kristappagari <diwakar-k@hpe.com> Dmitry Khasanov <pik4ez@gmail.com> Dominic Pearson <dsp@technoanimal.net> Dominik Hassler <hadfl@omniosce.org> @@ -250,6 +255,7 @@ CONTRIBUTORS: George Wilson <gwilson@delphix.com> Georgy Yakovlev <ya@sysdump.net> Gerardwx <gerardw@alum.mit.edu> + Germano Massullo <germano.massullo@gmail.com> Gian-Carlo DeFazio <defazio1@llnl.gov> Gionatan Danti <g.danti@assyoma.it> Giuseppe Di Natale <guss80@gmail.com> @@ -287,6 +293,7 @@ CONTRIBUTORS: Igor K <igor@dilos.org> Igor Kozhukhov <ikozhukhov@gmail.com> Igor Lvovsky <ilvovsky@gmail.com> + Igor Ostapenko <pm@igoro.pro> ilbsmart <wgqimut@gmail.com> Ilkka Sovanto <github@ilkka.kapsi.fi> illiliti <illiliti@protonmail.com> @@ -326,6 +333,7 @@ CONTRIBUTORS: Jinshan Xiong <jinshan.xiong@intel.com> Jitendra Patidar <jitendra.patidar@nutanix.com> JK Dingwall <james@dingwall.me.uk> + Joel Low <joel@joelsplace.sg> Joe Stein <joe.stein@delphix.com> John-Mark Gurney <jmg@funkthat.com> John Albietz <inthecloud247@gmail.com> @@ -374,6 +382,7 @@ CONTRIBUTORS: Kevin Jin <lostking2008@hotmail.com> Kevin P. Fleming <kevin@km6g.us> Kevin Tanguy <kevin.tanguy@ovh.net> + khoang98 <khoang98@users.noreply.github.com> KireinaHoro <i@jsteward.moe> Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl> Kleber TarcÃsio <klebertarcisio@yahoo.com.br> @@ -447,6 +456,7 @@ CONTRIBUTORS: Max Zettlmeißl <max@zettlmeissl.de> Md Islam <mdnahian@outlook.com> megari <megari@iki.fi> + Meriel Luna Mittelbach <lunarlambda@gmail.com> Michael D Labriola <michael.d.labriola@gmail.com> Michael Franzl <michael@franzl.name> Michael Gebetsroither <michael@mgeb.org> @@ -494,6 +504,7 @@ CONTRIBUTORS: Orivej Desh <orivej@gmx.fr> Pablo Correa Gómez <ablocorrea@hotmail.com> Palash Gandhi <pbg4930@rit.edu> + Patrick Fasano <patrick@patrickfasano.com> Patrick Mooney <pmooney@pfmooney.com> Patrik Greco <sikevux@sikevux.se> Paul B. Henson <henson@acm.org> @@ -535,6 +546,7 @@ CONTRIBUTORS: Remy Blank <remy.blank@pobox.com> renelson <bnelson@nelsonbe.com> Reno Reckling <e-github@wthack.de> + René Wirnata <rene.wirnata@pandascience.net> Ricardo M. Correia <ricardo.correia@oracle.com> Riccardo Schirone <rschirone91@gmail.com> Richard Allen <belperite@gmail.com> @@ -640,6 +652,7 @@ CONTRIBUTORS: tleydxdy <shironeko.github@tesaguri.club> Tobin Harding <me@tobin.cc> Todd Seidelmann <seidelma@users.noreply.github.com> + Todd Zullinger <tmz@pobox.com> Tom Caputi <tcaputi@datto.com> Tom Matthews <tom@axiom-partners.com> Tomohiro Kusumi <kusumi.tomohiro@gmail.com> diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 47f0795bfa11..1a9c671feac6 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.15 +Linux-Maximum: 6.16 Linux-Minimum: 4.18 diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index bf44d9c322b4..a5f23be2aaaf 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -127,6 +127,7 @@ static zfs_range_tree_t *mos_refd_objs; static spa_t *spa; static objset_t *os; static boolean_t kernel_init_done; +static boolean_t corruption_found = B_FALSE; static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); @@ -176,7 +177,7 @@ static int sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, dmu_tx_t *tx) { - ASSERT3P(tx, ==, NULL); + ASSERT0P(tx); struct sublivelist_verify *sv = arg; sublivelist_verify_block_refcnt_t current = { .svbr_blk = *bp, @@ -250,6 +251,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) &e->svbr_blk, B_TRUE); (void) printf("\tERROR: %d unmatched FREE(s): %s\n", e->svbr_refcnt, blkbuf); + corruption_found = B_TRUE; } zfs_btree_destroy(&sv->sv_pair); @@ -405,6 +407,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), (u_longlong_t)found->svb_allocated_txg, (u_longlong_t)txg); + corruption_found = B_TRUE; } } } @@ -426,6 +429,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_add(mv->mv_allocated, offset, size); @@ -439,6 +443,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_remove(mv->mv_allocated, offset, size); @@ -526,6 +531,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -542,6 +548,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -655,6 +662,7 @@ livelist_metaslab_validate(spa_t *spa) } (void) printf("ERROR: Found livelist blocks marked as allocated " "for indirect vdevs:\n"); + corruption_found = B_TRUE; zfs_btree_index_t *where = NULL; sublivelist_verify_block_t *svb; @@ -827,7 +835,7 @@ usage(void) (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); - zdb_exit(1); + zdb_exit(2); } static void @@ -892,9 +900,9 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) size_t nvsize = *(uint64_t *)data; char *packed = umem_alloc(nvsize, UMEM_NOFAIL); - VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); + VERIFY0(dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); - VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); + VERIFY0(nvlist_unpack(packed, nvsize, &nv, 0)); umem_free(packed, nvsize); @@ -1455,8 +1463,8 @@ get_obsolete_refcount(vdev_t *vd) refcount++; } } else { - ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); - ASSERT3U(obsolete_sm_object, ==, 0); + ASSERT0P(vd->vdev_obsolete_sm); + ASSERT0(obsolete_sm_object); } for (unsigned c = 0; c < vd->vdev_children; c++) { refcount += get_obsolete_refcount(vd->vdev_child[c]); @@ -1792,7 +1800,7 @@ print_vdev_indirect(vdev_t *vd) vdev_indirect_births_t *vib = vd->vdev_indirect_births; if (vim == NULL) { - ASSERT3P(vib, ==, NULL); + ASSERT0P(vib); return; } @@ -2043,10 +2051,10 @@ dump_ddt_object(ddt_t *ddt, ddt_type_t type, ddt_class_t class) if (error == ENOENT) return; - ASSERT(error == 0); + ASSERT0(error); error = ddt_object_count(ddt, type, class, &count); - ASSERT(error == 0); + ASSERT0(error); if (count == 0) return; @@ -2583,19 +2591,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, } } -static void +static u_longlong_t print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp) { char blkbuf[BP_SPRINTF_LEN]; + u_longlong_t offset; int l; - if (!BP_IS_EMBEDDED(bp)) { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } + offset = (u_longlong_t)blkid2offset(dnp, bp, zb); - (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); + (void) printf("%16llx ", offset); ASSERT(zb->zb_level >= 0); @@ -2610,19 +2616,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); - (void) printf("%s\n", blkbuf); + (void) printf("%s", blkbuf); + + if (!BP_IS_EMBEDDED(bp)) { + if (BP_GET_TYPE(bp) != dnp->dn_type) { + (void) printf(" (ERROR: Block pointer type " + "(%llu) does not match dnode type (%hhu))", + BP_GET_TYPE(bp), dnp->dn_type); + corruption_found = B_TRUE; + } + if (BP_GET_LEVEL(bp) != zb->zb_level) { + (void) printf(" (ERROR: Block pointer level " + "(%llu) does not match bookmark level (%lld))", + BP_GET_LEVEL(bp), (u_longlong_t)zb->zb_level); + corruption_found = B_TRUE; + } + } + (void) printf("\n"); + + return (offset); } static int visit_indirect(spa_t *spa, const dnode_phys_t *dnp, blkptr_t *bp, const zbookmark_phys_t *zb) { + u_longlong_t offset; int err = 0; if (BP_GET_BIRTH(bp) == 0) return (0); - print_indirect(spa, bp, zb, dnp); + offset = print_indirect(spa, bp, zb, dnp); if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { arc_flags_t flags = ARC_FLAG_WAIT; @@ -2652,8 +2677,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, break; fill += BP_GET_FILL(cbp); } - if (!err) - ASSERT3U(fill, ==, BP_GET_FILL(bp)); + if (!err) { + if (fill != BP_GET_FILL(bp)) { + (void) printf("%16llx: Block pointer " + "fill (%llu) does not match calculated " + "value (%llu)\n", offset, BP_GET_FILL(bp), + (u_longlong_t)fill); + corruption_found = B_TRUE; + } + } arc_buf_destroy(buf, &buf); } @@ -2909,6 +2941,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } dump_full_bpobj(&subbpo, "subobj", indent + 1); @@ -3088,6 +3121,7 @@ bpobj_count_refd(bpobj_t *bpo) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } bpobj_count_refd(&subbpo); @@ -3109,7 +3143,7 @@ dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle) static int dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle) { - ASSERT(arg == NULL); + ASSERT0P(arg); if (dump_opt['d'] >= 5) { char buf[128]; (void) snprintf(buf, sizeof (buf), @@ -3347,7 +3381,7 @@ open_objset(const char *path, const void *tag, objset_t **osp) uint64_t sa_attrs = 0; uint64_t version = 0; - VERIFY3P(sa_os, ==, NULL); + VERIFY0P(sa_os); /* * We can't own an objset if it's redacted. Therefore, we do this @@ -3520,8 +3554,8 @@ dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) uint64_t fuid_obj; /* first find the fuid object. It lives in the master node */ - VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, - 8, 1, &fuid_obj) == 0); + VERIFY0(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, + 8, 1, &fuid_obj)); zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); (void) zfs_fuid_table_load(os, fuid_obj, &idx_tree, &domain_tree); @@ -7016,7 +7050,7 @@ deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc) static void dump_livelist_cb(dsl_deadlist_t *ll, void *arg) { - ASSERT3P(arg, ==, NULL); + ASSERT0P(arg); global_feature_count[SPA_FEATURE_LIVELIST]++; dump_blkptr_list(ll, "Deleted Livelist"); dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL); @@ -7913,7 +7947,7 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) for (uint64_t c = ckpoint_rvd->vdev_children; c < current_rvd->vdev_children; c++) { vdev_t *current_vd = current_rvd->vdev_child[c]; - VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL); + VERIFY0P(current_vd->vdev_checkpoint_sm); } } @@ -9634,7 +9668,7 @@ main(int argc, char **argv) } else if (objset_str && !zdb_numeric(objset_str + 1) && dump_opt['N']) { printf("Supply a numeric objset ID with -N\n"); - error = 1; + error = 2; goto fini; } } else { @@ -9743,7 +9777,7 @@ main(int argc, char **argv) if (error == 0) { if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { ASSERT(checkpoint_pool != NULL); - ASSERT(checkpoint_target == NULL); + ASSERT0P(checkpoint_target); error = spa_open(checkpoint_pool, &spa, FTAG); if (error != 0) { @@ -9936,5 +9970,8 @@ fini: if (kernel_init_done) kernel_fini(); + if (corruption_found && error == 0) + error = 3; + return (error); } diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh index 6e00f153be1c..78d8f658ddd8 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh @@ -441,8 +441,9 @@ zed_notify_slack_webhook() "${pathname}")" # Construct the JSON message for posting. + # shellcheck disable=SC2016 # - msg_json="$(printf '{"text": "*%s*\\n%s"}' "${subject}" "${msg_body}" )" + msg_json="$(printf '{"text": "*%s*\\n```%s```"}' "${subject}" "${msg_body}" )" # Send the POST request and check for errors. # diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 363bb6da74ec..235f011af953 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -923,7 +923,7 @@ zfs_do_clone(int argc, char **argv) return (!!ret); usage: - ASSERT3P(zhp, ==, NULL); + ASSERT0P(zhp); nvlist_free(props); usage(B_FALSE); return (-1); diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c index 8244bc83fa0d..2bd3051dce7b 100644 --- a/sys/contrib/openzfs/cmd/zhack.c +++ b/sys/contrib/openzfs/cmd/zhack.c @@ -162,9 +162,9 @@ zhack_import(char *target, boolean_t readonly) props = NULL; if (readonly) { - VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); + VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + VERIFY0(nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_READONLY), 1)); } zfeature_checks_disable = B_TRUE; @@ -218,8 +218,8 @@ dump_obj(objset_t *os, uint64_t obj, const char *name) } else { ASSERT(za->za_integer_length == 1); char val[1024]; - VERIFY(zap_lookup(os, obj, za->za_name, - 1, sizeof (val), val) == 0); + VERIFY0(zap_lookup(os, obj, za->za_name, + 1, sizeof (val), val)); (void) printf("\t%s = %s\n", za->za_name, val); } } diff --git a/sys/contrib/openzfs/cmd/zilstat.in b/sys/contrib/openzfs/cmd/zilstat.in index 4140398bf4a3..d01db9b0914b 100755 --- a/sys/contrib/openzfs/cmd/zilstat.in +++ b/sys/contrib/openzfs/cmd/zilstat.in @@ -47,6 +47,7 @@ cols = { "cec": [5, 1000, "zil_commit_error_count"], "csc": [5, 1000, "zil_commit_stall_count"], "cSc": [5, 1000, "zil_commit_suspend_count"], + "cCc": [5, 1000, "zil_commit_crash_count"], "ic": [5, 1000, "zil_itx_count"], "iic": [5, 1000, "zil_itx_indirect_count"], "iib": [5, 1024, "zil_itx_indirect_bytes"], diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index d401e087916b..237e558da65b 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -752,10 +752,11 @@ usage(boolean_t requested) } /* - * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...] + * zpool initialize [-c | -s | -u] [-w] <-a | pool> [<vdev> ...] * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool * if none specified. * + * -a Use all pools. * -c Cancel. Ends active initializing. * -s Suspend. Initializing can then be restarted with no flags. * -u Uninitialize. Clears initialization state. @@ -776,7 +777,7 @@ zpool_do_initialize(int argc, char **argv) {"suspend", no_argument, NULL, 's'}, {"uninit", no_argument, NULL, 'u'}, {"wait", no_argument, NULL, 'w'}, - {"all", no_argument, NULL, 'a'}, + {"all", no_argument, NULL, 'a'}, {0, 0, 0, 0} }; @@ -8446,8 +8447,9 @@ date_string_to_sec(const char *timestr, boolean_t rounding) } /* - * zpool scrub [-e | -s | -p | -C | -E | -S] [-w] <pool> ... + * zpool scrub [-e | -s | -p | -C | -E | -S] [-w] [-a | <pool> ...] * + * -a Scrub all pools. * -e Only scrub blocks in the error log. * -E End date of scrub. * -S Start date of scrub. @@ -8621,8 +8623,9 @@ zpool_do_resilver(int argc, char **argv) } /* - * zpool trim [-d] [-r <rate>] [-c | -s] <pool> [<device> ...] + * zpool trim [-d] [-r <rate>] [-c | -s] <-a | pool> [<device> ...] * + * -a Trim all pools. * -c Cancel. Ends any in-progress trim. * -d Secure trim. Requires kernel and device support. * -r <rate> Sets the TRIM rate in bytes (per second). Supports @@ -12374,7 +12377,7 @@ zpool_do_events_next(ev_opts_t *opts) nvlist_free(nvl); } - VERIFY(0 == close(zevent_fd)); + VERIFY0(close(zevent_fd)); return (ret); } diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index 2e88ae3e7994..89752dcb0f0f 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -273,7 +273,6 @@ extern int zfs_compressed_arc_enabled; extern int zfs_abd_scatter_enabled; extern uint_t dmu_object_alloc_chunk_shift; extern boolean_t zfs_force_some_double_word_sm_entries; -extern unsigned long zio_decompress_fail_fraction; extern unsigned long zfs_reconstruct_indirect_damage_fraction; extern uint64_t raidz_expand_max_reflow_bytes; extern uint_t raidz_expand_pause_point; @@ -829,8 +828,8 @@ static char *short_opts = NULL; static void init_options(void) { - ASSERT3P(long_opts, ==, NULL); - ASSERT3P(short_opts, ==, NULL); + ASSERT0P(long_opts); + ASSERT0P(short_opts); int count = sizeof (option_table) / sizeof (option_table[0]); long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL); @@ -1686,7 +1685,7 @@ ztest_rll_init(rll_t *rll) static void ztest_rll_destroy(rll_t *rll) { - ASSERT3P(rll->rll_writer, ==, NULL); + ASSERT0P(rll->rll_writer); ASSERT0(rll->rll_readers); mutex_destroy(&rll->rll_lock); cv_destroy(&rll->rll_cv); @@ -1720,7 +1719,7 @@ ztest_rll_unlock(rll_t *rll) rll->rll_writer = NULL; } else { ASSERT3S(rll->rll_readers, >, 0); - ASSERT3P(rll->rll_writer, ==, NULL); + ASSERT0P(rll->rll_writer); rll->rll_readers--; } @@ -1996,7 +1995,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) { - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); itx = zil_itx_create(TX_WRITE, sizeof (*lr)); write_state = WR_NEED_COPY; } @@ -2278,8 +2277,8 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) ztest_block_tag_t rbt; - VERIFY(dmu_read(os, lr->lr_foid, offset, - sizeof (rbt), &rbt, flags) == 0); + VERIFY0(dmu_read(os, lr->lr_foid, offset, + sizeof (rbt), &rbt, flags)); if (rbt.bt_magic == BT_MAGIC) { ztest_bt_verify(&rbt, os, lr->lr_foid, 0, offset, gen, txg, crtxg); @@ -2966,7 +2965,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) (void) pthread_rwlock_rdlock(&zd->zd_zilog_lock); - zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); + VERIFY0(zil_commit(zilog, ztest_random(ZTEST_OBJECTS))); /* * Remember the committed values in zd, which is in parent/child @@ -4007,7 +4006,7 @@ raidz_scratch_verify(void) * requested by user, but scratch object was not created. */ case RRSS_SCRATCH_NOT_IN_USE: - ASSERT3U(offset, ==, 0); + ASSERT0(offset); break; /* @@ -5537,8 +5536,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) } if (i == 1) { - VERIFY(dmu_buf_hold(os, bigobj, off, - FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); + VERIFY0(dmu_buf_hold(os, bigobj, off, + FTAG, &dbt, DMU_READ_NO_PREFETCH)); } if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) { VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db, @@ -7937,7 +7936,7 @@ ztest_freeze(void) */ while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { ztest_dmu_object_alloc_free(zd, 0); - zil_commit(zd->zd_zilog, 0); + VERIFY0(zil_commit(zd->zd_zilog, 0)); } txg_wait_synced(spa_get_dsl(spa), 0); @@ -7979,7 +7978,7 @@ ztest_freeze(void) /* * Commit all of the changes we just generated. */ - zil_commit(zd->zd_zilog, 0); + VERIFY0(zil_commit(zd->zd_zilog, 0)); txg_wait_synced(spa_get_dsl(spa), 0); /* @@ -8979,7 +8978,7 @@ main(int argc, char **argv) exit(EXIT_FAILURE); } else { /* children should not be spawned if setting gvars fails */ - VERIFY3S(err, ==, 0); + VERIFY0(err); } /* Override location of zpool.cache */ diff --git a/sys/contrib/openzfs/config/kernel-mkdir.m4 b/sys/contrib/openzfs/config/kernel-mkdir.m4 index c1aebc387abe..78b32447c593 100644 --- a/sys/contrib/openzfs/config/kernel-mkdir.m4 +++ b/sys/contrib/openzfs/config/kernel-mkdir.m4 @@ -84,6 +84,8 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ AC_DEFINE(HAVE_IOPS_MKDIR_DENTRY, 1, [iops->mkdir() returns struct dentry*]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 6.3 API change dnl # mkdir() takes struct mnt_idmap * as the first arg diff --git a/sys/contrib/openzfs/config/toolchain-simd.m4 b/sys/contrib/openzfs/config/toolchain-simd.m4 index 061576fd94e3..f18c91007cde 100644 --- a/sys/contrib/openzfs/config/toolchain-simd.m4 +++ b/sys/contrib/openzfs/config/toolchain-simd.m4 @@ -24,6 +24,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES @@ -38,9 +40,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [ AC_MSG_CHECKING([whether host toolchain supports SSE]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { __asm__ __volatile__("xorps %xmm0, %xmm1"); + return (0); } ]])], [ AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE]) @@ -57,9 +60,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [ AC_MSG_CHECKING([whether host toolchain supports SSE2]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { __asm__ __volatile__("pxor %xmm0, %xmm1"); + return (0); } ]])], [ AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2]) @@ -76,10 +80,11 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [ AC_MSG_CHECKING([whether host toolchain supports SSE3]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { char v[16]; __asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0])); + return (0); } ]])], [ AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3]) @@ -96,9 +101,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [ AC_MSG_CHECKING([whether host toolchain supports SSSE3]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { __asm__ __volatile__("pshufb %xmm0,%xmm1"); + return (0); } ]])], [ AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3]) @@ -115,9 +121,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [ AC_MSG_CHECKING([whether host toolchain supports SSE4.1]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { __asm__ __volatile__("pmaxsb %xmm0,%xmm1"); + return (0); } ]])], [ AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1]) @@ -134,9 +141,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [ AC_MSG_CHECKING([whether host toolchain supports SSE4.2]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { __asm__ __volatile__("pcmpgtq %xmm0, %xmm1"); + return (0); } ]])], [ AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2]) @@ -153,10 +161,11 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [ AC_MSG_CHECKING([whether host toolchain supports AVX]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ - void main() + int main() { char v[32]; __asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0])); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -174,9 +183,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -194,9 +204,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -214,9 +225,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vplzcntd %zmm0,%zmm1"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -234,9 +246,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -254,9 +267,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -274,9 +288,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -294,9 +309,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -314,9 +330,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -334,9 +351,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vexp2pd %zmm0,%zmm1"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -354,9 +372,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("vpabsq %zmm0,%zmm1"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -374,9 +393,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("aesenc %xmm0, %xmm1"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -394,9 +414,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("pclmulqdq %0, %%xmm0, %%xmm1" :: "i"(0)); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -414,9 +435,10 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { __asm__ __volatile__("movbe 0(%eax), %eax"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -427,6 +449,48 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ ]) dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES], [ + AC_MSG_CHECKING([whether host toolchain supports VAES]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + int main() + { + __asm__ __volatile__("vaesenc %ymm0, %ymm1, %ymm0"); + return (0); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_VAES], 1, [Define if host toolchain supports VAES]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ], [ + AC_MSG_CHECKING([whether host toolchain supports VPCLMULQDQ]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + int main() + { + __asm__ __volatile__("vpclmulqdq %0, %%ymm4, %%ymm3, %%ymm5" :: "i"(0)); + return (0); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_VPCLMULQDQ], 1, [Define if host toolchain supports VPCLMULQDQ]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE dnl # AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [ @@ -434,10 +498,11 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { char b[4096] __attribute__ ((aligned (64))); __asm__ __volatile__("xsave %[b]\n" : : [b] "m" (*b) : "memory"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -455,10 +520,11 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { char b[4096] __attribute__ ((aligned (64))); __asm__ __volatile__("xsaveopt %[b]\n" : : [b] "m" (*b) : "memory"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) @@ -476,10 +542,11 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES], [ AC_LINK_IFELSE([AC_LANG_SOURCE([ [ - void main() + int main() { char b[4096] __attribute__ ((aligned (64))); __asm__ __volatile__("xsaves %[b]\n" : : [b] "m" (*b) : "memory"); + return (0); } ]])], [ AC_MSG_RESULT([yes]) diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control index 96a2bdd88665..c5358dedc0fd 100644 --- a/sys/contrib/openzfs/contrib/debian/control +++ b/sys/contrib/openzfs/contrib/debian/control @@ -100,8 +100,8 @@ Depends: ${misc:Depends}, ${shlibs:Depends} # The libcurl4 is loaded through dlopen("libcurl.so.4"). # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521 Recommends: libcurl4 -Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux -Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux +Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux, openzfs-libzfs4 +Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux, openzfs-libzfs4 Conflicts: libzfs6linux Description: OpenZFS filesystem library for Linux - general support OpenZFS is a storage platform that encompasses the functionality of @@ -128,8 +128,8 @@ Package: openzfs-libzpool6 Section: contrib/libs Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} -Breaks: libzpool2, libzpool5, libzpool5linux, libzpool6linux -Replaces: libzpool2, libzpool5, libzpool5linux, libzpool6linux +Breaks: libzpool2, libzpool5, libzpool6linux +Replaces: libzpool2, libzpool5, libzpool6linux Conflicts: libzpool6linux Description: OpenZFS pool library for Linux OpenZFS is a storage platform that encompasses the functionality of diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE new file mode 100644 index 000000000000..04c03a37e0cb --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE @@ -0,0 +1,253 @@ +BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL +licensing. Files that are completely new have a Google copyright and an ISC +license. This license is reproduced at the bottom of this file. + +Contributors to BoringSSL are required to follow the CLA rules for Chromium: +https://cla.developers.google.com/clas + +Files in third_party/ have their own licenses, as described therein. The MIT +license, for third_party/fiat, which, unlike other third_party directories, is +compiled into non-test libraries, is included below. + +The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the +OpenSSL License and the original SSLeay license apply to the toolkit. See below +for the actual license texts. Actually both licenses are BSD-style Open Source +licenses. In case of any license issues related to OpenSSL please contact +openssl-core@openssl.org. + +The following are Google-internal bug numbers where explicit permission from +some authors is recorded for use of their work. (This is purely for our own +record keeping.) + 27287199 + 27287880 + 27287883 + 263291445 + + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +ISC license used for completely new code in BoringSSL: + +/* Copyright 2015 The BoringSSL Authors + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + +The code in third_party/fiat carries the MIT license: + +Copyright (c) 2015-2016 the fiat-crypto authors (see +https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Licenses for support code +------------------------- + +Parts of the TLS test suite are under the Go license. This code is not included +in BoringSSL (i.e. libcrypto and libssl) when compiled, however, so +distributing code linked against BoringSSL does not trigger this license: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +BoringSSL uses the Chromium test infrastructure to run a continuous build, +trybots etc. The scripts which manage this, and the script for generating build +metadata, are under the Chromium license. Distributing code linked against +BoringSSL does not trigger this license. + +Copyright 2015 The Chromium Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README new file mode 100644 index 000000000000..aa6fb6d477fa --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README @@ -0,0 +1,11 @@ +This directory contains the original BoringSSL [1] GCM x86-64 assembly +files [2]. + +The assembler files where then further modified to fit the ICP conventions. + +The main purpose to include these files (and the original ones) here, is to +serve as a reference if upstream changes need to be applied to the files +included and modified in the ICP. + +[1] https://github.com/google/boringssl +[2] https://github.com/google/boringssl/blob/d5440dd2c2c500ac2d3bba4afec47a054b4d99ae/gen/bcm/aes-gcm-avx2-x86_64-linux.S diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S new file mode 100644 index 000000000000..e7327c9de872 --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S @@ -0,0 +1,1328 @@ +// SPDX-License-Identifier: Apache-2.0 +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include <openssl/asm_base.h> + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.section .rodata +.align 16 + + +.Lbswap_mask: +.quad 0x08090a0b0c0d0e0f, 0x0001020304050607 + + + + + + + + +.Lgfpoly: +.quad 1, 0xc200000000000000 + + +.Lgfpoly_and_internal_carrybit: +.quad 1, 0xc200000000000001 + +.align 32 + +.Lctr_pattern: +.quad 0, 0 +.quad 1, 0 +.Linc_2blocks: +.quad 2, 0 +.quad 2, 0 + +.text +.globl gcm_init_vpclmulqdq_avx2 +.hidden gcm_init_vpclmulqdq_avx2 +.type gcm_init_vpclmulqdq_avx2,@function +.align 32 +gcm_init_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + + + vpshufd $0x4e,(%rsi),%xmm3 + + + + + + vpshufd $0xd3,%xmm3,%xmm0 + vpsrad $31,%xmm0,%xmm0 + vpaddq %xmm3,%xmm3,%xmm3 + vpand .Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + + vbroadcasti128 .Lgfpoly(%rip),%ymm6 + + + vpclmulqdq $0x00,%xmm3,%xmm3,%xmm0 + vpclmulqdq $0x01,%xmm3,%xmm3,%xmm1 + vpclmulqdq $0x10,%xmm3,%xmm3,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm6,%xmm2 + vpshufd $0x4e,%xmm0,%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm3,%xmm5 + vpclmulqdq $0x01,%xmm1,%xmm6,%xmm0 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm5,%xmm5 + vpxor %xmm0,%xmm5,%xmm5 + + + + vinserti128 $1,%xmm3,%ymm5,%ymm3 + vinserti128 $1,%xmm5,%ymm5,%ymm5 + + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + + + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,64(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128+32(%rdi) + + + vpclmulqdq $0x00,%ymm5,%ymm4,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm4,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm4,%ymm3 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm3,%ymm3 + vpxor %ymm0,%ymm3,%ymm3 + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,0(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128(%rdi) + + vzeroupper + ret + +.cfi_endproc +.size gcm_init_vpclmulqdq_avx2, . - gcm_init_vpclmulqdq_avx2 +.globl gcm_gmult_vpclmulqdq_avx2 +.hidden gcm_gmult_vpclmulqdq_avx2 +.type gcm_gmult_vpclmulqdq_avx2,@function +.align 32 +gcm_gmult_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + vmovdqu (%rdi),%xmm0 + vmovdqu .Lbswap_mask(%rip),%xmm1 + vmovdqu 128-16(%rsi),%xmm2 + vmovdqu .Lgfpoly(%rip),%xmm3 + vpshufb %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6 + vpshufd $0x4e,%xmm4,%xmm4 + vpxor %xmm4,%xmm5,%xmm5 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4 + vpshufd $0x4e,%xmm5,%xmm5 + vpxor %xmm5,%xmm0,%xmm0 + vpxor %xmm4,%xmm0,%xmm0 + + + vpshufb %xmm1,%xmm0,%xmm0 + vmovdqu %xmm0,(%rdi) + ret + +.cfi_endproc +.size gcm_gmult_vpclmulqdq_avx2, . - gcm_gmult_vpclmulqdq_avx2 +.globl gcm_ghash_vpclmulqdq_avx2 +.hidden gcm_ghash_vpclmulqdq_avx2 +.type gcm_ghash_vpclmulqdq_avx2,@function +.align 32 +gcm_ghash_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + + + + vmovdqu .Lbswap_mask(%rip),%xmm6 + vmovdqu .Lgfpoly(%rip),%xmm7 + + + vmovdqu (%rdi),%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + + + cmpq $32,%rcx + jb .Lghash_lastblock + + + + vinserti128 $1,%xmm6,%ymm6,%ymm6 + vinserti128 $1,%xmm7,%ymm7,%ymm7 + + cmpq $127,%rcx + jbe .Lghash_loop_1x + + + vmovdqu 128(%rsi),%ymm8 + vmovdqu 128+32(%rsi),%ymm9 +.Lghash_loop_4x: + + vmovdqu 0(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 0(%rsi),%ymm2 + vpxor %ymm5,%ymm1,%ymm1 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm8,%ymm0,%ymm4 + + vmovdqu 32(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 32(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm8,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu 64(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + + vmovdqu 96(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 96(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm5,%ymm4,%ymm4 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm3,%ymm2,%ymm0 + vpshufd $0x4e,%ymm3,%ymm3 + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vpclmulqdq $0x01,%ymm4,%ymm2,%ymm0 + vpshufd $0x4e,%ymm4,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm0,%ymm5,%ymm5 + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + + subq $-128,%rdx + addq $-128,%rcx + cmpq $127,%rcx + ja .Lghash_loop_4x + + + cmpq $32,%rcx + jb .Lghash_loop_1x_done +.Lghash_loop_1x: + vmovdqu (%rdx),%ymm0 + vpshufb %ymm6,%ymm0,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vmovdqu 128-32(%rsi),%ymm0 + vpclmulqdq $0x00,%ymm0,%ymm5,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2 + vpclmulqdq $0x10,%ymm0,%ymm5,%ymm3 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x01,%ymm1,%ymm7,%ymm3 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm2,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x11,%ymm0,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm7,%ymm1 + vpshufd $0x4e,%ymm2,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm1,%ymm5,%ymm5 + + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + addq $32,%rdx + subq $32,%rcx + cmpq $32,%rcx + jae .Lghash_loop_1x +.Lghash_loop_1x_done: + + +.Lghash_lastblock: + testq %rcx,%rcx + jz .Lghash_done + vmovdqu (%rdx),%xmm0 + vpshufb %xmm6,%xmm0,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 128-16(%rsi),%xmm0 + vpclmulqdq $0x00,%xmm0,%xmm5,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2 + vpclmulqdq $0x10,%xmm0,%xmm5,%xmm3 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x01,%xmm1,%xmm7,%xmm3 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm2,%xmm2 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm2,%xmm7,%xmm1 + vpshufd $0x4e,%xmm2,%xmm2 + vpxor %xmm2,%xmm5,%xmm5 + vpxor %xmm1,%xmm5,%xmm5 + + +.Lghash_done: + + vpshufb %xmm6,%xmm5,%xmm5 + vmovdqu %xmm5,(%rdi) + + vzeroupper + ret + +.cfi_endproc +.size gcm_ghash_vpclmulqdq_avx2, . - gcm_ghash_vpclmulqdq_avx2 +.globl aes_gcm_enc_update_vaes_avx2 +.hidden aes_gcm_enc_update_vaes_avx2 +.type aes_gcm_enc_update_vaes_avx2,@function +.align 32 +aes_gcm_enc_update_vaes_avx2: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+8(%rip) +#endif + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 240(%rcx),%r10d + leal -20(,%r10,4),%r10d + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func1 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + leaq 16(%rcx),%rax +.Lvaesenc_loop_first_4_vecs__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_first_4_vecs__func1 + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + addq $-128,%rdx + cmpq $127,%rdx + jbe .Lghash_last_ciphertext_4x__func1 +.align 16 +.Lcrypt_loop_4x__func1: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func1 + je .Laes192__func1 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func1: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func1: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + subq $-128,%rsi + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func1 +.Lghash_last_ciphertext_4x__func1: + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + subq $-128,%rsi +.Lcrypt_loop_4x_done__func1: + + testq %rdx,%rdx + jz .Ldone__func1 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func1 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %ymm0,%ymm13,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func1 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func1: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func1 + je .Lxor_two_blocks__func1 + +.Lxor_three_blocks__func1: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_two_blocks__func1: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_one_block__func1: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func1: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func1: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func1: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret + +.cfi_endproc +.size aes_gcm_enc_update_vaes_avx2, . - aes_gcm_enc_update_vaes_avx2 +.globl aes_gcm_dec_update_vaes_avx2 +.hidden aes_gcm_dec_update_vaes_avx2 +.type aes_gcm_dec_update_vaes_avx2,@function +.align 32 +aes_gcm_dec_update_vaes_avx2: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 240(%rcx),%r10d + leal -20(,%r10,4),%r10d + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func2 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 +.align 16 +.Lcrypt_loop_4x__func2: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func2 + je .Laes192__func2 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func2: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func2: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + subq $-128,%rsi + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func2 +.Lcrypt_loop_4x_done__func2: + + testq %rdx,%rdx + jz .Ldone__func2 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func2 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %ymm0,%ymm3,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func2 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func2: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func2 + je .Lxor_two_blocks__func2 + +.Lxor_three_blocks__func2: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %xmm0,%xmm3,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_two_blocks__func2: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm2,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_one_block__func2: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm2,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func2: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func2: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func2: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret + +.cfi_endproc +.size aes_gcm_dec_update_vaes_avx2, . - aes_gcm_dec_update_vaes_avx2 +#endif diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index c569b2528368..67707e9d80f4 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -979,7 +979,8 @@ mountroot() touch /run/zfs_unlock_complete if [ -e /run/zfs_unlock_complete_notify ]; then - read -r < /run/zfs_unlock_complete_notify + # shellcheck disable=SC2034 + read -r zfs_unlock_complete_notify < /run/zfs_unlock_complete_notify fi # ------------ diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h index 974704e92bbd..32bc02f3dc86 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h @@ -69,6 +69,10 @@ #define __maybe_unused __attribute__((unused)) #endif +#ifndef __must_check +#define __must_check __attribute__((__warn_unused_result__)) +#endif + /* * Without this, we see warnings from objtool during normal Linux builds when * the kernel is built with CONFIG_STACK_VALIDATION=y: diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h index c6bc10d6babe..1cbd79ec893f 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h @@ -77,8 +77,8 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg, /* * Be sure there are no surprises. */ - ASSERT(stk == NULL); - ASSERT(len == 0); + ASSERT0P(stk); + ASSERT0(len); ASSERT(state == TS_RUN); if (pp == &p0) diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h index cd245a5f0135..326f471d7c9b 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h @@ -139,15 +139,6 @@ */ #if defined(HAVE_KERNEL_FPU_INTERNAL) -/* - * For kernels not exporting *kfpu_{begin,end} we have to use inline assembly - * with the XSAVE{,OPT,S} instructions, so we need the toolchain to support at - * least XSAVE. - */ -#if !defined(HAVE_XSAVE) -#error "Toolchain needs to support the XSAVE assembler instruction" -#endif - #ifndef XFEATURE_MASK_XTILE /* * For kernels where this doesn't exist yet, we still don't want to break @@ -335,9 +326,13 @@ kfpu_begin(void) return; } #endif +#if defined(HAVE_XSAVE) if (static_cpu_has(X86_FEATURE_XSAVE)) { kfpu_do_xsave("xsave", state, ~XFEATURE_MASK_XTILE); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { + return; + } +#endif + if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_save_fxsr(state); } else { kfpu_save_fsave(state); @@ -390,9 +385,13 @@ kfpu_end(void) goto out; } #endif +#if defined(HAVE_XSAVE) if (static_cpu_has(X86_FEATURE_XSAVE)) { kfpu_do_xrstor("xrstor", state, ~XFEATURE_MASK_XTILE); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { + goto out; + } +#endif + if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_restore_fxsr(state); } else { kfpu_restore_fsave(state); @@ -599,6 +598,32 @@ zfs_movbe_available(void) } /* + * Check if VAES instruction set is available + */ +static inline boolean_t +zfs_vaes_available(void) +{ +#if defined(X86_FEATURE_VAES) + return (!!boot_cpu_has(X86_FEATURE_VAES)); +#else + return (B_FALSE); +#endif +} + +/* + * Check if VPCLMULQDQ instruction set is available + */ +static inline boolean_t +zfs_vpclmulqdq_available(void) +{ +#if defined(X86_FEATURE_VPCLMULQDQ) + return (!!boot_cpu_has(X86_FEATURE_VPCLMULQDQ)); +#else + return (B_FALSE); +#endif +} + +/* * Check if SHA_NI instruction set is available */ static inline boolean_t diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h index 1671ba4074da..85b96e1e23a7 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h @@ -69,6 +69,10 @@ #define __maybe_unused __attribute__((unused)) #endif +#ifndef __must_check +#define __must_check __attribute__((__warn_unused_result__)) +#endif + /* * Without this, we see warnings from objtool during normal Linux builds when * the kernel is built with CONFIG_STACK_VALIDATION=y: diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h index f000f53ab9b6..4eca2414fc5b 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h @@ -111,7 +111,7 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ #undef mutex_destroy #define mutex_destroy(mp) \ { \ - VERIFY3P(mutex_owner(mp), ==, NULL); \ + VERIFY0P(mutex_owner(mp)); \ } #define mutex_tryenter(mp) \ diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h index 955462c85d10..e34ea46b3fe8 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h @@ -139,18 +139,18 @@ #define ZCW_TP_STRUCT_ENTRY \ __field(lwb_t *, zcw_lwb) \ __field(boolean_t, zcw_done) \ - __field(int, zcw_zio_error) \ + __field(int, zcw_error) \ #define ZCW_TP_FAST_ASSIGN \ __entry->zcw_lwb = zcw->zcw_lwb; \ __entry->zcw_done = zcw->zcw_done; \ - __entry->zcw_zio_error = zcw->zcw_zio_error; + __entry->zcw_error = zcw->zcw_error; #define ZCW_TP_PRINTK_FMT \ "zcw { lwb %p done %u error %u }" #define ZCW_TP_PRINTK_ARGS \ - __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_zio_error + __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_error /* * Generic support for two argument tracepoints of the form: diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h index 7c2024a16d8f..b18961be1282 100644 --- a/sys/contrib/openzfs/include/sys/dmu.h +++ b/sys/contrib/openzfs/include/sys/dmu.h @@ -742,8 +742,8 @@ dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync, dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp __maybe_unused) { - ASSERT(dbu->dbu_evict_func_sync == NULL); - ASSERT(dbu->dbu_evict_func_async == NULL); + ASSERT0P(dbu->dbu_evict_func_sync); + ASSERT0P(dbu->dbu_evict_func_async); /* must have at least one evict func */ IMPLY(evict_func_sync == NULL, evict_func_async != NULL); diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index db6de332ae67..66db16b33c51 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -880,7 +880,6 @@ extern kcondvar_t spa_namespace_cv; #define SPA_CONFIG_UPDATE_VDEVS 1 extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t); -extern void spa_config_load(void); extern int spa_all_configs(uint64_t *generation, nvlist_t **pools); extern void spa_config_set(spa_t *spa, nvlist_t *config); extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, @@ -1244,7 +1243,6 @@ extern void vdev_mirror_stat_fini(void); /* Initialization and termination */ extern void spa_init(spa_mode_t mode); extern void spa_fini(void); -extern void spa_boot_init(void *); /* properties */ extern int spa_prop_set(spa_t *spa, nvlist_t *nvp); diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h index ba577b80c98f..79b845a672a8 100644 --- a/sys/contrib/openzfs/include/sys/zfs_znode.h +++ b/sys/contrib/openzfs/include/sys/zfs_znode.h @@ -73,7 +73,7 @@ extern "C" { pflags |= attr; \ else \ pflags &= ~attr; \ - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \ + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \ &pflags, sizeof (pflags), tx)); \ } diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h index 9d1fb47e2dfc..da085998879b 100644 --- a/sys/contrib/openzfs/include/sys/zil.h +++ b/sys/contrib/openzfs/include/sys/zil.h @@ -456,7 +456,7 @@ typedef enum { WR_NUM_STATES /* number of states */ } itx_wr_state_t; -typedef void (*zil_callback_t)(void *data); +typedef void (*zil_callback_t)(void *data, int err); typedef struct itx { list_node_t itx_node; /* linkage on zl_itx_list */ @@ -498,10 +498,13 @@ typedef struct zil_stats { * (see zil_commit_writer_stall()) * - suspend: ZIL suspended * (see zil_commit(), zil_get_commit_list()) + * - crash: ZIL crashed + * (see zil_crash(), zil_commit(), ...) */ kstat_named_t zil_commit_error_count; kstat_named_t zil_commit_stall_count; kstat_named_t zil_commit_suspend_count; + kstat_named_t zil_commit_crash_count; /* * Number of transactions (reads, writes, renames, etc.) @@ -549,6 +552,7 @@ typedef struct zil_sums { wmsum_t zil_commit_error_count; wmsum_t zil_commit_stall_count; wmsum_t zil_commit_suspend_count; + wmsum_t zil_commit_crash_count; wmsum_t zil_itx_count; wmsum_t zil_itx_indirect_count; wmsum_t zil_itx_indirect_bytes; @@ -577,6 +581,25 @@ typedef struct zil_sums { #define ZIL_STAT_BUMP(zil, stat) \ ZIL_STAT_INCR(zil, stat, 1); +/* + * Flags for zil_commit_flags(). zil_commit() is a shortcut for + * zil_commit_flags(ZIL_COMMIT_FAILMODE), which is the most common use. + */ +typedef enum { + /* + * Try to commit the ZIL. If it fails, fall back to txg_wait_synced(). + * If that fails, return EIO. + */ + ZIL_COMMIT_NOW = 0, + + /* + * Like ZIL_COMMIT_NOW, but if the ZIL commit fails because the pool + * suspended, act according to the pool's failmode= setting (wait for + * the pool to resume, or return EIO). + */ + ZIL_COMMIT_FAILMODE = (1 << 1), +} zil_commit_flag_t; + typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t txg); typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg, @@ -606,14 +629,16 @@ extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first); extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); -extern void zil_itx_destroy(itx_t *itx); +extern void zil_itx_destroy(itx_t *itx, int err); extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx); extern void zil_async_to_sync(zilog_t *zilog, uint64_t oid); -extern void zil_commit(zilog_t *zilog, uint64_t oid); -extern void zil_commit_impl(zilog_t *zilog, uint64_t oid); extern void zil_remove_async(zilog_t *zilog, uint64_t oid); +extern int zil_commit_flags(zilog_t *zilog, uint64_t oid, + zil_commit_flag_t flags); +extern int __must_check zil_commit(zilog_t *zilog, uint64_t oid); + extern int zil_reset(const char *osname, void *txarg); extern int zil_claim(struct dsl_pool *dp, struct dsl_dataset *ds, void *txarg); diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h index 252264b9eae9..ea1364a7e35a 100644 --- a/sys/contrib/openzfs/include/sys/zil_impl.h +++ b/sys/contrib/openzfs/include/sys/zil_impl.h @@ -41,8 +41,8 @@ extern "C" { * * An lwb will start out in the "new" state, and transition to the "opened" * state via a call to zil_lwb_write_open() on first itx assignment. When - * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be - * held. + * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" and + * LWB's "lwb_lock" must be held. * * After the lwb is "opened", it can be assigned number of itxs and transition * into the "closed" state via zil_lwb_write_close() when full or on timeout. @@ -100,16 +100,22 @@ typedef enum { * holding the "zl_issuer_lock". After the lwb is issued, the zilog's * "zl_lock" is used to protect the lwb against concurrent access. */ +typedef enum { + LWB_FLAG_SLIM = (1<<0), /* log block has slim format */ + LWB_FLAG_SLOG = (1<<1), /* lwb_blk is on SLOG device */ + LWB_FLAG_CRASHED = (1<<2), /* lwb is on the crash list */ +} lwb_flag_t; + typedef struct lwb { zilog_t *lwb_zilog; /* back pointer to log struct */ blkptr_t lwb_blk; /* on disk address of this log blk */ - boolean_t lwb_slim; /* log block has slim format */ - boolean_t lwb_slog; /* lwb_blk is on SLOG device */ + lwb_flag_t lwb_flags; /* extra info about this lwb */ int lwb_error; /* log block allocation error */ int lwb_nmax; /* max bytes in the buffer */ int lwb_nused; /* # used bytes in buffer */ int lwb_nfilled; /* # filled bytes in buffer */ int lwb_sz; /* size of block and buffer */ + int lwb_min_sz; /* min size for range allocation */ lwb_state_t lwb_state; /* the state of this lwb */ char *lwb_buf; /* log write buffer */ zio_t *lwb_child_zio; /* parent zio for children */ @@ -124,7 +130,7 @@ typedef struct lwb { list_t lwb_itxs; /* list of itx's */ list_t lwb_waiters; /* list of zil_commit_waiter's */ avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */ - kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */ + kmutex_t lwb_lock; /* protects lwb_vdev_tree and size */ } lwb_t; /* @@ -149,7 +155,7 @@ typedef struct zil_commit_waiter { list_node_t zcw_node; /* linkage in lwb_t:lwb_waiter list */ lwb_t *zcw_lwb; /* back pointer to lwb when linked */ boolean_t zcw_done; /* B_TRUE when "done", else B_FALSE */ - int zcw_zio_error; /* contains the zio io_error value */ + int zcw_error; /* result to return from zil_commit() */ } zil_commit_waiter_t; /* @@ -221,6 +227,7 @@ struct zilog { uint64_t zl_cur_left; /* current burst remaining size */ uint64_t zl_cur_max; /* biggest record in current burst */ list_t zl_lwb_list; /* in-flight log write list */ + list_t zl_lwb_crash_list; /* log writes in-flight at crash */ avl_tree_t zl_bp_tree; /* track bps during log parse */ clock_t zl_replay_time; /* lbolt of when replay started */ uint64_t zl_replay_blks; /* number of log blocks replayed */ @@ -245,6 +252,9 @@ struct zilog { */ uint64_t zl_max_block_size; + /* After crash, txg to restart zil */ + uint64_t zl_restart_txg; + /* Pointer for per dataset zil sums */ zil_sums_t *zl_sums; }; diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 4f46eab3db89..353805fcb969 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -360,26 +360,26 @@ struct zbookmark_err_phys { (zb)->zb_blkid == ZB_ROOT_BLKID) typedef struct zio_prop { - enum zio_checksum zp_checksum; - enum zio_compress zp_compress; + enum zio_checksum zp_checksum:8; + enum zio_compress zp_compress:8; uint8_t zp_complevel; uint8_t zp_level; uint8_t zp_copies; uint8_t zp_gang_copies; - dmu_object_type_t zp_type; - boolean_t zp_dedup; - boolean_t zp_dedup_verify; - boolean_t zp_nopwrite; - boolean_t zp_brtwrite; - boolean_t zp_encrypt; - boolean_t zp_byteorder; - boolean_t zp_direct_write; - boolean_t zp_rewrite; + dmu_object_type_t zp_type:8; + dmu_object_type_t zp_storage_type:8; + boolean_t zp_dedup:1; + boolean_t zp_dedup_verify:1; + boolean_t zp_nopwrite:1; + boolean_t zp_brtwrite:1; + boolean_t zp_encrypt:1; + boolean_t zp_byteorder:1; + boolean_t zp_direct_write:1; + boolean_t zp_rewrite:1; + uint32_t zp_zpl_smallblk; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; - uint32_t zp_zpl_smallblk; - dmu_object_type_t zp_storage_type; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -622,7 +622,8 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_flag_t flags); extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, - blkptr_t *new_bp, uint64_t size, boolean_t *slog); + blkptr_t *new_bp, uint64_t min_size, uint64_t max_size, boolean_t *slog, + boolean_t allow_larger); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); diff --git a/sys/contrib/openzfs/include/sys/zvol_impl.h b/sys/contrib/openzfs/include/sys/zvol_impl.h index f3dd9f26f23c..5422e66832c0 100644 --- a/sys/contrib/openzfs/include/sys/zvol_impl.h +++ b/sys/contrib/openzfs/include/sys/zvol_impl.h @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ #ifndef _SYS_ZVOL_IMPL_H @@ -56,6 +56,7 @@ typedef struct zvol_state { atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ kcondvar_t zv_removing_cv; /* ready to remove minor */ + list_node_t zv_remove_node; /* node on removal list */ struct zvol_state_os *zv_zso; /* private platform state */ boolean_t zv_threading; /* volthreading property */ } zvol_state_t; @@ -135,7 +136,7 @@ int zvol_os_rename_minor(zvol_state_t *zv, const char *newname); int zvol_os_create_minor(const char *name); int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize); boolean_t zvol_os_is_zvol(const char *path); -void zvol_os_clear_private(zvol_state_t *zv); +void zvol_os_remove_minor(zvol_state_t *zv); void zvol_os_set_disk_ro(zvol_state_t *zv, int flags); void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity); diff --git a/sys/contrib/openzfs/lib/libicp/Makefile.am b/sys/contrib/openzfs/lib/libicp/Makefile.am index ce24d13a760f..23adba10bc44 100644 --- a/sys/contrib/openzfs/lib/libicp/Makefile.am +++ b/sys/contrib/openzfs/lib/libicp/Makefile.am @@ -69,6 +69,7 @@ nodist_libicp_la_SOURCES += \ module/icp/asm-x86_64/aes/aes_aesni.S \ module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \ module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \ + module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S \ module/icp/asm-x86_64/modes/ghash-x86_64.S \ module/icp/asm-x86_64/sha2/sha256-x86_64.S \ module/icp/asm-x86_64/sha2/sha512-x86_64.S \ diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/debug.h b/sys/contrib/openzfs/lib/libspl/include/sys/debug.h index cced309bd1bb..02f33a68b75b 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/debug.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/debug.h @@ -38,4 +38,8 @@ #define __maybe_unused __attribute__((unused)) #endif +#ifndef __must_check +#define __must_check __attribute__((warn_unused_result)) +#endif + #endif diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h index 1ef24f5a7d39..4772a5416b2e 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h @@ -102,7 +102,9 @@ typedef enum cpuid_inst_sets { AES, PCLMULQDQ, MOVBE, - SHA_NI + SHA_NI, + VAES, + VPCLMULQDQ } cpuid_inst_sets_t; /* @@ -127,6 +129,8 @@ typedef struct cpuid_feature_desc { #define _AES_BIT (1U << 25) #define _PCLMULQDQ_BIT (1U << 1) #define _MOVBE_BIT (1U << 22) +#define _VAES_BIT (1U << 9) +#define _VPCLMULQDQ_BIT (1U << 10) #define _SHA_NI_BIT (1U << 29) /* @@ -157,6 +161,8 @@ static const cpuid_feature_desc_t cpuid_features[] = { [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX }, + [VAES] = {7U, 0U, _VAES_BIT, ECX }, + [VPCLMULQDQ] = {7U, 0U, _VPCLMULQDQ_BIT, ECX }, }; /* @@ -231,6 +237,8 @@ CPUID_FEATURE_CHECK(aes, AES); CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); CPUID_FEATURE_CHECK(movbe, MOVBE); CPUID_FEATURE_CHECK(shani, SHA_NI); +CPUID_FEATURE_CHECK(vaes, VAES); +CPUID_FEATURE_CHECK(vpclmulqdq, VPCLMULQDQ); /* * Detect register set support @@ -382,6 +390,24 @@ zfs_shani_available(void) } /* + * Check if VAES instruction is available + */ +static inline boolean_t +zfs_vaes_available(void) +{ + return (__cpuid_has_vaes()); +} + +/* + * Check if VPCLMULQDQ instruction is available + */ +static inline boolean_t +zfs_vpclmulqdq_available(void) +{ + return (__cpuid_has_vpclmulqdq()); +} + +/* * AVX-512 family of instruction sets: * * AVX512F Foundation diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c index 6aa0375f98d7..5f50bce531f7 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c @@ -81,7 +81,7 @@ get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, /* we can get stats even if we failed to get a path */ (void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t)); if (error == 0) { - ASSERT(di->zerr == 0); + ASSERT0(di->zerr); (void) strlcpy(pn, zc.zc_value, maxlen); return (0); } @@ -404,7 +404,7 @@ write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) (void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name)); zc.zc_obj = dr->ddr_first - 1; - ASSERT(di->zerr == 0); + ASSERT0(di->zerr); while (zc.zc_obj < dr->ddr_last) { int err; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_import.c b/sys/contrib/openzfs/lib/libzfs/libzfs_import.c index 599e8e6f7819..7f276e9592c9 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_import.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_import.c @@ -122,7 +122,7 @@ const pool_config_ops_t libzfs_config_ops = { static uint64_t label_offset(uint64_t size, int l) { - ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); + ASSERT0(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t)); return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); } diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c index 2a81b658d342..5c9e2199eed4 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c @@ -516,7 +516,7 @@ zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags, } else if (rc == ENOTSUP) { int spa_version; - VERIFY(zfs_spa_version(zhp, &spa_version) == 0); + VERIFY0(zfs_spa_version(zhp, &spa_version)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Can't mount a version %llu " "file system on a version %d pool. Pool must be" diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index 1ad10ebb3c15..77134d197904 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -2505,7 +2505,7 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, err = ENOENT; if (sdd.cleanup_fd != -1) { - VERIFY(0 == close(sdd.cleanup_fd)); + VERIFY0(close(sdd.cleanup_fd)); sdd.cleanup_fd = -1; } @@ -2531,7 +2531,7 @@ err_out: fnvlist_free(sdd.snapholds); if (sdd.cleanup_fd != -1) - VERIFY(0 == close(sdd.cleanup_fd)); + VERIFY0(close(sdd.cleanup_fd)); return (err); } @@ -5108,7 +5108,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, nvlist_t *holds, *errors = NULL; int cleanup_fd = -1; - VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP)); + VERIFY0(nvlist_alloc(&holds, 0, KM_SLEEP)); for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL); pair != NULL; pair = nvlist_next_nvpair(snapholds_nvlist, pair)) { @@ -5560,7 +5560,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) *cp = '\0'; sendfs = nonpackage_sendfs; - VERIFY(finalsnap == NULL); + VERIFY0P(finalsnap); } return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags, &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, diff --git a/sys/contrib/openzfs/lib/libzpool/abd_os.c b/sys/contrib/openzfs/lib/libzpool/abd_os.c index 0d5795de143a..8bd7a64ab24a 100644 --- a/sys/contrib/openzfs/lib/libzpool/abd_os.c +++ b/sys/contrib/openzfs/lib/libzpool/abd_os.c @@ -302,7 +302,7 @@ abd_iter_at_end(struct abd_iter *aiter) void abd_iter_advance(struct abd_iter *aiter, size_t amount) { - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); if (abd_iter_at_end(aiter)) @@ -315,7 +315,7 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount) void abd_iter_map(struct abd_iter *aiter) { - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); if (abd_iter_at_end(aiter)) diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index 2e8bf160465a..fea2f81458f9 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -38,6 +38,7 @@ #include <sys/processor.h> #include <sys/rrwlock.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/stat.h> #include <sys/systeminfo.h> #include <sys/time.h> @@ -369,7 +370,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) if (delta <= 0) return (-1); - VERIFY(gettimeofday(&tv, NULL) == 0); + VERIFY0(gettimeofday(&tv, NULL)); ts.tv_sec = tv.tv_sec + delta / hz; ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz); @@ -811,6 +812,79 @@ umem_out_of_memory(void) return (0); } +static void +spa_config_load(void) +{ + void *buf = NULL; + nvlist_t *nvlist, *child; + nvpair_t *nvpair; + char *pathname; + zfs_file_t *fp; + zfs_file_attr_t zfa; + uint64_t fsize; + int err; + + /* + * Open the configuration file. + */ + pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); + + err = zfs_file_open(pathname, O_RDONLY, 0, &fp); + if (err) + err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp); + + kmem_free(pathname, MAXPATHLEN); + + if (err) + return; + + if (zfs_file_getattr(fp, &zfa)) + goto out; + + fsize = zfa.zfa_size; + buf = kmem_alloc(fsize, KM_SLEEP); + + /* + * Read the nvlist from the file. + */ + if (zfs_file_read(fp, buf, fsize, NULL) < 0) + goto out; + + /* + * Unpack the nvlist. + */ + if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) + goto out; + + /* + * Iterate over all elements in the nvlist, creating a new spa_t for + * each one with the specified configuration. + */ + mutex_enter(&spa_namespace_lock); + nvpair = NULL; + while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { + if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) + continue; + + child = fnvpair_value_nvlist(nvpair); + + if (spa_lookup(nvpair_name(nvpair)) != NULL) + continue; + (void) spa_add(nvpair_name(nvpair), child, NULL); + } + mutex_exit(&spa_namespace_lock); + + nvlist_free(nvlist); + +out: + if (buf != NULL) + kmem_free(buf, fsize); + + zfs_file_close(fp); +} + void kernel_init(int mode) { @@ -835,6 +909,7 @@ kernel_init(int mode) zstd_init(); spa_init((spa_mode_t)mode); + spa_config_load(); fletcher_4_init(); diff --git a/sys/contrib/openzfs/lib/libzpool/util.c b/sys/contrib/openzfs/lib/libzpool/util.c index 1d0d1a1e56d9..66d6f43967d5 100644 --- a/sys/contrib/openzfs/lib/libzpool/util.c +++ b/sys/contrib/openzfs/lib/libzpool/util.c @@ -137,12 +137,10 @@ show_pool_stats(spa_t *spa) nvlist_t *config, *nvroot; const char *name; - VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0); + VERIFY0(spa_get_stats(spa_name(spa), &config, NULL, 0)); - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); + VERIFY0(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)); + VERIFY0(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name)); show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0); show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0); diff --git a/sys/contrib/openzfs/lib/libzutil/zutil_import.c b/sys/contrib/openzfs/lib/libzutil/zutil_import.c index ccdc874076c3..08367f4c064d 100644 --- a/sys/contrib/openzfs/lib/libzutil/zutil_import.c +++ b/sys/contrib/openzfs/lib/libzutil/zutil_import.c @@ -917,7 +917,7 @@ error: static uint64_t label_offset(uint64_t size, int l) { - ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); + ASSERT0(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t)); return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); } @@ -1769,7 +1769,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg) fnvlist_add_nvlist(pools, nvpair_name(pair), fnvpair_value_nvlist(pair)); - VERIFY3P(nvlist_next_nvpair(nv, pair), ==, NULL); + VERIFY0P(nvlist_next_nvpair(nv, pair)); iarg->guid = saved_guid; iarg->poolname = saved_poolname; @@ -1903,30 +1903,43 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, *sepp = '\0'; pools = zpool_search_import(hdl, args); + if (pools == NULL) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, "no pools found")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); + free(targetdup); + return (ENOENT); + } - if (pools != NULL) { - nvpair_t *elem = NULL; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - VERIFY0(nvpair_value_nvlist(elem, &config)); - if (pool_match(config, targetdup)) { - count++; - if (match != NULL) { - /* multiple matches found */ - continue; - } else { - match = fnvlist_dup(config); - } + nvpair_t *elem = NULL; + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { + VERIFY0(nvpair_value_nvlist(elem, &config)); + if (pool_match(config, targetdup)) { + count++; + if (match != NULL) { + /* multiple matches found */ + continue; + } else { + match = fnvlist_dup(config); } } - fnvlist_free(pools); } + fnvlist_free(pools); if (count == 0) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no matching pools")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); free(targetdup); return (ENOENT); } if (count > 1) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, + "more than one matching pool")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); free(targetdup); fnvlist_free(match); return (EINVAL); diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 4a5f9fd93f4f..5c7958667f92 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -941,10 +941,6 @@ The target number of bytes the ARC should leave as free memory on the system. If zero, equivalent to the bigger of .Sy 512 KiB No and Sy all_system_memory/64 . . -.It Sy zfs_autoimport_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Disable pool import at module load by ignoring the cache file -.Pq Sy spa_config_path . -. .It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint Rate limit checksum events to this many per second. Note that this should not be set below the ZED thresholds diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index 3984aaac5866..0a5b6af73fdb 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -15,7 +15,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd October 27, 2024 +.Dd April 23, 2025 .Dt ZDB 8 .Os . @@ -531,6 +531,18 @@ option, with more occurrences enabling more verbosity. If no options are specified, all information about the named pool will be displayed at default verbosity. . +.Sh EXIT STATUS +The +.Nm +utility exits +.Sy 0 +on success, +.Sy 1 +if a fatal error occurs, +.Sy 2 +if invalid command line options were specified, or +.Sy 3 +if on-disk corruption was detected, but was not fatal. .Sh EXAMPLES .Ss Example 1 : No Display the configuration of imported pool Ar rpool .Bd -literal diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index c920a5a48798..f7c6b840303c 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -173,8 +173,10 @@ The receiving system must have the feature enabled. If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. Datasets that are sent with this flag may not be received as an encrypted dataset, since encrypted datasets cannot use the .Sy embedded_data @@ -201,8 +203,10 @@ property for details .Pc . If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. If the .Sy large_blocks feature is enabled on the sending system but the @@ -357,8 +361,10 @@ property for details .Pc . If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. If the .Sy large_blocks feature is enabled on the sending system but the @@ -400,8 +406,10 @@ The receiving system must have the feature enabled. If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. Datasets that are sent with this flag may not be received as an encrypted dataset, since encrypted datasets cannot use the diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index 3d6f288fa5da..362d2295e091 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -135,6 +135,7 @@ ICP_OBJS_X86_64 := \ asm-x86_64/sha2/sha256-x86_64.o \ asm-x86_64/sha2/sha512-x86_64.o \ asm-x86_64/modes/aesni-gcm-x86_64.o \ + asm-x86_64/modes/aesni-gcm-avx2-vaes.o \ asm-x86_64/modes/gcm_pclmulqdq.o \ asm-x86_64/modes/ghash-x86_64.o diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c index b6c1c02bc3f2..67cbcd3adeec 100644 --- a/sys/contrib/openzfs/module/avl/avl.c +++ b/sys/contrib/openzfs/module/avl/avl.c @@ -225,7 +225,7 @@ avl_nearest(avl_tree_t *tree, avl_index_t where, int direction) size_t off = tree->avl_offset; if (node == NULL) { - ASSERT(tree->avl_root == NULL); + ASSERT0P(tree->avl_root); return (NULL); } data = AVL_NODE2DATA(node, off); @@ -478,7 +478,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where) size_t off = tree->avl_offset; #ifdef _LP64 - ASSERT(((uintptr_t)new_data & 0x7) == 0); + ASSERT0(((uintptr_t)new_data & 0x7)); #endif node = AVL_DATA2NODE(new_data, off); @@ -495,10 +495,10 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where) AVL_SETBALANCE(node, 0); AVL_SETPARENT(node, parent); if (parent != NULL) { - ASSERT(parent->avl_child[which_child] == NULL); + ASSERT0P(parent->avl_child[which_child]); parent->avl_child[which_child] = node; } else { - ASSERT(tree->avl_root == NULL); + ASSERT0P(tree->avl_root); tree->avl_root = node; } /* @@ -608,7 +608,7 @@ avl_insert_here( ASSERT(diff > 0 ? child == 1 : child == 0); #endif } - ASSERT(node->avl_child[child] == NULL); + ASSERT0P(node->avl_child[child]); avl_insert(tree, new_data, AVL_MKINDEX(node, child)); } @@ -881,7 +881,7 @@ avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *), ASSERT(size > 0); ASSERT(size >= offset + sizeof (avl_node_t)); #ifdef _LP64 - ASSERT((offset & 0x7) == 0); + ASSERT0((offset & 0x7)); #endif tree->avl_compar = compar; @@ -897,8 +897,8 @@ void avl_destroy(avl_tree_t *tree) { ASSERT(tree); - ASSERT(tree->avl_numnodes == 0); - ASSERT(tree->avl_root == NULL); + ASSERT0(tree->avl_numnodes); + ASSERT0P(tree->avl_root); } diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c index c2a982b5a376..3cfa5b8165ce 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -46,6 +46,9 @@ #define IMPL_CYCLE (UINT32_MAX-1) #ifdef CAN_USE_GCM_ASM #define IMPL_AVX (UINT32_MAX-2) +#if CAN_USE_GCM_ASM >= 2 +#define IMPL_AVX2 (UINT32_MAX-3) +#endif #endif #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_gcm_impl = IMPL_FASTEST; @@ -56,17 +59,16 @@ static uint32_t user_sel_impl = IMPL_FASTEST; boolean_t gcm_avx_can_use_movbe = B_FALSE; /* * Whether to use the optimized openssl gcm and ghash implementations. - * Set to true if module parameter icp_gcm_impl == "avx". */ -static boolean_t gcm_use_avx = B_FALSE; -#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) +static gcm_impl gcm_impl_used = GCM_IMPL_GENERIC; +#define GCM_IMPL_USED (*(volatile gcm_impl *)&gcm_impl_used) extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *); static inline boolean_t gcm_avx_will_work(void); -static inline void gcm_set_avx(boolean_t); -static inline boolean_t gcm_toggle_avx(void); -static inline size_t gcm_simd_get_htab_size(boolean_t); +static inline boolean_t gcm_avx2_will_work(void); +static inline void gcm_use_impl(gcm_impl impl); +static inline gcm_impl gcm_toggle_impl(void); static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t); @@ -89,7 +91,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_mode_encrypt_contiguous_blocks_avx( ctx, data, length, out, block_size)); #endif @@ -208,7 +210,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, { (void) copy_block; #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_encrypt_final_avx(ctx, out, block_size)); #endif @@ -374,7 +376,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_decrypt_final_avx(ctx, out, block_size)); #endif @@ -631,23 +633,23 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap; if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { - gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + gcm_ctx->impl = GCM_IMPL_USED; } else { /* - * Handle the "cycle" implementation by creating avx and - * non-avx contexts alternately. + * Handle the "cycle" implementation by creating different + * contexts, one per implementation. */ - gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + gcm_ctx->impl = gcm_toggle_impl(); - /* The avx impl. doesn't handle byte swapped key schedules. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { - gcm_ctx->gcm_use_avx = B_FALSE; + /* The AVX impl. doesn't handle byte swapped key schedules. */ + if (needs_bswap == B_TRUE) { + gcm_ctx->impl = GCM_IMPL_GENERIC; } /* - * If this is a GCM context, use the MOVBE and the BSWAP + * If this is an AVX context, use the MOVBE and the BSWAP * variants alternately. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && + if (gcm_ctx->impl == GCM_IMPL_AVX && zfs_movbe_available() == B_TRUE) { (void) atomic_toggle_boolean_nv( (volatile boolean_t *)&gcm_avx_can_use_movbe); @@ -658,12 +660,13 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, * still they could be created by the aes generic implementation. * Make sure not to use them since we'll corrupt data if we do. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { - gcm_ctx->gcm_use_avx = B_FALSE; + if (gcm_ctx->impl != GCM_IMPL_GENERIC && needs_bswap == B_TRUE) { + gcm_ctx->impl = GCM_IMPL_GENERIC; cmn_err_once(CE_WARN, "ICP: Can't use the aes generic or cycle implementations " - "in combination with the gcm avx implementation!"); + "in combination with the gcm avx or avx2-vaes " + "implementation!"); cmn_err_once(CE_WARN, "ICP: Falling back to a compatible implementation, " "aes-gcm performance will likely be degraded."); @@ -672,36 +675,20 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, "restore performance."); } - /* Allocate Htab memory as needed. */ - if (gcm_ctx->gcm_use_avx == B_TRUE) { - size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); - - if (htab_len == 0) { - return (CRYPTO_MECHANISM_PARAM_INVALID); - } - gcm_ctx->gcm_htab_len = htab_len; - gcm_ctx->gcm_Htable = - kmem_alloc(htab_len, KM_SLEEP); - - if (gcm_ctx->gcm_Htable == NULL) { - return (CRYPTO_HOST_MEMORY); - } + /* + * AVX implementations use Htable with sizes depending on + * implementation. + */ + if (gcm_ctx->impl != GCM_IMPL_GENERIC) { + rv = gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len, + block_size); } - /* Avx and non avx context initialization differs from here on. */ - if (gcm_ctx->gcm_use_avx == B_FALSE) { + else #endif /* ifdef CAN_USE_GCM_ASM */ - if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size, - encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } -#ifdef CAN_USE_GCM_ASM - } else { - if (gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len, - block_size) != CRYPTO_SUCCESS) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } + if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size, + encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; } -#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -767,6 +754,9 @@ gcm_impl_get_ops(void) break; #ifdef CAN_USE_GCM_ASM case IMPL_AVX: +#if CAN_USE_GCM_ASM >= 2 + case IMPL_AVX2: +#endif /* * Make sure that we return a valid implementation while * switching to the avx implementation since there still @@ -828,6 +818,13 @@ gcm_impl_init(void) * Use the avx implementation if it's available and the implementation * hasn't changed from its default value of fastest on module load. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_avx2_will_work()) { + if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { + gcm_use_impl(GCM_IMPL_AVX2); + } + } else +#endif if (gcm_avx_will_work()) { #ifdef HAVE_MOVBE if (zfs_movbe_available() == B_TRUE) { @@ -835,7 +832,7 @@ gcm_impl_init(void) } #endif if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { - gcm_set_avx(B_TRUE); + gcm_use_impl(GCM_IMPL_AVX); } } #endif @@ -852,6 +849,7 @@ static const struct { { "fastest", IMPL_FASTEST }, #ifdef CAN_USE_GCM_ASM { "avx", IMPL_AVX }, + { "avx2-vaes", IMPL_AVX2 }, #endif }; @@ -887,7 +885,13 @@ gcm_impl_set(const char *val) /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM +#if CAN_USE_GCM_ASM >= 2 /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX2 && + !gcm_avx2_will_work()) { + continue; + } +#endif if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } @@ -915,11 +919,17 @@ gcm_impl_set(const char *val) * Use the avx implementation if available and the requested one is * avx or fastest. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_avx2_will_work() == B_TRUE && + (impl == IMPL_AVX2 || impl == IMPL_FASTEST)) { + gcm_use_impl(GCM_IMPL_AVX2); + } else +#endif if (gcm_avx_will_work() == B_TRUE && (impl == IMPL_AVX || impl == IMPL_FASTEST)) { - gcm_set_avx(B_TRUE); + gcm_use_impl(GCM_IMPL_AVX); } else { - gcm_set_avx(B_FALSE); + gcm_use_impl(GCM_IMPL_GENERIC); } #endif @@ -952,6 +962,12 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_impl_opts[i].sel == IMPL_AVX2 && + !gcm_avx2_will_work()) { + continue; + } +#endif if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } @@ -993,9 +1009,6 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); /* Clear the FPU registers since they hold sensitive internal state. */ #define clear_fpu_regs() clear_fpu_regs_avx() -#define GHASH_AVX(ctx, in, len) \ - gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ - in, len) #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) @@ -1010,20 +1023,77 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; +/* + * GCM definitions: uint128_t is copied from include/crypto/modes.h + * Avoiding u128 because it is already defined in kernel sources. + */ +typedef struct { + uint64_t hi, lo; +} uint128_t; + extern void ASMABI clear_fpu_regs_avx(void); extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst); extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI gcm_init_vpclmulqdq_avx2(uint128_t Htable[16], + const uint64_t H[2]); +#endif extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI gcm_ghash_vpclmulqdq_avx2(uint64_t ghash[2], + const uint64_t *Htable, const uint8_t *in, size_t len); +#endif +static inline void GHASH_AVX(gcm_ctx_t *ctx, const uint8_t *in, size_t len) +{ + switch (ctx->impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + gcm_ghash_vpclmulqdq_avx2(ctx->gcm_ghash, + (const uint64_t *)ctx->gcm_Htable, in, len); + break; +#endif + + case GCM_IMPL_AVX: + gcm_ghash_avx(ctx->gcm_ghash, + (const uint64_t *)ctx->gcm_Htable, in, len); + break; + + default: + VERIFY(B_FALSE); + } +} +typedef size_t ASMABI aesni_gcm_encrypt_impl(const uint8_t *, uint8_t *, + size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *); extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI aes_gcm_enc_update_vaes_avx2(const uint8_t *in, + uint8_t *out, size_t len, const void *key, const uint8_t ivec[16], + const uint128_t Htable[16], uint8_t Xi[16]); +#endif +typedef size_t ASMABI aesni_gcm_decrypt_impl(const uint8_t *, uint8_t *, + size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *); extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI aes_gcm_dec_update_vaes_avx2(const uint8_t *in, + uint8_t *out, size_t len, const void *key, const uint8_t ivec[16], + const uint128_t Htable[16], uint8_t Xi[16]); +#endif + +static inline boolean_t +gcm_avx2_will_work(void) +{ + return (kfpu_allowed() && + zfs_avx2_available() && zfs_vaes_available() && + zfs_vpclmulqdq_available()); +} static inline boolean_t gcm_avx_will_work(void) @@ -1035,33 +1105,67 @@ gcm_avx_will_work(void) } static inline void -gcm_set_avx(boolean_t val) +gcm_use_impl(gcm_impl impl) { - if (gcm_avx_will_work() == B_TRUE) { - atomic_swap_32(&gcm_use_avx, val); + switch (impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + if (gcm_avx2_will_work() == B_TRUE) { + atomic_swap_32(&gcm_impl_used, impl); + return; + } + + zfs_fallthrough; +#endif + + case GCM_IMPL_AVX: + if (gcm_avx_will_work() == B_TRUE) { + atomic_swap_32(&gcm_impl_used, impl); + return; + } + + zfs_fallthrough; + + default: + atomic_swap_32(&gcm_impl_used, GCM_IMPL_GENERIC); } } static inline boolean_t -gcm_toggle_avx(void) +gcm_impl_will_work(gcm_impl impl) { - if (gcm_avx_will_work() == B_TRUE) { - return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); - } else { - return (B_FALSE); + switch (impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + return (gcm_avx2_will_work()); +#endif + + case GCM_IMPL_AVX: + return (gcm_avx_will_work()); + + default: + return (B_TRUE); } } -static inline size_t -gcm_simd_get_htab_size(boolean_t simd_mode) +static inline gcm_impl +gcm_toggle_impl(void) { - switch (simd_mode) { - case B_TRUE: - return (2 * 6 * 2 * sizeof (uint64_t)); + gcm_impl current_impl, new_impl; + do { /* handle races */ + current_impl = atomic_load_32(&gcm_impl_used); + new_impl = current_impl; + while (B_TRUE) { /* handle incompatble implementations */ + new_impl = (new_impl + 1) % GCM_IMPL_MAX; + if (gcm_impl_will_work(new_impl)) { + break; + } + } - default: - return (0); - } + } while (atomic_cas_32(&gcm_impl_used, current_impl, new_impl) != + current_impl); + + return (new_impl); } @@ -1077,6 +1181,50 @@ gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; } +static size_t aesni_gcm_encrypt_avx(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + (void) Htable; + return (aesni_gcm_encrypt(in, out, len, key, iv, Xip)); +} + +#if CAN_USE_GCM_ASM >= 2 +// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four +// bits of a |size_t|. +// This is from boringssl/crypto/fipsmodule/aes/gcm.cc.inc +static const size_t kSizeTWithoutLower4Bits = (size_t)-16; + +/* The following CRYPTO methods are from boringssl/crypto/internal.h */ +static inline uint32_t CRYPTO_bswap4(uint32_t x) { + return (__builtin_bswap32(x)); +} + +static inline uint32_t CRYPTO_load_u32_be(const void *in) { + uint32_t v; + memcpy(&v, in, sizeof (v)); + return (CRYPTO_bswap4(v)); +} + +static inline void CRYPTO_store_u32_be(void *out, uint32_t v) { + v = CRYPTO_bswap4(v); + memcpy(out, &v, sizeof (v)); +} + +static size_t aesni_gcm_encrypt_avx2(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + uint8_t *ivec = (uint8_t *)iv; + len &= kSizeTWithoutLower4Bits; + aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec, + (const uint128_t *)Htable, (uint8_t *)Xip); + CRYPTO_store_u32_be(&ivec[12], + CRYPTO_load_u32_be(&ivec[12]) + len / 16); + return (len); +} +#endif /* if CAN_USE_GCM_ASM >= 2 */ + /* * Encrypt multiple blocks of data in GCM mode. * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines @@ -1091,8 +1239,15 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, size_t done = 0; uint8_t *datap = (uint8_t *)data; size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + aesni_gcm_encrypt_impl *encrypt_blocks = +#if CAN_USE_GCM_ASM >= 2 + ctx->impl == GCM_IMPL_AVX2 ? + aesni_gcm_encrypt_avx2 : +#endif + aesni_gcm_encrypt_avx; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint64_t *ghash = ctx->gcm_ghash; + uint64_t *htable = ctx->gcm_Htable; uint64_t *cb = ctx->gcm_cb; uint8_t *ct_buf = NULL; uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; @@ -1156,8 +1311,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Do the bulk encryption in chunk_size blocks. */ for (; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); - done = aesni_gcm_encrypt( - datap, ct_buf, chunk_size, key, cb, ghash); + done = encrypt_blocks( + datap, ct_buf, chunk_size, key, cb, htable, ghash); clear_fpu_regs(); kfpu_end(); @@ -1180,7 +1335,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Bulk encrypt the remaining data. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { - done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); + done = encrypt_blocks(datap, ct_buf, bleft, key, cb, htable, + ghash); if (done == 0) { rv = CRYPTO_FAILED; goto out; @@ -1293,6 +1449,29 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) return (CRYPTO_SUCCESS); } +static size_t aesni_gcm_decrypt_avx(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + (void) Htable; + return (aesni_gcm_decrypt(in, out, len, key, iv, Xip)); +} + +#if CAN_USE_GCM_ASM >= 2 +static size_t aesni_gcm_decrypt_avx2(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + uint8_t *ivec = (uint8_t *)iv; + len &= kSizeTWithoutLower4Bits; + aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec, + (const uint128_t *)Htable, (uint8_t *)Xip); + CRYPTO_store_u32_be(&ivec[12], + CRYPTO_load_u32_be(&ivec[12]) + len / 16); + return (len); +} +#endif /* if CAN_USE_GCM_ASM >= 2 */ + /* * Finalize decryption: We just have accumulated crypto text, so now we * decrypt it here inplace. @@ -1306,10 +1485,17 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) B_FALSE); size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + aesni_gcm_decrypt_impl *decrypt_blocks = +#if CAN_USE_GCM_ASM >= 2 + ctx->impl == GCM_IMPL_AVX2 ? + aesni_gcm_decrypt_avx2 : +#endif + aesni_gcm_decrypt_avx; size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; uint8_t *datap = ctx->gcm_pt_buf; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint32_t *cb = (uint32_t *)ctx->gcm_cb; + uint64_t *htable = ctx->gcm_Htable; uint64_t *ghash = ctx->gcm_ghash; uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; int rv = CRYPTO_SUCCESS; @@ -1322,8 +1508,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) */ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); - done = aesni_gcm_decrypt(datap, datap, chunk_size, - (const void *)key, ctx->gcm_cb, ghash); + done = decrypt_blocks(datap, datap, chunk_size, + (const void *)key, ctx->gcm_cb, htable, ghash); clear_fpu_regs(); kfpu_end(); if (done != chunk_size) { @@ -1334,8 +1520,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) /* Decrypt remainder, which is less than chunk size, in one go. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { - done = aesni_gcm_decrypt(datap, datap, bleft, - (const void *)key, ctx->gcm_cb, ghash); + done = decrypt_blocks(datap, datap, bleft, + (const void *)key, ctx->gcm_cb, htable, ghash); if (done == 0) { clear_fpu_regs(); kfpu_end(); @@ -1424,13 +1610,42 @@ gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len, ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, B_FALSE); + size_t htab_len = 0; +#if CAN_USE_GCM_ASM >= 2 + if (ctx->impl == GCM_IMPL_AVX2) { + /* + * BoringSSL's API specifies uint128_t[16] for htab; but only + * uint128_t[12] are used. + * See https://github.com/google/boringssl/blob/ + * 813840dd094f9e9c1b00a7368aa25e656554221f1/crypto/fipsmodule/ + * modes/asm/aes-gcm-avx2-x86_64.pl#L198-L200 + */ + htab_len = (2 * 8 * sizeof (uint128_t)); + } else +#endif /* CAN_USE_GCM_ASM >= 2 */ + { + htab_len = (2 * 6 * sizeof (uint128_t)); + } + + ctx->gcm_Htable = kmem_alloc(htab_len, KM_SLEEP); + if (ctx->gcm_Htable == NULL) { + return (CRYPTO_HOST_MEMORY); + } + /* Init H (encrypt zero block) and create the initial counter block. */ memset(H, 0, sizeof (ctx->gcm_H)); kfpu_begin(); aes_encrypt_intel(keysched, aes_rounds, (const uint32_t *)H, (uint32_t *)H); - gcm_init_htab_avx(ctx->gcm_Htable, H); +#if CAN_USE_GCM_ASM >= 2 + if (ctx->impl == GCM_IMPL_AVX2) { + gcm_init_vpclmulqdq_avx2((uint128_t *)ctx->gcm_Htable, H); + } else +#endif /* if CAN_USE_GCM_ASM >= 2 */ + { + gcm_init_htab_avx(ctx->gcm_Htable, H); + } if (iv_len == 12) { memcpy(cb, iv, 12); diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c index 343591cd9691..ef3c1806e4b6 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c @@ -171,7 +171,7 @@ gcm_clear_ctx(gcm_ctx_t *ctx) explicit_memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder)); explicit_memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); #if defined(CAN_USE_GCM_ASM) - if (ctx->gcm_use_avx == B_TRUE) { + if (ctx->impl != GCM_IMPL_GENERIC) { ASSERT3P(ctx->gcm_Htable, !=, NULL); explicit_memset(ctx->gcm_Htable, 0, ctx->gcm_htab_len); kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl new file mode 100644 index 000000000000..04c03a37e0cb --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl @@ -0,0 +1,253 @@ +BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL +licensing. Files that are completely new have a Google copyright and an ISC +license. This license is reproduced at the bottom of this file. + +Contributors to BoringSSL are required to follow the CLA rules for Chromium: +https://cla.developers.google.com/clas + +Files in third_party/ have their own licenses, as described therein. The MIT +license, for third_party/fiat, which, unlike other third_party directories, is +compiled into non-test libraries, is included below. + +The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the +OpenSSL License and the original SSLeay license apply to the toolkit. See below +for the actual license texts. Actually both licenses are BSD-style Open Source +licenses. In case of any license issues related to OpenSSL please contact +openssl-core@openssl.org. + +The following are Google-internal bug numbers where explicit permission from +some authors is recorded for use of their work. (This is purely for our own +record keeping.) + 27287199 + 27287880 + 27287883 + 263291445 + + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +ISC license used for completely new code in BoringSSL: + +/* Copyright 2015 The BoringSSL Authors + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + +The code in third_party/fiat carries the MIT license: + +Copyright (c) 2015-2016 the fiat-crypto authors (see +https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Licenses for support code +------------------------- + +Parts of the TLS test suite are under the Go license. This code is not included +in BoringSSL (i.e. libcrypto and libssl) when compiled, however, so +distributing code linked against BoringSSL does not trigger this license: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +BoringSSL uses the Chromium test infrastructure to run a continuous build, +trybots etc. The scripts which manage this, and the script for generating build +metadata, are under the Chromium license. Distributing code linked against +BoringSSL does not trigger this license. + +Copyright 2015 The Chromium Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip new file mode 100644 index 000000000000..f63a67a4d2ae --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip @@ -0,0 +1 @@ +PORTIONS OF AES GCM and GHASH FUNCTIONALITY diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S new file mode 100644 index 000000000000..3d1b045127e2 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S @@ -0,0 +1,1323 @@ +// SPDX-License-Identifier: Apache-2.0 +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_VAES) && defined(HAVE_VPCLMULQDQ) + +#define _ASM +#include <sys/asm_linkage.h> + +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) + +.section .rodata +.balign 16 + + +.Lbswap_mask: +.quad 0x08090a0b0c0d0e0f, 0x0001020304050607 + + + + + + + + +.Lgfpoly: +.quad 1, 0xc200000000000000 + + +.Lgfpoly_and_internal_carrybit: +.quad 1, 0xc200000000000001 + +.balign 32 + +.Lctr_pattern: +.quad 0, 0 +.quad 1, 0 +.Linc_2blocks: +.quad 2, 0 +.quad 2, 0 + +ENTRY_ALIGN(gcm_init_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + + + vmovdqu (%rsi),%xmm3 + // KCF/ICP stores H in network byte order with the hi qword first + // so we need to swap all bytes, not the 2 qwords. + vmovdqu .Lbswap_mask(%rip),%xmm4 + vpshufb %xmm4,%xmm3,%xmm3 + + + + + + vpshufd $0xd3,%xmm3,%xmm0 + vpsrad $31,%xmm0,%xmm0 + vpaddq %xmm3,%xmm3,%xmm3 + vpand .Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + + vbroadcasti128 .Lgfpoly(%rip),%ymm6 + + + vpclmulqdq $0x00,%xmm3,%xmm3,%xmm0 + vpclmulqdq $0x11,%xmm3,%xmm3,%xmm5 + vpclmulqdq $0x01,%xmm0,%xmm6,%xmm1 + vpshufd $0x4e,%xmm0,%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm6,%xmm0 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm5,%xmm5 + vpxor %xmm0,%xmm5,%xmm5 + + + + vinserti128 $1,%xmm3,%ymm5,%ymm3 + vinserti128 $1,%xmm5,%ymm5,%ymm5 + + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + + + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,64(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128+32(%rdi) + + + vpclmulqdq $0x00,%ymm5,%ymm4,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm4,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm4,%ymm3 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm3,%ymm3 + vpxor %ymm0,%ymm3,%ymm3 + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,0(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128(%rdi) + + vzeroupper + RET + +.cfi_endproc +SET_SIZE(gcm_init_vpclmulqdq_avx2) +ENTRY_ALIGN(gcm_gmult_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + vmovdqu (%rdi),%xmm0 + vmovdqu .Lbswap_mask(%rip),%xmm1 + vmovdqu 128-16(%rsi),%xmm2 + vmovdqu .Lgfpoly(%rip),%xmm3 + vpshufb %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6 + vpshufd $0x4e,%xmm4,%xmm4 + vpxor %xmm4,%xmm5,%xmm5 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4 + vpshufd $0x4e,%xmm5,%xmm5 + vpxor %xmm5,%xmm0,%xmm0 + vpxor %xmm4,%xmm0,%xmm0 + + + vpshufb %xmm1,%xmm0,%xmm0 + vmovdqu %xmm0,(%rdi) + + + RET + +.cfi_endproc +SET_SIZE(gcm_gmult_vpclmulqdq_avx2) +ENTRY_ALIGN(gcm_ghash_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + + + + vmovdqu .Lbswap_mask(%rip),%xmm6 + vmovdqu .Lgfpoly(%rip),%xmm7 + + + vmovdqu (%rdi),%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + + + cmpq $32,%rcx + jb .Lghash_lastblock + + + + vinserti128 $1,%xmm6,%ymm6,%ymm6 + vinserti128 $1,%xmm7,%ymm7,%ymm7 + + cmpq $127,%rcx + jbe .Lghash_loop_1x + + + vmovdqu 128(%rsi),%ymm8 + vmovdqu 128+32(%rsi),%ymm9 +.Lghash_loop_4x: + + vmovdqu 0(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 0(%rsi),%ymm2 + vpxor %ymm5,%ymm1,%ymm1 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm8,%ymm0,%ymm4 + + vmovdqu 32(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 32(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm8,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu 64(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + + vmovdqu 96(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 96(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm5,%ymm4,%ymm4 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm3,%ymm2,%ymm0 + vpshufd $0x4e,%ymm3,%ymm3 + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vpclmulqdq $0x01,%ymm4,%ymm2,%ymm0 + vpshufd $0x4e,%ymm4,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm0,%ymm5,%ymm5 + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + + subq $-128,%rdx + addq $-128,%rcx + cmpq $127,%rcx + ja .Lghash_loop_4x + + + cmpq $32,%rcx + jb .Lghash_loop_1x_done +.Lghash_loop_1x: + vmovdqu (%rdx),%ymm0 + vpshufb %ymm6,%ymm0,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vmovdqu 128-32(%rsi),%ymm0 + vpclmulqdq $0x00,%ymm0,%ymm5,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2 + vpclmulqdq $0x10,%ymm0,%ymm5,%ymm3 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x01,%ymm1,%ymm7,%ymm3 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm2,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x11,%ymm0,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm7,%ymm1 + vpshufd $0x4e,%ymm2,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm1,%ymm5,%ymm5 + + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + addq $32,%rdx + subq $32,%rcx + cmpq $32,%rcx + jae .Lghash_loop_1x +.Lghash_loop_1x_done: + + +.Lghash_lastblock: + testq %rcx,%rcx + jz .Lghash_done + vmovdqu (%rdx),%xmm0 + vpshufb %xmm6,%xmm0,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 128-16(%rsi),%xmm0 + vpclmulqdq $0x00,%xmm0,%xmm5,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2 + vpclmulqdq $0x10,%xmm0,%xmm5,%xmm3 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x01,%xmm1,%xmm7,%xmm3 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm2,%xmm2 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm2,%xmm7,%xmm1 + vpshufd $0x4e,%xmm2,%xmm2 + vpxor %xmm2,%xmm5,%xmm5 + vpxor %xmm1,%xmm5,%xmm5 + + +.Lghash_done: + + vpshufb %xmm6,%xmm5,%xmm5 + vmovdqu %xmm5,(%rdi) + + vzeroupper + RET + +.cfi_endproc +SET_SIZE(gcm_ghash_vpclmulqdq_avx2) +ENTRY_ALIGN(aes_gcm_enc_update_vaes_avx2, 32) +.cfi_startproc + +ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+6(%rip) +#endif + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 504(%rcx),%r10d // ICP has a larger offset for rounds. + leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds. + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func1 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + leaq 16(%rcx),%rax +.Lvaesenc_loop_first_4_vecs__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_first_4_vecs__func1 + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + addq $-128,%rdx + cmpq $127,%rdx + jbe .Lghash_last_ciphertext_4x__func1 +.balign 16 +.Lcrypt_loop_4x__func1: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func1 + je .Laes192__func1 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func1: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func1: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + subq $-128,%rsi + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func1 +.Lghash_last_ciphertext_4x__func1: + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + subq $-128,%rsi +.Lcrypt_loop_4x_done__func1: + + testq %rdx,%rdx + jz .Ldone__func1 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func1 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %ymm0,%ymm13,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func1 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func1: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func1 + je .Lxor_two_blocks__func1 + +.Lxor_three_blocks__func1: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_two_blocks__func1: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_one_block__func1: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func1: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func1: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func1: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + RET + +.cfi_endproc +SET_SIZE(aes_gcm_enc_update_vaes_avx2) +ENTRY_ALIGN(aes_gcm_dec_update_vaes_avx2, 32) +.cfi_startproc + +ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 504(%rcx),%r10d // ICP has a larger offset for rounds. + leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds. + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func2 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 +.balign 16 +.Lcrypt_loop_4x__func2: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func2 + je .Laes192__func2 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func2: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func2: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + subq $-128,%rsi + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func2 +.Lcrypt_loop_4x_done__func2: + + testq %rdx,%rdx + jz .Ldone__func2 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func2 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %ymm0,%ymm3,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func2 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func2: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func2 + je .Lxor_two_blocks__func2 + +.Lxor_three_blocks__func2: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %xmm0,%xmm3,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_two_blocks__func2: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm2,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_one_block__func2: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm2,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func2: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func2: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func2: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + RET + +.cfi_endproc +SET_SIZE(aes_gcm_dec_update_vaes_avx2) + +#endif /* !_WIN32 || _KERNEL */ + +/* Mark the stack non-executable. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/sys/contrib/openzfs/module/icp/core/kcf_sched.c b/sys/contrib/openzfs/module/icp/core/kcf_sched.c index 759f0d81d521..75e1052a4ed4 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_sched.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_sched.c @@ -124,7 +124,7 @@ kcf_context_cache_destructor(void *buf, void *cdrarg) (void) cdrarg; kcf_context_t *kctx = (kcf_context_t *)buf; - ASSERT(kctx->kc_refcnt == 0); + ASSERT0(kctx->kc_refcnt); } void diff --git a/sys/contrib/openzfs/module/icp/include/modes/modes.h b/sys/contrib/openzfs/module/icp/include/modes/modes.h index ca734cf4f045..de11d9eafafb 100644 --- a/sys/contrib/openzfs/module/icp/include/modes/modes.h +++ b/sys/contrib/openzfs/module/icp/include/modes/modes.h @@ -42,7 +42,7 @@ extern "C" { */ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) -#define CAN_USE_GCM_ASM +#define CAN_USE_GCM_ASM (HAVE_VAES && HAVE_VPCLMULQDQ ? 2 : 1) extern boolean_t gcm_avx_can_use_movbe; #endif @@ -129,6 +129,15 @@ typedef struct ccm_ctx { #define ccm_copy_to ccm_common.cc_copy_to #define ccm_flags ccm_common.cc_flags +#ifdef CAN_USE_GCM_ASM +typedef enum gcm_impl { + GCM_IMPL_GENERIC = 0, + GCM_IMPL_AVX, + GCM_IMPL_AVX2, + GCM_IMPL_MAX, +} gcm_impl; +#endif + /* * gcm_tag_len: Length of authentication tag. * @@ -174,7 +183,7 @@ typedef struct gcm_ctx { uint64_t gcm_len_a_len_c[2]; uint8_t *gcm_pt_buf; #ifdef CAN_USE_GCM_ASM - boolean_t gcm_use_avx; + enum gcm_impl impl; #endif } gcm_ctx_t; diff --git a/sys/contrib/openzfs/module/icp/io/aes.c b/sys/contrib/openzfs/module/icp/io/aes.c index ba703efa71fc..ca586eaf97ef 100644 --- a/sys/contrib/openzfs/module/icp/io/aes.c +++ b/sys/contrib/openzfs/module/icp/io/aes.c @@ -236,16 +236,16 @@ aes_encrypt_atomic(crypto_mechanism_t *mechanism, aes_xor_block); if (ret != CRYPTO_SUCCESS) goto out; - ASSERT(aes_ctx.ac_remainder_len == 0); + ASSERT0(aes_ctx.ac_remainder_len); } else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE) { ret = gcm_encrypt_final((gcm_ctx_t *)&aes_ctx, ciphertext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) goto out; - ASSERT(aes_ctx.ac_remainder_len == 0); + ASSERT0(aes_ctx.ac_remainder_len); } else { - ASSERT(aes_ctx.ac_remainder_len == 0); + ASSERT0(aes_ctx.ac_remainder_len); } if (plaintext != ciphertext) { @@ -337,7 +337,7 @@ aes_decrypt_atomic(crypto_mechanism_t *mechanism, ret = ccm_decrypt_final((ccm_ctx_t *)&aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); - ASSERT(aes_ctx.ac_remainder_len == 0); + ASSERT0(aes_ctx.ac_remainder_len); if ((ret == CRYPTO_SUCCESS) && (ciphertext != plaintext)) { plaintext->cd_length = @@ -349,7 +349,7 @@ aes_decrypt_atomic(crypto_mechanism_t *mechanism, ret = gcm_decrypt_final((gcm_ctx_t *)&aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); - ASSERT(aes_ctx.ac_remainder_len == 0); + ASSERT0(aes_ctx.ac_remainder_len); if ((ret == CRYPTO_SUCCESS) && (ciphertext != plaintext)) { plaintext->cd_length = diff --git a/sys/contrib/openzfs/module/nvpair/nvpair.c b/sys/contrib/openzfs/module/nvpair/nvpair.c index 811cfc87d7a4..eb8c14b4a783 100644 --- a/sys/contrib/openzfs/module/nvpair/nvpair.c +++ b/sys/contrib/openzfs/module/nvpair/nvpair.c @@ -265,7 +265,7 @@ nv_priv_alloc_embedded(nvpriv_t *priv) static int nvt_tab_alloc(nvpriv_t *priv, uint64_t buckets) { - ASSERT3P(priv->nvp_hashtable, ==, NULL); + ASSERT0P(priv->nvp_hashtable); ASSERT0(priv->nvp_nbuckets); ASSERT0(priv->nvp_nentries); @@ -334,7 +334,7 @@ nvt_lookup_name_type(const nvlist_t *nvl, const char *name, data_type_t type) i_nvp_t **tab = priv->nvp_hashtable; if (tab == NULL) { - ASSERT3P(priv->nvp_list, ==, NULL); + ASSERT0P(priv->nvp_list); ASSERT0(priv->nvp_nbuckets); ASSERT0(priv->nvp_nentries); return (NULL); @@ -540,7 +540,7 @@ nvt_add_nvpair(nvlist_t *nvl, nvpair_t *nvp) /* insert link at the beginning of the bucket */ i_nvp_t *new_entry = NVPAIR2I_NVP(nvp); - ASSERT3P(new_entry->nvi_hashtable_next, ==, NULL); + ASSERT0P(new_entry->nvi_hashtable_next); new_entry->nvi_hashtable_next = bucket; // cppcheck-suppress nullPointerRedundantCheck tab[index] = new_entry; diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c index 6d198fad5203..ae6e36d988c2 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c @@ -160,7 +160,7 @@ kmem_cache_create(const char *name, size_t bufsize, size_t align, { kmem_cache_t *cache; - ASSERT3P(vmp, ==, NULL); + ASSERT0P(vmp); cache = kmem_alloc(sizeof (*cache), KM_SLEEP); strlcpy(cache->kc_name, name, sizeof (cache->kc_name)); diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c index 9da633c2b1be..3c2d39b20c09 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c @@ -256,7 +256,7 @@ sysevent_worker(void *arg __unused) * free `ze`, so just inline the free() here -- events have already * been drained. */ - VERIFY3P(ze->ze_zevent, ==, NULL); + VERIFY0P(ze->ze_zevent); kmem_free(ze, sizeof (zfs_zevent_t)); kthread_exit(); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c index fbf67f6a14a8..4bf487cdc469 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c @@ -507,7 +507,7 @@ abd_iter_at_end(struct abd_iter *aiter) void abd_iter_advance(struct abd_iter *aiter, size_t amount) { - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); /* There's nothing left to advance to, so do nothing */ @@ -526,7 +526,7 @@ abd_iter_map(struct abd_iter *aiter) { void *paddr; - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); /* There's nothing left to iterate over, so do nothing */ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c index 364bbfc60abd..26cc7981bfcd 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c @@ -156,7 +156,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, if (dbp[0]->db_offset != 0 || numbufs > 1) { for (i = 0; i < numbufs; i++) { ASSERT(ISP2(dbp[i]->db_size)); - ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0); + ASSERT0((dbp[i]->db_offset % dbp[i]->db_size)); ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); } } @@ -175,7 +175,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, vm_page_sunbusy(m); break; } - ASSERT3U(m->dirty, ==, 0); + ASSERT0(m->dirty); ASSERT(!pmap_page_is_write_mapped(m)); ASSERT3U(db->db_size, >, PAGE_SIZE); @@ -201,7 +201,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, if (m != bogus_page) { vm_page_assert_xbusied(m); ASSERT(vm_page_none_valid(m)); - ASSERT3U(m->dirty, ==, 0); + ASSERT0(m->dirty); ASSERT(!pmap_page_is_write_mapped(m)); va = zfs_map_page(m, &sf); } @@ -295,7 +295,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, vm_page_sunbusy(m); break; } - ASSERT3U(m->dirty, ==, 0); + ASSERT0(m->dirty); ASSERT(!pmap_page_is_write_mapped(m)); ASSERT3U(db->db_size, >, PAGE_SIZE); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index c114db14a916..b218c0da8125 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -112,7 +112,6 @@ static int zfs__fini(void); static void zfs_shutdown(void *, int); static eventhandler_tag zfs_shutdown_event_tag; -static eventhandler_tag zfs_mountroot_event_tag; #define ZFS_MIN_KSTACK_PAGES 4 @@ -311,9 +310,6 @@ zfs_modevent(module_t mod, int type, void *unused __unused) zfs_shutdown_event_tag = EVENTHANDLER_REGISTER( shutdown_post_sync, zfs_shutdown, NULL, SHUTDOWN_PRI_FIRST); - zfs_mountroot_event_tag = EVENTHANDLER_REGISTER( - mountroot, spa_boot_init, NULL, - SI_ORDER_ANY); } return (err); case MOD_UNLOAD: @@ -322,9 +318,6 @@ zfs_modevent(module_t mod, int type, void *unused __unused) if (zfs_shutdown_event_tag != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, zfs_shutdown_event_tag); - if (zfs_mountroot_event_tag != NULL) - EVENTHANDLER_DEREGISTER(mountroot, - zfs_mountroot_event_tag); } return (err); case MOD_SHUTDOWN: diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c index c8ab7cc7cf8e..bbd1dafc69be 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c @@ -1236,7 +1236,7 @@ vdev_geom_io_done(zio_t *zio) struct bio *bp = zio->io_bio; if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { - ASSERT3P(bp, ==, NULL); + ASSERT0P(bp); return; } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c index 5c5adc6cc12b..b15a3e6e38c0 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c @@ -1632,7 +1632,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (zfsvfs->z_replay == B_FALSE) ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); } else - ASSERT3P(dzp->z_vnode, ==, NULL); + ASSERT0P(dzp->z_vnode); memset(acl_ids, 0, sizeof (zfs_acl_ids_t)); acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); @@ -2014,7 +2014,7 @@ top: error = zfs_aclset_common(zp, aclp, cr, tx); ASSERT0(error); - ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT0P(zp->z_acl_cached); zp->z_acl_cached = aclp; if (fuid_dirtied) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index 8d0ff9b25e30..a222c5de4a2a 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -357,7 +357,7 @@ zfsctl_create(zfsvfs_t *zfsvfs) vnode_t *rvp; uint64_t crtime[2]; - ASSERT3P(zfsvfs->z_ctldir, ==, NULL); + ASSERT0P(zfsvfs->z_ctldir); snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR); @@ -494,7 +494,7 @@ zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) vap->va_uid = 0; vap->va_gid = 0; - vap->va_rdev = 0; + vap->va_rdev = NODEV; /* * We are a purely virtual object, so we have no * blocksize or allocated blocks. @@ -674,6 +674,7 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) zfs_uio_t uio; int *eofp = ap->a_eofflag; off_t dots_offset; + ssize_t orig_resid; int error; zfs_uio_init(&uio, ap->a_uio); @@ -688,16 +689,16 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) * count to return is 0. */ if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) { + if (eofp != NULL) + *eofp = 1; return (0); } + orig_resid = zfs_uio_resid(&uio); error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, &dots_offset); - if (error != 0) { - if (error == ENAMETOOLONG) /* ran out of destination space */ - error = 0; - return (error); - } + if (error != 0) + goto err; if (zfs_uio_offset(&uio) != dots_offset) return (SET_ERROR(EINVAL)); @@ -710,8 +711,11 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) entry.d_reclen = sizeof (entry); error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); if (error != 0) { - if (error == ENAMETOOLONG) - error = 0; +err: + if (error == ENAMETOOLONG) { + error = orig_resid == zfs_uio_resid(&uio) ? + EINVAL : 0; + } return (SET_ERROR(error)); } if (eofp != NULL) @@ -1056,17 +1060,21 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) zfs_uio_t uio; int *eofp = ap->a_eofflag; off_t dots_offset; + ssize_t orig_resid; int error; zfs_uio_init(&uio, ap->a_uio); + orig_resid = zfs_uio_resid(&uio); ASSERT3S(vp->v_type, ==, VDIR); error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, &uio, &dots_offset); if (error != 0) { - if (error == ENAMETOOLONG) /* ran out of destination space */ - error = 0; + if (error == ENAMETOOLONG) { /* ran out of destination space */ + error = orig_resid == zfs_uio_resid(&uio) ? + EINVAL : 0; + } return (error); } @@ -1084,9 +1092,13 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); if (error != 0) { if (error == ENOENT) { - if (eofp != NULL) - *eofp = 1; - error = 0; + if (orig_resid == zfs_uio_resid(&uio)) { + error = EINVAL; + } else { + error = 0; + if (eofp != NULL) + *eofp = 1; + } } zfs_exit(zfsvfs, FTAG); return (error); @@ -1099,8 +1111,10 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) entry.d_reclen = sizeof (entry); error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); if (error != 0) { - if (error == ENAMETOOLONG) - error = 0; + if (error == ENAMETOOLONG) { + error = orig_resid == zfs_uio_resid(&uio) ? + EINVAL : 0; + } zfs_exit(zfsvfs, FTAG); return (SET_ERROR(error)); } @@ -1367,7 +1381,7 @@ zfsctl_snapshot_unmount(const char *snapname, int flags __unused) int err = getzfsvfs(snapname, &zfsvfs); if (err != 0) { - ASSERT3P(zfsvfs, ==, NULL); + ASSERT0P(zfsvfs); return (0); } vfsp = zfsvfs->z_vfs; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c index 191df832d726..75ba2ea0cb9e 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c @@ -273,7 +273,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) zfsvfs_t *zfsvfs = zp->z_zfsvfs; ASSERT(zp->z_unlinked); - ASSERT3U(zp->z_links, ==, 0); + ASSERT0(zp->z_links); VERIFY0(zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); @@ -437,7 +437,7 @@ zfs_rmnode(znode_t *zp) uint64_t count; int error; - ASSERT3U(zp->z_links, ==, 0); + ASSERT0(zp->z_links); if (zfsvfs->z_replay == B_FALSE) ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 0456552ed07e..79b784288911 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -455,8 +455,13 @@ zfs_sync(vfs_t *vfsp, int waitfor) return (0); } - if (zfsvfs->z_log != NULL) - zil_commit(zfsvfs->z_log, 0); + if (zfsvfs->z_log != NULL) { + error = zil_commit(zfsvfs->z_log, 0); + if (error != 0) { + zfs_exit(zfsvfs, FTAG); + return (error); + } + } zfs_exit(zfsvfs, FTAG); } else { @@ -1091,7 +1096,7 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) if (mounting) { boolean_t readonly; - ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); + ASSERT0P(zfsvfs->z_kstat.dk_kstats); error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); if (error) return (error); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index c4270d8b5d5c..174141a5deab 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -1101,7 +1101,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, zfs_exit(zfsvfs, FTAG); return (error); } - ASSERT3P(zp, ==, NULL); + ASSERT0P(zp); /* * Create a new file object and update the directory @@ -1193,8 +1193,8 @@ out: *zpp = zp; } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1323,9 +1323,8 @@ out: if (xzp) vrele(ZTOV(xzp)); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1482,7 +1481,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, zfs_exit(zfsvfs, FTAG); return (error); } - ASSERT3P(zp, ==, NULL); + ASSERT0P(zp); if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr, mnt_ns))) { @@ -1556,8 +1555,8 @@ out: getnewvnode_drop_reserve(); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1637,8 +1636,8 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) if (zfsvfs->z_use_namecache) cache_vop_rmdir(dvp, vp); out: - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1696,6 +1695,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, objset_t *os; caddr_t outbuf; size_t bufsize; + ssize_t orig_resid; zap_cursor_t zc; zap_attribute_t *zap; uint_t bytes_wanted; @@ -1736,7 +1736,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, /* * Quit if directory has been removed (posix) */ - if ((*eofp = zp->z_unlinked) != 0) { + if ((*eofp = (zp->z_unlinked != 0)) != 0) { zfs_exit(zfsvfs, FTAG); return (0); } @@ -1744,6 +1744,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, error = 0; os = zfsvfs->z_os; offset = zfs_uio_offset(uio); + orig_resid = zfs_uio_resid(uio); prefetch = zp->z_zn_prefetch; zap = zap_attribute_long_alloc(); @@ -1923,7 +1924,7 @@ update: kmem_free(outbuf, bufsize); if (error == ENOENT) - error = 0; + error = orig_resid == zfs_uio_resid(uio) ? EINVAL : 0; ZFS_ACCESSTIME_STAMP(zfsvfs, zp); @@ -2014,7 +2015,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) if (vp->v_type == VBLK || vp->v_type == VCHR) vap->va_rdev = zfs_cmpldev(rdev); else - vap->va_rdev = 0; + vap->va_rdev = NODEV; vap->va_gen = zp->z_gen; vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ vap->va_filerev = zp->z_seq; @@ -3009,8 +3010,8 @@ out: } out2: - if (os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (err == 0 && os->os_sync == ZFS_SYNC_ALWAYS) + err = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (err); @@ -3539,7 +3540,7 @@ out_seq: out: if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -3731,7 +3732,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, *zpp = zp; if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + error = zil_commit(zilog, 0); } zfs_exit(zfsvfs, FTAG); @@ -3921,8 +3922,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, vnevent_link(ZTOV(szp), ct); } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -4313,7 +4314,7 @@ typedef struct { } putpage_commit_arg_t; static void -zfs_putpage_commit_cb(void *arg) +zfs_putpage_commit_cb(void *arg, int err) { putpage_commit_arg_t *pca = arg; vm_object_t object = pca->pca_pages[0]->object; @@ -4322,7 +4323,17 @@ zfs_putpage_commit_cb(void *arg) for (uint_t i = 0; i < pca->pca_npages; i++) { vm_page_t pp = pca->pca_pages[i]; - vm_page_undirty(pp); + + if (err == 0) { + /* + * Writeback succeeded, so undirty the page. If it + * fails, we leave it in the same state it was. That's + * most likely dirty, so it will get tried again some + * other time. + */ + vm_page_undirty(pp); + } + vm_page_sunbusy(pp); } @@ -4510,8 +4521,13 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, out: zfs_rangelock_exit(lr); - if (commit) - zil_commit(zfsvfs->z_log, zp->z_id); + if (commit) { + err = zil_commit(zfsvfs->z_log, zp->z_id); + if (err != 0) { + zfs_exit(zfsvfs, FTAG); + return (err); + } + } dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len); @@ -5223,8 +5239,32 @@ struct vop_fsync_args { static int zfs_freebsd_fsync(struct vop_fsync_args *ap) { + vnode_t *vp = ap->a_vp; + int err = 0; + + /* + * Push any dirty mmap()'d data out to the DMU and ZIL, ready for + * zil_commit() to be called in zfs_fsync(). + */ + if (vm_object_mightbedirty(vp->v_object)) { + zfs_vmobject_wlock(vp->v_object); + if (!vm_object_page_clean(vp->v_object, 0, 0, 0)) + err = SET_ERROR(EIO); + zfs_vmobject_wunlock(vp->v_object); + if (err) { + /* + * Unclear what state things are in. zfs_putpages() + * will ensure the pages remain dirty if they haven't + * been written down to the DMU, but because there may + * be nothing logged, we can't assume that zfs_sync() + * -> zil_commit() will give us a useful error. It's + * safest if we just error out here. + */ + return (err); + } + } - return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); + return (zfs_fsync(VTOZ(vp), 0, ap->a_td->td_ucred)); } #ifndef _SYS_SYSPROTO_H_ @@ -6773,9 +6813,11 @@ zfs_deallocate(struct vop_deallocate_args *ap) if (error == 0) { if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || (ap->a_ioflag & IO_SYNC) != 0) - zil_commit(zilog, zp->z_id); - *ap->a_offset = off + len; - *ap->a_len = 0; + error = zil_commit(zilog, zp->z_id); + if (error == 0) { + *ap->a_offset = off + len; + *ap->a_len = 0; + } } zfs_exit(zfsvfs, FTAG); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c index 775f54a65f7d..7cd0a153577c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c @@ -161,15 +161,15 @@ zfs_znode_cache_destructor(void *buf, void *arg) znode_t *zp = buf; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); - ASSERT3P(zp->z_vnode, ==, NULL); + ASSERT0P(zp->z_vnode); ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_lock); mutex_destroy(&zp->z_acl_lock); rw_destroy(&zp->z_xattr_lock); zfs_rangelock_fini(&zp->z_rangelock); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); } @@ -195,7 +195,7 @@ zfs_znode_init(void) /* * Initialize zcache */ - ASSERT3P(znode_uma_zone, ==, NULL); + ASSERT0P(znode_uma_zone); znode_uma_zone = uma_zcreate("zfs_znode_cache", sizeof (znode_t), zfs_znode_cache_constructor_smr, zfs_znode_cache_destructor_smr, NULL, NULL, 0, 0); @@ -224,7 +224,7 @@ zfs_znode_init(void) /* * Initialize zcache */ - ASSERT3P(znode_cache, ==, NULL); + ASSERT0P(znode_cache); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); @@ -353,8 +353,8 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); - ASSERT3P(zp->z_sa_hdl, ==, NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT0P(zp->z_sa_hdl); + ASSERT0P(zp->z_acl_cached); if (sa_hdl == NULL) { VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); @@ -1127,7 +1127,7 @@ zfs_rezget(znode_t *zp) } rw_exit(&zp->z_xattr_lock); - ASSERT3P(zp->z_sa_hdl, ==, NULL); + ASSERT0P(zp->z_sa_hdl); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); @@ -1298,7 +1298,7 @@ zfs_znode_free(znode_t *zp) zfsvfs_t *zfsvfs = zp->z_zfsvfs; char *symlink; - ASSERT3P(zp->z_sa_hdl, ==, NULL); + ASSERT0P(zp->z_sa_hdl); zp->z_vnode = NULL; mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 72a7c4ea082a..0dd2ecd7fd8d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -31,7 +31,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ @@ -196,7 +196,6 @@ DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); static int zvol_geom_open(struct g_provider *pp, int flag, int count); static int zvol_geom_close(struct g_provider *pp, int flag, int count); -static void zvol_geom_destroy(zvol_state_t *zv); static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); static void zvol_geom_bio_start(struct bio *bp); static int zvol_geom_bio_getattr(struct bio *bp); @@ -226,25 +225,14 @@ zvol_geom_open(struct g_provider *pp, int flag, int count) } retry: - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - /* - * Obtain a copy of private under zvol_state_lock to make sure either - * the result of zvol free code setting private to NULL is observed, - * or the zv is protected from being freed because of the positive - * zv_open_count. - */ - zv = pp->private; - if (zv == NULL) { - rw_exit(&zvol_state_lock); - err = SET_ERROR(ENXIO); - goto out_locked; - } + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) + return (SET_ERROR(ENXIO)); mutex_enter(&zv->zv_state_lock); if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { - rw_exit(&zvol_state_lock); err = SET_ERROR(ENXIO); - goto out_zv_locked; + goto out_locked; } ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); @@ -257,8 +245,24 @@ retry: drop_suspend = B_TRUE; if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { mutex_exit(&zv->zv_state_lock); + + /* + * Removal may happen while the locks are down, so + * we can't trust zv any longer; we have to start over. + */ + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) + return (SET_ERROR(ENXIO)); + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + if (zv->zv_zso->zso_dying || + zv->zv_flags & ZVOL_REMOVING) { + err = SET_ERROR(ENXIO); + goto out_locked; + } + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -266,7 +270,6 @@ retry: } } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -294,7 +297,7 @@ retry: if (drop_namespace) mutex_exit(&spa_namespace_lock); if (err) - goto out_zv_locked; + goto out_locked; pp->mediasize = zv->zv_volsize; pp->stripeoffset = 0; pp->stripesize = zv->zv_volblocksize; @@ -329,9 +332,8 @@ out_opened: zvol_last_close(zv); wakeup(zv); } -out_zv_locked: - mutex_exit(&zv->zv_state_lock); out_locked: + mutex_exit(&zv->zv_state_lock); if (drop_suspend) rw_exit(&zv->zv_suspend_lock); return (err); @@ -345,12 +347,9 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) boolean_t drop_suspend = B_TRUE; int new_open_count; - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - zv = pp->private; - if (zv == NULL) { - rw_exit(&zvol_state_lock); + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) return (SET_ERROR(ENXIO)); - } mutex_enter(&zv->zv_state_lock); if (zv->zv_flags & ZVOL_EXCL) { @@ -377,6 +376,15 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_geom_open(), we don't check if + * removal started here, because we might be one of the + * openers that needs to be thrown out! If we're the + * last, we need to call zvol_last_close() below to + * finish cleanup. So, no special treatment for us. + */ + /* Check to see if zv_suspend_lock is needed. */ new_open_count = zv->zv_open_count - count; if (new_open_count != 0) { @@ -387,7 +395,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -408,20 +415,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) return (0); } -static void -zvol_geom_destroy(zvol_state_t *zv) -{ - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp = zsg->zsg_provider; - - ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); - - g_topology_assert(); - - zsg->zsg_provider = NULL; - g_wither_geom(pp->geom, ENXIO); -} - void zvol_wait_close(zvol_state_t *zv) { @@ -454,7 +447,7 @@ zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", pp->name, acr, acw, ace)); - if (pp->private == NULL) { + if (atomic_load_ptr(&pp->private) == NULL) { if (acr <= 0 && acw <= 0 && ace <= 0) return (0); return (pp->error); @@ -727,9 +720,9 @@ unlock: break; } - if (commit) { + if (error == 0 && commit) { commit: - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); } resume: rw_exit(&zv->zv_suspend_lock); @@ -906,8 +899,8 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) zfs_rangelock_exit(lr); int64_t nwritten = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); - if (commit) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error == 0 && commit) + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); return (error); @@ -921,25 +914,14 @@ zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) boolean_t drop_suspend = B_FALSE; retry: - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - /* - * Obtain a copy of si_drv2 under zvol_state_lock to make sure either - * the result of zvol free code setting si_drv2 to NULL is observed, - * or the zv is protected from being freed because of the positive - * zv_open_count. - */ - zv = dev->si_drv2; - if (zv == NULL) { - rw_exit(&zvol_state_lock); - err = SET_ERROR(ENXIO); - goto out_locked; - } + zv = atomic_load_ptr(&dev->si_drv2); + if (zv == NULL) + return (SET_ERROR(ENXIO)); mutex_enter(&zv->zv_state_lock); - if (zv->zv_zso->zso_dying) { - rw_exit(&zvol_state_lock); + if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { err = SET_ERROR(ENXIO); - goto out_zv_locked; + goto out_locked; } ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV); @@ -954,6 +936,13 @@ retry: mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + /* Removal started while locks were down. */ + err = SET_ERROR(ENXIO); + goto out_locked; + } + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -961,7 +950,6 @@ retry: } } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -989,7 +977,7 @@ retry: if (drop_namespace) mutex_exit(&spa_namespace_lock); if (err) - goto out_zv_locked; + goto out_locked; } ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -1016,9 +1004,8 @@ out_opened: zvol_last_close(zv); wakeup(zv); } -out_zv_locked: - mutex_exit(&zv->zv_state_lock); out_locked: + mutex_exit(&zv->zv_state_lock); if (drop_suspend) rw_exit(&zv->zv_suspend_lock); return (err); @@ -1030,12 +1017,9 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) zvol_state_t *zv; boolean_t drop_suspend = B_TRUE; - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - zv = dev->si_drv2; - if (zv == NULL) { - rw_exit(&zvol_state_lock); + zv = atomic_load_ptr(&dev->si_drv2); + if (zv == NULL) return (SET_ERROR(ENXIO)); - } mutex_enter(&zv->zv_state_lock); if (zv->zv_flags & ZVOL_EXCL) { @@ -1060,6 +1044,15 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_cdev_open(), we don't check if + * removal started here, because we might be one of the + * openers that needs to be thrown out! If we're the + * last, we need to call zvol_last_close() below to + * finish cleanup. So, no special treatment for us. + */ + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 1) { rw_exit(&zv->zv_suspend_lock); @@ -1069,7 +1062,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -1101,7 +1093,8 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, int error; boolean_t sync; - zv = dev->si_drv2; + zv = atomic_load_ptr(&dev->si_drv2); + ASSERT3P(zv, !=, NULL); error = 0; KASSERT(zv->zv_open_count > 0, @@ -1117,7 +1110,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, case DIOCGFLUSH: rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); if (zv->zv_zilog != NULL) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); break; case DIOCGDELETE: @@ -1152,7 +1145,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, } zfs_rangelock_exit(lr); if (sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); break; case DIOCGSTRIPESIZE: @@ -1162,6 +1155,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, *(off_t *)data = 0; break; case DIOCGATTR: { + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); spa_t *spa = dmu_objset_spa(zv->zv_objset); struct diocgattr_arg *arg = (struct diocgattr_arg *)data; uint64_t refd, avail, usedobjs, availobjs; @@ -1186,6 +1180,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, arg->value.off = refd / DEV_BSIZE; } else error = SET_ERROR(ENOIOCTL); + rw_exit(&zv->zv_suspend_lock); break; } case FIOSEEKHOLE: @@ -1196,10 +1191,12 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); zfs_rangelock_exit(lr); + rw_exit(&zv->zv_suspend_lock); *off = noff; break; } @@ -1400,42 +1397,65 @@ zvol_alloc(const char *name, uint64_t volsize, uint64_t volblocksize, * Remove minor node for the specified volume. */ void -zvol_os_free(zvol_state_t *zv) +zvol_os_remove_minor(zvol_state_t *zv) { - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); + ASSERT0(atomic_read(&zv->zv_suspend_ref)); + ASSERT(zv->zv_flags & ZVOL_REMOVING); - ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); - - rw_destroy(&zv->zv_suspend_lock); - zfs_rangelock_fini(&zv->zv_rangelock); + struct zvol_state_os *zso = zv->zv_zso; + zv->zv_zso = NULL; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp __maybe_unused = zsg->zsg_provider; - - ASSERT3P(pp->private, ==, NULL); + struct zvol_state_geom *zsg = &zso->zso_geom; + struct g_provider *pp = zsg->zsg_provider; + atomic_store_ptr(&pp->private, NULL); + mutex_exit(&zv->zv_state_lock); g_topology_lock(); - zvol_geom_destroy(zv); + g_wither_geom(pp->geom, ENXIO); g_topology_unlock(); } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { - struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; + struct zvol_state_dev *zsd = &zso->zso_dev; struct cdev *dev = zsd->zsd_cdev; + if (dev != NULL) + atomic_store_ptr(&dev->si_drv2, NULL); + mutex_exit(&zv->zv_state_lock); + if (dev != NULL) { - ASSERT3P(dev->si_drv2, ==, NULL); destroy_dev(dev); knlist_clear(&zsd->zsd_selinfo.si_note, 0); knlist_destroy(&zsd->zsd_selinfo.si_note); } } + kmem_free(zso, sizeof (struct zvol_state_os)); + + mutex_enter(&zv->zv_state_lock); +} + +void +zvol_os_free(zvol_state_t *zv) +{ + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT0(zv->zv_open_count); + ASSERT0P(zv->zv_zso); + + ASSERT0P(zv->zv_objset); + ASSERT0P(zv->zv_zilog); + ASSERT0P(zv->zv_dn); + + ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); + + rw_destroy(&zv->zv_suspend_lock); + zfs_rangelock_fini(&zv->zv_rangelock); + mutex_destroy(&zv->zv_state_lock); cv_destroy(&zv->zv_removing_cv); dataset_kstats_destroy(&zv->zv_kstat); - kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); zvol_minors--; } @@ -1493,11 +1513,11 @@ zvol_os_create_minor(const char *name) zv->zv_objset = os; - ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); + ASSERT0P(zv->zv_kstat.dk_kstats); error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); if (error) goto out_dmu_objset_disown; - ASSERT3P(zv->zv_zilog, ==, NULL); + ASSERT0P(zv->zv_zilog); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) @@ -1538,28 +1558,6 @@ out_doi: return (error); } -void -zvol_os_clear_private(zvol_state_t *zv) -{ - ASSERT(RW_LOCK_HELD(&zvol_state_lock)); - if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp = zsg->zsg_provider; - - if (pp->private == NULL) /* already cleared */ - return; - - pp->private = NULL; - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { - struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; - struct cdev *dev = zsd->zsd_cdev; - - if (dev != NULL) - dev->si_drv2 = NULL; - } -} - int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) { diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c index ce9c9e39e60c..aac5f2ebbfd2 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c @@ -66,9 +66,9 @@ void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) { ASSERT(cvp); - ASSERT(name == NULL); + ASSERT0P(name); ASSERT(type == CV_DEFAULT); - ASSERT(arg == NULL); + ASSERT0P(arg); cvp->cv_magic = CV_MAGIC; init_waitqueue_head(&cvp->cv_event); @@ -83,7 +83,7 @@ static int cv_destroy_wakeup(kcondvar_t *cvp) { if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { - ASSERT(cvp->cv_mutex == NULL); + ASSERT0P(cvp->cv_mutex); ASSERT(!waitqueue_active(&cvp->cv_event)); return (1); } @@ -104,7 +104,7 @@ __cv_destroy(kcondvar_t *cvp) while (cv_destroy_wakeup(cvp) == 0) wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); - ASSERT3P(cvp->cv_mutex, ==, NULL); + ASSERT0P(cvp->cv_mutex); ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c index f37699b4347e..89ca4a648b2f 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c @@ -709,7 +709,7 @@ zone_get_hostid(void *zone) { uint32_t hostid; - ASSERT3P(zone, ==, NULL); + ASSERT0P(zone); if (spl_hostid != 0) return ((uint32_t)(spl_hostid & HW_HOSTID_MASK)); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c index fab80289b278..22e4ed169d03 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -296,7 +296,7 @@ spl_slab_free(spl_kmem_slab_t *sks, spl_kmem_cache_t *skc; ASSERT(sks->sks_magic == SKS_MAGIC); - ASSERT(sks->sks_ref == 0); + ASSERT0(sks->sks_ref); skc = sks->sks_cache; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -598,7 +598,7 @@ static void spl_magazine_free(spl_kmem_magazine_t *skm) { ASSERT(skm->skm_magic == SKM_MAGIC); - ASSERT(skm->skm_avail == 0); + ASSERT0(skm->skm_avail); kfree(skm); } @@ -610,7 +610,7 @@ spl_magazine_create(spl_kmem_cache_t *skc) { int i = 0; - ASSERT((skc->skc_flags & KMC_SLAB) == 0); + ASSERT0((skc->skc_flags & KMC_SLAB)); skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) * num_possible_cpus(), kmem_flags_convert(KM_SLEEP)); @@ -640,7 +640,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) spl_kmem_magazine_t *skm; int i = 0; - ASSERT((skc->skc_flags & KMC_SLAB) == 0); + ASSERT0((skc->skc_flags & KMC_SLAB)); for_each_possible_cpu(i) { skm = skc->skc_mag[i]; @@ -679,8 +679,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, /* * Unsupported flags */ - ASSERT(vmp == NULL); - ASSERT(reclaim == NULL); + ASSERT0P(vmp); + ASSERT0P(reclaim); might_sleep(); @@ -863,11 +863,11 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) * Validate there are no objects in use and free all the * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ - ASSERT3U(skc->skc_slab_alloc, ==, 0); - ASSERT3U(skc->skc_obj_alloc, ==, 0); - ASSERT3U(skc->skc_slab_total, ==, 0); - ASSERT3U(skc->skc_obj_total, ==, 0); - ASSERT3U(skc->skc_obj_emergency, ==, 0); + ASSERT0(skc->skc_slab_alloc); + ASSERT0(skc->skc_obj_alloc); + ASSERT0(skc->skc_slab_total); + ASSERT0(skc->skc_obj_total); + ASSERT0(skc->skc_obj_emergency); ASSERT(list_empty(&skc->skc_complete_list)); ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0); @@ -986,7 +986,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj) ASSERT0(flags & ~KM_PUBLIC_MASK); ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT((skc->skc_flags & KMC_SLAB) == 0); + ASSERT0((skc->skc_flags & KMC_SLAB)); *obj = NULL; diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c index 48f70b00c96b..02c5b42bc4a0 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c @@ -541,7 +541,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name, kstat_t *ksp; ASSERT(ks_module); - ASSERT(ks_instance == 0); + ASSERT0(ks_instance); ASSERT(ks_name); if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO)) diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c index f42f455222de..8f5c73b13df5 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c @@ -80,7 +80,7 @@ __thread_create(caddr_t stk, size_t stksize, thread_func_t func, /* Option pp is simply ignored */ /* Variable stack size unsupported */ - ASSERT(stk == NULL); + ASSERT0P(stk); tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE); if (tp == NULL) diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c b/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c index 34a61bef7d4f..2e8cedf0dc87 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c @@ -161,7 +161,7 @@ tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value) ulong_t hash; int rc = 0; - ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL); + ASSERT0P(tsd_hash_search(table, key, pid)); /* New entry allocate structure, set value, and add to hash */ entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 248c9b7a6d3b..8a8316f63c48 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -863,9 +863,9 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount) * Ensure that last chunk is not in use. abd_iterate_*() must clear * this state (directly or abd_iter_unmap()) before advancing. */ - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); - ASSERT3P(aiter->iter_page, ==, NULL); + ASSERT0P(aiter->iter_page); ASSERT0(aiter->iter_page_doff); ASSERT0(aiter->iter_page_dsize); @@ -897,7 +897,7 @@ abd_iter_map(struct abd_iter *aiter) void *paddr; size_t offset = 0; - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); /* There's nothing left to iterate over, so do nothing */ diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 154ca22d9513..830fad7fe793 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -552,7 +552,7 @@ vdev_bio_associate_blkg(struct bio *bio) #endif ASSERT3P(q, !=, NULL); - ASSERT3P(bio->bi_blkg, ==, NULL); + ASSERT0P(bio->bi_blkg); if (q->root_blkg && vdev_blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; @@ -574,7 +574,7 @@ vdev_bio_set_dev(struct bio *bio, struct block_device *bdev) bio->bi_bdev = bdev; ASSERT3P(q, !=, NULL); - ASSERT3P(bio->bi_blkg, ==, NULL); + ASSERT0P(bio->bi_blkg); if (q->root_blkg && vdev_blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; @@ -806,7 +806,7 @@ vbio_completion(struct bio *bio) * here; instead we stash vbio on the zio and take care of it in the * done callback. */ - ASSERT3P(zio->io_bio, ==, NULL); + ASSERT0P(zio->io_bio); zio->io_bio = vbio; zio_delay_interrupt(zio); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c index 1b169122f25b..daa4b5776837 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c @@ -1900,7 +1900,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (!(flag & IS_ROOT_NODE) && (dzp->z_pflags & ZFS_INHERIT_ACE) && !(dzp->z_pflags & ZFS_XATTR)) { - VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, + VERIFY0(zfs_acl_node_read(dzp, B_TRUE, &paclp, B_FALSE)); acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, vap->va_mode, paclp, acl_ids->z_mode, &need_chmod); @@ -2204,8 +2204,8 @@ top: } error = zfs_aclset_common(zp, aclp, cr, tx); - ASSERT(error == 0); - ASSERT(zp->z_acl_cached == NULL); + ASSERT0(error); + ASSERT0P(zp->z_acl_cached); zp->z_acl_cached = aclp; if (fuid_dirtied) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index 6552a933ce0a..fb4de50480a3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -494,9 +494,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, if (!creation) now = current_time(ip); zp = ITOZ(ip); - ASSERT3P(zp->z_dirlocks, ==, NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); zp->z_id = id; zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; @@ -590,7 +590,7 @@ zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, int zfsctl_create(zfsvfs_t *zfsvfs) { - ASSERT(zfsvfs->z_ctldir == NULL); + ASSERT0P(zfsvfs->z_ctldir); zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, &zpl_fops_root, &zpl_ops_root, 0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c index 2f935bb3fc8c..e8de536606e2 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c @@ -463,7 +463,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) zfsvfs_t *zfsvfs = ZTOZSB(zp); ASSERT(zp->z_unlinked); - ASSERT(ZTOI(zp)->i_nlink == 0); + ASSERT0(ZTOI(zp)->i_nlink); VERIFY3U(0, ==, zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); @@ -662,8 +662,8 @@ zfs_rmnode(znode_t *zp) uint64_t links; int error; - ASSERT(ZTOI(zp)->i_nlink == 0); - ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); + ASSERT0(ZTOI(zp)->i_nlink); + ASSERT0(atomic_read(&ZTOI(zp)->i_count)); /* * If this is an attribute directory, purge its contents. @@ -710,7 +710,7 @@ zfs_rmnode(znode_t *zp) &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); - ASSERT(error == 0); + ASSERT0(error); } acl_obj = zfs_external_acl(zp); @@ -744,12 +744,12 @@ zfs_rmnode(znode_t *zp) } if (xzp) { - ASSERT(error == 0); + ASSERT0(error); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ clear_nlink(ZTOI(xzp)); /* no more links to it */ links = 0; - VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + VERIFY0(sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &links, sizeof (links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); @@ -872,7 +872,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) ctime); } error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&zp->z_lock); @@ -894,7 +894,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&dzp->z_lock); return (0); @@ -986,7 +986,7 @@ zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, sizeof (links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT3U(error, ==, 0); + ASSERT0(error); if (unlinkedp != NULL) *unlinkedp = unlinked; @@ -1058,7 +1058,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, /* The only error is !zfs_dirempty() and we checked earlier. */ error = zfs_drop_nlink_locked(zp, tx, &unlinked); - ASSERT3U(error, ==, 0); + ASSERT0(error); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); @@ -1083,7 +1083,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&dzp->z_lock); if (unlinkedp != NULL) @@ -1167,7 +1167,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr) ASSERT(error == 0 && parent == zp->z_id); #endif - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); if (!zp->z_unlinked) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c index 1c187d7b9cab..895d80b2d79e 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c @@ -223,7 +223,7 @@ zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name) { /* zko_default_group.attrs must be NULL terminated */ ASSERT(zkobj->zko_default_group.attrs != NULL); - ASSERT(zkobj->zko_default_group.attrs[zkobj->zko_attr_count] == NULL); + ASSERT0P(zkobj->zko_default_group.attrs[zkobj->zko_attr_count]); kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type); return (kobject_add(&zkobj->zko_kobj, parent, name)); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 396faef8f646..cd606e667bff 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -279,19 +279,14 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr) return (err); /* - * If the pool is suspended, just return an error. This is to help - * with shutting down with pools suspended, as we don't want to block - * in that case. + * Sync any pending writes, but do not block if the pool is suspended. + * This is to help with shutting down with pools suspended, as we don't + * want to block in that case. */ - if (spa_suspended(zfsvfs->z_os->os_spa)) { - zfs_exit(zfsvfs, FTAG); - return (SET_ERROR(EIO)); - } - - zil_commit(zfsvfs->z_log, 0); + err = zil_commit_flags(zfsvfs->z_log, 0, ZIL_COMMIT_NOW); zfs_exit(zfsvfs, FTAG); - return (0); + return (err); } static void @@ -883,7 +878,7 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { - ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); + ASSERT0P(zfsvfs->z_kstat.dk_kstats); error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); if (error) return (error); @@ -1676,7 +1671,7 @@ zfs_umount(struct super_block *sb) if (zfsvfs->z_arc_prune != NULL) arc_remove_prune_callback(zfsvfs->z_arc_prune); - VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); + VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); os = zfsvfs->z_os; /* @@ -1802,8 +1797,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) ASSERT(*ipp != NULL); if (object == ZFSCTL_INO_SNAPDIR) { - VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, - 0, kcred, NULL, NULL) == 0); + VERIFY0(zfsctl_root_lookup(*ipp, "snapshot", ipp, + 0, kcred, NULL, NULL)); } else { /* * Must have an existing ref, so igrab() @@ -1905,7 +1900,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) goto bail; ds->ds_dir->dd_activity_cancelled = B_FALSE; - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); zfs_set_fuid_feature(zfsvfs); zfsvfs->z_rollback_time = jiffies; @@ -2078,7 +2073,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); - VERIFY(0 == sa_set_sa_object(os, sa_obj)); + VERIFY0(sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 6a2fc5ad7935..6106726651a3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -841,8 +841,8 @@ out: *zpp = zp; } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1203,8 +1203,8 @@ out: zfs_zrele_async(xzp); } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1392,14 +1392,15 @@ out: zfs_dirent_unlock(dl); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - if (error != 0) { zrele(zp); } else { zfs_znode_update_vfs(dzp); zfs_znode_update_vfs(zp); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); + } zfs_exit(zfsvfs, FTAG); return (error); @@ -1528,8 +1529,8 @@ out: zfs_znode_update_vfs(zp); zrele(zp); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -2483,10 +2484,10 @@ top: new_mode = zp->z_mode; } err = zfs_acl_chown_setattr(zp); - ASSERT(err == 0); + ASSERT0(err); if (attrzp) { err = zfs_acl_chown_setattr(attrzp); - ASSERT(err == 0); + ASSERT0(err); } } @@ -2600,7 +2601,7 @@ out: if (err == 0 && xattr_count > 0) { err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, xattr_count, tx); - ASSERT(err2 == 0); + ASSERT0(err2); } if (aclp) @@ -2630,8 +2631,8 @@ out: } out2: - if (os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (err == 0 && os->os_sync == ZFS_SYNC_ALWAYS) + err = zil_commit(zilog, 0); out3: kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks); @@ -3157,7 +3158,7 @@ top: * zfs_link_create() to add back the same entry, but with a new * dnode (szp), should not fail. */ - ASSERT3P(tzp, ==, NULL); + ASSERT0P(tzp); goto commit_link_tzp; } @@ -3235,8 +3236,8 @@ out: zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -3436,7 +3437,7 @@ top: *zpp = zp; if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + error = zil_commit(zilog, 0); } else { zrele(zp); } @@ -3654,8 +3655,8 @@ top: * operation are sync safe. */ if (is_tmpfile) { - VERIFY(zap_remove_int(zfsvfs->z_os, - zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0); + VERIFY0(zap_remove_int(zfsvfs->z_os, + zfsvfs->z_unlinkedobj, szp->z_id, tx)); } else { if (flags & FIGNORECASE) txtype |= TX_CI; @@ -3670,18 +3671,20 @@ top: zfs_dirent_unlock(dl); - if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - - if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { - txg_wait_flag_t wait_flags = - spa_get_failmode(dmu_objset_spa(zfsvfs->z_os)) == - ZIO_FAILURE_MODE_CONTINUE ? TXG_WAIT_SUSPEND : 0; - error = txg_wait_synced_flags(dmu_objset_pool(zfsvfs->z_os), - txg, wait_flags); - if (error != 0) { - ASSERT3U(error, ==, ESHUTDOWN); - error = SET_ERROR(EIO); + if (error == 0) { + if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); + + if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { + txg_wait_flag_t wait_flags = + spa_get_failmode(dmu_objset_spa(zfsvfs->z_os)) == + ZIO_FAILURE_MODE_CONTINUE ? TXG_WAIT_SUSPEND : 0; + error = txg_wait_synced_flags( + dmu_objset_pool(zfsvfs->z_os), txg, wait_flags); + if (error != 0) { + ASSERT3U(error, ==, ESHUTDOWN); + error = SET_ERROR(EIO); + } } } @@ -3691,16 +3694,42 @@ top: return (error); } -static void -zfs_putpage_commit_cb(void *arg) +/* Finish page writeback. */ +static inline void +zfs_page_writeback_done(struct page *pp, int err) { - struct page *pp = arg; + if (err != 0) { + /* + * Writeback failed. Re-dirty the page. It was undirtied before + * the IO was issued (in zfs_putpage() or write_cache_pages()). + * The kernel only considers writeback for dirty pages; if we + * don't do this, it is eligible for eviction without being + * written out, which we definitely don't want. + */ +#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO + filemap_dirty_folio(page_mapping(pp), page_folio(pp)); +#else + __set_page_dirty_nobuffers(pp); +#endif + } ClearPageError(pp); end_page_writeback(pp); } /* + * ZIL callback for page writeback. Passes to zfs_log_write() in zfs_putpage() + * for syncing writes. Called when the ZIL itx has been written to the log or + * the whole txg syncs, or if the ZIL crashes or the pool suspends. Any failure + * is passed as `err`. + */ +static void +zfs_putpage_commit_cb(void *arg, int err) +{ + zfs_page_writeback_done(arg, err); +} + +/* * Push a page out to disk, once the page is on stable storage the * registered commit callback will be run as notification of completion. * @@ -3853,16 +3882,15 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, err = dmu_tx_assign(tx, DMU_TX_WAIT); if (err != 0) { dmu_tx_abort(tx); -#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO - filemap_dirty_folio(page_mapping(pp), page_folio(pp)); -#else - __set_page_dirty_nobuffers(pp); -#endif - ClearPageError(pp); - end_page_writeback(pp); + zfs_page_writeback_done(pp, err); zfs_rangelock_exit(lr); zfs_exit(zfsvfs, FTAG); - return (err); + + /* + * Don't return error for an async writeback; we've re-dirtied + * the page so it will be tried again some other time. + */ + return (for_sync ? err : 0); } va = kmap(pp); @@ -3916,7 +3944,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, * ALL, zfs_putpage should do it. * * Summary: - * for_sync: 0=unlock immediately; 1 unlock once on disk + * for_sync: 0=unlock immediately; 1=unlock once on disk * sync_mode: NONE=caller will commit; ALL=we will commit */ boolean_t need_commit = (wbc->sync_mode != WB_SYNC_NONE); @@ -3931,16 +3959,24 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, B_FALSE, for_sync ? zfs_putpage_commit_cb : NULL, pp); if (!for_sync) { - ClearPageError(pp); - end_page_writeback(pp); + /* + * Async writeback is logged and written to the DMU, so page + * can now be unlocked. + */ + zfs_page_writeback_done(pp, 0); } dmu_tx_commit(tx); zfs_rangelock_exit(lr); - if (need_commit) - zil_commit(zfsvfs->z_log, zp->z_id); + if (need_commit) { + err = zil_commit_flags(zfsvfs->z_log, zp->z_id, ZIL_COMMIT_NOW); + if (err != 0) { + zfs_exit(zfsvfs, FTAG); + return (err); + } + } dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c index 7683eeb3cf9f..bcaabeb32b8a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c @@ -144,9 +144,9 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_xattr_lock); zfs_rangelock_fini(&zp->z_rangelock); - ASSERT3P(zp->z_dirlocks, ==, NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); } static int @@ -178,13 +178,13 @@ zfs_znode_init(void) * backed by kmalloc() when on the Linux slab in order that any * wait_on_bit() operations on the related inode operate properly. */ - ASSERT(znode_cache == NULL); + ASSERT0P(znode_cache); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB | KMC_RECLAIMABLE); - ASSERT(znode_hold_cache == NULL); + ASSERT0P(znode_hold_cache); znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor, zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0); @@ -327,10 +327,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, mutex_enter(&zp->z_lock); - ASSERT(zp->z_sa_hdl == NULL); - ASSERT(zp->z_acl_cached == NULL); + ASSERT0P(zp->z_sa_hdl); + ASSERT0P(zp->z_acl_cached); if (sa_hdl == NULL) { - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; @@ -530,9 +530,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, return (NULL); zp = ITOZ(ip); - ASSERT(zp->z_dirlocks == NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; zp->z_is_ctldir = B_FALSE; @@ -611,7 +611,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, * processing so do not hash unlinked znodes. */ if (links > 0) - VERIFY3S(insert_inode_locked(ip), ==, 0); + VERIFY0(insert_inode_locked(ip)); mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); @@ -811,7 +811,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } /* Now add in all of the "SA" attributes */ - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* @@ -901,7 +901,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, acl_ids->z_fuid, acl_ids->z_fgid); } - VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); + VERIFY0(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); if (!(flag & IS_ROOT_NODE)) { /* @@ -1200,7 +1200,7 @@ zfs_rezget(znode_t *zp) } rw_exit(&zp->z_xattr_lock); - ASSERT(zp->z_sa_hdl == NULL); + ASSERT0P(zp->z_sa_hdl); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { zfs_znode_hold_exit(zfsvfs, zh); @@ -1314,9 +1314,9 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) zh = zfs_znode_hold_enter(zfsvfs, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); - VERIFY(0 == dmu_object_free(os, acl_obj, tx)); + VERIFY0(dmu_object_free(os, acl_obj, tx)); } - VERIFY(0 == dmu_object_free(os, obj, tx)); + VERIFY0(dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zfsvfs, zh); } @@ -1536,7 +1536,7 @@ zfs_extend(znode_t *zp, uint64_t end) zp->z_size = end; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); zfs_rangelock_exit(lr); @@ -1726,7 +1726,7 @@ zfs_trunc(znode_t *zp, uint64_t end) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); } - VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); + VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); dmu_tx_commit(tx); zfs_rangelock_exit(lr); @@ -1793,7 +1793,7 @@ log: NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); @@ -1840,7 +1840,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Set starting attributes. @@ -1853,7 +1853,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) const char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); - VERIFY(nvpair_value_uint64(elem, &val) == 0); + VERIFY0(nvpair_value_uint64(elem, &val)); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) @@ -1861,7 +1861,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } - ASSERT(error == 0); + ASSERT0(error); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) @@ -1869,7 +1869,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Create zap object used for SA attribute registration @@ -1879,7 +1879,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); - ASSERT(error == 0); + ASSERT0(error); } else { sa_obj = 0; } @@ -1889,7 +1889,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Create root znode. Create minimal znode/inode/zfsvfs/sb @@ -1922,7 +1922,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table); - ASSERT(error == 0); + ASSERT0(error); /* * Fold case on file systems that are always or sometimes case @@ -1946,12 +1946,12 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL); } - VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, + VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids, zfs_init_idmap)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_acl_ids_free(&acl_ids); atomic_set(&ZTOI(rootzp)->i_count, 0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index ef7bd7352084..d07317b0d910 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ @@ -106,6 +107,10 @@ zpl_iterate(struct file *filp, struct dir_context *ctx) return (error); } +static inline int +zpl_write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, void *data); + static int zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { @@ -115,9 +120,38 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) int error; fstrans_cookie_t cookie; - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (error) - return (error); + /* + * Force dirty pages in the range out to the DMU and the log, ready + * for zil_commit() to write down. + * + * We call write_cache_pages() directly to ensure that zpl_putpage() is + * called with the flags we need. We need WB_SYNC_NONE to avoid a call + * to zil_commit() (since we're doing this as a kind of pre-sync); but + * we do need for_sync so that the pages remain in writeback until + * they're on disk, and so that we get an error if the DMU write fails. + */ + if (filemap_range_has_page(inode->i_mapping, start, end)) { + int for_sync = 1; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = LONG_MAX, + .range_start = start, + .range_end = end, + }; + error = + zpl_write_cache_pages(inode->i_mapping, &wbc, &for_sync); + if (error != 0) { + /* + * Unclear what state things are in. zfs_putpage() will + * ensure the pages remain dirty if they haven't been + * written down to the DMU, but because there may be + * nothing logged, we can't assume that zfs_sync() -> + * zil_commit() will give us a useful error. It's + * safest if we just error out here. + */ + return (error); + } + } crhold(cr); cookie = spl_fstrans_mark(); @@ -494,11 +528,30 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) if (sync_mode != wbc->sync_mode) { if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (result); - if (zfsvfs->z_log != NULL) - zil_commit(zfsvfs->z_log, zp->z_id); + + if (zfsvfs->z_log != NULL) { + /* + * We don't want to block here if the pool suspends, + * because this is not a syncing op by itself, but + * might be part of one that the caller will + * coordinate. + */ + result = -zil_commit_flags(zfsvfs->z_log, zp->z_id, + ZIL_COMMIT_NOW); + } + zpl_exit(zfsvfs, FTAG); /* + * If zil_commit_flags() failed, it's unclear what state things + * are currently in. putpage() has written back out what it can + * to the DMU, but it may not be on disk. We have little choice + * but to escape. + */ + if (result != 0) + return (result); + + /* * We need to call write_cache_pages() again (we can't just * return after the commit) because the previous call in * non-SYNC mode does not guarantee that we got all the dirty diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index f9f6406f8b47..f97662d052c7 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -247,7 +247,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, * and fifos, but we want to know if this behavior ever changes. */ if (S_ISSOCK(mode) || S_ISFIFO(mode)) - ASSERT(rdev == 0); + ASSERT0(rdev); crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c index 94dcdd0b887d..53819628627d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c @@ -49,7 +49,7 @@ zpl_inode_alloc(struct super_block *sb) static void zpl_inode_free(struct inode *ip) { - ASSERT(atomic_read(&ip->i_count) == 0); + ASSERT0(atomic_read(&ip->i_count)); zfs_inode_free(ip); } #endif @@ -57,7 +57,7 @@ zpl_inode_free(struct inode *ip) static void zpl_inode_destroy(struct inode *ip) { - ASSERT(atomic_read(&ip->i_count) == 0); + ASSERT0(atomic_read(&ip->i_count)); zfs_inode_destroy(ip); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c index a098197e7448..d93282db815a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c @@ -1494,7 +1494,7 @@ zpl_posix_acl_free(void *arg) acl_rel_head = NULL; if (cmpxchg(&acl_rel_tail, &a->next, &acl_rel_head) == &a->next) { - ASSERT3P(a->next, ==, NULL); + ASSERT0P(a->next); a->next = freelist; freelist = a; break; @@ -1544,7 +1544,7 @@ zpl_posix_acl_release_impl(struct posix_acl *acl) a->time = ddi_get_lbolt(); /* atomically points tail to us and get the previous tail */ prev = xchg(&acl_rel_tail, &a->next); - ASSERT3P(*prev, ==, NULL); + ASSERT0P(*prev); *prev = a; /* if it was empty before, schedule the free task */ if (prev == &acl_rel_head) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index a7431cc4da9d..bac166fcd89e 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ #include <sys/dataset_kstats.h> @@ -84,8 +84,9 @@ static unsigned int zvol_blk_mq_blocks_per_thread = 8; static inline void zvol_end_io(struct bio *bio, struct request *rq, int error) { + ASSERT3U(error, >=, 0); if (bio) { - bio->bi_status = errno_to_bi_status(-error); + bio->bi_status = errno_to_bi_status(error); bio_endio(bio); } else { blk_mq_end_request(rq, errno_to_bi_status(error)); @@ -208,8 +209,14 @@ zvol_write(zv_request_t *zvr) disk = zv->zv_zso->zvo_disk; /* bio marked as FLUSH need to flush before write */ - if (io_is_flush(bio, rq)) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (io_is_flush(bio, rq)) { + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error != 0) { + rw_exit(&zv->zv_suspend_lock); + zvol_end_io(bio, rq, -error); + return; + } + } /* Some requests are just for flush and nothing else. */ if (io_size(bio, rq) == 0) { @@ -273,8 +280,8 @@ zvol_write(zv_request_t *zvr) dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); task_io_account_write(nwritten); - if (sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error == 0 && sync) + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); @@ -282,7 +289,7 @@ zvol_write(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, WRITE, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -361,7 +368,7 @@ zvol_discard(zv_request_t *zvr) zfs_rangelock_exit(lr); if (error == 0 && sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); unlock: rw_exit(&zv->zv_suspend_lock); @@ -371,7 +378,7 @@ unlock: start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -449,7 +456,7 @@ zvol_read(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, READ, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -480,7 +487,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, int rw = io_data_dir(bio, rq); if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { - zvol_end_io(bio, rq, -SET_ERROR(ENXIO)); + zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; } @@ -499,7 +506,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, (long long unsigned)offset, (long unsigned)size); - zvol_end_io(bio, rq, -SET_ERROR(EIO)); + zvol_end_io(bio, rq, SET_ERROR(EIO)); goto out; } @@ -512,8 +519,8 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, #ifdef HAVE_BLK_MQ_RQ_HCTX blk_mq_hw_queue = rq->mq_hctx->queue_num; #else - blk_mq_hw_queue = - rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; + blk_mq_hw_queue = rq->q->queue_hw_ctx[ + rq->q->mq_map[raw_smp_processor_id()]]->queue_num; #endif taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, blk_mq_hw_queue); @@ -521,7 +528,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, if (rw == WRITE) { if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - zvol_end_io(bio, rq, -SET_ERROR(EROFS)); + zvol_end_io(bio, rq, SET_ERROR(EROFS)); goto out; } @@ -672,28 +679,19 @@ zvol_open(struct block_device *bdev, fmode_t flag) retry: #endif - rw_enter(&zvol_state_lock, RW_READER); - /* - * Obtain a copy of private_data under the zvol_state_lock to make - * sure that either the result of zvol free code path setting - * disk->private_data to NULL is observed, or zvol_os_free() - * is not called on this zv because of the positive zv_open_count. - */ + #ifdef HAVE_BLK_MODE_T - zv = disk->private_data; + zv = atomic_load_ptr(&disk->private_data); #else - zv = bdev->bd_disk->private_data; + zv = atomic_load_ptr(&bdev->bd_disk->private_data); #endif if (zv == NULL) { - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } mutex_enter(&zv->zv_state_lock); - if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } @@ -705,8 +703,28 @@ retry: if (zv->zv_open_count == 0) { if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { mutex_exit(&zv->zv_state_lock); + + /* + * Removal may happen while the locks are down, so + * we can't trust zv any longer; we have to start over. + */ +#ifdef HAVE_BLK_MODE_T + zv = atomic_load_ptr(&disk->private_data); +#else + zv = atomic_load_ptr(&bdev->bd_disk->private_data); +#endif + if (zv == NULL) + return (-SET_ERROR(ENXIO)); + rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); + return (-SET_ERROR(ENXIO)); + } + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -717,7 +735,6 @@ retry: drop_suspend = B_TRUE; } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -814,11 +831,11 @@ zvol_release(struct gendisk *disk, fmode_t unused) #if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG) (void) unused; #endif - zvol_state_t *zv; boolean_t drop_suspend = B_TRUE; - rw_enter(&zvol_state_lock, RW_READER); - zv = disk->private_data; + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); + if (zv == NULL) + return; mutex_enter(&zv->zv_state_lock); ASSERT3U(zv->zv_open_count, >, 0); @@ -832,6 +849,15 @@ zvol_release(struct gendisk *disk, fmode_t unused) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_open(), we don't check if removal + * started here, because we might be one of the openers + * that needs to be thrown out! If we're the last, we + * need to call zvol_last_close() below to finish + * cleanup. So, no special treatment for us. + */ + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 1) { rw_exit(&zv->zv_suspend_lock); @@ -841,7 +867,6 @@ zvol_release(struct gendisk *disk, fmode_t unused) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -861,9 +886,10 @@ static int zvol_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - zvol_state_t *zv = bdev->bd_disk->private_data; int error = 0; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); switch (cmd) { @@ -886,16 +912,18 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, case BLKZNAME: mutex_enter(&zv->zv_state_lock); - error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); + error = -copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); mutex_exit(&zv->zv_state_lock); + if (error) + error = SET_ERROR(error); break; default: - error = -ENOTTY; + error = SET_ERROR(ENOTTY); break; } - return (SET_ERROR(error)); + return (-error); } #ifdef CONFIG_COMPAT @@ -914,9 +942,8 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) { unsigned int mask = 0; - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0; @@ -924,17 +951,14 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (mask); } static int zvol_revalidate_disk(struct gendisk *disk) { - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); set_capacity(zv->zv_zso->zvo_disk, @@ -942,8 +966,6 @@ zvol_revalidate_disk(struct gendisk *disk) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (0); } @@ -962,16 +984,6 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) return (0); } -void -zvol_os_clear_private(zvol_state_t *zv) -{ - /* - * Cleared while holding zvol_state_lock as a writer - * which will prevent zvol_open() from opening it. - */ - zv->zv_zso->zvo_disk->private_data = NULL; -} - /* * Provide a simple virtual geometry for legacy compatibility. For devices * smaller than 1 MiB a small head and sector count is used to allow very @@ -981,9 +993,10 @@ zvol_os_clear_private(zvol_state_t *zv) static int zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - zvol_state_t *zv = bdev->bd_disk->private_data; sector_t sectors; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); sectors = get_capacity(zv->zv_zso->zvo_disk); @@ -1408,53 +1421,70 @@ out_kmem: return (ret); } -/* - * Cleanup then free a zvol_state_t which was created by zvol_alloc(). - * At this time, the structure is not opened by anyone, is taken off - * the zvol_state_list, and has its private data set to NULL. - * The zvol_state_lock is dropped. - * - * This function may take many milliseconds to complete (e.g. we've seen - * it take over 256ms), due to the calls to "blk_cleanup_queue" and - * "del_gendisk". Thus, consumers need to be careful to account for this - * latency when calling this function. - */ void -zvol_os_free(zvol_state_t *zv) +zvol_os_remove_minor(zvol_state_t *zv) { - - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); - ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL); + ASSERT0(atomic_read(&zv->zv_suspend_ref)); + ASSERT(zv->zv_flags & ZVOL_REMOVING); - rw_destroy(&zv->zv_suspend_lock); - zfs_rangelock_fini(&zv->zv_rangelock); + struct zvol_state_os *zso = zv->zv_zso; + zv->zv_zso = NULL; + + /* Clearing private_data will make new callers return immediately. */ + atomic_store_ptr(&zso->zvo_disk->private_data, NULL); - del_gendisk(zv->zv_zso->zvo_disk); + /* + * Drop the state lock before calling del_gendisk(). There may be + * callers waiting to acquire it, but del_gendisk() will block until + * they exit, which would deadlock. + */ + mutex_exit(&zv->zv_state_lock); + + del_gendisk(zso->zvo_disk); #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \ (defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG)) #if defined(HAVE_BLK_CLEANUP_DISK) - blk_cleanup_disk(zv->zv_zso->zvo_disk); + blk_cleanup_disk(zso->zvo_disk); #else - put_disk(zv->zv_zso->zvo_disk); + put_disk(zso->zvo_disk); #endif #else - blk_cleanup_queue(zv->zv_zso->zvo_queue); - put_disk(zv->zv_zso->zvo_disk); + blk_cleanup_queue(zso->zvo_queue); + put_disk(zso->zvo_disk); #endif - if (zv->zv_zso->use_blk_mq) - blk_mq_free_tag_set(&zv->zv_zso->tag_set); + if (zso->use_blk_mq) + blk_mq_free_tag_set(&zso->tag_set); - ida_simple_remove(&zvol_ida, - MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); + ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); + + kmem_free(zso, sizeof (struct zvol_state_os)); + + mutex_enter(&zv->zv_state_lock); +} + +void +zvol_os_free(zvol_state_t *zv) +{ + + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT0(zv->zv_open_count); + ASSERT0P(zv->zv_zso); + + ASSERT0P(zv->zv_objset); + ASSERT0P(zv->zv_zilog); + ASSERT0P(zv->zv_dn); + + rw_destroy(&zv->zv_suspend_lock); + zfs_rangelock_fini(&zv->zv_rangelock); cv_destroy(&zv->zv_removing_cv); mutex_destroy(&zv->zv_state_lock); dataset_kstats_destroy(&zv->zv_kstat); - kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); } @@ -1474,7 +1504,9 @@ __zvol_os_add_disk(struct gendisk *disk) { int error = 0; #ifdef HAVE_ADD_DISK_RET - error = add_disk(disk); + error = -add_disk(disk); + if (error) + error = SET_ERROR(error); #else add_disk(disk); #endif @@ -1649,11 +1681,11 @@ zvol_os_create_minor(const char *name) blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue); #endif - ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); + ASSERT0P(zv->zv_kstat.dk_kstats); error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); if (error) goto out_dmu_objset_disown; - ASSERT3P(zv->zv_zilog, ==, NULL); + ASSERT0P(zv->zv_zilog); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) @@ -1759,10 +1791,10 @@ zvol_init(void) return (error); } - error = register_blkdev(zvol_major, ZVOL_DRIVER); + error = -register_blkdev(zvol_major, ZVOL_DRIVER); if (error) { printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error); - return (error); + return (SET_ERROR(error)); } if (zvol_blk_mq_queue_depth == 0) { diff --git a/sys/contrib/openzfs/module/zcommon/simd_stat.c b/sys/contrib/openzfs/module/zcommon/simd_stat.c index 11e2080ff9f2..007ae9e4fbbc 100644 --- a/sys/contrib/openzfs/module/zcommon/simd_stat.c +++ b/sys/contrib/openzfs/module/zcommon/simd_stat.c @@ -118,6 +118,10 @@ simd_stat_kstat_data(char *buf, size_t size, void *data) "pclmulqdq", zfs_pclmulqdq_available()); off += SIMD_STAT_PRINT(simd_stat_kstat_payload, "movbe", zfs_movbe_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "vaes", zfs_vaes_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "vpclmulqdq", zfs_vpclmulqdq_available()); off += SIMD_STAT_PRINT(simd_stat_kstat_payload, "osxsave", boot_cpu_has(X86_FEATURE_OSXSAVE)); diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c index 826928e67350..bf9b13c30509 100644 --- a/sys/contrib/openzfs/module/zfs/abd.c +++ b/sys/contrib/openzfs/module/zfs/abd.c @@ -563,7 +563,7 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size) left -= csize; off = 0; } - ASSERT3U(left, ==, 0); + ASSERT0(left); } else { abd = abd_get_offset_scatter(abd, sabd, off, size); } diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 3483be64ec57..df41e3b49204 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -2239,8 +2239,8 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(state)) { - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); (void) zfs_refcount_add_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); @@ -2278,8 +2278,8 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(state)) { - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); (void) zfs_refcount_remove_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); @@ -2319,7 +2319,7 @@ add_reference(arc_buf_hdr_t *hdr, const void *tag) if (!HDR_EMPTY(hdr) && !MUTEX_HELD(HDR_LOCK(hdr))) { ASSERT(state == arc_anon); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); } if ((zfs_refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) && @@ -2503,7 +2503,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) (void) zfs_refcount_add_many( &new_state->arcs_size[type], HDR_GET_LSIZE(hdr), hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); } else { @@ -2547,7 +2547,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) if (update_old && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(old_state)) { - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); /* @@ -2758,7 +2758,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, VERIFY(hdr->b_type == ARC_BUFC_DATA || hdr->b_type == ARC_BUFC_METADATA); ASSERT3P(ret, !=, NULL); - ASSERT3P(*ret, ==, NULL); + ASSERT0P(*ret); IMPLY(encrypted, compressed); buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); @@ -2982,7 +2982,7 @@ static void arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) { ASSERT(arc_can_share(hdr, buf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(HDR_EMPTY_OR_LOCKED(hdr)); @@ -3201,14 +3201,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags) if (alloc_rdata) { size = HDR_GET_PSIZE(hdr); - ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); + ASSERT0P(hdr->b_crypt_hdr.b_rabd); hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr, alloc_flags); ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL); ARCSTAT_INCR(arcstat_raw_size, size); } else { size = arc_hdr_size(hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr, alloc_flags); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); @@ -3290,7 +3290,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, ASSERT(HDR_EMPTY(hdr)); #ifdef ZFS_DEBUG - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_freeze_cksum); #endif HDR_SET_PSIZE(hdr, psize); HDR_SET_LSIZE(hdr, lsize); @@ -3351,12 +3351,12 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) nhdr->b_l1hdr.b_state = arc_l2c_only; /* Verify previous threads set to NULL before freeing */ - ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(nhdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); } else { - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); #ifdef ZFS_DEBUG - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_freeze_cksum); #endif /* @@ -3375,7 +3375,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) * might try to be accessed, even though it was removed. */ VERIFY(!HDR_L2_WRITING(hdr)); - VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); + VERIFY0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR); @@ -3698,12 +3698,12 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) arc_hdr_free_abd(hdr, B_TRUE); } - ASSERT3P(hdr->b_hash_next, ==, NULL); + ASSERT0P(hdr->b_hash_next); if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); - ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_acb); #ifdef ZFS_DEBUG - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_freeze_cksum); #endif kmem_cache_free(hdr_full_cache, hdr); } else { @@ -3771,7 +3771,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted) ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); *real_evicted = 0; @@ -3796,7 +3796,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted) DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); if (HDR_HAS_L2HDR(hdr)) { - ASSERT(hdr->b_l1hdr.b_pabd == NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); /* * This buffer is cached on the 2nd Level ARC; @@ -5554,7 +5554,7 @@ static void arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp) { if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) { - ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0); + ASSERT0(HDR_GET_PSIZE(hdr)); ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF); } else { if (HDR_COMPRESSION_ENABLED(hdr)) { @@ -6132,14 +6132,14 @@ top: } if (GHOST_STATE(hdr->b_l1hdr.b_state)) { - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT0(zfs_refcount_count( &hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_buf); #ifdef ZFS_DEBUG - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_freeze_cksum); #endif } else if (HDR_IO_IN_PROGRESS(hdr)) { /* @@ -6233,7 +6233,7 @@ top: acb->acb_nobuf = no_buf; acb->acb_zb = *zb; - ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_acb); hdr->b_l1hdr.b_acb = acb; if (HDR_HAS_L2HDR(hdr) && @@ -6717,7 +6717,7 @@ arc_release(arc_buf_t *buf, const void *tag) nhdr = arc_hdr_alloc(spa, psize, lsize, protected, compress, hdr->b_complevel, type); - ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); + ASSERT0P(nhdr->b_l1hdr.b_buf); ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt)); VERIFY3U(nhdr->b_type, ==, type); ASSERT(!HDR_SHARED_DATA(nhdr)); @@ -6804,7 +6804,7 @@ arc_write_ready(zio_t *zio) if (HDR_HAS_RABD(hdr)) arc_hdr_free_abd(hdr, B_TRUE); } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_SHARED_DATA(hdr)); ASSERT(!arc_buf_is_shared(buf)); @@ -6948,7 +6948,7 @@ arc_write_done(zio_t *zio) arc_buf_t *buf = callback->awcb_buf; arc_buf_hdr_t *hdr = buf->b_hdr; - ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_acb); if (zio->io_error == 0) { arc_hdr_verify(hdr, zio->io_bp); @@ -6973,7 +6973,7 @@ arc_write_done(zio_t *zio) arc_buf_hdr_t *exists; kmutex_t *hash_lock; - ASSERT3U(zio->io_error, ==, 0); + ASSERT0(zio->io_error); arc_cksum_verify(buf); @@ -6994,7 +6994,7 @@ arc_write_done(zio_t *zio) arc_hdr_destroy(exists); mutex_exit(hash_lock); exists = buf_hash_insert(hdr, &hash_lock); - ASSERT3P(exists, ==, NULL); + ASSERT0P(exists); } else if (zio->io_flags & ZIO_FLAG_NOPWRITE) { /* nopwrite */ ASSERT(zio->io_prop.zp_nopwrite); @@ -7007,7 +7007,7 @@ arc_write_done(zio_t *zio) ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf)); ASSERT(hdr->b_l1hdr.b_state == arc_anon); ASSERT(BP_GET_DEDUP(zio->io_bp)); - ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); + ASSERT0(BP_GET_LEVEL(zio->io_bp)); } } arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); @@ -7044,7 +7044,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, ASSERT3P(done, !=, NULL); ASSERT(!HDR_IO_ERROR(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_acb); ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL); if (uncached) arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED); @@ -7113,7 +7113,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); ASSERT(!arc_buf_is_shared(buf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT0P(hdr->b_l1hdr.b_pabd); zio = zio_write(pio, spa, txg, bp, abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)), diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c index 0a8a077edf63..ea9fbd036c6e 100644 --- a/sys/contrib/openzfs/module/zfs/bpobj.c +++ b/sys/contrib/openzfs/module/zfs/bpobj.c @@ -160,8 +160,8 @@ bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) memset(bpo, 0, sizeof (*bpo)); mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); - ASSERT(bpo->bpo_dbuf == NULL); - ASSERT(bpo->bpo_phys == NULL); + ASSERT0P(bpo->bpo_dbuf); + ASSERT0P(bpo->bpo_phys); ASSERT(object != 0); ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); @@ -478,7 +478,7 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg, * We have unprocessed subobjs. Process the next one. */ ASSERT(bpo->bpo_havecomp); - ASSERT3P(bpobj_size, ==, NULL); + ASSERT0P(bpobj_size); /* Add the last subobj to stack. */ int64_t i = bpi->bpi_unprocessed_subobjs - 1; diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c index aa282f711bc3..725b96a3b2c7 100644 --- a/sys/contrib/openzfs/module/zfs/btree.c +++ b/sys/contrib/openzfs/module/zfs/btree.c @@ -1110,7 +1110,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value, if (where->bti_node == NULL) { ASSERT3U(tree->bt_num_elems, ==, 1); ASSERT3S(tree->bt_height, ==, -1); - ASSERT3P(tree->bt_root, ==, NULL); + ASSERT0P(tree->bt_root); ASSERT0(where->bti_offset); tree->bt_num_nodes++; @@ -1947,7 +1947,7 @@ void zfs_btree_destroy(zfs_btree_t *tree) { ASSERT0(tree->bt_num_elems); - ASSERT3P(tree->bt_root, ==, NULL); + ASSERT0P(tree->bt_root); } /* Verify that every child of this node has the correct parent pointer. */ @@ -1969,10 +1969,10 @@ static void zfs_btree_verify_pointers(zfs_btree_t *tree) { if (tree->bt_height == -1) { - VERIFY3P(tree->bt_root, ==, NULL); + VERIFY0P(tree->bt_root); return; } - VERIFY3P(tree->bt_root->bth_parent, ==, NULL); + VERIFY0P(tree->bt_root->bth_parent); zfs_btree_verify_pointers_helper(tree, tree->bt_root); } diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c index d3baabd6169f..e5abcd2044cf 100644 --- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c +++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c @@ -44,6 +44,7 @@ static dataset_kstat_values_t empty_dataset_kstats = { { "zil_commit_error_count", KSTAT_DATA_UINT64 }, { "zil_commit_stall_count", KSTAT_DATA_UINT64 }, { "zil_commit_suspend_count", KSTAT_DATA_UINT64 }, + { "zil_commit_crash_count", KSTAT_DATA_UINT64 }, { "zil_itx_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 }, diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 432c99cec960..7403f10d91b7 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -523,7 +523,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) return; /* Only data blocks support the attachment of user data. */ - ASSERT(db->db_level == 0); + ASSERT0(db->db_level); /* Clients must resolve a dbuf before attaching user data. */ ASSERT(db->db.db_data != NULL); @@ -1128,8 +1128,8 @@ dbuf_verify(dmu_buf_impl_t *db) DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (dn == NULL) { - ASSERT(db->db_parent == NULL); - ASSERT(db->db_blkptr == NULL); + ASSERT0P(db->db_parent); + ASSERT0P(db->db_blkptr); } else { ASSERT3U(db->db.db_object, ==, dn->dn_object); ASSERT3P(db->db_objset, ==, dn->dn_objset); @@ -1180,7 +1180,7 @@ dbuf_verify(dmu_buf_impl_t *db) /* db is pointed to by the dnode */ /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ if (DMU_OBJECT_IS_SPECIAL(db->db.db_object)) - ASSERT(db->db_parent == NULL); + ASSERT0P(db->db_parent); else ASSERT(db->db_parent != NULL); if (db->db_blkid != DMU_SPILL_BLKID) @@ -1219,7 +1219,7 @@ dbuf_verify(dmu_buf_impl_t *db) int i; for (i = 0; i < db->db.db_size >> 3; i++) { - ASSERT(buf[i] == 0); + ASSERT0(buf[i]); } } else { blkptr_t *bps = db->db.db_data; @@ -1259,7 +1259,7 @@ dbuf_clear_data(dmu_buf_impl_t *db) { ASSERT(MUTEX_HELD(&db->db_mtx)); dbuf_evict_user(db); - ASSERT3P(db->db_buf, ==, NULL); + ASSERT0P(db->db_buf); db->db.db_data = NULL; if (db->db_state != DB_NOFILL) { db->db_state = DB_UNCACHED; @@ -1384,13 +1384,13 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, * All reads are synchronous, so we must have a hold on the dbuf */ ASSERT(zfs_refcount_count(&db->db_holds) > 0); - ASSERT(db->db_buf == NULL); - ASSERT(db->db.db_data == NULL); + ASSERT0P(db->db_buf); + ASSERT0P(db->db.db_data); if (buf == NULL) { /* i/o error */ ASSERT(zio == NULL || zio->io_error != 0); ASSERT(db->db_blkid != DMU_BONUS_BLKID); - ASSERT3P(db->db_buf, ==, NULL); + ASSERT0P(db->db_buf); db->db_state = DB_UNCACHED; DTRACE_SET_STATE(db, "i/o error"); } else if (db->db_level == 0 && db->db_freed_in_flight) { @@ -1584,7 +1584,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags, ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); - ASSERT(db->db_buf == NULL); + ASSERT0P(db->db_buf); ASSERT(db->db_parent == NULL || RW_LOCK_HELD(&db->db_parent->db_rwlock)); @@ -1682,7 +1682,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(db->db.db_data != NULL); - ASSERT(db->db_level == 0); + ASSERT0(db->db_level); ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); if (dr == NULL || @@ -1901,8 +1901,8 @@ dbuf_noread(dmu_buf_impl_t *db, dmu_flags_t flags) while (db->db_state == DB_READ || db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); if (db->db_state == DB_UNCACHED) { - ASSERT(db->db_buf == NULL); - ASSERT(db->db.db_data == NULL); + ASSERT0P(db->db_buf); + ASSERT0P(db->db.db_data); dbuf_set_data(db, dbuf_alloc_arcbuf(db)); db->db_state = DB_FILL; DTRACE_SET_STATE(db, "assigning filled buffer"); @@ -1929,7 +1929,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * comes from dbuf_dirty() callers who must also hold a range lock. */ ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC); - ASSERT(db->db_level == 0); + ASSERT0(db->db_level); if (db->db_blkid == DMU_BONUS_BLKID || dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) @@ -1994,7 +1994,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, mutex_enter(&dn->dn_dbufs_mtx); db = avl_find(&dn->dn_dbufs, db_search, &where); - ASSERT3P(db, ==, NULL); + ASSERT0P(db); db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); @@ -2017,7 +2017,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, if (db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL || db->db_state == DB_EVICTING) { - ASSERT(db->db.db_data == NULL); + ASSERT0P(db->db.db_data); mutex_exit(&db->db_mtx); continue; } @@ -2557,12 +2557,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) /* * Due to our use of dn_nlevels below, this can only be called - * in open context, unless we are operating on the MOS. - * From syncing context, dn_nlevels may be different from the - * dn_nlevels used when dbuf was dirtied. + * in open context, unless we are operating on the MOS or it's + * a special object. From syncing context, dn_nlevels may be + * different from the dn_nlevels used when dbuf was dirtied. */ ASSERT(db->db_objset == dmu_objset_pool(db->db_objset)->dp_meta_objset || + DMU_OBJECT_IS_SPECIAL(db->db.db_object) || txg != spa_syncing_txg(dmu_objset_spa(db->db_objset))); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT0(db->db_level); @@ -2896,8 +2897,8 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx) dbuf_clear_data(db); } - ASSERT3P(db->db_buf, ==, NULL); - ASSERT3P(db->db.db_data, ==, NULL); + ASSERT0P(db->db_buf); + ASSERT0P(db->db.db_data); db->db_state = DB_NOFILL; DTRACE_SET_STATE(db, @@ -2932,7 +2933,7 @@ dmu_buf_will_fill_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail, ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(tx->tx_txg != 0); - ASSERT(db->db_level == 0); + ASSERT0(db->db_level); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || @@ -3144,7 +3145,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx, { ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(db->db_blkid != DMU_BONUS_BLKID); - ASSERT(db->db_level == 0); + ASSERT0(db->db_level); ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf)); ASSERT(buf != NULL); ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size); @@ -3209,7 +3210,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx, VERIFY(!dbuf_undirty(db, tx)); db->db_state = DB_UNCACHED; } - ASSERT(db->db_buf == NULL); + ASSERT0P(db->db_buf); dbuf_set_data(db, buf); db->db_state = DB_FILL; DTRACE_SET_STATE(db, "filling assigned arcbuf"); @@ -3269,7 +3270,7 @@ dbuf_destroy(dmu_buf_impl_t *db) } ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); - ASSERT(db->db_data_pending == NULL); + ASSERT0P(db->db_data_pending); ASSERT(list_is_empty(&db->db_dirty_records)); db->db_state = DB_EVICTING; @@ -3321,11 +3322,11 @@ dbuf_destroy(dmu_buf_impl_t *db) db->db_parent = NULL; - ASSERT(db->db_buf == NULL); - ASSERT(db->db.db_data == NULL); - ASSERT(db->db_hash_next == NULL); - ASSERT(db->db_blkptr == NULL); - ASSERT(db->db_data_pending == NULL); + ASSERT0P(db->db_buf); + ASSERT0P(db->db.db_data); + ASSERT0P(db->db_hash_next); + ASSERT0P(db->db_blkptr); + ASSERT0P(db->db_data_pending); ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE); ASSERT(!multilist_link_active(&db->db_cache_link)); @@ -3960,7 +3961,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, if (fail_uncached) return (SET_ERROR(ENOENT)); - ASSERT3P(parent, ==, NULL); + ASSERT0P(parent); err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); if (fail_sparse) { if (err == 0 && bp && BP_IS_HOLE(bp)) @@ -4064,7 +4065,7 @@ dbuf_create_bonus(dnode_t *dn) { ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); - ASSERT(dn->dn_bonus == NULL); + ASSERT0P(dn->dn_bonus); dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL, dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID)); dn->dn_bonus->db_pending_evict = FALSE; @@ -4416,7 +4417,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) * inappropriate to hook it in (i.e., nlevels mismatch). */ ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); - ASSERT(db->db_parent == NULL); + ASSERT0P(db->db_parent); db->db_parent = dn->dn_dbuf; db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid]; DBUF_VERIFY(db); @@ -4477,7 +4478,7 @@ dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr) ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); - ASSERT3U(db->db_level, ==, 0); + ASSERT0(db->db_level); if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) { zbookmark_phys_t zb; @@ -4588,7 +4589,7 @@ dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr) /* ensure that everything is zero after our data */ for (; datap_end < datap_max; datap_end++) - ASSERT(*datap_end == 0); + ASSERT0(*datap_end); #endif } @@ -4596,7 +4597,7 @@ static blkptr_t * dbuf_lightweight_bp(dbuf_dirty_record_t *dr) { /* This must be a lightweight dirty record. */ - ASSERT3P(dr->dr_dbuf, ==, NULL); + ASSERT0P(dr->dr_dbuf); dnode_t *dn = dr->dr_dnode; if (dn->dn_phys->dn_nlevels == 1) { @@ -4739,7 +4740,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) */ if (db->db_state == DB_UNCACHED) { /* This buffer has been freed since it was dirtied */ - ASSERT3P(db->db.db_data, ==, NULL); + ASSERT0P(db->db.db_data); } else if (db->db_state == DB_FILL) { /* This buffer was freed and is now being re-filled */ ASSERT(db->db.db_data != dr->dt.dl.dr_data); @@ -4756,9 +4757,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) */ dbuf_dirty_record_t *dr_head = list_head(&db->db_dirty_records); - ASSERT3P(db->db_buf, ==, NULL); - ASSERT3P(db->db.db_data, ==, NULL); - ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL); + ASSERT0P(db->db_buf); + ASSERT0P(db->db.db_data); + ASSERT0P(dr_head->dt.dl.dr_data); ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN); } else { ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c index e0b9fc3951ff..d6658375f810 100644 --- a/sys/contrib/openzfs/module/zfs/ddt.c +++ b/sys/contrib/openzfs/module/zfs/ddt.c @@ -397,7 +397,7 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt_object_name(ddt, type, class, name); - ASSERT3U(*objectp, ==, 0); + ASSERT0(*objectp); VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash)); ASSERT3U(*objectp, !=, 0); @@ -1011,7 +1011,7 @@ ddt_free(const ddt_t *ddt, ddt_entry_t *dde) { if (dde->dde_io != NULL) { for (int p = 0; p < DDT_NPHYS(ddt); p++) - ASSERT3P(dde->dde_io->dde_lead_zio[p], ==, NULL); + ASSERT0P(dde->dde_io->dde_lead_zio[p]); if (dde->dde_io->dde_repair_abd != NULL) abd_free(dde->dde_io->dde_repair_abd); @@ -1421,7 +1421,7 @@ ddt_key_compare(const void *x1, const void *x2) static void ddt_create_dir(ddt_t *ddt, dmu_tx_t *tx) { - ASSERT3U(ddt->ddt_dir_object, ==, 0); + ASSERT0(ddt->ddt_dir_object); ASSERT3U(ddt->ddt_version, ==, DDT_VERSION_FDT); char name[DDT_NAMELEN]; @@ -2395,7 +2395,7 @@ ddt_sync(spa_t *spa, uint64_t txg) * scan's root zio here so that we can wait for any scan IOs in * addition to the regular ddt IOs. */ - ASSERT3P(scn->scn_zio_root, ==, NULL); + ASSERT0P(scn->scn_zio_root); scn->scn_zio_root = rio; for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c index dbd381aa9609..3d30e244c1f7 100644 --- a/sys/contrib/openzfs/module/zfs/ddt_log.c +++ b/sys/contrib/openzfs/module/zfs/ddt_log.c @@ -116,7 +116,7 @@ static void ddt_log_create_one(ddt_t *ddt, ddt_log_t *ddl, uint_t n, dmu_tx_t *tx) { ASSERT3U(ddt->ddt_dir_object, >, 0); - ASSERT3U(ddl->ddl_object, ==, 0); + ASSERT0(ddl->ddl_object); char name[DDT_NAMELEN]; ddt_log_name(ddt, name, n); @@ -194,7 +194,7 @@ void ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu) { ASSERT3U(nentries, >, 0); - ASSERT3P(dlu->dlu_dbp, ==, NULL); + ASSERT0P(dlu->dlu_dbp); if (ddt->ddt_log_active->ddl_object == 0) ddt_log_create(ddt, tx); @@ -748,8 +748,8 @@ ddt_log_load(ddt_t *ddt) void ddt_log_alloc(ddt_t *ddt) { - ASSERT3P(ddt->ddt_log_active, ==, NULL); - ASSERT3P(ddt->ddt_log_flushing, ==, NULL); + ASSERT0P(ddt->ddt_log_active); + ASSERT0P(ddt->ddt_log_flushing); avl_create(&ddt->ddt_log[0].ddl_tree, ddt_key_compare, sizeof (ddt_log_entry_t), offsetof(ddt_log_entry_t, ddle_node)); diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 296e58ef9cd8..f7f808d5b8f7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -1343,7 +1343,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, if (size == 0) return; - VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, + VERIFY0(dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, &numbufs, &dbp)); for (i = 0; i < numbufs; i++) { @@ -1872,7 +1872,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) */ BP_SET_LSIZE(bp, db->db_size); } else if (!BP_IS_EMBEDDED(bp)) { - ASSERT(BP_GET_LEVEL(bp) == 0); + ASSERT0(BP_GET_LEVEL(bp)); BP_SET_FILL(bp, 1); } } @@ -2405,7 +2405,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) } } } else if (wp & WP_NOFILL) { - ASSERT(level == 0); + ASSERT0(level); /* * If we're writing preallocated blocks, we aren't actually @@ -2865,7 +2865,7 @@ byteswap_uint64_array(void *vbuf, size_t size) size_t count = size >> 3; int i; - ASSERT((size & 7) == 0); + ASSERT0((size & 7)); for (i = 0; i < count; i++) buf[i] = BSWAP_64(buf[i]); @@ -2878,7 +2878,7 @@ byteswap_uint32_array(void *vbuf, size_t size) size_t count = size >> 2; int i; - ASSERT((size & 3) == 0); + ASSERT0((size & 3)); for (i = 0; i < count; i++) buf[i] = BSWAP_32(buf[i]); @@ -2891,7 +2891,7 @@ byteswap_uint16_array(void *vbuf, size_t size) size_t count = size >> 1; int i; - ASSERT((size & 1) == 0); + ASSERT0((size & 1)); for (i = 0; i < count; i++) buf[i] = BSWAP_16(buf[i]); diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c index 930ff101eca3..d44c686088fc 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_direct.c +++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c @@ -95,9 +95,9 @@ dmu_write_direct_done(zio_t *zio) abd_free(zio->io_abd); mutex_enter(&db->db_mtx); - ASSERT3P(db->db_buf, ==, NULL); - ASSERT3P(dr->dt.dl.dr_data, ==, NULL); - ASSERT3P(db->db.db_data, ==, NULL); + ASSERT0P(db->db_buf); + ASSERT0P(dr->dt.dl.dr_data); + ASSERT0P(db->db.db_data); db->db_state = DB_UNCACHED; mutex_exit(&db->db_mtx); diff --git a/sys/contrib/openzfs/module/zfs/dmu_object.c b/sys/contrib/openzfs/module/zfs/dmu_object.c index b4ff7d224cc9..207cc6d0e713 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_object.c +++ b/sys/contrib/openzfs/module/zfs/dmu_object.c @@ -90,7 +90,7 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, if (allocated_dnode != NULL) { ASSERT3P(tag, !=, NULL); } else { - ASSERT3P(tag, ==, NULL); + ASSERT0P(tag); tag = FTAG; } diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index c135f620800f..a77f338bdfd3 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -724,7 +724,7 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) if (err == 0) { mutex_enter(&ds->ds_lock); - ASSERT(ds->ds_objset == NULL); + ASSERT0P(ds->ds_objset); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } @@ -2226,7 +2226,7 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) rf |= DB_RF_HAVESTRUCT; error = dmu_spill_hold_by_dnode(dn, rf, FTAG, (dmu_buf_t **)&db); - ASSERT(error == 0); + ASSERT0(error); mutex_enter(&db->db_mtx); data = (before) ? db->db.db_data : dmu_objset_userquota_find_data(db, tx); diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index 73227b58c140..45c7af2bdcd2 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -866,7 +866,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) */ if (dcp == NULL && drrb->drr_fromguid == 0 && drba->drba_origin == NULL) { - ASSERT3P(dcp, ==, NULL); + ASSERT0P(dcp); dcp = &dummy_dcp; if (featureflags & DMU_BACKUP_FEATURE_RAW) @@ -881,7 +881,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) if (drba->drba_cookie->drc_fromsnapobj != 0) { VERIFY0(dsl_dataset_hold_obj(dp, drba->drba_cookie->drc_fromsnapobj, FTAG, &snap)); - ASSERT3P(dcp, ==, NULL); + ASSERT0P(dcp); } if (drc->drc_heal) { /* When healing we want to use the provided snapshot */ @@ -905,7 +905,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) if (drba->drba_origin != NULL) { VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, FTAG, &origin)); - ASSERT3P(dcp, ==, NULL); + ASSERT0P(dcp); } /* Create new dataset. */ @@ -2792,7 +2792,7 @@ receive_read_payload_and_next_header(dmu_recv_cookie_t *drc, int len, void *buf) drc->drc_rrd->bytes_read = drc->drc_bytes_read; } } else { - ASSERT3P(buf, ==, NULL); + ASSERT0P(buf); } drc->drc_prev_cksum = drc->drc_cksum; @@ -3450,7 +3450,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp) break; } - ASSERT3P(drc->drc_rrd, ==, NULL); + ASSERT0P(drc->drc_rrd); drc->drc_rrd = drc->drc_next_rrd; drc->drc_next_rrd = NULL; /* Allocates and loads header into drc->drc_next_rrd */ @@ -3468,7 +3468,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp) drc->drc_rrd = NULL; } - ASSERT3P(drc->drc_rrd, ==, NULL); + ASSERT0P(drc->drc_rrd); drc->drc_rrd = kmem_zalloc(sizeof (*drc->drc_rrd), KM_SLEEP); drc->drc_rrd->eos_marker = B_TRUE; bqueue_enqueue_flush(&rwa->q, drc->drc_rrd, 1); diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c index 9226ac9e4b80..5a22ed71a5fe 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_redact.c +++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c @@ -1067,7 +1067,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, } if (err != 0) goto out; - VERIFY3P(nvlist_next_nvpair(redactnvl, pair), ==, NULL); + VERIFY0P(nvlist_next_nvpair(redactnvl, pair)); boolean_t resuming = B_FALSE; zfs_bookmark_phys_t bookmark; diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index deeba29e159a..8ecb99d5f57c 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -962,7 +962,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) char *data = NULL; if (srdp->abd != NULL) { data = abd_to_buf(srdp->abd); - ASSERT3P(srdp->abuf, ==, NULL); + ASSERT0P(srdp->abuf); } else if (srdp->abuf != NULL) { data = srdp->abuf->b_data; } @@ -2514,7 +2514,7 @@ dmu_send_impl(struct dmu_send_params *dspp) * list in the stream. */ if (dspp->numfromredactsnaps != NUM_SNAPS_NOT_REDACTED) { - ASSERT3P(from_rl, ==, NULL); + ASSERT0P(from_rl); fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_FROM_SNAPS, dspp->fromredactsnaps, (uint_t)dspp->numfromredactsnaps); if (dspp->numfromredactsnaps > 0) { @@ -2891,7 +2891,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, &fromds); if (err != 0) { - ASSERT3P(fromds, ==, NULL); + ASSERT0P(fromds); } else { /* * We need to make a deep copy of the redact diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c index d85d8b89423e..40c0b3402a05 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_tx.c +++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c @@ -126,7 +126,7 @@ dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type, * problem, but there's no way for it to happen (for * now, at least). */ - ASSERT(dn->dn_assigned_txg == 0); + ASSERT0(dn->dn_assigned_txg); dn->dn_assigned_txg = tx->tx_txg; (void) zfs_refcount_add(&dn->dn_tx_holds, tx); mutex_exit(&dn->dn_mtx); @@ -443,7 +443,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) dnode_t *dn = txh->txh_dnode; int err; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz) return; @@ -607,7 +607,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) dnode_t *dn = txh->txh_dnode; int err; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); dmu_tx_count_dnode(txh); @@ -681,7 +681,7 @@ dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) { dmu_tx_hold_t *txh; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_BONUS, 0, 0); @@ -706,7 +706,7 @@ dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) { dmu_tx_hold_t *txh; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE, space, 0); @@ -1232,7 +1232,7 @@ dmu_tx_assign(dmu_tx_t *tx, dmu_tx_flag_t flags) { int err; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); ASSERT0(flags & ~(DMU_TX_WAIT | DMU_TX_NOTHROTTLE | DMU_TX_SUSPEND)); IMPLY(flags & DMU_TX_SUSPEND, flags & DMU_TX_WAIT); ASSERT(!dsl_pool_sync_context(tx->tx_pool)); @@ -1328,7 +1328,7 @@ dmu_tx_wait(dmu_tx_t *tx) dsl_pool_t *dp = tx->tx_pool; hrtime_t before; - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); ASSERT(!dsl_pool_config_held(tx->tx_pool)); /* @@ -1644,12 +1644,12 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); if (sa->sa_force_spill || may_grow || hdl->sa_spill) { - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); dmu_tx_hold_spill(tx, object); } else { DB_DNODE_ENTER(db); if (DB_DNODE(db)->dn_have_spill) { - ASSERT(tx->tx_txg == 0); + ASSERT0(tx->tx_txg); dmu_tx_hold_spill(tx, object); } DB_DNODE_EXIT(db); diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 451e1533efa0..963ff41232a3 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -214,7 +214,7 @@ dnode_dest(void *arg, void *unused) for (int i = 0; i < TXG_SIZE; i++) { ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); - ASSERT3P(dn->dn_free_ranges[i], ==, NULL); + ASSERT0P(dn->dn_free_ranges[i]); list_destroy(&dn->dn_dirty_records[i]); ASSERT0(dn->dn_next_nblkptr[i]); ASSERT0(dn->dn_next_nlevels[i]); @@ -231,10 +231,10 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_assigned_txg); ASSERT0(dn->dn_dirty_txg); ASSERT0(dn->dn_dirtyctx); - ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL); - ASSERT3P(dn->dn_bonus, ==, NULL); + ASSERT0P(dn->dn_dirtyctx_firstset); + ASSERT0P(dn->dn_bonus); ASSERT(!dn->dn_have_spill); - ASSERT3P(dn->dn_zio, ==, NULL); + ASSERT0P(dn->dn_zio); ASSERT0(dn->dn_oldused); ASSERT0(dn->dn_oldflags); ASSERT0(dn->dn_olduid); @@ -318,7 +318,7 @@ dnode_kstats_update(kstat_t *ksp, int rw) void dnode_init(void) { - ASSERT(dnode_cache == NULL); + ASSERT0P(dnode_cache); dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t), 0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE); kmem_cache_set_move(dnode_cache, dnode_move); @@ -509,7 +509,7 @@ dnode_buf_byteswap(void *vbuf, size_t size) int i = 0; ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT)); - ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0); + ASSERT0((size & (sizeof (dnode_phys_t)-1))); while (i < size) { dnode_phys_t *dnp = (void *)(((char *)vbuf) + i); @@ -673,7 +673,7 @@ dnode_destroy(dnode_t *dn) objset_t *os = dn->dn_objset; boolean_t complete_os_eviction = B_FALSE; - ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0); + ASSERT0((dn->dn_id_flags & DN_ID_NEW_EXIST)); mutex_enter(&os->os_lock); POINTER_INVALIDATE(&dn->dn_objset); @@ -780,7 +780,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_maxblkid[i]); ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); - ASSERT3P(dn->dn_free_ranges[i], ==, NULL); + ASSERT0P(dn->dn_free_ranges[i]); } dn->dn_type = ot; @@ -958,7 +958,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_dirty_txg = odn->dn_dirty_txg; ndn->dn_dirtyctx = odn->dn_dirtyctx; ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; - ASSERT(zfs_refcount_count(&odn->dn_tx_holds) == 0); + ASSERT0(zfs_refcount_count(&odn->dn_tx_holds)); zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds); ASSERT(avl_is_empty(&ndn->dn_dbufs)); avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs); @@ -2304,7 +2304,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) if ((off >> blkshift) > dn->dn_maxblkid) return; } else { - ASSERT(dn->dn_maxblkid == 0); + ASSERT0(dn->dn_maxblkid); if (off == 0 && len >= blksz) { /* * Freeing the whole block; fast-track this request. @@ -2524,7 +2524,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta) } space += delta; if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) { - ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0); + ASSERT0((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES)); ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT)); dn->dn_phys->dn_used = space >> DEV_BSHIFT; } else { diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c index 4067f221f1bf..046ceddb3609 100644 --- a/sys/contrib/openzfs/module/zfs/dnode_sync.c +++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c @@ -209,8 +209,8 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) rw_exit(&dn->dn_struct_rwlock); if (err == ENOENT) continue; - ASSERT(err == 0); - ASSERT(child->db_level == 0); + ASSERT0(err); + ASSERT0(child->db_level); dr = dbuf_find_dirty_eq(child, txg); /* data_old better be zeroed */ @@ -868,7 +868,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dbuf_sync_list(list, dn->dn_phys->dn_nlevels - 1, tx); if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { - ASSERT3P(list_head(list), ==, NULL); + ASSERT0P(list_head(list)); dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index fdc8b7b198f0..ee574c499f9f 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -243,7 +243,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, /* error is retval of the following if-cascade */ if (strchr(source, '@') != NULL) { dsl_dataset_t *source_snap_ds; - ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0); + ASSERT0(snapshot_namecheck(source, NULL, NULL)); error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds); if (error == 0) { VERIFY(source_snap_ds->ds_is_snapshot); @@ -258,7 +258,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, } } else if (strchr(source, '#') != NULL) { zfs_bookmark_phys_t source_phys; - ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0); + ASSERT0(bookmark_namecheck(source, NULL, NULL)); /* * Source must exists and be an earlier point in newbm_ds's * timeline (newbm_ds's origin may be a snap of source's ds) @@ -501,7 +501,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, sizeof (uint64_t) * num_redact_snaps); local_rl->rl_phys->rlp_num_snaps = num_redact_snaps; if (bookmark_redacted) { - ASSERT3P(redaction_list, ==, NULL); + ASSERT0P(redaction_list); local_rl->rl_phys->rlp_last_blkid = UINT64_MAX; local_rl->rl_phys->rlp_last_object = UINT64_MAX; dsl_redaction_list_long_rele(local_rl, tag); diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index 6b6bb8d45b6b..f519b937edc0 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -534,7 +534,7 @@ out: static void dsl_crypto_key_free(dsl_crypto_key_t *dck) { - ASSERT(zfs_refcount_count(&dck->dck_holds) == 0); + ASSERT0(zfs_refcount_count(&dck->dck_holds)); /* destroy the zio_crypt_key_t */ zio_crypt_key_destroy(&dck->dck_key); @@ -1912,7 +1912,7 @@ dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, /* clones always use their origin's wrapping key */ if (dsl_dir_is_clone(dd)) { - ASSERT3P(dcp, ==, NULL); + ASSERT0P(dcp); /* * If this is an encrypted clone we just need to clone the diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index b767c9641419..420687480a76 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -450,7 +450,7 @@ dsl_dataset_evict_sync(void *dbu) { dsl_dataset_t *ds = dbu; - ASSERT(ds->ds_owner == NULL); + ASSERT0P(ds->ds_owner); unique_remove(ds->ds_fsid_guid); } @@ -460,7 +460,7 @@ dsl_dataset_evict_async(void *dbu) { dsl_dataset_t *ds = dbu; - ASSERT(ds->ds_owner == NULL); + ASSERT0P(ds->ds_owner); ds->ds_dbuf = NULL; @@ -1187,7 +1187,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); + ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); @@ -2112,7 +2112,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(ds->ds_objset != NULL); - ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); + ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj); /* * in case we had to change ds_fsid_guid when we opened it, @@ -4180,7 +4180,7 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, dsl_pool_t *dp = dmu_tx_pool(tx); int64_t unused_refres_delta; - ASSERT(clone->ds_reserved == 0); + ASSERT0(clone->ds_reserved); /* * NOTE: On DEBUG kernels there could be a race between this and * the check function if spa_asize_inflation is adjusted... diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index 9ffc998ac173..41ac72bf1c16 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -1037,7 +1037,7 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, avl_tree_t *avl = lia->avl; bplist_t *to_free = lia->to_free; zthr_t *t = lia->t; - ASSERT(tx == NULL); + ASSERT0P(tx); if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t))) return (SET_ERROR(EINTR)); @@ -1049,7 +1049,8 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(&found->le_bp)); ASSERT3U(BP_GET_CHECKSUM(bp), ==, BP_GET_CHECKSUM(&found->le_bp)); - ASSERT3U(BP_GET_BIRTH(bp), ==, BP_GET_BIRTH(&found->le_bp)); + ASSERT3U(BP_GET_PHYSICAL_BIRTH(bp), ==, + BP_GET_PHYSICAL_BIRTH(&found->le_bp)); } if (bp_freed) { if (found == NULL) { diff --git a/sys/contrib/openzfs/module/zfs/dsl_deleg.c b/sys/contrib/openzfs/module/zfs/dsl_deleg.c index c01a06e98340..fdd37b36e280 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deleg.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deleg.c @@ -102,7 +102,7 @@ dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) nvlist_t *perms; nvpair_t *permpair = NULL; - VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + VERIFY0(nvpair_value_nvlist(whopair, &perms)); while ((permpair = nvlist_next_nvpair(perms, permpair))) { const char *perm = nvpair_name(permpair); @@ -189,8 +189,7 @@ dsl_deleg_set_sync(void *arg, dmu_tx_t *tx) const char *perm = nvpair_name(permpair); uint64_t n = 0; - VERIFY(zap_update(mos, jumpobj, - perm, 8, 1, &n, tx) == 0); + VERIFY0(zap_update(mos, jumpobj, perm, 8, 1, &n, tx)); spa_history_log_internal_dd(dd, "permission update", tx, "%s %s", whokey, perm); } @@ -225,7 +224,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx) if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == 0) { (void) zap_remove(mos, zapobj, whokey, tx); - VERIFY(0 == zap_destroy(mos, jumpobj, tx)); + VERIFY0(zap_destroy(mos, jumpobj, tx)); } spa_history_log_internal_dd(dd, "permission who remove", tx, "%s", whokey); @@ -243,7 +242,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx) if (zap_count(mos, jumpobj, &n) == 0 && n == 0) { (void) zap_remove(mos, zapobj, whokey, tx); - VERIFY(0 == zap_destroy(mos, + VERIFY0(zap_destroy(mos, jumpobj, tx)); } spa_history_log_internal_dd(dd, "permission remove", tx, @@ -332,7 +331,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); baseza = zap_attribute_alloc(); source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); - VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); for (dd = startdd; dd != NULL; dd = dd->dd_parent) { nvlist_t *sp_nvp; @@ -706,7 +705,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, ZFS_DELEG_LOCAL, &uid); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) { jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); - VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); + VERIFY0(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx)); } za = zap_attribute_alloc(); @@ -716,8 +715,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, uint64_t zero = 0; ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1); - VERIFY(zap_update(mos, jumpobj, za->za_name, - 8, 1, &zero, tx) == 0); + VERIFY0(zap_update(mos, jumpobj, za->za_name, 8, 1, &zero, tx)); } zap_cursor_fini(&zc); zap_attribute_free(za); @@ -761,10 +759,10 @@ dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx) zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1); - VERIFY(0 == zap_destroy(mos, za->za_first_integer, tx)); + VERIFY0(zap_destroy(mos, za->za_first_integer, tx)); } zap_cursor_fini(&zc); - VERIFY(0 == zap_destroy(mos, zapobj, tx)); + VERIFY0(zap_destroy(mos, zapobj, tx)); zap_attribute_free(za); return (0); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c index fff49c97f4d2..ea01ee586f8b 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c +++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c @@ -350,7 +350,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) dsl_dataset_deactivate_feature(ds, f, tx); } if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { - ASSERT3P(ds->ds_prev, ==, NULL); + ASSERT0P(ds->ds_prev); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); after_branch_point = @@ -465,7 +465,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) &used, &comp, &uncomp); dsl_dataset_phys(ds_next)->ds_unique_bytes += used; dsl_dataset_rele(ds_nextnext, FTAG); - ASSERT3P(ds_next->ds_prev, ==, NULL); + ASSERT0P(ds_next->ds_prev); /* Collapse range in this head. */ dsl_dataset_t *hds; @@ -525,7 +525,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) /* remove from snapshot namespace */ dsl_dataset_t *ds_head; - ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0); + ASSERT0(dsl_dataset_phys(ds)->ds_snapnames_zapobj); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head)); VERIFY0(dsl_dataset_get_snapname(ds)); @@ -728,7 +728,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { - ASSERT(zilog == NULL); + ASSERT0P(zilog); ASSERT3U(BP_GET_BIRTH(bp), >, dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c index f24cd2049533..6ce1890cfea1 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dir.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c @@ -151,8 +151,8 @@ dsl_dir_evict_async(void *dbu) for (t = 0; t < TXG_SIZE; t++) { ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); - ASSERT(dd->dd_tempreserved[t] == 0); - ASSERT(dd->dd_space_towrite[t] == 0); + ASSERT0(dd->dd_tempreserved[t]); + ASSERT0(dd->dd_space_towrite[t]); } if (dd->dd_parent) diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c index 4f1f66b835f2..f47822df8b53 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_pool.c +++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c @@ -522,8 +522,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)), /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); - VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); + VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } @@ -1077,7 +1077,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) dsl_dataset_phys(prev)->ds_num_children++; if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) { - ASSERT(ds->ds_prev == NULL); + ASSERT0P(ds->ds_prev); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); @@ -1173,7 +1173,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) dsl_dataset_t *ds; ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(dp->dp_origin_snap == NULL); + ASSERT0P(dp->dp_origin_snap); ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); /* create the origin dir, ds, & snap-ds */ @@ -1250,7 +1250,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; - ASSERT(dp->dp_tmp_userrefs_obj == 0); + ASSERT0(dp->dp_tmp_userrefs_obj); ASSERT(dmu_tx_is_syncing(tx)); dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS, diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c index b76f22df61e2..51f624da5689 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_prop.c +++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c @@ -815,7 +815,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, */ err = zap_update(mos, zapobj, recvdstr, intsz, numints, value, tx); - ASSERT(err == 0); + ASSERT0(err); break; case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED): /* @@ -1166,7 +1166,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, if (nvlist_exists(nv, propname)) continue; - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)); if (za->za_integer_length == 1) { /* * String property @@ -1179,8 +1179,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, kmem_free(tmp, za->za_num_integers); break; } - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, - tmp) == 0); + VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, tmp)); kmem_free(tmp, za->za_num_integers); } else { /* @@ -1191,8 +1190,8 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, za->za_first_integer); } - VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0); - VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, source)); + VERIFY0(nvlist_add_nvlist(nv, propname, propval)); nvlist_free(propval); } zap_cursor_fini(&zc); @@ -1215,7 +1214,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, int err = 0; char setpoint[ZFS_MAX_DATASET_NAME_LEN]; - VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); if (ds->ds_is_snapshot) flags |= DSL_PROP_GET_SNAPSHOT; @@ -1333,18 +1332,18 @@ dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value) uint64_t default_value; if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) { - VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0); + VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value)); return; } - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0); + VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)); + VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value)); /* Indicate the default source if we can. */ if (dodefault(prop, 8, 1, &default_value) == 0 && value == default_value) { - VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0); + VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, "")); } - VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + VERIFY0(nvlist_add_nvlist(nv, propname, propval)); nvlist_free(propval); } @@ -1355,13 +1354,13 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value) const char *propname = zfs_prop_to_name(prop); if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) { - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0); + VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value)); return; } - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0); - VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0); + VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)); + VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value)); + VERIFY0(nvlist_add_nvlist(nv, propname, propval)); nvlist_free(propval); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index 5052992d775c..fcd50c459d07 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -1784,7 +1784,7 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg, SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); return (0); } @@ -1820,7 +1820,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); - VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); + VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb)); } return (0); } @@ -5141,7 +5141,7 @@ dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd) mutex_enter(&svd->vdev_scan_io_queue_lock); mutex_enter(&tvd->vdev_scan_io_queue_lock); - VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL); + VERIFY0P(tvd->vdev_scan_io_queue); tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue; svd->vdev_scan_io_queue = NULL; if (tvd->vdev_scan_io_queue != NULL) diff --git a/sys/contrib/openzfs/module/zfs/dsl_userhold.c b/sys/contrib/openzfs/module/zfs/dsl_userhold.c index 57c70e4ce3d2..f91b7a1eb69a 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_userhold.c +++ b/sys/contrib/openzfs/module/zfs/dsl_userhold.c @@ -335,7 +335,7 @@ dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) dduha.dduha_holds = holds; /* chkholds can have non-unique name */ - VERIFY(0 == nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP)); + VERIFY0(nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP)); dduha.dduha_errlist = errlist; dduha.dduha_minor = cleanup_minor; diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c index a092817efedd..ae788b2310d8 100644 --- a/sys/contrib/openzfs/module/zfs/fm.c +++ b/sys/contrib/openzfs/module/zfs/fm.c @@ -337,7 +337,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, } } - VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); + VERIFY0(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE)); if (size > *event_size) { *event_size = size; error = ENOMEM; diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 0e5f09b2724c..9f4399af56bd 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -391,7 +391,7 @@ static kstat_t *metaslab_ksp; void metaslab_stat_init(void) { - ASSERT(metaslab_alloc_trace_cache == NULL); + ASSERT0P(metaslab_alloc_trace_cache); metaslab_alloc_trace_cache = kmem_cache_create( "metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t), 0, NULL, NULL, NULL, NULL, NULL, 0); @@ -456,16 +456,16 @@ metaslab_class_destroy(metaslab_class_t *mc) { spa_t *spa = mc->mc_spa; - ASSERT(mc->mc_alloc == 0); - ASSERT(mc->mc_deferred == 0); - ASSERT(mc->mc_space == 0); - ASSERT(mc->mc_dspace == 0); + ASSERT0(mc->mc_alloc); + ASSERT0(mc->mc_deferred); + ASSERT0(mc->mc_space); + ASSERT0(mc->mc_dspace); for (int i = 0; i < spa->spa_alloc_count; i++) { metaslab_class_allocator_t *mca = &mc->mc_allocator[i]; avl_destroy(&mca->mca_tree); mutex_destroy(&mca->mca_lock); - ASSERT(mca->mca_rotor == NULL); + ASSERT0P(mca->mca_rotor); ASSERT0(mca->mca_reserved); } mutex_destroy(&mc->mc_lock); @@ -1087,8 +1087,8 @@ metaslab_group_destroy(metaslab_group_t *mg) { spa_t *spa = mg->mg_class->mc_spa; - ASSERT(mg->mg_prev == NULL); - ASSERT(mg->mg_next == NULL); + ASSERT0P(mg->mg_prev); + ASSERT0P(mg->mg_next); /* * We may have gone below zero with the activation count * either because we never activated in the first place or @@ -1118,8 +1118,8 @@ metaslab_group_activate(metaslab_group_t *mg) ASSERT3U(spa_config_held(spa, SCL_ALLOC, RW_WRITER), !=, 0); - ASSERT(mg->mg_prev == NULL); - ASSERT(mg->mg_next == NULL); + ASSERT0P(mg->mg_prev); + ASSERT0P(mg->mg_next); ASSERT(mg->mg_activation_count <= 0); if (++mg->mg_activation_count <= 0) @@ -1164,8 +1164,8 @@ metaslab_group_passivate(metaslab_group_t *mg) if (--mg->mg_activation_count != 0) { for (int i = 0; i < spa->spa_alloc_count; i++) ASSERT(mc->mc_allocator[i].mca_rotor != mg); - ASSERT(mg->mg_prev == NULL); - ASSERT(mg->mg_next == NULL); + ASSERT0P(mg->mg_prev); + ASSERT0P(mg->mg_next); ASSERT(mg->mg_activation_count < 0); return; } @@ -1345,7 +1345,7 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp) static void metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) { - ASSERT(msp->ms_group == NULL); + ASSERT0P(msp->ms_group); mutex_enter(&mg->mg_lock); msp->ms_group = mg; msp->ms_weight = 0; @@ -3017,7 +3017,7 @@ metaslab_fini(metaslab_t *msp) metaslab_group_remove(mg, msp); mutex_enter(&msp->ms_lock); - VERIFY(msp->ms_group == NULL); + VERIFY0P(msp->ms_group); /* * If this metaslab hasn't been through metaslab_sync_done() yet its @@ -5739,7 +5739,7 @@ metaslab_unalloc_dva(spa_t *spa, const dva_t *dva, uint64_t txg) ASSERT(!vd->vdev_removing); ASSERT(vdev_is_concrete(vd)); ASSERT0(vd->vdev_indirect_config.vic_mapping_object); - ASSERT3P(vd->vdev_indirect_mapping, ==, NULL); + ASSERT0P(vd->vdev_indirect_mapping); if (DVA_GET_GANG(dva)) size = vdev_gang_header_asize(vd); @@ -5997,7 +5997,7 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize, } ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); - ASSERT(BP_GET_NDVAS(bp) == 0); + ASSERT0(BP_GET_NDVAS(bp)); ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); ASSERT3P(zal, !=, NULL); @@ -6029,7 +6029,7 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize, smallest_psize = MIN(cur_psize, smallest_psize); } } - ASSERT(error == 0); + ASSERT0(error); ASSERT(BP_GET_NDVAS(bp) == ndvas); if (actual_psize) *actual_psize = smallest_psize; diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c index f3665d29b8b4..7db72b9b04b0 100644 --- a/sys/contrib/openzfs/module/zfs/mmp.c +++ b/sys/contrib/openzfs/module/zfs/mmp.c @@ -260,7 +260,7 @@ mmp_thread_stop(spa_t *spa) zfs_dbgmsg("MMP thread stopped pool '%s' gethrtime %llu", spa_name(spa), gethrtime()); - ASSERT(mmp->mmp_thread == NULL); + ASSERT0P(mmp->mmp_thread); mmp->mmp_thread_exiting = 0; } diff --git a/sys/contrib/openzfs/module/zfs/multilist.c b/sys/contrib/openzfs/module/zfs/multilist.c index 7b85d19e19ee..46fb79269310 100644 --- a/sys/contrib/openzfs/module/zfs/multilist.c +++ b/sys/contrib/openzfs/module/zfs/multilist.c @@ -81,7 +81,7 @@ multilist_create_impl(multilist_t *ml, size_t size, size_t offset, ml->ml_num_sublists = num; ml->ml_index_func = index_func; - ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * + ml->ml_sublists = vmem_zalloc(sizeof (multilist_sublist_t) * ml->ml_num_sublists, KM_SLEEP); ASSERT3P(ml->ml_sublists, !=, NULL); @@ -134,7 +134,7 @@ multilist_destroy(multilist_t *ml) } ASSERT3P(ml->ml_sublists, !=, NULL); - kmem_free(ml->ml_sublists, + vmem_free(ml->ml_sublists, sizeof (multilist_sublist_t) * ml->ml_num_sublists); ml->ml_num_sublists = 0; diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c index fc2b17606bd2..ea2d2c7227c8 100644 --- a/sys/contrib/openzfs/module/zfs/range_tree.c +++ b/sys/contrib/openzfs/module/zfs/range_tree.c @@ -377,7 +377,7 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) return; } - ASSERT3P(rs, ==, NULL); + ASSERT0P(rs); /* * Determine whether or not we will have to merge with our neighbors. @@ -867,7 +867,7 @@ zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, next = zfs_btree_next(&removefrom->rt_root, &where, &where); } - VERIFY3P(curr, ==, NULL); + VERIFY0P(curr); if (start != end) { VERIFY3U(start, <, end); diff --git a/sys/contrib/openzfs/module/zfs/rrwlock.c b/sys/contrib/openzfs/module/zfs/rrwlock.c index 8ee784619839..d0df39b93560 100644 --- a/sys/contrib/openzfs/module/zfs/rrwlock.c +++ b/sys/contrib/openzfs/module/zfs/rrwlock.c @@ -108,7 +108,7 @@ rrn_add(rrwlock_t *rrl, const void *tag) rn->rn_rrl = rrl; rn->rn_next = tsd_get(rrw_tsd_key); rn->rn_tag = tag; - VERIFY(tsd_set(rrw_tsd_key, rn) == 0); + VERIFY0(tsd_set(rrw_tsd_key, rn)); } /* @@ -129,7 +129,7 @@ rrn_find_and_remove(rrwlock_t *rrl, const void *tag) if (prev) prev->rn_next = rn->rn_next; else - VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0); + VERIFY0(tsd_set(rrw_tsd_key, rn->rn_next)); kmem_free(rn, sizeof (*rn)); return (B_TRUE); } @@ -155,7 +155,7 @@ rrw_destroy(rrwlock_t *rrl) { mutex_destroy(&rrl->rr_lock); cv_destroy(&rrl->rr_cv); - ASSERT(rrl->rr_writer == NULL); + ASSERT0P(rrl->rr_writer); zfs_refcount_destroy(&rrl->rr_anon_rcount); zfs_refcount_destroy(&rrl->rr_linked_rcount); } @@ -188,7 +188,7 @@ rrw_enter_read_impl(rrwlock_t *rrl, boolean_t prio, const void *tag) } else { (void) zfs_refcount_add(&rrl->rr_anon_rcount, tag); } - ASSERT(rrl->rr_writer == NULL); + ASSERT0P(rrl->rr_writer); mutex_exit(&rrl->rr_lock); } diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c index 5db470ce6242..7ad25d4d85ba 100644 --- a/sys/contrib/openzfs/module/zfs/sa.c +++ b/sys/contrib/openzfs/module/zfs/sa.c @@ -304,7 +304,7 @@ sa_get_spill(sa_handle_t *hdl) if (hdl->sa_spill == NULL) { if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, &hdl->sa_spill)) == 0) - VERIFY(0 == sa_build_index(hdl, SA_SPILL)); + VERIFY0(sa_build_index(hdl, SA_SPILL)); } else { rc = 0; } @@ -432,7 +432,7 @@ sa_add_layout_entry(objset_t *os, const sa_attr_type_t *attrs, int attr_count, (void) snprintf(attr_name, sizeof (attr_name), "%d", (int)lot_num); - VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, + VERIFY0(zap_update(os, os->os_sa->sa_layout_attr_obj, attr_name, 2, attr_count, attrs, tx)); } @@ -505,7 +505,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) } error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); - ASSERT(error == 0); + ASSERT0(error); return (error); } @@ -717,7 +717,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > hdl->sa_spill->db_size) - VERIFY(0 == sa_resize_spill(hdl, + VERIFY0(sa_resize_spill(hdl, BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); } @@ -791,7 +791,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, hdl->sa_bonus_tab = NULL; } if (!sa->sa_force_spill) - VERIFY(0 == sa_build_index(hdl, SA_BONUS)); + VERIFY0(sa_build_index(hdl, SA_BONUS)); if (hdl->sa_spill) { sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); if (!spilling) { @@ -801,10 +801,10 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, dmu_buf_rele(hdl->sa_spill, NULL); hdl->sa_spill = NULL; hdl->sa_spill_tab = NULL; - VERIFY(0 == dmu_rm_spill(hdl->sa_os, + VERIFY0(dmu_rm_spill(hdl->sa_os, sa_handle_object(hdl), tx)); } else { - VERIFY(0 == sa_build_index(hdl, SA_SPILL)); + VERIFY0(sa_build_index(hdl, SA_SPILL)); } } @@ -1733,10 +1733,10 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid) NULL, dxattr_obj, dxattr_size); } - VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); - VERIFY(sa_replace_all_by_template_locked(hdl, attrs, count, tx) == 0); + VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx)); + VERIFY0(sa_replace_all_by_template_locked(hdl, attrs, count, tx)); if (znode_acl.z_acl_extern_obj) { - VERIFY(0 == dmu_object_free(zfsvfs->z_os, + VERIFY0(dmu_object_free(zfsvfs->z_os, znode_acl.z_acl_extern_obj, tx)); } @@ -1858,7 +1858,7 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) continue; ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, tb[i].sa_byteswap); - VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, + VERIFY0(zap_update(hdl->sa_os, sa->sa_reg_attr_obj, tb[i].sa_name, 8, 1, &attr_value, tx)); tb[i].sa_registered = B_TRUE; } @@ -2013,7 +2013,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, * Only a variable-sized attribute can be * replaced here, and its size must be changing. */ - ASSERT3U(reg_length, ==, 0); + ASSERT0(reg_length); ASSERT3U(length, !=, buflen); SA_ADD_BULK_ATTR(attr_desc, j, attr, locator, datastart, buflen); diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 5ecb175fbd63..b3bb46da263b 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -426,10 +426,10 @@ spa_prop_add_user(nvlist_t *nvl, const char *propname, char *strval, { nvlist_t *propval; - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); - VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); + VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)); + VERIFY0(nvlist_add_uint64(propval, ZPROP_SOURCE, src)); + VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, strval)); + VERIFY0(nvlist_add_nvlist(nvl, propname, propval)); nvlist_free(propval); } @@ -965,7 +965,7 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) uint64_t ver = 0; if (prop == ZPOOL_PROP_VERSION) { - VERIFY(nvpair_value_uint64(elem, &ver) == 0); + VERIFY0(nvpair_value_uint64(elem, &ver)); } else { ASSERT(zpool_prop_feature(nvpair_name(elem))); ver = SPA_VERSION_FEATURES; @@ -1295,7 +1295,7 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; if (tqs->stqs_taskq == NULL) { - ASSERT3U(tqs->stqs_count, ==, 0); + ASSERT0(tqs->stqs_count); return; } @@ -1836,9 +1836,9 @@ static void spa_deactivate(spa_t *spa) { ASSERT(spa->spa_sync_on == B_FALSE); - ASSERT(spa->spa_dsl_pool == NULL); - ASSERT(spa->spa_root_vdev == NULL); - ASSERT(spa->spa_async_zio_root == NULL); + ASSERT0P(spa->spa_dsl_pool); + ASSERT0P(spa->spa_root_vdev); + ASSERT0P(spa->spa_async_zio_root); ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); spa_evicting_os_wait(spa); @@ -2021,7 +2021,7 @@ spa_unload_log_sm_flush_all(spa_t *spa) dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND)); - ASSERT3U(spa->spa_log_flushall_txg, ==, 0); + ASSERT0(spa->spa_log_flushall_txg); spa->spa_log_flushall_txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); @@ -2280,7 +2280,7 @@ spa_unload(spa_t *spa) */ if (spa->spa_root_vdev) vdev_free(spa->spa_root_vdev); - ASSERT(spa->spa_root_vdev == NULL); + ASSERT0P(spa->spa_root_vdev); /* * Close the dsl pool. @@ -2418,8 +2418,8 @@ spa_load_spares(spa_t *spa) spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) { - VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, - VDEV_ALLOC_SPARE) == 0); + VERIFY0(spa_config_parse(spa, &vd, spares[i], NULL, 0, + VDEV_ALLOC_SPARE)); ASSERT(vd != NULL); spa->spa_spares.sav_vdevs[i] = vd; @@ -2546,8 +2546,8 @@ spa_load_l2cache(spa_t *spa) /* * Create new vdev */ - VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, - VDEV_ALLOC_L2CACHE) == 0); + VERIFY0(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, + VDEV_ALLOC_L2CACHE)); ASSERT(vd != NULL); newvdevs[i] = vd; @@ -2799,7 +2799,7 @@ spa_passivate_log(spa_t *spa) vdev_t *tvd = rvd->vdev_child[c]; if (tvd->vdev_islog) { - ASSERT3P(tvd->vdev_log_mg, ==, NULL); + ASSERT0P(tvd->vdev_log_mg); metaslab_group_passivate(tvd->vdev_mg); slog_found = B_TRUE; } @@ -2822,7 +2822,7 @@ spa_activate_log(spa_t *spa) vdev_t *tvd = rvd->vdev_child[c]; if (tvd->vdev_islog) { - ASSERT3P(tvd->vdev_log_mg, ==, NULL); + ASSERT0P(tvd->vdev_log_mg); metaslab_group_activate(tvd->vdev_mg); } } @@ -3259,7 +3259,7 @@ spa_livelist_delete_cb(void *arg, zthr_t *z) static void spa_start_livelist_destroy_thread(spa_t *spa) { - ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL); + ASSERT0P(spa->spa_livelist_delete_zthr); spa->spa_livelist_delete_zthr = zthr_create("z_livelist_destroy", spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa, @@ -3275,7 +3275,7 @@ static int livelist_track_new_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) { - ASSERT(tx == NULL); + ASSERT0P(tx); livelist_new_arg_t *lna = arg; if (bp_freed) { bplist_append(lna->frees, bp); @@ -3469,7 +3469,7 @@ spa_start_livelist_condensing_thread(spa_t *spa) spa->spa_to_condense.syncing = B_FALSE; spa->spa_to_condense.cancelled = B_FALSE; - ASSERT3P(spa->spa_livelist_condense_zthr, ==, NULL); + ASSERT0P(spa->spa_livelist_condense_zthr); spa->spa_livelist_condense_zthr = zthr_create("z_livelist_condense", spa_livelist_condense_cb_check, @@ -3486,7 +3486,7 @@ spa_spawn_aux_threads(spa_t *spa) spa_start_livelist_destroy_thread(spa); spa_start_livelist_condensing_thread(spa); - ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); + ASSERT0P(spa->spa_checkpoint_discard_zthr); spa->spa_checkpoint_discard_zthr = zthr_create("z_checkpoint_discard", spa_checkpoint_discard_thread_check, @@ -4091,11 +4091,11 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type) nvlist_free(spa->spa_load_info); spa->spa_load_info = fnvlist_alloc(); - ASSERT(spa->spa_comment == NULL); + ASSERT0P(spa->spa_comment); if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) spa->spa_comment = spa_strdup(comment); - ASSERT(spa->spa_compatibility == NULL); + ASSERT0P(spa->spa_compatibility); if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMPATIBILITY, &compatibility) == 0) spa->spa_compatibility = spa_strdup(compatibility); @@ -5913,7 +5913,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, nvlist_free(config); if (state == SPA_LOAD_RECOVER) { - ASSERT3P(loadinfo, ==, NULL); + ASSERT0P(loadinfo); spa_import_progress_remove(spa_guid(spa)); return (rewind_error); } else { @@ -9091,7 +9091,7 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) { - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER)); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); @@ -9102,7 +9102,7 @@ spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) int spa_scan_stop(spa_t *spa) { - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER)); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); @@ -9119,7 +9119,7 @@ int spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart, uint64_t txgend) { - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER)); if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) return (SET_ERROR(ENOTSUP)); @@ -9548,7 +9548,7 @@ spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) { zio_t *zio = zio_root(spa, NULL, NULL, 0); bplist_iterate(bpl, spa_free_sync_cb, zio, tx); - VERIFY(zio_wait(zio) == 0); + VERIFY0(zio_wait(zio)); } /* @@ -9587,7 +9587,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) size_t nvsize = 0; dmu_buf_t *db; - VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); + VERIFY0(nvlist_size(nv, &nvsize, NV_ENCODE_XDR)); /* * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration @@ -9597,15 +9597,15 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); packed = vmem_alloc(bufsize, KM_SLEEP); - VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, - KM_SLEEP) == 0); + VERIFY0(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, + KM_SLEEP)); memset(packed + nvsize, 0, bufsize - nvsize); dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); vmem_free(packed, bufsize); - VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); + VERIFY0(dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = nvsize; dmu_buf_rele(db, FTAG); @@ -10541,7 +10541,7 @@ spa_sync_tq_create(spa_t *spa, const char *name) { kthread_t **kthreads; - ASSERT(spa->spa_sync_tq == NULL); + ASSERT0P(spa->spa_sync_tq); ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus); /* diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index 7d4d06659146..cf28955b0c50 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -48,18 +48,17 @@ /* * Pool configuration repository. * - * Pool configuration is stored as a packed nvlist on the filesystem. By - * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot - * (when the ZFS module is loaded). Pools can also have the 'cachefile' - * property set that allows them to be stored in an alternate location until - * the control of external software. + * Pool configuration is stored as a packed nvlist on the filesystem. When + * pools are imported they are added to the /etc/zfs/zpool.cache file and + * removed from it when exported. For each cache file, we have a single nvlist + * which holds all the configuration information. Pools can also have the + * 'cachefile' property set which allows this config to be stored in an + * alternate location under the control of external software. * - * For each cache file, we have a single nvlist which holds all the - * configuration information. When the module loads, we read this information - * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is - * maintained independently in spa.c. Whenever the namespace is modified, or - * the configuration of a pool is changed, we call spa_write_cachefile(), which - * walks through all the active pools and writes the configuration to disk. + * The kernel independantly maintains an AVL tree of imported pools. See the + * "SPA locking" comment in spa.c. Whenever a pool configuration is modified + * we call spa_write_cachefile() which walks through all the active pools and + * writes the updated configuration to to /etc/zfs/zpool.cache file. */ static uint64_t spa_config_generation = 1; @@ -69,94 +68,6 @@ static uint64_t spa_config_generation = 1; * userland pools when doing testing. */ char *spa_config_path = (char *)ZPOOL_CACHE; -#ifdef _KERNEL -static int zfs_autoimport_disable = B_TRUE; -#endif - -/* - * Called when the module is first loaded, this routine loads the configuration - * file into the SPA namespace. It does not actually open or load the pools; it - * only populates the namespace. - */ -void -spa_config_load(void) -{ - void *buf = NULL; - nvlist_t *nvlist, *child; - nvpair_t *nvpair; - char *pathname; - zfs_file_t *fp; - zfs_file_attr_t zfa; - uint64_t fsize; - int err; - -#ifdef _KERNEL - if (zfs_autoimport_disable) - return; -#endif - - /* - * Open the configuration file. - */ - pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); - - err = zfs_file_open(pathname, O_RDONLY, 0, &fp); - -#ifdef __FreeBSD__ - if (err) - err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp); -#endif - kmem_free(pathname, MAXPATHLEN); - - if (err) - return; - - if (zfs_file_getattr(fp, &zfa)) - goto out; - - fsize = zfa.zfa_size; - buf = kmem_alloc(fsize, KM_SLEEP); - - /* - * Read the nvlist from the file. - */ - if (zfs_file_read(fp, buf, fsize, NULL) < 0) - goto out; - - /* - * Unpack the nvlist. - */ - if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) - goto out; - - /* - * Iterate over all elements in the nvlist, creating a new spa_t for - * each one with the specified configuration. - */ - mutex_enter(&spa_namespace_lock); - nvpair = NULL; - while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { - if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) - continue; - - child = fnvpair_value_nvlist(nvpair); - - if (spa_lookup(nvpair_name(nvpair)) != NULL) - continue; - (void) spa_add(nvpair_name(nvpair), child, NULL); - } - mutex_exit(&spa_namespace_lock); - - nvlist_free(nvlist); - -out: - if (buf != NULL) - kmem_free(buf, fsize); - - zfs_file_close(fp); -} static int spa_config_remove(spa_config_dirent_t *dp) @@ -623,7 +534,6 @@ spa_config_update(spa_t *spa, int what) spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); } -EXPORT_SYMBOL(spa_config_load); EXPORT_SYMBOL(spa_all_configs); EXPORT_SYMBOL(spa_config_set); EXPORT_SYMBOL(spa_config_generate); @@ -634,8 +544,3 @@ EXPORT_SYMBOL(spa_config_update); ZFS_MODULE_PARAM(zfs_spa, spa_, config_path, STRING, ZMOD_RD, "SPA config file (/etc/zfs/zpool.cache)"); #endif - -#ifdef _KERNEL -ZFS_MODULE_PARAM(zfs, zfs_, autoimport_disable, INT, ZMOD_RW, - "Disable pool import at module load"); -#endif diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 2eba8362a166..dceafbc27556 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -471,9 +471,9 @@ spa_config_lock_destroy(spa_t *spa) spa_config_lock_t *scl = &spa->spa_config_lock[i]; mutex_destroy(&scl->scl_lock); cv_destroy(&scl->scl_cv); - ASSERT(scl->scl_writer == NULL); - ASSERT(scl->scl_write_wanted == 0); - ASSERT(scl->scl_count == 0); + ASSERT0P(scl->scl_writer); + ASSERT0(scl->scl_write_wanted); + ASSERT0(scl->scl_count); } } @@ -784,24 +784,23 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); list_insert_head(&spa->spa_config_list, dp); - VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, - KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, KM_SLEEP)); if (config != NULL) { nvlist_t *features; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, &features) == 0) { - VERIFY(nvlist_dup(features, &spa->spa_label_features, - 0) == 0); + VERIFY0(nvlist_dup(features, + &spa->spa_label_features, 0)); } - VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); + VERIFY0(nvlist_dup(config, &spa->spa_config, 0)); } if (spa->spa_label_features == NULL) { - VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, - KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, + KM_SLEEP)); } spa->spa_min_ashift = INT_MAX; @@ -2549,13 +2548,6 @@ spa_name_compare(const void *a1, const void *a2) } void -spa_boot_init(void *unused) -{ - (void) unused; - spa_config_load(); -} - -void spa_init(spa_mode_t mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); @@ -2608,7 +2600,6 @@ spa_init(spa_mode_t mode) chksum_init(); zpool_prop_init(); zpool_feature_init(); - spa_config_load(); vdev_prop_init(); l2arc_start(); scan_init(); diff --git a/sys/contrib/openzfs/module/zfs/spa_stats.c b/sys/contrib/openzfs/module/zfs/spa_stats.c index 6d7cabcf766d..2c87122a0aa9 100644 --- a/sys/contrib/openzfs/module/zfs/spa_stats.c +++ b/sys/contrib/openzfs/module/zfs/spa_stats.c @@ -718,7 +718,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error, for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL; smh = list_prev(&shl->procfs_list.pl_list, smh)) { if (smh->mmp_node_id == mmp_node_id) { - ASSERT(smh->io_error == 0); + ASSERT0(smh->io_error); smh->io_error = io_error; smh->duration = duration; error = 0; diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c index c429e0edd168..5f24963f2291 100644 --- a/sys/contrib/openzfs/module/zfs/space_map.c +++ b/sys/contrib/openzfs/module/zfs/space_map.c @@ -817,7 +817,7 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object, space_map_t *sm; int error; - ASSERT(*smp == NULL); + ASSERT0P(*smp); ASSERT(os != NULL); ASSERT(object != 0); diff --git a/sys/contrib/openzfs/module/zfs/space_reftree.c b/sys/contrib/openzfs/module/zfs/space_reftree.c index 9b2d5ed31dc9..889980e08c06 100644 --- a/sys/contrib/openzfs/module/zfs/space_reftree.c +++ b/sys/contrib/openzfs/module/zfs/space_reftree.c @@ -149,6 +149,6 @@ space_reftree_generate_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t minref) } } } - ASSERT(refcnt == 0); + ASSERT0(refcnt); ASSERT(start == -1ULL); } diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 70b14fb9b2c8..9cf35e379000 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -554,7 +554,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd) vdev_t **newchild; ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); - ASSERT(cvd->vdev_parent == NULL); + ASSERT0P(cvd->vdev_parent); cvd->vdev_parent = pvd; @@ -578,7 +578,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd) pvd->vdev_nonrot &= cvd->vdev_nonrot; cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); - ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); + ASSERT0P(cvd->vdev_top->vdev_parent->vdev_parent); /* * Walk up all ancestors to update guid sum. @@ -1101,10 +1101,10 @@ vdev_free(vdev_t *vd) { spa_t *spa = vd->vdev_spa; - ASSERT3P(vd->vdev_initialize_thread, ==, NULL); - ASSERT3P(vd->vdev_trim_thread, ==, NULL); - ASSERT3P(vd->vdev_autotrim_thread, ==, NULL); - ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); + ASSERT0P(vd->vdev_initialize_thread); + ASSERT0P(vd->vdev_trim_thread); + ASSERT0P(vd->vdev_autotrim_thread); + ASSERT0P(vd->vdev_rebuild_thread); /* * Scan queues are normally destroyed at the end of a scan. If the @@ -1133,7 +1133,7 @@ vdev_free(vdev_t *vd) for (int c = 0; c < vd->vdev_children; c++) vdev_free(vd->vdev_child[c]); - ASSERT(vd->vdev_child == NULL); + ASSERT0P(vd->vdev_child); ASSERT(vd->vdev_guid_sum == vd->vdev_guid); if (vd->vdev_ops->vdev_op_fini != NULL) @@ -1162,7 +1162,7 @@ vdev_free(vdev_t *vd) */ vdev_remove_child(vd->vdev_parent, vd); - ASSERT(vd->vdev_parent == NULL); + ASSERT0P(vd->vdev_parent); ASSERT(!list_link_active(&vd->vdev_leaf_node)); /* @@ -1309,9 +1309,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) ASSERT0(tvd->vdev_indirect_config.vic_births_object); ASSERT0(tvd->vdev_indirect_config.vic_mapping_object); ASSERT3U(tvd->vdev_indirect_config.vic_prev_indirect_vdev, ==, -1ULL); - ASSERT3P(tvd->vdev_indirect_mapping, ==, NULL); - ASSERT3P(tvd->vdev_indirect_births, ==, NULL); - ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL); + ASSERT0P(tvd->vdev_indirect_mapping); + ASSERT0P(tvd->vdev_indirect_births); + ASSERT0P(tvd->vdev_obsolete_sm); ASSERT0(tvd->vdev_noalloc); ASSERT0(tvd->vdev_removing); ASSERT0(tvd->vdev_rebuilding); @@ -1464,7 +1464,7 @@ vdev_remove_parent(vdev_t *cvd) if (cvd == cvd->vdev_top) vdev_top_transfer(mvd, cvd); - ASSERT(mvd->vdev_children == 0); + ASSERT0(mvd->vdev_children); vdev_free(mvd); } @@ -2134,14 +2134,14 @@ vdev_open(vdev_t *vd) * faulted, bail out of the open. */ if (!vd->vdev_removed && vd->vdev_faulted) { - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED || vd->vdev_label_aux == VDEV_AUX_EXTERNAL); vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, vd->vdev_label_aux); return (SET_ERROR(ENXIO)); } else if (vd->vdev_offline) { - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); return (SET_ERROR(ENXIO)); } @@ -2197,7 +2197,7 @@ vdev_open(vdev_t *vd) * the vdev is accessible. If we're faulted, bail. */ if (vd->vdev_faulted) { - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED || vd->vdev_label_aux == VDEV_AUX_EXTERNAL); vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, @@ -2206,7 +2206,7 @@ vdev_open(vdev_t *vd) } if (vd->vdev_degraded) { - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, VDEV_AUX_ERR_EXCEEDED); } else { @@ -3945,7 +3945,7 @@ vdev_load(vdev_t *vd) if (error == 0 && checkpoint_sm_obj != 0) { objset_t *mos = spa_meta_objset(vd->vdev_spa); ASSERT(vd->vdev_asize != 0); - ASSERT3P(vd->vdev_checkpoint_sm, ==, NULL); + ASSERT0P(vd->vdev_checkpoint_sm); error = space_map_open(&vd->vdev_checkpoint_sm, mos, checkpoint_sm_obj, 0, vd->vdev_asize, @@ -3993,7 +3993,7 @@ vdev_load(vdev_t *vd) if (error == 0 && obsolete_sm_object != 0) { objset_t *mos = vd->vdev_spa->spa_meta_objset; ASSERT(vd->vdev_asize != 0); - ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); + ASSERT0P(vd->vdev_obsolete_sm); if ((error = space_map_open(&vd->vdev_obsolete_sm, mos, obsolete_sm_object, 0, vd->vdev_asize, 0))) { @@ -4521,7 +4521,7 @@ top: /* * Prevent any future allocations. */ - ASSERT3P(tvd->vdev_log_mg, ==, NULL); + ASSERT0P(tvd->vdev_log_mg); metaslab_group_passivate(mg); (void) spa_vdev_state_exit(spa, vd, 0); @@ -5194,7 +5194,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize) int64_t vdev_deflated_space(vdev_t *vd, int64_t space) { - ASSERT((space & (SPA_MINBLOCKSIZE-1)) == 0); + ASSERT0((space & (SPA_MINBLOCKSIZE-1))); ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache); return ((space >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio); @@ -5286,8 +5286,8 @@ vdev_config_dirty(vdev_t *vd) if (nvlist_lookup_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) { - VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, - ZPOOL_CONFIG_SPARES, &aux, &naux) == 0); + VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_SPARES, &aux, &naux)); } ASSERT(c < naux); @@ -5675,7 +5675,7 @@ vdev_expand(vdev_t *vd, uint64_t txg) (vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count && vdev_is_concrete(vd)) { vdev_metaslab_group_create(vd); - VERIFY(vdev_metaslab_init(vd, txg) == 0); + VERIFY0(vdev_metaslab_init(vd, txg)); vdev_config_dirty(vd); } } diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index feec5fd3ce17..a05289102af2 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -477,7 +477,7 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp) VERIFY3U(map->dm_children, <=, VDEV_DRAID_MAX_CHILDREN); VERIFY3U(map->dm_seed, !=, 0); VERIFY3U(map->dm_nperms, !=, 0); - VERIFY3P(map->dm_perms, ==, NULL); + VERIFY0P(map->dm_perms); #ifdef _KERNEL /* @@ -590,7 +590,7 @@ vdev_draid_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg) uint64_t asize = (rows * vdc->vdc_groupwidth) << ashift; ASSERT3U(asize, !=, 0); - ASSERT3U(asize % (vdc->vdc_groupwidth), ==, 0); + ASSERT0(asize % (vdc->vdc_groupwidth)); return (asize); } @@ -704,7 +704,7 @@ vdev_draid_map_alloc_scrub(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr) uint64_t skip_off = 0; ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); - ASSERT3P(rr->rr_abd_empty, ==, NULL); + ASSERT0P(rr->rr_abd_empty); if (rr->rr_nempty > 0) { rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size, @@ -793,7 +793,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) uint64_t skip_off = 0; ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); - ASSERT3P(rr->rr_abd_empty, ==, NULL); + ASSERT0P(rr->rr_abd_empty); if (rr->rr_nempty > 0) { rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size, @@ -807,7 +807,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) /* empty data column (small read), add a skip sector */ ASSERT3U(skip_size, ==, parity_size); ASSERT3U(rr->rr_nempty, !=, 0); - ASSERT3P(rc->rc_abd, ==, NULL); + ASSERT0P(rc->rc_abd); rc->rc_abd = abd_get_offset_size(rr->rr_abd_empty, skip_off, skip_size); skip_off += skip_size; @@ -1623,7 +1623,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize, SPA_MAXBLOCKSIZE); ASSERT3U(vdev_draid_get_astart(vd, start), ==, start); - ASSERT3U(asize % (vdc->vdc_groupwidth << ashift), ==, 0); + ASSERT0(asize % (vdc->vdc_groupwidth << ashift)); /* Chunks must evenly span all data columns in the group. */ psize = (((psize >> ashift) / ndata) * ndata) << ashift; @@ -1634,7 +1634,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize, uint64_t left = vdev_draid_group_to_offset(vd, group + 1) - start; chunk_size = MIN(chunk_size, left); - ASSERT3U(chunk_size % (vdc->vdc_groupwidth << ashift), ==, 0); + ASSERT0(chunk_size % (vdc->vdc_groupwidth << ashift)); ASSERT3U(vdev_draid_offset_to_group(vd, start), ==, vdev_draid_offset_to_group(vd, start + chunk_size - 1)); @@ -2272,7 +2272,7 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd) ASSERT3U(vdc->vdc_groupwidth, <=, vdc->vdc_ndisks); ASSERT3U(vdc->vdc_groupsz, >=, 2 * VDEV_DRAID_ROWHEIGHT); ASSERT3U(vdc->vdc_devslicesz, >=, VDEV_DRAID_ROWHEIGHT); - ASSERT3U(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT, ==, 0); + ASSERT0(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT); ASSERT3U((vdc->vdc_groupwidth * vdc->vdc_ngroups) % vdc->vdc_ndisks, ==, 0); diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index 9fc71fa0e03e..7538f471e63c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -792,7 +792,7 @@ spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx) DMU_POOL_CONDENSING_INDIRECT, sizeof (uint64_t), sizeof (*scip) / sizeof (uint64_t), scip, tx)); - ASSERT3P(spa->spa_condensing_indirect, ==, NULL); + ASSERT0P(spa->spa_condensing_indirect); spa->spa_condensing_indirect = spa_condensing_indirect_create(spa); zfs_dbgmsg("starting condense of vdev %llu in txg %llu: " @@ -882,7 +882,7 @@ spa_condense_fini(spa_t *spa) void spa_start_indirect_condensing_thread(spa_t *spa) { - ASSERT3P(spa->spa_condense_zthr, ==, NULL); + ASSERT0P(spa->spa_condense_zthr); spa->spa_condense_zthr = zthr_create("z_indirect_condense", spa_condense_indirect_thread_check, spa_condense_indirect_thread, spa, minclsyspri); @@ -1504,7 +1504,7 @@ vdev_indirect_splits_checksum_validate(indirect_vsd_t *iv, zio_t *zio) is != NULL; is = list_next(&iv->iv_splits, is)) { ASSERT3P(is->is_good_child->ic_data, !=, NULL); - ASSERT3P(is->is_good_child->ic_duplicate, ==, NULL); + ASSERT0P(is->is_good_child->ic_duplicate); abd_copy_off(zio->io_abd, is->is_good_child->ic_data, is->is_split_offset, 0, is->is_size); diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c index 9243c76e810d..27188c46e561 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c +++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c @@ -632,7 +632,7 @@ vdev_initialize(vdev_t *vd) ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock)); ASSERT(vd->vdev_ops->vdev_op_leaf); ASSERT(vdev_is_concrete(vd)); - ASSERT3P(vd->vdev_initialize_thread, ==, NULL); + ASSERT0P(vd->vdev_initialize_thread); ASSERT(!vd->vdev_detached); ASSERT(!vd->vdev_initialize_exit_wanted); ASSERT(!vd->vdev_top->vdev_removing); @@ -653,7 +653,7 @@ vdev_uninitialize(vdev_t *vd) ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock)); ASSERT(vd->vdev_ops->vdev_op_leaf); ASSERT(vdev_is_concrete(vd)); - ASSERT3P(vd->vdev_initialize_thread, ==, NULL); + ASSERT0P(vd->vdev_initialize_thread); ASSERT(!vd->vdev_detached); ASSERT(!vd->vdev_initialize_exit_wanted); ASSERT(!vd->vdev_top->vdev_removing); @@ -672,7 +672,7 @@ vdev_initialize_stop_wait_impl(vdev_t *vd) while (vd->vdev_initialize_thread != NULL) cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock); - ASSERT3P(vd->vdev_initialize_thread, ==, NULL); + ASSERT0P(vd->vdev_initialize_thread); vd->vdev_initialize_exit_wanted = B_FALSE; } diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index 6baa6236aac2..c44f654b0261 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -163,7 +163,7 @@ uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset) { ASSERT(offset < sizeof (vdev_label_t)); - ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0); + ASSERT0(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t)); return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); @@ -768,12 +768,12 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config) } if (idx) { - VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, - array, idx) == 0); + VERIFY0(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_HOLE_ARRAY, array, idx)); } - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, - rvd->vdev_children) == 0); + VERIFY0(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, + rvd->vdev_children)); kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); } @@ -1189,8 +1189,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * vdev uses as described above, and automatically expires if we * fail. */ - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, - crtxg) == 0); + VERIFY0(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, + crtxg)); } buf = vp->vp_nvlist; diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c index aa41f7066036..c12713b107bf 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_queue.c +++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c @@ -780,7 +780,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (dio->io_flags & ZIO_FLAG_NODATA) { /* allocate a buffer for a write gap */ ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE); - ASSERT3P(dio->io_abd, ==, NULL); + ASSERT0P(dio->io_abd); abd_gang_add(aio->io_abd, abd_get_zeros(dio->io_size), B_TRUE); } else { diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index 210cdcab1ecc..b597d6daefde 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -412,7 +412,7 @@ vdev_raidz_map_free(raidz_map_t *rm) rm->rm_nphys_cols); } - ASSERT3P(rm->rm_lr, ==, NULL); + ASSERT0P(rm->rm_lr); kmem_free(rm, offsetof(raidz_map_t, rm_row[rm->rm_nrows])); } @@ -2431,7 +2431,7 @@ raidz_start_skip_writes(zio_t *zio) vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; if (rc->rc_size != 0) continue; - ASSERT3P(rc->rc_abd, ==, NULL); + ASSERT0P(rc->rc_abd); ASSERT3U(rc->rc_offset, <, cvd->vdev_psize - VDEV_LABEL_END_SIZE); @@ -3363,7 +3363,7 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm, * also have been fewer parity errors than parity * columns or, again, we wouldn't be in this code path. */ - ASSERT(parity_untried == 0); + ASSERT0(parity_untried); ASSERT(parity_errors < rr->rr_firstdatacol); /* @@ -4743,7 +4743,7 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) void spa_start_raidz_expansion_thread(spa_t *spa) { - ASSERT3P(spa->spa_raidz_expand_zthr, ==, NULL); + ASSERT0P(spa->spa_raidz_expand_zthr); spa->spa_raidz_expand_zthr = zthr_create("raidz_expand", spa_raidz_expand_thread_check, spa_raidz_expand_thread, spa, defclsyspri); diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index cf259788ccf4..47b3b9921abe 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -256,7 +256,7 @@ vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx) "vdev_id=%llu vdev_guid=%llu started", (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid); - ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); + ASSERT0P(vd->vdev_rebuild_thread); vd->vdev_rebuild_thread = thread_create(NULL, 0, vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri); @@ -413,7 +413,7 @@ vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx) mutex_enter(&vd->vdev_rebuild_lock); ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE); - ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); + ASSERT0P(vd->vdev_rebuild_thread); vrp->vrp_last_offset = 0; vrp->vrp_min_txg = 0; diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 3887be4bd548..2f7a739da241 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -344,10 +344,10 @@ spa_vdev_remove_aux(nvlist_t *config, const char *name, nvlist_t **dev, for (int i = 0, j = 0; i < count; i++) { if (dev[i] == dev_to_remove) continue; - VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); + VERIFY0(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP)); } - VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); + VERIFY0(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY)); fnvlist_add_nvlist_array(config, name, (const nvlist_t * const *)newdev, count - 1); @@ -423,7 +423,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx) svr = spa_vdev_removal_create(vd); ASSERT(vd->vdev_removing); - ASSERT3P(vd->vdev_indirect_mapping, ==, NULL); + ASSERT0P(vd->vdev_indirect_mapping); spa_feature_incr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx); if (spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)) { @@ -529,7 +529,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx) * but in any case only when there are outstanding free i/os, which * there are not). */ - ASSERT3P(spa->spa_vdev_removal, ==, NULL); + ASSERT0P(spa->spa_vdev_removal); spa->spa_vdev_removal = svr; svr->svr_thread = thread_create(NULL, 0, spa_vdev_remove_thread, spa, 0, &p0, TS_RUN, minclsyspri); @@ -1362,11 +1362,11 @@ vdev_remove_complete(spa_t *spa) txg_wait_synced(spa->spa_dsl_pool, 0); txg = spa_vdev_enter(spa); vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id); - ASSERT3P(vd->vdev_initialize_thread, ==, NULL); - ASSERT3P(vd->vdev_trim_thread, ==, NULL); - ASSERT3P(vd->vdev_autotrim_thread, ==, NULL); + ASSERT0P(vd->vdev_initialize_thread); + ASSERT0P(vd->vdev_trim_thread); + ASSERT0P(vd->vdev_autotrim_thread); vdev_rebuild_stop_wait(vd); - ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); + ASSERT0P(vd->vdev_rebuild_thread); sysevent_t *ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_DEV); @@ -1868,7 +1868,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; objset_t *mos = spa->spa_meta_objset; - ASSERT3P(svr->svr_thread, ==, NULL); + ASSERT0P(svr->svr_thread); spa_feature_decr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx); @@ -2076,7 +2076,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) ASSERT(vd->vdev_islog); ASSERT(vd == vd->vdev_top); - ASSERT3P(vd->vdev_log_mg, ==, NULL); + ASSERT0P(vd->vdev_log_mg); ASSERT(MUTEX_HELD(&spa_namespace_lock)); /* @@ -2112,7 +2112,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) if (error != 0) { metaslab_group_activate(mg); - ASSERT3P(vd->vdev_log_mg, ==, NULL); + ASSERT0P(vd->vdev_log_mg); return (error); } ASSERT0(vd->vdev_stat.vs_alloc); diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index fc8d5b8e9a8a..eee18b367909 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -1010,7 +1010,7 @@ vdev_trim(vdev_t *vd, uint64_t rate, boolean_t partial, boolean_t secure) ASSERT(MUTEX_HELD(&vd->vdev_trim_lock)); ASSERT(vd->vdev_ops->vdev_op_leaf); ASSERT(vdev_is_concrete(vd)); - ASSERT3P(vd->vdev_trim_thread, ==, NULL); + ASSERT0P(vd->vdev_trim_thread); ASSERT(!vd->vdev_detached); ASSERT(!vd->vdev_trim_exit_wanted); ASSERT(!vd->vdev_top->vdev_removing); @@ -1032,7 +1032,7 @@ vdev_trim_stop_wait_impl(vdev_t *vd) while (vd->vdev_trim_thread != NULL) cv_wait(&vd->vdev_trim_cv, &vd->vdev_trim_lock); - ASSERT3P(vd->vdev_trim_thread, ==, NULL); + ASSERT0P(vd->vdev_trim_thread); vd->vdev_trim_exit_wanted = B_FALSE; } @@ -1539,7 +1539,7 @@ vdev_autotrim_stop_wait(vdev_t *tvd) cv_wait(&tvd->vdev_autotrim_cv, &tvd->vdev_autotrim_lock); - ASSERT3P(tvd->vdev_autotrim_thread, ==, NULL); + ASSERT0P(tvd->vdev_autotrim_thread); tvd->vdev_autotrim_exit_wanted = B_FALSE; } mutex_exit(&tvd->vdev_autotrim_lock); @@ -1712,7 +1712,7 @@ vdev_trim_l2arc(spa_t *spa) mutex_enter(&vd->vdev_trim_lock); ASSERT(vd->vdev_ops->vdev_op_leaf); ASSERT(vdev_is_concrete(vd)); - ASSERT3P(vd->vdev_trim_thread, ==, NULL); + ASSERT0P(vd->vdev_trim_thread); ASSERT(!vd->vdev_detached); ASSERT(!vd->vdev_trim_exit_wanted); ASSERT(!vd->vdev_top->vdev_removing); diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c index 0896690c97e3..3e4e997798a3 100644 --- a/sys/contrib/openzfs/module/zfs/zap.c +++ b/sys/contrib/openzfs/module/zfs/zap.c @@ -921,7 +921,7 @@ fzap_add_cd(zap_name_t *zn, ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT(!zap->zap_ismicro); - ASSERT(fzap_check(zn, integer_size, num_integers) == 0); + ASSERT0(fzap_check(zn, integer_size, num_integers)); err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l); if (err != 0) @@ -1386,7 +1386,7 @@ again: } err = zap_entry_read_name(zap, &zeh, za->za_name_len, za->za_name); - ASSERT(err == 0); + ASSERT0(err); za->za_normalization_conflict = zap_entry_normalization_conflict(&zeh, @@ -1546,7 +1546,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) boolean_t trunc = B_FALSE; int err = 0; - ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0); + ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries); ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix); @@ -1564,7 +1564,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len); int slbit = prefix & 1; - ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0); + ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries); /* * Check if there is a sibling by reading ptrtbl ptrs. diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c index 411b1a9db5ab..ea4e3117a8b9 100644 --- a/sys/contrib/openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/openzfs/module/zfs/zap_micro.c @@ -346,7 +346,7 @@ zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) { zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP); - ASSERT(zap->zap_normflags == 0); + ASSERT0(zap->zap_normflags); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = zn->zn_key_norm = key; @@ -1876,7 +1876,7 @@ zap_cursor_serialize(zap_cursor_t *zc) return (-1ULL); if (zc->zc_zap == NULL) return (zc->zc_serialized); - ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); + ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap))); ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); /* @@ -1911,7 +1911,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) * we must add to the existing zc_cd, which may already * be 1 due to the zap_cursor_advance. */ - ASSERT(zc->zc_hash == 0); + ASSERT0(zc->zc_hash); hb = zap_hashbits(zc->zc_zap); zc->zc_hash = zc->zc_serialized << (64 - hb); zc->zc_cd += zc->zc_serialized >> hb; diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c index 9aecf67fd256..c6684f453e95 100644 --- a/sys/contrib/openzfs/module/zfs/zcp.c +++ b/sys/contrib/openzfs/module/zfs/zcp.c @@ -765,7 +765,7 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) return (NULL); } (void) memcpy(luabuf, ptr, osize); - VERIFY3P(zcp_lua_alloc(ud, ptr, osize, 0), ==, NULL); + VERIFY0P(zcp_lua_alloc(ud, ptr, osize, 0)); return (luabuf); } } diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c index 7dfe00d42a08..0816ea134bf3 100644 --- a/sys/contrib/openzfs/module/zfs/zfeature.c +++ b/sys/contrib/openzfs/module/zfs/zfeature.c @@ -210,8 +210,8 @@ spa_features_check(spa_t *spa, boolean_t for_write, za->za_name, 1, MAXPATHLEN, buf) == 0) desc = buf; - VERIFY(nvlist_add_string(unsup_feat, - za->za_name, desc) == 0); + VERIFY0(nvlist_add_string(unsup_feat, + za->za_name, desc)); } } } diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c index 10a6d289fbf8..2af1efe82e62 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c @@ -112,8 +112,7 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree, uint64_t fuid_size; ASSERT(fuid_obj != 0); - VERIFY(0 == dmu_bonus_hold(os, fuid_obj, - FTAG, &db)); + VERIFY0(dmu_bonus_hold(os, fuid_obj, FTAG, &db)); fuid_size = *(uint64_t *)db->db_data; dmu_buf_rele(db, FTAG); @@ -125,22 +124,21 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree, int i; packed = kmem_alloc(fuid_size, KM_SLEEP); - VERIFY(dmu_read(os, fuid_obj, 0, - fuid_size, packed, DMU_READ_PREFETCH) == 0); - VERIFY(nvlist_unpack(packed, fuid_size, - &nvp, 0) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY, - &fuidnvp, &count) == 0); + VERIFY0(dmu_read(os, fuid_obj, 0, + fuid_size, packed, DMU_READ_PREFETCH)); + VERIFY0(nvlist_unpack(packed, fuid_size, &nvp, 0)); + VERIFY0(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY, + &fuidnvp, &count)); for (i = 0; i != count; i++) { fuid_domain_t *domnode; const char *domain; uint64_t idx; - VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN, - &domain) == 0); - VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX, - &idx) == 0); + VERIFY0(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN, + &domain)); + VERIFY0(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX, + &idx)); domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); @@ -246,35 +244,33 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) &zfsvfs->z_fuid_obj, tx) == 0); } - VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP)); numnodes = avl_numnodes(&zfsvfs->z_fuid_idx); fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP); for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++, domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) { - VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, - domnode->f_idx) == 0); - VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); - VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN, - domnode->f_ksid->kd_name) == 0); + VERIFY0(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP)); + VERIFY0(nvlist_add_uint64(fuids[i], FUID_IDX, + domnode->f_idx)); + VERIFY0(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0)); + VERIFY0(nvlist_add_string(fuids[i], FUID_DOMAIN, + domnode->f_ksid->kd_name)); } fnvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, (const nvlist_t * const *)fuids, numnodes); for (i = 0; i != numnodes; i++) nvlist_free(fuids[i]); kmem_free(fuids, numnodes * sizeof (void *)); - VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); + VERIFY0(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR)); packed = kmem_alloc(nvsize, KM_SLEEP); - VERIFY(nvlist_pack(nvp, &packed, &nvsize, - NV_ENCODE_XDR, KM_SLEEP) == 0); + VERIFY0(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP)); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); - VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, - FTAG, &db)); + VERIFY0(dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index dcb71229f96a..121b966b9864 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -1493,7 +1493,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); - VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP)); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); if (error != 0) @@ -2245,7 +2245,7 @@ nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop) */ if ((error = zfs_get_zplprop(os, prop, &value)) != 0) return (error); - VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0); + VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value)); return (0); } @@ -2280,7 +2280,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc) dmu_objset_type(os) == DMU_OST_ZFS) { nvlist_t *nv; - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP)); if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 && (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 && @@ -2483,7 +2483,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY0(nvpair_value_nvlist(pair, &attrs)); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) != 0) return (SET_ERROR(EINVAL)); @@ -2538,9 +2538,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &pair) == 0); + VERIFY0(nvpair_value_nvlist(pair, &attrs)); + VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair)); } /* all special properties are numeric except for keylocation */ @@ -2932,14 +2931,14 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) { nvpair_t *pair; - VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP)); pair = NULL; while ((pair = nvlist_next_nvpair(props, pair)) != NULL) { if (nvlist_exists(skipped, nvpair_name(pair))) continue; - VERIFY(nvlist_add_nvpair(*newprops, pair) == 0); + VERIFY0(nvlist_add_nvpair(*newprops, pair)); } } @@ -3064,11 +3063,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) switch (type) { case PROP_TYPE_STRING: - VERIFY(0 == nvlist_add_string(dummy, propname, "")); + VERIFY0(nvlist_add_string(dummy, propname, "")); break; case PROP_TYPE_NUMBER: case PROP_TYPE_INDEX: - VERIFY(0 == nvlist_add_uint64(dummy, propname, 0)); + VERIFY0(nvlist_add_uint64(dummy, propname, 0)); break; default: err = SET_ERROR(EINVAL); @@ -3454,14 +3453,14 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, /* * Put the version in the zplprops */ - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_VERSION), zplver)); if (norm == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm)); /* * If we're normalizing, names must always be valid UTF-8 strings. @@ -3471,55 +3470,55 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, if (u8 == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8)); if (sense == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_CASE), sense)); if (duq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq)); if (dgq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA, &dgq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq)); if (dpq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA, &dpq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq)); if (duoq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA, &duoq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq)); if (dgoq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA, &dgoq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq)); if (dpoq == ZFS_PROP_UNDEFINED && (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA, &dpoq)) != 0) return (error); - VERIFY(nvlist_add_uint64(zplprops, - zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq) == 0); + VERIFY0(nvlist_add_uint64(zplprops, + zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq)); if (is_ci) *is_ci = (sense == ZFS_CASE_INSENSITIVE); @@ -3668,8 +3667,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) * file system creation, so go figure them out * now. */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP)); error = zfs_fill_zplprops(fsname, nvprops, zct.zct_zplprops, &is_insensitive); if (error != 0) { @@ -4916,9 +4915,8 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) * format. */ nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &pair) == 0); + VERIFY0(nvpair_value_nvlist(pair, &attrs)); + VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair)); } /* @@ -5103,7 +5101,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist) if (props == NULL) return (0); - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP)); zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name)); @@ -5115,9 +5113,8 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist) sizeof (zc->zc_value)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { - VERIFY(nvlist_remove_nvpair(props, pair) == 0); - VERIFY(nvlist_add_int32(errors, - zc->zc_value, err) == 0); + VERIFY0(nvlist_remove_nvpair(props, pair)); + VERIFY0(nvlist_add_int32(errors, zc->zc_value, err)); } pair = next_pair; } @@ -5127,7 +5124,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist) nvlist_free(errors); errors = NULL; } else { - VERIFY(nvpair_value_int32(pair, &rv) == 0); + VERIFY0(nvpair_value_int32(pair, &rv)); } if (errlist == NULL) @@ -5144,16 +5141,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2) if (nvpair_type(p1) == DATA_TYPE_NVLIST) { /* dsl_prop_get_all_impl() format */ nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(p1, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &p1) == 0); + VERIFY0(nvpair_value_nvlist(p1, &attrs)); + VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1)); } if (nvpair_type(p2) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(p2, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &p2) == 0); + VERIFY0(nvpair_value_nvlist(p2, &attrs)); + VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2)); } if (nvpair_type(p1) != nvpair_type(p2)) @@ -5162,14 +5157,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2) if (nvpair_type(p1) == DATA_TYPE_STRING) { const char *valstr1, *valstr2; - VERIFY(nvpair_value_string(p1, &valstr1) == 0); - VERIFY(nvpair_value_string(p2, &valstr2) == 0); + VERIFY0(nvpair_value_string(p1, &valstr1)); + VERIFY0(nvpair_value_string(p2, &valstr2)); return (strcmp(valstr1, valstr2) == 0); } else { uint64_t intval1, intval2; - VERIFY(nvpair_value_uint64(p1, &intval1) == 0); - VERIFY(nvpair_value_uint64(p2, &intval2) == 0); + VERIFY0(nvpair_value_uint64(p1, &intval1)); + VERIFY0(nvpair_value_uint64(p2, &intval2)); return (intval1 == intval2); } } @@ -5237,7 +5232,7 @@ extract_delay_props(nvlist_t *props) }; int i; - VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP)); for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL; nvp = nvlist_next_nvpair(props, nvp)) { @@ -5253,8 +5248,8 @@ extract_delay_props(nvlist_t *props) } if (delayable[i] != 0) { tmp = nvlist_prev_nvpair(props, nvp); - VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0); - VERIFY(nvlist_remove_nvpair(props, nvp) == 0); + VERIFY0(nvlist_add_nvpair(delayprops, nvp)); + VERIFY0(nvlist_remove_nvpair(props, nvp)); nvp = tmp; } } @@ -5485,15 +5480,15 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin, * using ASSERT() will be just like a VERIFY. */ if (recv_delayprops != NULL) { - ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0); + ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0)); nvlist_free(recv_delayprops); } if (local_delayprops != NULL) { - ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0); + ASSERT0(nvlist_merge(localprops, local_delayprops, 0)); nvlist_free(local_delayprops); } if (inherited_delayprops != NULL) { - ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0); + ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0)); nvlist_free(inherited_delayprops); } *read_bytes = off - noff; @@ -7342,8 +7337,8 @@ zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, ASSERT3U(ioc, >=, ZFS_IOC_FIRST); ASSERT3U(ioc, <, ZFS_IOC_LAST); - ASSERT3P(vec->zvec_legacy_func, ==, NULL); - ASSERT3P(vec->zvec_func, ==, NULL); + ASSERT0P(vec->zvec_legacy_func); + ASSERT0P(vec->zvec_func); vec->zvec_legacy_func = func; vec->zvec_secpolicy = secpolicy; @@ -7366,8 +7361,8 @@ zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, ASSERT3U(ioc, >=, ZFS_IOC_FIRST); ASSERT3U(ioc, <, ZFS_IOC_LAST); - ASSERT3P(vec->zvec_legacy_func, ==, NULL); - ASSERT3P(vec->zvec_func, ==, NULL); + ASSERT0P(vec->zvec_legacy_func); + ASSERT0P(vec->zvec_func); /* if we are logging, the name must be valid */ ASSERT(!allow_log || namecheck != NO_NAME); @@ -8148,7 +8143,7 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag) spa_t *spa; nvlist_t *lognv = NULL; - ASSERT(vec->zvec_legacy_func == NULL); + ASSERT0P(vec->zvec_legacy_func); /* * Add the innvl to the lognv before calling the func, diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c index 2f61ecfd9b3b..ea17e049279f 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_log.c +++ b/sys/contrib/openzfs/module/zfs/zfs_log.c @@ -620,7 +620,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, if (zil_replaying(zilog, tx) || zp->z_unlinked || zfs_xattr_owner_unlinked(zp)) { if (callback != NULL) - callback(callback_data); + callback(callback_data, 0); return; } @@ -663,7 +663,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, DMU_KEEP_CACHING); DB_DNODE_EXIT(db); if (err != 0) { - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; diff --git a/sys/contrib/openzfs/module/zfs/zfs_quota.c b/sys/contrib/openzfs/module/zfs/zfs_quota.c index b8fe512d4f09..2e91ccc27d6d 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_quota.c +++ b/sys/contrib/openzfs/module/zfs/zfs_quota.c @@ -374,7 +374,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, if (*objp == 0) { *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, DMU_OT_NONE, 0, tx); - VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + VERIFY0(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); } mutex_exit(&zfsvfs->z_lock); @@ -386,7 +386,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, } else { err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); } - ASSERT(err == 0); + ASSERT0(err); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); dmu_tx_commit(tx); diff --git a/sys/contrib/openzfs/module/zfs/zfs_rlock.c b/sys/contrib/openzfs/module/zfs/zfs_rlock.c index 53eb3ef1b66e..4035baff77d6 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_rlock.c +++ b/sys/contrib/openzfs/module/zfs/zfs_rlock.c @@ -666,7 +666,7 @@ zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len) /* Ensure there are no other locks */ ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1); - ASSERT3U(lr->lr_offset, ==, 0); + ASSERT0(lr->lr_offset); ASSERT3U(lr->lr_type, ==, RL_WRITER); ASSERT(!lr->lr_proxy); ASSERT3U(lr->lr_length, ==, UINT64_MAX); diff --git a/sys/contrib/openzfs/module/zfs/zfs_sa.c b/sys/contrib/openzfs/module/zfs/zfs_sa.c index 59b6ae4e4203..8b4fc6fd7fbd 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_sa.c +++ b/sys/contrib/openzfs/module/zfs/zfs_sa.c @@ -169,7 +169,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) ASSERT(MUTEX_HELD(&zp->z_lock)); VERIFY((xoap = xva_getxoptattr(xvap)) != NULL); if (zp->z_is_sa) - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), &xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp), tx)); else { @@ -181,12 +181,12 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) len = sizeof (xoap->xoa_av_scanstamp) + ZFS_OLD_ZNODE_PHYS_SIZE; if (len > doi.doi_bonus_size) - VERIFY(dmu_set_bonus(db, len, tx) == 0); + VERIFY0(dmu_set_bonus(db, len, tx)); (void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp)); zp->z_pflags |= ZFS_BONUS_SCANSTAMP; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), &zp->z_pflags, sizeof (uint64_t), tx)); } } @@ -286,7 +286,7 @@ zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize) dmu_tx_commit(tx); if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + error = zil_commit(zilog, 0); } out_free: vmem_free(obj, size); @@ -427,11 +427,10 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } - VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); - VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, - count, tx) == 0); + VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx)); + VERIFY0(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx)); if (znode_acl.z_acl_extern_obj) - VERIFY(0 == dmu_object_free(zfsvfs->z_os, + VERIFY0(dmu_object_free(zfsvfs->z_os, znode_acl.z_acl_extern_obj, tx)); zp->z_is_sa = B_TRUE; diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 74aa91a4f2eb..7bb9ba57c69e 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -27,6 +27,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> + * Copyright (c) 2025, Klara, Inc. */ /* Portions Copyright 2007 Jeremy Teo */ @@ -116,7 +117,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); - zil_commit(zfsvfs->z_log, zp->z_id); + error = zil_commit(zfsvfs->z_log, zp->z_id); zfs_exit(zfsvfs, FTAG); } return (error); @@ -375,8 +376,13 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) frsync = !!(ioflag & FRSYNC); #endif if (zfsvfs->z_log && - (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) - zil_commit(zfsvfs->z_log, zp->z_id); + (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) { + error = zil_commit(zfsvfs->z_log, zp->z_id); + if (error != 0) { + zfs_exit(zfsvfs, FTAG); + return (error); + } + } /* * Lock the range against changes. @@ -1074,8 +1080,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (error); } - if (commit) - zil_commit(zilog, zp->z_id); + if (commit) { + error = zil_commit(zilog, zp->z_id); + if (error != 0) { + zfs_exit(zfsvfs, FTAG); + return (error); + } + } int64_t nwritten = start_resid - zfs_uio_resid(uio); dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten); @@ -1260,8 +1271,8 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) zilog = zfsvfs->z_log; error = zfs_setacl(zp, vsecp, skipaclchk, cr); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1946,7 +1957,7 @@ unlock: ZFS_ACCESSTIME_STAMP(inzfsvfs, inzp); if (outos->os_sync == ZFS_SYNC_ALWAYS) { - zil_commit(zilog, outzp->z_id); + error = zil_commit(zilog, outzp->z_id); } *inoffp += done; diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index 6e4f84257407..0307df55aa21 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2018 Datto Inc. + * Copyright (c) 2025, Klara, Inc. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -103,6 +104,7 @@ static zil_kstat_values_t zil_stats = { { "zil_commit_error_count", KSTAT_DATA_UINT64 }, { "zil_commit_stall_count", KSTAT_DATA_UINT64 }, { "zil_commit_suspend_count", KSTAT_DATA_UINT64 }, + { "zil_commit_crash_count", KSTAT_DATA_UINT64 }, { "zil_itx_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 }, @@ -145,7 +147,7 @@ static uint64_t zil_slog_bulk = 64 * 1024 * 1024; static kmem_cache_t *zil_lwb_cache; static kmem_cache_t *zil_zcw_cache; -static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); +static int zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); static itx_t *zil_itx_clone(itx_t *oitx); static uint64_t zil_max_waste_space(zilog_t *zilog); @@ -367,6 +369,7 @@ zil_sums_init(zil_sums_t *zs) wmsum_init(&zs->zil_commit_error_count, 0); wmsum_init(&zs->zil_commit_stall_count, 0); wmsum_init(&zs->zil_commit_suspend_count, 0); + wmsum_init(&zs->zil_commit_crash_count, 0); wmsum_init(&zs->zil_itx_count, 0); wmsum_init(&zs->zil_itx_indirect_count, 0); wmsum_init(&zs->zil_itx_indirect_bytes, 0); @@ -392,6 +395,7 @@ zil_sums_fini(zil_sums_t *zs) wmsum_fini(&zs->zil_commit_error_count); wmsum_fini(&zs->zil_commit_stall_count); wmsum_fini(&zs->zil_commit_suspend_count); + wmsum_fini(&zs->zil_commit_crash_count); wmsum_fini(&zs->zil_itx_count); wmsum_fini(&zs->zil_itx_indirect_count); wmsum_fini(&zs->zil_itx_indirect_bytes); @@ -422,6 +426,8 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums) wmsum_value(&zil_sums->zil_commit_stall_count); zs->zil_commit_suspend_count.value.ui64 = wmsum_value(&zil_sums->zil_commit_suspend_count); + zs->zil_commit_crash_count.value.ui64 = + wmsum_value(&zil_sums->zil_commit_crash_count); zs->zil_itx_count.value.ui64 = wmsum_value(&zil_sums->zil_itx_count); zs->zil_itx_indirect_count.value.ui64 = @@ -813,34 +819,37 @@ zil_lwb_vdev_compare(const void *x1, const void *x2) * we choose them here and later make the block allocation match. */ static lwb_t * -zil_alloc_lwb(zilog_t *zilog, int sz, blkptr_t *bp, boolean_t slog, - uint64_t txg, lwb_state_t state) +zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, int min_sz, int sz, + boolean_t slog, uint64_t txg) { lwb_t *lwb; lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); + lwb->lwb_flags = 0; lwb->lwb_zilog = zilog; if (bp) { lwb->lwb_blk = *bp; - lwb->lwb_slim = (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2); + if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) + lwb->lwb_flags |= LWB_FLAG_SLIM; sz = BP_GET_LSIZE(bp); + lwb->lwb_min_sz = sz; } else { BP_ZERO(&lwb->lwb_blk); - lwb->lwb_slim = (spa_version(zilog->zl_spa) >= - SPA_VERSION_SLIM_ZIL); + if (spa_version(zilog->zl_spa) >= SPA_VERSION_SLIM_ZIL) + lwb->lwb_flags |= LWB_FLAG_SLIM; + lwb->lwb_min_sz = min_sz; } - lwb->lwb_slog = slog; + if (slog) + lwb->lwb_flags |= LWB_FLAG_SLOG; lwb->lwb_error = 0; - if (lwb->lwb_slim) { - lwb->lwb_nmax = sz; - lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t); - } else { - lwb->lwb_nmax = sz - sizeof (zil_chain_t); - lwb->lwb_nused = lwb->lwb_nfilled = 0; - } + /* + * Buffer allocation and capacity setup will be done in + * zil_lwb_write_open() when the LWB is opened for ITX assignment. + */ + lwb->lwb_nmax = lwb->lwb_nused = lwb->lwb_nfilled = 0; lwb->lwb_sz = sz; - lwb->lwb_state = state; - lwb->lwb_buf = zio_buf_alloc(sz); + lwb->lwb_buf = NULL; + lwb->lwb_state = LWB_STATE_NEW; lwb->lwb_child_zio = NULL; lwb->lwb_write_zio = NULL; lwb->lwb_root_zio = NULL; @@ -851,8 +860,6 @@ zil_alloc_lwb(zilog_t *zilog, int sz, blkptr_t *bp, boolean_t slog, mutex_enter(&zilog->zl_lock); list_insert_tail(&zilog->zl_lwb_list, lwb); - if (state != LWB_STATE_NEW) - zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); return (lwb); @@ -864,15 +871,15 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb) ASSERT(MUTEX_HELD(&zilog->zl_lock)); ASSERT(lwb->lwb_state == LWB_STATE_NEW || lwb->lwb_state == LWB_STATE_FLUSH_DONE); - ASSERT3P(lwb->lwb_child_zio, ==, NULL); - ASSERT3P(lwb->lwb_write_zio, ==, NULL); - ASSERT3P(lwb->lwb_root_zio, ==, NULL); + ASSERT0P(lwb->lwb_child_zio); + ASSERT0P(lwb->lwb_write_zio); + ASSERT0P(lwb->lwb_root_zio); ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa)); ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); VERIFY(list_is_empty(&lwb->lwb_itxs)); VERIFY(list_is_empty(&lwb->lwb_waiters)); ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); - ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); + ASSERT(!MUTEX_HELD(&lwb->lwb_lock)); /* * Clear the zilog's field to indicate this lwb is no longer @@ -991,8 +998,8 @@ zil_create(zilog_t *zilog) */ txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); - ASSERT(zh->zh_claim_txg == 0); - ASSERT(zh->zh_replay_seq == 0); + ASSERT0(zh->zh_claim_txg); + ASSERT0(zh->zh_replay_seq); blk = zh->zh_log; @@ -1013,7 +1020,7 @@ zil_create(zilog_t *zilog) } error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, - ZIL_MIN_BLKSZ, &slog); + ZIL_MIN_BLKSZ, ZIL_MIN_BLKSZ, &slog, B_TRUE); if (error == 0) zil_init_log_chain(zilog, &blk); } @@ -1022,7 +1029,7 @@ zil_create(zilog_t *zilog) * Allocate a log write block (lwb) for the first log block. */ if (error == 0) - lwb = zil_alloc_lwb(zilog, 0, &blk, slog, txg, LWB_STATE_NEW); + lwb = zil_alloc_lwb(zilog, &blk, 0, 0, slog, txg); /* * If we just allocated the first log block, commit our transaction @@ -1104,7 +1111,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) zilog->zl_keep_first = keep_first; if (!list_is_empty(&zilog->zl_lwb_list)) { - ASSERT(zh->zh_claim_txg == 0); + ASSERT0(zh->zh_claim_txg); VERIFY(!keep_first); while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) { if (lwb->lwb_buf != NULL) @@ -1250,7 +1257,7 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) blkptr_t *bp; int error; - ASSERT(tx == NULL); + ASSERT0P(tx); error = dmu_objset_from_ds(ds, &os); if (error != 0) { @@ -1318,10 +1325,12 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) * zil_commit() is racing with spa_sync(). */ static void -zil_commit_waiter_skip(zil_commit_waiter_t *zcw) +zil_commit_waiter_done(zil_commit_waiter_t *zcw, int err) { mutex_enter(&zcw->zcw_lock); ASSERT3B(zcw->zcw_done, ==, B_FALSE); + zcw->zcw_lwb = NULL; + zcw->zcw_error = err; zcw->zcw_done = B_TRUE; cv_broadcast(&zcw->zcw_cv); mutex_exit(&zcw->zcw_lock); @@ -1351,7 +1360,7 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb) ASSERT(!list_link_active(&zcw->zcw_node)); list_insert_tail(&lwb->lwb_waiters, zcw); - ASSERT3P(zcw->zcw_lwb, ==, NULL); + ASSERT0P(zcw->zcw_lwb); zcw->zcw_lwb = lwb; } @@ -1365,7 +1374,7 @@ zil_commit_waiter_link_nolwb(zil_commit_waiter_t *zcw, list_t *nolwb) { ASSERT(!list_link_active(&zcw->zcw_node)); list_insert_tail(nolwb, zcw); - ASSERT3P(zcw->zcw_lwb, ==, NULL); + ASSERT0P(zcw->zcw_lwb); } void @@ -1383,7 +1392,7 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) if (zil_nocacheflush) return; - mutex_enter(&lwb->lwb_vdev_lock); + mutex_enter(&lwb->lwb_lock); for (i = 0; i < ndvas; i++) { zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); if (avl_find(t, &zvsearch, &where) == NULL) { @@ -1392,7 +1401,7 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) avl_insert(t, zv, where); } } - mutex_exit(&lwb->lwb_vdev_lock); + mutex_exit(&lwb->lwb_lock); } static void @@ -1409,12 +1418,12 @@ zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb) /* * While 'lwb' is at a point in its lifetime where lwb_vdev_tree does - * not need the protection of lwb_vdev_lock (it will only be modified + * not need the protection of lwb_lock (it will only be modified * while holding zilog->zl_lock) as its writes and those of its * children have all completed. The younger 'nlwb' may be waiting on * future writes to additional vdevs. */ - mutex_enter(&nlwb->lwb_vdev_lock); + mutex_enter(&nlwb->lwb_lock); /* * Tear down the 'lwb' vdev tree, ensuring that entries which do not * exist in 'nlwb' are moved to it, freeing any would-be duplicates. @@ -1428,7 +1437,7 @@ zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb) kmem_free(zv, sizeof (*zv)); } } - mutex_exit(&nlwb->lwb_vdev_lock); + mutex_exit(&nlwb->lwb_lock); } void @@ -1482,13 +1491,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) } while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { - mutex_enter(&zcw->zcw_lock); - - ASSERT3P(zcw->zcw_lwb, ==, lwb); - zcw->zcw_lwb = NULL; /* * We expect any ZIO errors from child ZIOs to have been * propagated "up" to this specific LWB's root ZIO, in @@ -1503,14 +1508,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio) * errors not being handled correctly here. See the * comment above the call to "zio_flush" for details. */ - - zcw->zcw_zio_error = zio->io_error; - - ASSERT3B(zcw->zcw_done, ==, B_FALSE); - zcw->zcw_done = B_TRUE; - cv_broadcast(&zcw->zcw_cv); - - mutex_exit(&zcw->zcw_lock); + zil_commit_waiter_done(zcw, zio->io_error); } uint64_t txg = lwb->lwb_issued_txg; @@ -1582,7 +1580,7 @@ zil_lwb_write_done(zio_t *zio) avl_tree_t *t = &lwb->lwb_vdev_tree; void *cookie = NULL; zil_vdev_node_t *zv; - lwb_t *nlwb; + lwb_t *nlwb = NULL; ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0); @@ -1602,9 +1600,11 @@ zil_lwb_write_done(zio_t *zio) * its write ZIO a parent this ZIO. In such case we can not defer * our flushes or below may be a race between the done callbacks. */ - nlwb = list_next(&zilog->zl_lwb_list, lwb); - if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED) - nlwb = NULL; + if (!(lwb->lwb_flags & LWB_FLAG_CRASHED)) { + nlwb = list_next(&zilog->zl_lwb_list, lwb); + if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED) + nlwb = NULL; + } mutex_exit(&zilog->zl_lock); if (avl_numnodes(t) == 0) @@ -1618,12 +1618,17 @@ zil_lwb_write_done(zio_t *zio) * written out. * * Additionally, we don't perform any further error handling at - * this point (e.g. setting "zcw_zio_error" appropriately), as - * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus, - * we expect any error seen here, to have been propagated to - * that function). + * this point (e.g. setting "zcw_error" appropriately), as we + * expect that to occur in "zil_lwb_flush_vdevs_done" (thus, we + * expect any error seen here, to have been propagated to that + * function). + * + * Note that we treat a "crashed" LWB as though it was in error, + * even if it did appear to succeed, because we've already + * signaled error and cleaned up waiters and committers in + * zil_crash(); we just want to clean up and get out of here. */ - if (zio->io_error != 0) { + if (zio->io_error != 0 || (lwb->lwb_flags & LWB_FLAG_CRASHED)) { while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) kmem_free(zv, sizeof (*zv)); return; @@ -1736,10 +1741,26 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) return; } + mutex_enter(&lwb->lwb_lock); mutex_enter(&zilog->zl_lock); lwb->lwb_state = LWB_STATE_OPENED; zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); + mutex_exit(&lwb->lwb_lock); + + /* + * Allocate buffer and set up LWB capacities. + */ + ASSERT0P(lwb->lwb_buf); + ASSERT3U(lwb->lwb_sz, >, 0); + lwb->lwb_buf = zio_buf_alloc(lwb->lwb_sz); + if (lwb->lwb_flags & LWB_FLAG_SLIM) { + lwb->lwb_nmax = lwb->lwb_sz; + lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t); + } else { + lwb->lwb_nmax = lwb->lwb_sz - sizeof (zil_chain_t); + lwb->lwb_nused = lwb->lwb_nfilled = 0; + } } /* @@ -1756,6 +1777,8 @@ static uint_t zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) { uint_t md = zilog->zl_max_block_size - sizeof (zil_chain_t); + uint_t waste = zil_max_waste_space(zilog); + waste = MAX(waste, zilog->zl_cur_max); if (size <= md) { /* @@ -1766,9 +1789,10 @@ zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) } else if (size > 8 * md) { /* * Big bursts use maximum blocks. The first block size - * is hard to predict, but it does not really matter. + * is hard to predict, but we need at least enough space + * to make reasonable progress. */ - *minsize = 0; + *minsize = waste; return (md); } @@ -1781,57 +1805,52 @@ zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) uint_t s = size; uint_t n = DIV_ROUND_UP(s, md - sizeof (lr_write_t)); uint_t chunk = DIV_ROUND_UP(s, n); - uint_t waste = zil_max_waste_space(zilog); - waste = MAX(waste, zilog->zl_cur_max); if (chunk <= md - waste) { *minsize = MAX(s - (md - waste) * (n - 1), waste); return (chunk); } else { - *minsize = 0; + *minsize = waste; return (md); } } /* * Try to predict next block size based on previous history. Make prediction - * sufficient for 7 of 8 previous bursts. Don't try to save if the saving is - * less then 50%, extra writes may cost more, but we don't want single spike - * to badly affect our predictions. + * sufficient for 7 of 8 previous bursts, but don't try to save if the saving + * is less then 50%. Extra writes may cost more, but we don't want single + * spike to badly affect our predictions. */ -static uint_t -zil_lwb_predict(zilog_t *zilog) +static void +zil_lwb_predict(zilog_t *zilog, uint64_t *min_predict, uint64_t *max_predict) { - uint_t m, o; + uint_t m1 = 0, m2 = 0, o; - /* If we are in the middle of a burst, take it into account also. */ - if (zilog->zl_cur_size > 0) { - o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m); - } else { + /* If we are in the middle of a burst, take it as another data point. */ + if (zilog->zl_cur_size > 0) + o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m1); + else o = UINT_MAX; - m = 0; - } - /* Find minimum optimal size. We don't need to go below that. */ - for (int i = 0; i < ZIL_BURSTS; i++) - o = MIN(o, zilog->zl_prev_opt[i]); - - /* Find two biggest minimal first block sizes above the optimal. */ - uint_t m1 = MAX(m, o), m2 = o; + /* Find two largest minimal first block sizes. */ for (int i = 0; i < ZIL_BURSTS; i++) { - m = zilog->zl_prev_min[i]; - if (m >= m1) { + uint_t cur = zilog->zl_prev_min[i]; + if (cur >= m1) { m2 = m1; - m1 = m; - } else if (m > m2) { - m2 = m; + m1 = cur; + } else if (cur > m2) { + m2 = cur; } } - /* - * If second minimum size gives 50% saving -- use it. It may cost us - * one additional write later, but the space saving is just too big. - */ - return ((m1 < m2 * 2) ? m1 : m2); + /* Minimum should guarantee progress in most cases. */ + *min_predict = (m1 < m2 * 2) ? m1 : m2; + + /* Maximum doesn't need to go below the minimum optimal size. */ + for (int i = 0; i < ZIL_BURSTS; i++) + o = MIN(o, zilog->zl_prev_opt[i]); + m1 = MAX(m1, o); + m2 = MAX(m2, o); + *max_predict = (m1 < m2 * 2) ? m1 : m2; } /* @@ -1839,12 +1858,13 @@ zil_lwb_predict(zilog_t *zilog) * Has to be called under zl_issuer_lock to chain more lwbs. */ static lwb_t * -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state) +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) { - uint64_t blksz, plan, plan2; + uint64_t minbs, maxbs; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); + membar_producer(); lwb->lwb_state = LWB_STATE_CLOSED; /* @@ -1869,33 +1889,40 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state) * Try to predict what can it be and plan for the worst case. */ uint_t m; - plan = zil_lwb_plan(zilog, zilog->zl_cur_left, &m); + maxbs = zil_lwb_plan(zilog, zilog->zl_cur_left, &m); + minbs = m; if (zilog->zl_parallel) { - plan2 = zil_lwb_plan(zilog, zilog->zl_cur_left + - zil_lwb_predict(zilog), &m); - if (plan < plan2) - plan = plan2; + uint64_t minp, maxp; + zil_lwb_predict(zilog, &minp, &maxp); + maxp = zil_lwb_plan(zilog, zilog->zl_cur_left + maxp, + &m); + if (maxbs < maxp) + maxbs = maxp; } } else { /* * The previous burst is done and we can only predict what * will come next. */ - plan = zil_lwb_predict(zilog); + zil_lwb_predict(zilog, &minbs, &maxbs); } - blksz = plan + sizeof (zil_chain_t); - blksz = P2ROUNDUP_TYPED(blksz, ZIL_MIN_BLKSZ, uint64_t); - blksz = MIN(blksz, zilog->zl_max_block_size); - DTRACE_PROBE3(zil__block__size, zilog_t *, zilog, uint64_t, blksz, - uint64_t, plan); - return (zil_alloc_lwb(zilog, blksz, NULL, 0, 0, state)); + minbs += sizeof (zil_chain_t); + maxbs += sizeof (zil_chain_t); + minbs = P2ROUNDUP_TYPED(minbs, ZIL_MIN_BLKSZ, uint64_t); + maxbs = P2ROUNDUP_TYPED(maxbs, ZIL_MIN_BLKSZ, uint64_t); + maxbs = MIN(maxbs, zilog->zl_max_block_size); + minbs = MIN(minbs, maxbs); + DTRACE_PROBE3(zil__block__size, zilog_t *, zilog, uint64_t, minbs, + uint64_t, maxbs); + + return (zil_alloc_lwb(zilog, NULL, minbs, maxbs, 0, 0)); } /* * Finalize previously closed block and issue the write zio. */ -static void +static int zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) { spa_t *spa = zilog->zl_spa; @@ -1909,8 +1936,13 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) /* Actually fill the lwb with the data. */ for (itx_t *itx = list_head(&lwb->lwb_itxs); itx; - itx = list_next(&lwb->lwb_itxs, itx)) - zil_lwb_commit(zilog, lwb, itx); + itx = list_next(&lwb->lwb_itxs, itx)) { + error = zil_lwb_commit(zilog, lwb, itx); + if (error != 0) { + ASSERT3U(error, ==, ESHUTDOWN); + return (error); + } + } lwb->lwb_nused = lwb->lwb_nfilled; ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax); @@ -1928,19 +1960,21 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) lwb->lwb_state = LWB_STATE_READY; if (BP_IS_HOLE(&lwb->lwb_blk) && lwb->lwb_error == 0) { mutex_exit(&zilog->zl_lock); - return; + return (0); } mutex_exit(&zilog->zl_lock); next_lwb: - if (lwb->lwb_slim) + if (lwb->lwb_flags & LWB_FLAG_SLIM) zilc = (zil_chain_t *)lwb->lwb_buf; else zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_nmax); - int wsz = lwb->lwb_sz; + uint64_t alloc_size = BP_GET_LSIZE(&lwb->lwb_blk); + int wsz = alloc_size; if (lwb->lwb_error == 0) { abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, lwb->lwb_sz); - if (!lwb->lwb_slog || zilog->zl_cur_size <= zil_slog_bulk) + if (!(lwb->lwb_flags & LWB_FLAG_SLOG) || + zilog->zl_cur_size <= zil_slog_bulk) prio = ZIO_PRIORITY_SYNC_WRITE; else prio = ZIO_PRIORITY_ASYNC_WRITE; @@ -1948,16 +1982,17 @@ next_lwb: ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0, - &lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done, + &lwb->lwb_blk, lwb_abd, alloc_size, zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb); zil_lwb_add_block(lwb, &lwb->lwb_blk); - if (lwb->lwb_slim) { + if (lwb->lwb_flags & LWB_FLAG_SLIM) { /* For Slim ZIL only write what is used. */ wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int); - ASSERT3S(wsz, <=, lwb->lwb_sz); - zio_shrink(lwb->lwb_write_zio, wsz); + ASSERT3S(wsz, <=, alloc_size); + if (wsz < alloc_size) + zio_shrink(lwb->lwb_write_zio, wsz); wsz = lwb->lwb_write_zio->io_size; } memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused); @@ -1993,13 +2028,53 @@ next_lwb: BP_ZERO(bp); error = lwb->lwb_error; if (error == 0) { - error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, nlwb->lwb_sz, - &slog); + /* + * Allocation flexibility depends on LWB state: + * if NEW: allow range allocation and larger sizes; + * if OPENED: use fixed predetermined allocation size; + * if CLOSED + Slim: allocate precisely for actual usage. + */ + boolean_t flexible = (nlwb->lwb_state == LWB_STATE_NEW); + if (flexible) { + /* We need to prevent opening till we update lwb_sz. */ + mutex_enter(&nlwb->lwb_lock); + flexible = (nlwb->lwb_state == LWB_STATE_NEW); + if (!flexible) + mutex_exit(&nlwb->lwb_lock); /* We lost. */ + } + boolean_t closed_slim = (nlwb->lwb_state == LWB_STATE_CLOSED && + (lwb->lwb_flags & LWB_FLAG_SLIM)); + + uint64_t min_size, max_size; + if (closed_slim) { + /* This transition is racy, but only one way. */ + membar_consumer(); + min_size = max_size = P2ROUNDUP_TYPED(nlwb->lwb_nused, + ZIL_MIN_BLKSZ, uint64_t); + } else if (flexible) { + min_size = nlwb->lwb_min_sz; + max_size = nlwb->lwb_sz; + } else { + min_size = max_size = nlwb->lwb_sz; + } + + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, + min_size, max_size, &slog, flexible); + if (error == 0) { + if (closed_slim) + ASSERT3U(BP_GET_LSIZE(bp), ==, max_size); + else if (flexible) + nlwb->lwb_sz = BP_GET_LSIZE(bp); + else + ASSERT3U(BP_GET_LSIZE(bp), ==, nlwb->lwb_sz); + } + if (flexible) + mutex_exit(&nlwb->lwb_lock); } if (error == 0) { ASSERT3U(BP_GET_BIRTH(bp), ==, txg); - BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : - ZIO_CHECKSUM_ZILOG); + BP_SET_CHECKSUM(bp, (nlwb->lwb_flags & LWB_FLAG_SLIM) ? + ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; } @@ -2028,14 +2103,15 @@ next_lwb: if (nlwb) { nlwb->lwb_blk = *bp; nlwb->lwb_error = error; - nlwb->lwb_slog = slog; + if (slog) + nlwb->lwb_flags |= LWB_FLAG_SLOG; nlwb->lwb_alloc_txg = txg; if (nlwb->lwb_state != LWB_STATE_READY) nlwb = NULL; } mutex_exit(&zilog->zl_lock); - if (lwb->lwb_slog) { + if (lwb->lwb_flags & LWB_FLAG_SLOG) { ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count); ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes, lwb->lwb_nused); @@ -2065,6 +2141,8 @@ next_lwb: lwb = nlwb; if (lwb) goto next_lwb; + + return (0); } /* @@ -2207,7 +2285,6 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs) ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3P(lwb, !=, NULL); - ASSERT3P(lwb->lwb_buf, !=, NULL); zil_lwb_write_open(zilog, lwb); @@ -2249,9 +2326,10 @@ cont: (dlen % max_log_data == 0 || lwb_sp < reclen + dlen % max_log_data))) { list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_OPENED); + lwb = zil_lwb_write_close(zilog, lwb); if (lwb == NULL) return (NULL); + zil_lwb_write_open(zilog, lwb); lwb_sp = lwb->lwb_nmax - lwb->lwb_nused; } @@ -2308,11 +2386,13 @@ cont: return (lwb); } +static void zil_crash(zilog_t *zilog); + /* * Fill the actual transaction data into the lwb, following zil_lwb_assign(). * Does not require locking. */ -static void +static int zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx) { lr_t *lr, *lrb; @@ -2324,7 +2404,7 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx) lrw = (lr_write_t *)lr; if (lr->lrc_txtype == TX_COMMIT) - return; + return (0); reclen = lr->lrc_reclen; dlen = zil_itx_data_size(itx); @@ -2410,16 +2490,35 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx) ". Falling back to txg_wait_synced().", error); zfs_fallthrough; - case EIO: - txg_wait_synced(zilog->zl_dmu_pool, - lr->lrc_txg); + case EIO: { + int error = txg_wait_synced_flags( + zilog->zl_dmu_pool, + lr->lrc_txg, TXG_WAIT_SUSPEND); + if (error != 0) { + ASSERT3U(error, ==, ESHUTDOWN); + /* + * zil_lwb_commit() is called from a + * loop over a list of itxs at the + * top of zil_lwb_write_issue(), which + * itself is called from a loop over a + * list of lwbs in various places. + * zil_crash() will free those itxs + * and sometimes the lwbs, so they + * are invalid when zil_crash() returns. + * Callers must pretty much abort + * immediately. + */ + zil_crash(zilog); + return (error); + } zfs_fallthrough; + } case ENOENT: zfs_fallthrough; case EEXIST: zfs_fallthrough; case EALREADY: - return; + return (0); } } } @@ -2427,6 +2526,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx) lwb->lwb_nfilled += reclen + dlen; ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused); ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t))); + + return (0); } itx_t * @@ -2468,7 +2569,7 @@ zil_itx_clone(itx_t *oitx) } void -zil_itx_destroy(itx_t *itx) +zil_itx_destroy(itx_t *itx, int err) { ASSERT3U(itx->itx_size, >=, sizeof (itx_t)); ASSERT3U(itx->itx_lr.lrc_reclen, ==, @@ -2477,7 +2578,7 @@ zil_itx_destroy(itx_t *itx) IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT); if (itx->itx_callback != NULL) - itx->itx_callback(itx->itx_callback_data); + itx->itx_callback(itx->itx_callback_data, err); zio_data_buf_free(itx, itx->itx_size); } @@ -2518,9 +2619,9 @@ zil_itxg_clean(void *arg) * called) we will hit this case. */ if (itx->itx_lr.lrc_txtype == TX_COMMIT) - zil_commit_waiter_skip(itx->itx_private); + zil_commit_waiter_done(itx->itx_private, 0); - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); } cookie = NULL; @@ -2530,7 +2631,7 @@ zil_itxg_clean(void *arg) while ((itx = list_remove_head(list)) != NULL) { /* commit itxs should never be on the async lists. */ ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT); - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); } list_destroy(list); kmem_free(ian, sizeof (itx_async_node_t)); @@ -2592,7 +2693,7 @@ zil_remove_async(zilog_t *zilog, uint64_t oid) while ((itx = list_remove_head(&clean_list)) != NULL) { /* commit itxs should never be on the async lists. */ ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT); - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); } list_destroy(&clean_list); } @@ -2677,6 +2778,68 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) } /* + * Post-crash cleanup. This is called from zil_clean() because it needs to + * do cleanup after every txg until the ZIL is restarted, and zilog_dirty() + * can arrange that easily, unlike zil_sync() which is more complicated to + * get a call to without actual dirty data. + */ +static void +zil_crash_clean(zilog_t *zilog, uint64_t synced_txg) +{ + ASSERT(MUTEX_HELD(&zilog->zl_lock)); + ASSERT3U(zilog->zl_restart_txg, >, 0); + + /* Clean up anything on the crash list from earlier txgs */ + lwb_t *lwb; + while ((lwb = list_head(&zilog->zl_lwb_crash_list)) != NULL) { + if (lwb->lwb_alloc_txg >= synced_txg || + lwb->lwb_max_txg >= synced_txg) { + /* + * This lwb was allocated or updated on this txg, or + * in the future. We stop processing here, to avoid + * the strange situation of freeing a ZIL block on + * on the same or earlier txg than what it was + * allocated for. + * + * We'll take care of it on the next txg. + */ + break; + } + + /* This LWB is from the past, so we can clean it up now. */ + ASSERT(lwb->lwb_flags & LWB_FLAG_CRASHED); + list_remove(&zilog->zl_lwb_crash_list, lwb); + if (lwb->lwb_buf != NULL) + zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + if (!BP_IS_HOLE(&lwb->lwb_blk)) + /* + * Free on the next txg, since zil_clean() is called + * once synced_txg has already been completed. + */ + zio_free(zilog->zl_spa, synced_txg+1, &lwb->lwb_blk); + zil_free_lwb(zilog, lwb); + } + + if (zilog->zl_restart_txg > synced_txg) { + /* + * Not reached the restart txg yet, so mark the ZIL dirty for + * the next txg and we'll consider it all again then. + */ + zilog_dirty(zilog, synced_txg+1); + return; + } + + /* + * Reached the restart txg, so we can allow new calls to zil_commit(). + * All ZIL txgs have long past so there should be no IO waiting. + */ + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + ASSERT(list_is_empty(&zilog->zl_lwb_crash_list)); + + zilog->zl_restart_txg = 0; +} + +/* * If there are any in-memory intent log transactions which have now been * synced then start up a taskq to free them. We should only do this after we * have written out the uberblocks (i.e. txg has been committed) so that @@ -2691,6 +2854,15 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg) ASSERT3U(synced_txg, <, ZILTEST_TXG); + /* Do cleanup and restart after crash. */ + if (zilog->zl_restart_txg > 0) { + mutex_enter(&zilog->zl_lock); + /* Make sure we didn't lose a race. */ + if (zilog->zl_restart_txg > 0) + zil_crash_clean(zilog, synced_txg); + mutex_exit(&zilog->zl_lock); + } + mutex_enter(&itxg->itxg_lock); if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) { mutex_exit(&itxg->itxg_lock); @@ -2875,7 +3047,7 @@ zil_prune_commit_list(zilog_t *zilog) * never any itx's for it to wait on), so it's * safe to skip this waiter and mark it done. */ - zil_commit_waiter_skip(itx->itx_private); + zil_commit_waiter_done(itx->itx_private, 0); } else { zil_commit_waiter_link_lwb(itx->itx_private, last_lwb); } @@ -2883,13 +3055,13 @@ zil_prune_commit_list(zilog_t *zilog) mutex_exit(&zilog->zl_lock); list_remove(&zilog->zl_itx_commit_list, itx); - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); } IMPLY(itx != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT); } -static void +static int zil_commit_writer_stall(zilog_t *zilog) { /* @@ -2914,8 +3086,22 @@ zil_commit_writer_stall(zilog_t *zilog) */ ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ZIL_STAT_BUMP(zilog, zil_commit_stall_count); - txg_wait_synced(zilog->zl_dmu_pool, 0); + + int err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0, + TXG_WAIT_SUSPEND); + if (err != 0) { + ASSERT3U(err, ==, ESHUTDOWN); + zil_crash(zilog); + } + + /* + * Either zil_sync() has been called to wait for and clean up any + * in-flight LWBs, or zil_crash() has emptied out the list and arranged + * for them to be cleaned up later. + */ ASSERT(list_is_empty(&zilog->zl_lwb_list)); + + return (err); } static void @@ -3082,7 +3268,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) } else { ASSERT3S(lrc->lrc_txtype, !=, TX_COMMIT); zilog->zl_cur_left -= zil_itx_full_size(itx); - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); } } @@ -3092,10 +3278,21 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) * "next" lwb on-disk. When this happens, we must stall * the ZIL write pipeline; see the comment within * zil_commit_writer_stall() for more details. + * + * ESHUTDOWN has to be handled carefully here. If we get it, + * then the pool suspended and zil_crash() was called, so we + * need to stop trying and just get an error back to the + * callers. */ - while ((lwb = list_remove_head(ilwbs)) != NULL) - zil_lwb_write_issue(zilog, lwb); - zil_commit_writer_stall(zilog); + int err = 0; + while ((lwb = list_remove_head(ilwbs)) != NULL) { + if (err == 0) + err = zil_lwb_write_issue(zilog, lwb); + } + if (err != ESHUTDOWN) + err = zil_commit_writer_stall(zilog); + if (err == ESHUTDOWN) + err = SET_ERROR(EIO); /* * Additionally, we have to signal and mark the "nolwb" @@ -3105,7 +3302,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) */ zil_commit_waiter_t *zcw; while ((zcw = list_remove_head(&nolwb_waiters)) != NULL) - zil_commit_waiter_skip(zcw); + zil_commit_waiter_done(zcw, err); /* * And finally, we have to destroy the itx's that @@ -3113,7 +3310,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) * the itx's callback if one exists for the itx. */ while ((itx = list_remove_head(&nolwb_itxs)) != NULL) - zil_itx_destroy(itx); + zil_itx_destroy(itx, err); } else { ASSERT(list_is_empty(&nolwb_waiters)); ASSERT3P(lwb, !=, NULL); @@ -3167,11 +3364,17 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) (!zilog->zl_parallel || zilog->zl_suspend > 0)) { zil_burst_done(zilog); list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW); + lwb = zil_lwb_write_close(zilog, lwb); if (lwb == NULL) { - while ((lwb = list_remove_head(ilwbs)) != NULL) - zil_lwb_write_issue(zilog, lwb); - zil_commit_writer_stall(zilog); + int err = 0; + while ((lwb = + list_remove_head(ilwbs)) != NULL) { + if (err == 0) + err = zil_lwb_write_issue( + zilog, lwb); + } + if (err != ESHUTDOWN) + (void) zil_commit_writer_stall(zilog); } } } @@ -3230,10 +3433,23 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw) zil_prune_commit_list(zilog); zil_process_commit_list(zilog, zcw, &ilwbs); + /* + * If the ZIL failed somewhere inside zil_process_commit_list(), it's + * will be because a fallback to txg_wait_sync_flags() happened at some + * point (eg zil_commit_writer_stall()). All cases should issue and + * empty ilwbs, so there will be nothing to in the issue loop below. + * That's why we don't have to plumb the error value back from + * zil_process_commit_list(), and don't have to skip it. + */ + IMPLY(zilog->zl_restart_txg > 0, list_is_empty(&ilwbs)); + out: mutex_exit(&zilog->zl_issuer_lock); - while ((lwb = list_remove_head(&ilwbs)) != NULL) - zil_lwb_write_issue(zilog, lwb); + int err = 0; + while ((lwb = list_remove_head(&ilwbs)) != NULL) { + if (err == 0) + err = zil_lwb_write_issue(zilog, lwb); + } list_destroy(&ilwbs); return (wtxg); } @@ -3326,7 +3542,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw) * hasn't been issued. */ zil_burst_done(zilog); - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW); + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED); @@ -3402,7 +3618,7 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw) * commit itxs. When this occurs, the commit waiters linked * off of these commit itxs will not be committed to an * lwb. Additionally, these commit waiters will not be - * marked done until zil_commit_waiter_skip() is called via + * marked done until zil_commit_waiter_done() is called via * zil_itxg_clean(). * * Thus, it's possible for this commit waiter (i.e. the @@ -3480,7 +3696,7 @@ zil_alloc_commit_waiter(void) list_link_init(&zcw->zcw_node); zcw->zcw_lwb = NULL; zcw->zcw_done = B_FALSE; - zcw->zcw_zio_error = 0; + zcw->zcw_error = 0; return (zcw); } @@ -3489,7 +3705,7 @@ static void zil_free_commit_waiter(zil_commit_waiter_t *zcw) { ASSERT(!list_link_active(&zcw->zcw_node)); - ASSERT3P(zcw->zcw_lwb, ==, NULL); + ASSERT0P(zcw->zcw_lwb); ASSERT3B(zcw->zcw_done, ==, B_TRUE); mutex_destroy(&zcw->zcw_lock); cv_destroy(&zcw->zcw_cv); @@ -3526,6 +3742,99 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw) } /* + * Crash the ZIL. This is something like suspending, but abandons the ZIL + * without further IO until the wanted txg completes. No effort is made to + * close the on-disk chain or do any other on-disk work, as the pool may + * have suspended. zil_sync() will handle cleanup as normal and restart the + * ZIL once enough txgs have passed. + */ +static void +zil_crash(zilog_t *zilog) +{ + mutex_enter(&zilog->zl_lock); + + uint64_t txg = spa_syncing_txg(zilog->zl_spa); + uint64_t restart_txg = + spa_syncing_txg(zilog->zl_spa) + TXG_CONCURRENT_STATES; + + if (zilog->zl_restart_txg > 0) { + /* + * If the ZIL is already crashed, it's almost certainly because + * we lost a race involving multiple callers from + * zil_commit_impl(). + */ + + /* + * This sanity check is to support my understanding that in the + * event of multiple callers to zil_crash(), only one of them + * can possibly be in the codepath to issue lwbs; the rest + * should be calling from zil_commit_impl() after their waiters + * have completed. As I understand it, a second thread trying + * to issue will eventually wait on zl_issuer_lock, and then + * have no work to do and leave. + * + * If more lwbs had been created an issued between zil_crash() + * calls, then we probably just need to take those too, add + * them to the crash list and clean them up, but it complicates + * this function and I don't think it can happend. + */ + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + + mutex_exit(&zilog->zl_lock); + return; + } + + zilog->zl_restart_txg = restart_txg; + + /* + * Capture any live LWBs. Depending on the state of the pool they may + * represent in-flight IO that won't return for some time, and we want + * to make sure they don't get in the way of normal ZIL operation. + */ + ASSERT(list_is_empty(&zilog->zl_lwb_crash_list)); + list_move_tail(&zilog->zl_lwb_crash_list, &zilog->zl_lwb_list); + + /* + * Run through the LWB list; erroring all itxes and signalling error + * to all waiters. + */ + for (lwb_t *lwb = list_head(&zilog->zl_lwb_crash_list); lwb != NULL; + lwb = list_next(&zilog->zl_lwb_crash_list, lwb)) { + ASSERT(!(lwb->lwb_flags & LWB_FLAG_CRASHED)); + lwb->lwb_flags |= LWB_FLAG_CRASHED; + + itx_t *itx; + while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) + zil_itx_destroy(itx, EIO); + + zil_commit_waiter_t *zcw; + while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { + mutex_enter(&zcw->zcw_lock); + zcw->zcw_lwb = NULL; + zcw->zcw_error = EIO; + zcw->zcw_done = B_TRUE; + cv_broadcast(&zcw->zcw_cv); + mutex_exit(&zcw->zcw_lock); + } + } + + /* + * Zero the ZIL header bp after the ZIL restarts. We'll free it in + * zil_clean() when we clean up the lwbs. + */ + zil_header_t *zh = zil_header_in_syncing_context(zilog); + BP_ZERO(&zh->zh_log); + + /* + * Mark this ZIL dirty on the next txg, so that zil_clean() will be + * called for cleanup. + */ + zilog_dirty(zilog, txg+1); + + mutex_exit(&zilog->zl_lock); +} + +/* * Commit ZFS Intent Log transactions (itxs) to stable storage. * * When writing ZIL transactions to the on-disk representation of the @@ -3640,9 +3949,17 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw) * but the order in which they complete will be the same order in * which they were created. */ -void +static int zil_commit_impl(zilog_t *zilog, uint64_t foid); + +int zil_commit(zilog_t *zilog, uint64_t foid) { + return (zil_commit_flags(zilog, foid, ZIL_COMMIT_FAILMODE)); +} + +int +zil_commit_flags(zilog_t *zilog, uint64_t foid, zil_commit_flag_t flags) +{ /* * We should never attempt to call zil_commit on a snapshot for * a couple of reasons: @@ -3659,7 +3976,7 @@ zil_commit(zilog_t *zilog, uint64_t foid) ASSERT3B(dmu_objset_is_snapshot(zilog->zl_os), ==, B_FALSE); if (zilog->zl_sync == ZFS_SYNC_DISABLED) - return; + return (0); if (!spa_writeable(zilog->zl_spa)) { /* @@ -3670,10 +3987,23 @@ zil_commit(zilog_t *zilog, uint64_t foid) * verifying that truth before we return to the caller. */ ASSERT(list_is_empty(&zilog->zl_lwb_list)); - ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL); + ASSERT0P(zilog->zl_last_lwb_opened); for (int i = 0; i < TXG_SIZE; i++) - ASSERT3P(zilog->zl_itxg[i].itxg_itxs, ==, NULL); - return; + ASSERT0P(zilog->zl_itxg[i].itxg_itxs); + return (0); + } + + int err = 0; + + /* + * If the ZIL crashed, bypass it entirely, and rely on txg_wait_sync() + * to get the data out to disk. + */ + if (zilog->zl_restart_txg > 0) { + ZIL_STAT_BUMP(zilog, zil_commit_crash_count); + err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0, + TXG_WAIT_SUSPEND); + goto out; } /* @@ -3685,14 +4015,43 @@ zil_commit(zilog_t *zilog, uint64_t foid) */ if (zilog->zl_suspend > 0) { ZIL_STAT_BUMP(zilog, zil_commit_suspend_count); - txg_wait_synced(zilog->zl_dmu_pool, 0); - return; + err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0, + TXG_WAIT_SUSPEND); + if (err != 0) { + ASSERT3U(err, ==, ESHUTDOWN); + zil_crash(zilog); + } + goto out; } - zil_commit_impl(zilog, foid); + err = zil_commit_impl(zilog, foid); + +out: + if (err == 0) + return (0); + + /* + * The ZIL write failed and the pool is suspended. There's nothing else + * we can do except return or block. + */ + ASSERT3U(err, ==, ESHUTDOWN); + + /* + * Return error if failmode=continue or caller will handle directly. + */ + if (!(flags & ZIL_COMMIT_FAILMODE) || + spa_get_failmode(zilog->zl_spa) == ZIO_FAILURE_MODE_CONTINUE) + return (SET_ERROR(EIO)); + + /* + * Block until the pool returns. We assume that the data will make + * it out to disk in the end, and so return success. + */ + txg_wait_synced(zilog->zl_dmu_pool, 0); + return (0); } -void +static int zil_commit_impl(zilog_t *zilog, uint64_t foid) { ZIL_STAT_BUMP(zilog, zil_commit_count); @@ -3729,7 +4088,8 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) uint64_t wtxg = zil_commit_writer(zilog, zcw); zil_commit_waiter(zilog, zcw); - if (zcw->zcw_zio_error != 0) { + int err = 0; + if (zcw->zcw_error != 0) { /* * If there was an error writing out the ZIL blocks that * this thread is waiting on, then we fallback to @@ -3741,13 +4101,29 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) ZIL_STAT_BUMP(zilog, zil_commit_error_count); DTRACE_PROBE2(zil__commit__io__error, zilog_t *, zilog, zil_commit_waiter_t *, zcw); - txg_wait_synced(zilog->zl_dmu_pool, 0); + err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0, + TXG_WAIT_SUSPEND); } else if (wtxg != 0) { ZIL_STAT_BUMP(zilog, zil_commit_suspend_count); - txg_wait_synced(zilog->zl_dmu_pool, wtxg); + err = txg_wait_synced_flags(zilog->zl_dmu_pool, wtxg, + TXG_WAIT_SUSPEND); } zil_free_commit_waiter(zcw); + + if (err == 0) + return (0); + + /* + * ZIL write failed and pool failed in the fallback to + * txg_wait_synced_flags(). Right now we have no idea if the data is on + * disk and the pool is probably suspended so we have no idea when it's + * coming back. All we can do is shut down and return error to the + * caller. + */ + ASSERT3U(err, ==, ESHUTDOWN); + zil_crash(zilog); + return (err); } /* @@ -3773,7 +4149,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) mutex_enter(&zilog->zl_lock); - ASSERT(zilog->zl_stop_sync == 0); + ASSERT0(zilog->zl_stop_sync); if (*replayed_seq != 0) { ASSERT(zh->zh_replay_seq < *replayed_seq); @@ -3848,7 +4224,7 @@ zil_lwb_cons(void *vbuf, void *unused, int kmflag) offsetof(zil_commit_waiter_t, zcw_node)); avl_create(&lwb->lwb_vdev_tree, zil_lwb_vdev_compare, sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node)); - mutex_init(&lwb->lwb_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&lwb->lwb_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } @@ -3857,7 +4233,7 @@ zil_lwb_dest(void *vbuf, void *unused) { (void) unused; lwb_t *lwb = vbuf; - mutex_destroy(&lwb->lwb_vdev_lock); + mutex_destroy(&lwb->lwb_lock); avl_destroy(&lwb->lwb_vdev_tree); list_destroy(&lwb->lwb_waiters); list_destroy(&lwb->lwb_itxs); @@ -3943,6 +4319,8 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) list_create(&zilog->zl_lwb_list, sizeof (lwb_t), offsetof(lwb_t, lwb_node)); + list_create(&zilog->zl_lwb_crash_list, sizeof (lwb_t), + offsetof(lwb_t, lwb_node)); list_create(&zilog->zl_itx_commit_list, sizeof (itx_t), offsetof(itx_t, itx_node)); @@ -3967,9 +4345,12 @@ zil_free(zilog_t *zilog) ASSERT0(zilog->zl_suspend); ASSERT0(zilog->zl_suspending); + ASSERT0(zilog->zl_restart_txg); ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); + ASSERT(list_is_empty(&zilog->zl_lwb_crash_list)); + list_destroy(&zilog->zl_lwb_crash_list); ASSERT(list_is_empty(&zilog->zl_itx_commit_list)); list_destroy(&zilog->zl_itx_commit_list); @@ -4005,8 +4386,8 @@ zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums) { zilog_t *zilog = dmu_objset_zil(os); - ASSERT3P(zilog->zl_get_data, ==, NULL); - ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL); + ASSERT0P(zilog->zl_get_data); + ASSERT0P(zilog->zl_last_lwb_opened); ASSERT(list_is_empty(&zilog->zl_lwb_list)); zilog->zl_get_data = get_data; @@ -4025,7 +4406,8 @@ zil_close(zilog_t *zilog) uint64_t txg; if (!dmu_objset_is_snapshot(zilog->zl_os)) { - zil_commit(zilog, 0); + if (zil_commit_flags(zilog, 0, ZIL_COMMIT_NOW) != 0) + txg_wait_synced(zilog->zl_dmu_pool, 0); } else { ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT0(zilog->zl_dirty_max_txg); @@ -4074,7 +4456,7 @@ zil_close(zilog_t *zilog) if (lwb != NULL) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_NEW); - zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + ASSERT0P(lwb->lwb_buf); zil_free_lwb(zilog, lwb); } mutex_exit(&zilog->zl_lock); @@ -4126,6 +4508,17 @@ zil_suspend(const char *osname, void **cookiep) return (SET_ERROR(EBUSY)); } + if (zilog->zl_restart_txg > 0) { + /* + * ZIL crashed. It effectively _is_ suspended, but callers + * are usually trying to make sure it's empty on-disk, which + * we can't guarantee right now. + */ + mutex_exit(&zilog->zl_lock); + dmu_objset_rele(os, suspend_tag); + return (SET_ERROR(EBUSY)); + } + /* * Don't put a long hold in the cases where we can avoid it. This * is when there is no cookie so we are doing a suspend & resume @@ -4154,11 +4547,16 @@ zil_suspend(const char *osname, void **cookiep) cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); mutex_exit(&zilog->zl_lock); - if (cookiep == NULL) + if (zilog->zl_restart_txg > 0) { + /* ZIL crashed while we were waiting. */ + zil_resume(os); + error = SET_ERROR(EBUSY); + } else if (cookiep == NULL) zil_resume(os); else *cookiep = os; - return (0); + + return (error); } /* @@ -4199,17 +4597,34 @@ zil_suspend(const char *osname, void **cookiep) * would just call txg_wait_synced(), because zl_suspend is set. * txg_wait_synced() doesn't wait for these lwb's to be * LWB_STATE_FLUSH_DONE before returning. + * + * However, zil_commit_impl() itself can return an error if any of the + * lwbs fail, or the pool suspends in the fallback + * txg_wait_sync_flushed(), which affects what we do next, so we + * capture that error. */ - zil_commit_impl(zilog, 0); + error = zil_commit_impl(zilog, 0); + if (error == ESHUTDOWN) + /* zil_commit_impl() has called zil_crash() already */ + error = SET_ERROR(EBUSY); /* * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we * use txg_wait_synced() to ensure the data from the zilog has * migrated to the main pool before calling zil_destroy(). */ - txg_wait_synced(zilog->zl_dmu_pool, 0); + if (error == 0) { + error = txg_wait_synced_flags(zilog->zl_dmu_pool, 0, + TXG_WAIT_SUSPEND); + if (error != 0) { + ASSERT3U(error, ==, ESHUTDOWN); + zil_crash(zilog); + error = SET_ERROR(EBUSY); + } + } - zil_destroy(zilog, B_FALSE); + if (error == 0) + zil_destroy(zilog, B_FALSE); mutex_enter(&zilog->zl_lock); zilog->zl_suspending = B_FALSE; @@ -4223,7 +4638,8 @@ zil_suspend(const char *osname, void **cookiep) zil_resume(os); else *cookiep = os; - return (0); + + return (error); } void @@ -4386,7 +4802,7 @@ zil_replay(objset_t *os, void *arg, zilog->zl_replay = B_TRUE; zilog->zl_replay_time = ddi_get_lbolt(); - ASSERT(zilog->zl_replay_blks == 0); + ASSERT0(zilog->zl_replay_blks); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg, B_TRUE); vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 218aec6093e2..4cf8912d4269 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -339,8 +339,8 @@ zio_fini(void) } for (size_t i = 0; i < n; i++) { - VERIFY3P(zio_buf_cache[i], ==, NULL); - VERIFY3P(zio_data_buf_cache[i], ==, NULL); + VERIFY0P(zio_buf_cache[i]); + VERIFY0P(zio_data_buf_cache[i]); } if (zio_ksp != NULL) { @@ -771,7 +771,7 @@ zio_add_child_impl(zio_t *pio, zio_t *cio, boolean_t first) else mutex_enter(&cio->io_lock); - ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0); + ASSERT0(pio->io_state[ZIO_WAIT_DONE]); uint64_t *countp = pio->io_children[cio->io_child_type]; for (int w = 0; w < ZIO_WAIT_TYPES; w++) @@ -821,7 +821,7 @@ zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait) boolean_t waiting = B_FALSE; mutex_enter(&zio->io_lock); - ASSERT(zio->io_stall == NULL); + ASSERT0P(zio->io_stall); for (int c = 0; c < ZIO_CHILD_TYPES; c++) { if (!(ZIO_CHILD_BIT_IS_SET(childbits, c))) continue; @@ -955,8 +955,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_t *zio; IMPLY(type != ZIO_TYPE_TRIM, psize <= SPA_MAXBLOCKSIZE); - ASSERT(P2PHASE(psize, SPA_MINBLOCKSIZE) == 0); - ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0); + ASSERT0(P2PHASE(psize, SPA_MINBLOCKSIZE)); + ASSERT0(P2PHASE(offset, SPA_MINBLOCKSIZE)); ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER)); ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); @@ -1451,7 +1451,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) metaslab_check_free(spa, bp); bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); } else { - VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL); + VERIFY0P(zio_free_sync(NULL, spa, txg, bp, 0)); } } @@ -1559,7 +1559,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, { zio_t *zio; - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE || offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE); ASSERT3U(offset + size, <=, vd->vdev_psize); @@ -1580,7 +1580,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, { zio_t *zio; - ASSERT(vd->vdev_children == 0); + ASSERT0(vd->vdev_children); ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE || offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE); ASSERT3U(offset + size, <=, vd->vdev_psize); @@ -1747,7 +1747,7 @@ zio_flush(zio_t *pio, vdev_t *vd) void zio_shrink(zio_t *zio, uint64_t size) { - ASSERT3P(zio->io_executor, ==, NULL); + ASSERT0P(zio->io_executor); ASSERT3U(zio->io_orig_size, ==, zio->io_size); ASSERT3U(size, <=, zio->io_size); @@ -1941,7 +1941,7 @@ zio_write_compress(zio_t *zio) } ASSERT(zio->io_child_type != ZIO_CHILD_DDT); - ASSERT(zio->io_bp_override == NULL); + ASSERT0P(zio->io_bp_override); if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg) { /* @@ -2436,7 +2436,7 @@ __zio_execute(zio_t *zio) ASSERT(!MUTEX_HELD(&zio->io_lock)); ASSERT(ISP2(stage)); - ASSERT(zio->io_stall == NULL); + ASSERT0P(zio->io_stall); do { stage <<= 1; @@ -2509,7 +2509,7 @@ zio_wait(zio_t *zio) int error; ASSERT3S(zio->io_stage, ==, ZIO_STAGE_OPEN); - ASSERT3P(zio->io_executor, ==, NULL); + ASSERT0P(zio->io_executor); zio->io_waiter = curthread; ASSERT0(zio->io_queued_timestamp); @@ -2551,7 +2551,7 @@ zio_nowait(zio_t *zio) if (zio == NULL) return; - ASSERT3P(zio->io_executor, ==, NULL); + ASSERT0P(zio->io_executor); if (zio->io_child_type == ZIO_CHILD_LOGICAL && list_is_empty(&zio->io_parent_list)) { @@ -2590,8 +2590,8 @@ zio_reexecute(void *arg) ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN); - ASSERT(pio->io_gang_leader == NULL); - ASSERT(pio->io_gang_tree == NULL); + ASSERT0P(pio->io_gang_leader); + ASSERT0P(pio->io_gang_tree); mutex_enter(&pio->io_lock); pio->io_flags = pio->io_orig_flags; @@ -2689,7 +2689,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); ASSERT(zio != spa->spa_suspend_zio_root); ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); - ASSERT(zio_unique_parent(zio) == NULL); + ASSERT0P(zio_unique_parent(zio)); ASSERT(zio->io_stage == ZIO_STAGE_DONE); zio_add_child(spa->spa_suspend_zio_root, zio); } @@ -2908,7 +2908,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp, uint64_t gangblocksize) { zio_gang_node_t *gn; - ASSERT(*gnpp == NULL); + ASSERT0P(*gnpp); gn = kmem_zalloc(sizeof (*gn) + (gbh_nblkptrs(gangblocksize) * sizeof (gn)), KM_SLEEP); @@ -2925,7 +2925,7 @@ zio_gang_node_free(zio_gang_node_t **gnpp) zio_gang_node_t *gn = *gnpp; for (int g = 0; g < gbh_nblkptrs(gn->gn_allocsize); g++) - ASSERT(gn->gn_child[g] == NULL); + ASSERT0P(gn->gn_child[g]); zio_buf_free(gn->gn_gbh, gn->gn_allocsize); kmem_free(gn, sizeof (*gn) + @@ -3362,11 +3362,11 @@ zio_nop_write(zio_t *zio) zio_prop_t *zp = &zio->io_prop; ASSERT(BP_IS_HOLE(bp)); - ASSERT(BP_GET_LEVEL(bp) == 0); + ASSERT0(BP_GET_LEVEL(bp)); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); ASSERT(zp->zp_nopwrite); ASSERT(!zp->zp_dedup); - ASSERT(zio->io_bp_override == NULL); + ASSERT0P(zio->io_bp_override); ASSERT(IO_IS_ALLOCATING(zio)); /* @@ -3495,7 +3495,7 @@ zio_ddt_read_start(zio_t *zio) ddt_univ_phys_t *ddp = dde->dde_phys; blkptr_t blk; - ASSERT(zio->io_vsd == NULL); + ASSERT0P(zio->io_vsd); zio->io_vsd = dde; if (v_self == DDT_PHYS_NONE) @@ -3560,7 +3560,7 @@ zio_ddt_read_done(zio_t *zio) zio->io_vsd = NULL; } - ASSERT(zio->io_vsd == NULL); + ASSERT0P(zio->io_vsd); return (zio); } @@ -4415,7 +4415,7 @@ static void zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) { ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp)); - ASSERT(zio->io_bp_override == NULL); + ASSERT0P(zio->io_bp_override); if (!BP_IS_HOLE(bp)) { metaslab_free(zio->io_spa, bp, BP_GET_BIRTH(bp), B_TRUE); @@ -4434,12 +4434,15 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) */ int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, - uint64_t size, boolean_t *slog) + uint64_t min_size, uint64_t max_size, boolean_t *slog, + boolean_t allow_larger) { int error; zio_alloc_list_t io_alloc_list; + uint64_t alloc_size = 0; ASSERT(txg > spa_syncing_txg(spa)); + ASSERT3U(min_size, <=, max_size); metaslab_trace_init(&io_alloc_list); @@ -4448,7 +4451,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, * Fill in the obvious ones before calling into metaslab_alloc(). */ BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG); - BP_SET_PSIZE(new_bp, size); + BP_SET_PSIZE(new_bp, max_size); BP_SET_LEVEL(new_bp, 0); /* @@ -4463,43 +4466,51 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, ZIOSTAT_BUMP(ziostat_total_allocations); /* Try log class (dedicated slog devices) first */ - error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, - txg, NULL, flags, &io_alloc_list, allocator, NULL); + error = metaslab_alloc_range(spa, spa_log_class(spa), min_size, + max_size, new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, + NULL, &alloc_size); *slog = (error == 0); /* Try special_embedded_log class (reserved on special vdevs) */ if (error != 0) { - error = metaslab_alloc(spa, spa_special_embedded_log_class(spa), - size, new_bp, 1, txg, NULL, flags, &io_alloc_list, - allocator, NULL); + error = metaslab_alloc_range(spa, + spa_special_embedded_log_class(spa), min_size, max_size, + new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, + NULL, &alloc_size); } /* Try special class (general special vdev allocation) */ if (error != 0) { - error = metaslab_alloc(spa, spa_special_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_special_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } /* Try embedded_log class (reserved on normal vdevs) */ if (error != 0) { - error = metaslab_alloc(spa, spa_embedded_log_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_embedded_log_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } /* Finally fall back to normal class */ if (error != 0) { ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); - error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_normal_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } metaslab_trace_fini(&io_alloc_list); if (error == 0) { - BP_SET_LSIZE(new_bp, size); - BP_SET_PSIZE(new_bp, size); + if (!allow_larger) + alloc_size = MIN(alloc_size, max_size); + else if (max_size <= SPA_OLD_MAXBLOCKSIZE) + alloc_size = MIN(alloc_size, SPA_OLD_MAXBLOCKSIZE); + alloc_size = P2ALIGN_TYPED(alloc_size, ZIL_MIN_BLKSZ, uint64_t); + + BP_SET_LSIZE(new_bp, alloc_size); + BP_SET_PSIZE(new_bp, alloc_size); BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); BP_SET_CHECKSUM(new_bp, spa_version(spa) >= SPA_VERSION_SLIM_ZIL @@ -4527,8 +4538,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, } } else { zfs_dbgmsg("%s: zil block allocation failure: " - "size %llu, error %d", spa_name(spa), (u_longlong_t)size, - error); + "min_size %llu, max_size %llu, error %d", spa_name(spa), + (u_longlong_t)min_size, (u_longlong_t)max_size, error); } return (error); @@ -4559,8 +4570,8 @@ zio_vdev_io_start(zio_t *zio) zio->io_delay = 0; - ASSERT(zio->io_error == 0); - ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0); + ASSERT0(zio->io_error); + ASSERT0(zio->io_child_error[ZIO_CHILD_VDEV]); if (vd == NULL) { if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER)) @@ -4751,7 +4762,7 @@ zio_vdev_io_done(zio_t *zio) ops->vdev_op_io_done(zio); if (unexpected_error && vd->vdev_remove_wanted == B_FALSE) - VERIFY(vdev_probe(vd, zio) == NULL); + VERIFY0P(vdev_probe(vd, zio)); return (zio); } @@ -4903,7 +4914,7 @@ void zio_vdev_io_reissue(zio_t *zio) { ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START); - ASSERT(zio->io_error == 0); + ASSERT0(zio->io_error); zio->io_stage >>= 1; } @@ -4920,7 +4931,7 @@ void zio_vdev_io_bypass(zio_t *zio) { ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START); - ASSERT(zio->io_error == 0); + ASSERT0(zio->io_error); zio->io_flags |= ZIO_FLAG_IO_BYPASS; zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; @@ -5298,7 +5309,7 @@ zio_ready(zio_t *zio) ASSERT(IO_IS_ALLOCATING(zio)); ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp) || (zio->io_flags & ZIO_FLAG_NOPWRITE)); - ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); + ASSERT0(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY]); zio->io_ready(zio); } @@ -5448,7 +5459,7 @@ zio_done(zio_t *zio) for (int c = 0; c < ZIO_CHILD_TYPES; c++) for (int w = 0; w < ZIO_WAIT_TYPES; w++) - ASSERT(zio->io_children[c][w] == 0); + ASSERT0(zio->io_children[c][w]); if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) { ASSERT(memcmp(zio->io_bp, &zio->io_bp_copy, diff --git a/sys/contrib/openzfs/module/zfs/zio_checksum.c b/sys/contrib/openzfs/module/zfs/zio_checksum.c index 63d0c6dadd46..1d0646a61185 100644 --- a/sys/contrib/openzfs/module/zfs/zio_checksum.c +++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c @@ -215,7 +215,7 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { spa_feature_t zio_checksum_to_feature(enum zio_checksum cksum) { - VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0); + VERIFY0((cksum & ~ZIO_CHECKSUM_MASK)); switch (cksum) { case ZIO_CHECKSUM_BLAKE3: diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c index 9f0ac1b63146..89ceeb58ad91 100644 --- a/sys/contrib/openzfs/module/zfs/zio_compress.c +++ b/sys/contrib/openzfs/module/zfs/zio_compress.c @@ -38,12 +38,6 @@ #include <sys/zstd/zstd.h> /* - * If nonzero, every 1/X decompression attempts will fail, simulating - * an undetected memory error. - */ -static unsigned long zio_decompress_fail_fraction = 0; - -/* * Compression vectors. */ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { @@ -171,15 +165,6 @@ zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *dst, else err = ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); - /* - * Decompression shouldn't fail, because we've already verified - * the checksum. However, for extra protection (e.g. against bitflips - * in non-ECC RAM), we handle this error (and test it). - */ - if (zio_decompress_fail_fraction != 0 && - random_in_range(zio_decompress_fail_fraction) == 0) - err = SET_ERROR(EINVAL); - return (err); } diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c index df7b01ba879e..981a1be4847c 100644 --- a/sys/contrib/openzfs/module/zfs/zio_inject.c +++ b/sys/contrib/openzfs/module/zfs/zio_inject.c @@ -1119,7 +1119,7 @@ zio_clear_fault(int id) kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * handler->zi_record.zi_nlanes); } else { - ASSERT3P(handler->zi_lanes, ==, NULL); + ASSERT0P(handler->zi_lanes); } if (handler->zi_spa_name != NULL) diff --git a/sys/contrib/openzfs/module/zfs/zrlock.c b/sys/contrib/openzfs/module/zfs/zrlock.c index 3c0f1b7bbbc1..09c110945c97 100644 --- a/sys/contrib/openzfs/module/zfs/zrlock.c +++ b/sys/contrib/openzfs/module/zfs/zrlock.c @@ -129,7 +129,7 @@ zrl_tryenter(zrlock_t *zrl) (uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED); if (cas == 0) { #ifdef ZFS_DEBUG - ASSERT3P(zrl->zr_owner, ==, NULL); + ASSERT0P(zrl->zr_owner); zrl->zr_owner = curthread; #endif return (1); diff --git a/sys/contrib/openzfs/module/zfs/zthr.c b/sys/contrib/openzfs/module/zfs/zthr.c index 597a510528ea..d245ce4946e0 100644 --- a/sys/contrib/openzfs/module/zfs/zthr.c +++ b/sys/contrib/openzfs/module/zfs/zthr.c @@ -316,7 +316,7 @@ zthr_destroy(zthr_t *t) { ASSERT(!MUTEX_HELD(&t->zthr_state_lock)); ASSERT(!MUTEX_HELD(&t->zthr_request_lock)); - VERIFY3P(t->zthr_thread, ==, NULL); + VERIFY0P(t->zthr_thread); mutex_destroy(&t->zthr_request_lock); mutex_destroy(&t->zthr_state_lock); cv_destroy(&t->zthr_cv); diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 7e264f308cf2..2fd3e1c37045 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -38,25 +38,36 @@ * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2012, 2019 by Delphix. All rights reserved. - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ /* * Note on locking of zvol state structures. * - * These structures are used to maintain internal state used to emulate block - * devices on top of zvols. In particular, management of device minor number - * operations - create, remove, rename, and set_snapdev - involves access to - * these structures. The zvol_state_lock is primarily used to protect the - * zvol_state_list. The zv->zv_state_lock is used to protect the contents - * of the zvol_state_t structures, as well as to make sure that when the - * time comes to remove the structure from the list, it is not in use, and - * therefore, it can be taken off zvol_state_list and freed. + * zvol_state_t represents the connection between a single dataset + * (DMU_OST_ZVOL) and the device "minor" (some OS-specific representation of a + * "disk" or "device" or "volume", eg, a /dev/zdXX node, a GEOM object, etc). * - * The zv_suspend_lock was introduced to allow for suspending I/O to a zvol, - * e.g. for the duration of receive and rollback operations. This lock can be - * held for significant periods of time. Given that it is undesirable to hold - * mutexes for long periods of time, the following lock ordering applies: + * The global zvol_state_lock is used to protect access to zvol_state_list and + * zvol_htable, which are the primary way to obtain a zvol_state_t from a name. + * It should not be used for anything not name-relateds, and you should avoid + * sleeping or waiting while its held. See zvol_find_by_name(), zvol_insert(), + * zvol_remove(). + * + * The zv_state_lock is used to protect the contents of the associated + * zvol_state_t. Most of the zvol_state_t is dedicated to control and + * configuration; almost none of it is needed for data operations (that is, + * read, write, flush) so this lock is rarely taken during general IO. It + * should be released quickly; you should avoid sleeping or waiting while its + * held. + * + * zv_suspend_lock is used to suspend IO/data operations to a zvol. The read + * half should held for the duration of an IO operation. The write half should + * be taken when something to wait for IO to complete and the block further IO, + * eg for the duration of receive and rollback operations. This lock can be + * held for long periods of time. + * + * Thus, the following lock ordering appies. * - take zvol_state_lock if necessary, to protect zvol_state_list * - take zv_suspend_lock if necessary, by the code path in question * - take zv_state_lock to protect zvol_state_t @@ -67,9 +78,8 @@ * these operations are serialized per pool. Consequently, we can be certain * that for a given zvol, there is only one operation at a time in progress. * That is why one can be sure that first, zvol_state_t for a given zvol is - * allocated and placed on zvol_state_list, and then other minor operations - * for this zvol are going to proceed in the order of issue. - * + * allocated and placed on zvol_state_list, and then other minor operations for + * this zvol are going to proceed in the order of issue. */ #include <sys/dataset_kstats.h> @@ -215,8 +225,8 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) int error; uint64_t volblocksize, volsize; - VERIFY(nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); + VERIFY0(nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize)); if (nvlist_lookup_uint64(nvprops, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); @@ -225,21 +235,20 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) * These properties must be removed from the list so the generic * property setting step won't apply to them. */ - VERIFY(nvlist_remove_all(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); + VERIFY0(nvlist_remove_all(nvprops, zfs_prop_to_name(ZFS_PROP_VOLSIZE))); (void) nvlist_remove_all(nvprops, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT0(error); error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT0(error); error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); - ASSERT(error == 0); + ASSERT0(error); } /* @@ -254,7 +263,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv) error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); if (error) - return (SET_ERROR(error)); + return (error); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); @@ -267,7 +276,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv) kmem_free(doi, sizeof (dmu_object_info_t)); - return (SET_ERROR(error)); + return (error); } /* @@ -305,7 +314,7 @@ zvol_update_volsize(uint64_t volsize, objset_t *os) error = dmu_tx_assign(tx, DMU_TX_WAIT); if (error) { dmu_tx_abort(tx); - return (SET_ERROR(error)); + return (error); } txg = dmu_tx_get_txg(tx); @@ -337,7 +346,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) error = dsl_prop_get_integer(name, zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); if (error != 0) - return (SET_ERROR(error)); + return (error); if (readonly) return (SET_ERROR(EROFS)); @@ -353,7 +362,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) FTAG, &os)) != 0) { if (zv != NULL) mutex_exit(&zv->zv_state_lock); - return (SET_ERROR(error)); + return (error); } owned = B_TRUE; if (zv != NULL) @@ -390,7 +399,7 @@ out: if (error == 0 && zv != NULL) zvol_os_update_volsize(zv, volsize); - return (SET_ERROR(error)); + return (error); } /* @@ -401,7 +410,7 @@ zvol_set_volthreading(const char *name, boolean_t value) { zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); if (zv == NULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); zv->zv_threading = value; mutex_exit(&zv->zv_state_lock); return (0); @@ -450,8 +459,10 @@ zvol_check_volblocksize(const char *name, uint64_t volblocksize) * We don't allow setting the property above 1MB, * unless the tunable has been changed. */ - if (volblocksize > zfs_max_recordsize) + if (volblocksize > zfs_max_recordsize) { + spa_close(spa, FTAG); return (SET_ERROR(EDOM)); + } spa_close(spa, FTAG); } @@ -618,7 +629,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, dmu_tx_t *tx; blkptr_t *bps; size_t maxblocks; - int error = EINVAL; + int error = 0; rw_enter(&zv_dst->zv_suspend_lock, RW_READER); if (zv_dst->zv_zilog == NULL) { @@ -644,23 +655,22 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, */ if (!spa_feature_is_enabled(dmu_objset_spa(outos), SPA_FEATURE_BLOCK_CLONING)) { - error = EOPNOTSUPP; + error = SET_ERROR(EOPNOTSUPP); goto out; } if (dmu_objset_spa(inos) != dmu_objset_spa(outos)) { - error = EXDEV; + error = SET_ERROR(EXDEV); goto out; } if (inos->os_encrypted != outos->os_encrypted) { - error = EXDEV; + error = SET_ERROR(EXDEV); goto out; } if (zv_src->zv_volblocksize != zv_dst->zv_volblocksize) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } if (inoff >= zv_src->zv_volsize || outoff >= zv_dst->zv_volsize) { - error = 0; goto out; } @@ -671,17 +681,15 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, len = zv_src->zv_volsize - inoff; if (len > zv_dst->zv_volsize - outoff) len = zv_dst->zv_volsize - outoff; - if (len == 0) { - error = 0; + if (len == 0) goto out; - } /* * No overlapping if we are cloning within the same file */ if (zv_src == zv_dst) { if (inoff < outoff + len && outoff < inoff + len) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } } @@ -691,7 +699,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, */ if ((inoff % zv_src->zv_volblocksize) != 0 || (outoff % zv_dst->zv_volblocksize) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -699,7 +707,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, * Length must be multiple of block size */ if ((len % zv_src->zv_volblocksize) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -771,13 +779,13 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst, zfs_rangelock_exit(outlr); zfs_rangelock_exit(inlr); if (error == 0 && zv_dst->zv_objset->os_sync == ZFS_SYNC_ALWAYS) { - zil_commit(zilog_dst, ZVOL_OBJ); + error = zil_commit(zilog_dst, ZVOL_OBJ); } out: if (zv_src != zv_dst) rw_exit(&zv_src->zv_suspend_lock); rw_exit(&zv_dst->zv_suspend_lock); - return (SET_ERROR(error)); + return (error); } /* @@ -897,7 +905,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, if (wr_state == WR_COPIED && dmu_read_by_dnode(zv->zv_dn, offset, len, lr + 1, DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) { - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); itx = zil_itx_create(TX_WRITE, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; @@ -916,7 +924,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, itx->itx_private = zv; - (void) zil_itx_assign(zilog, itx, tx); + zil_itx_assign(zilog, itx, tx); offset += len; size -= len; @@ -1026,7 +1034,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, zvol_get_done(zgd, error); - return (SET_ERROR(error)); + return (error); } /* @@ -1071,15 +1079,15 @@ zvol_setup_zv(zvol_state_t *zv) error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); if (error) - return (SET_ERROR(error)); + return (error); error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) - return (SET_ERROR(error)); + return (error); error = dnode_hold(os, ZVOL_OBJ, zv, &zv->zv_dn); if (error) - return (SET_ERROR(error)); + return (error); zvol_os_set_capacity(zv, volsize >> 9); zv->zv_volsize = volsize; @@ -1121,7 +1129,7 @@ zvol_shutdown_zv(zvol_state_t *zv) */ if (zv->zv_flags & ZVOL_WRITTEN_TO) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); - (void) dmu_objset_evict_dbufs(zv->zv_objset); + dmu_objset_evict_dbufs(zv->zv_objset); } /* @@ -1198,7 +1206,7 @@ zvol_resume(zvol_state_t *zv) if (zv->zv_flags & ZVOL_REMOVING) cv_broadcast(&zv->zv_removing_cv); - return (SET_ERROR(error)); + return (error); } int @@ -1214,7 +1222,7 @@ zvol_first_open(zvol_state_t *zv, boolean_t readonly) boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL)); error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os); if (error) - return (SET_ERROR(error)); + return (error); zv->zv_objset = os; @@ -1440,41 +1448,32 @@ zvol_task_update_status(zvol_task_t *task, uint64_t total, uint64_t done, } } -static const char * -zvol_task_op_msg(zvol_async_op_t op) -{ - switch (op) { - case ZVOL_ASYNC_CREATE_MINORS: - return ("create"); - case ZVOL_ASYNC_REMOVE_MINORS: - return ("remove"); - case ZVOL_ASYNC_RENAME_MINORS: - return ("rename"); - case ZVOL_ASYNC_SET_SNAPDEV: - case ZVOL_ASYNC_SET_VOLMODE: - return ("set property"); - default: - return ("unknown"); - } - - __builtin_unreachable(); - return (NULL); -} - static void zvol_task_report_status(zvol_task_t *task) { +#ifdef ZFS_DEBUG + static const char *const msg[] = { + "create", + "remove", + "rename", + "set snapdev", + "set volmode", + "unknown", + }; if (task->zt_status == 0) return; + zvol_async_op_t op = MIN(task->zt_op, ZVOL_ASYNC_MAX); if (task->zt_error) { dprintf("The %s minors zvol task was not ok, last error %d\n", - zvol_task_op_msg(task->zt_op), task->zt_error); + msg[op], task->zt_error); } else { - dprintf("The %s minors zvol task was not ok\n", - zvol_task_op_msg(task->zt_op)); + dprintf("The %s minors zvol task was not ok\n", msg[op]); } +#else + (void) task; +#endif } /* @@ -1581,184 +1580,156 @@ zvol_create_minors_impl(zvol_task_t *task) } /* - * Remove minors for specified dataset including children and snapshots. - */ - -/* - * Remove the minor for a given zvol. This will do it all: - * - flag the zvol for removal, so new requests are rejected - * - wait until outstanding requests are completed - * - remove it from lists - * - free it - * It's also usable as a taskq task, and smells nice too. + * Remove minors for specified dataset and, optionally, its children and + * snapshots. */ static void -zvol_remove_minor_task(void *arg) -{ - zvol_state_t *zv = (zvol_state_t *)arg; - - ASSERT(!RW_LOCK_HELD(&zvol_state_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); - - mutex_enter(&zv->zv_state_lock); - while (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { - zv->zv_flags |= ZVOL_REMOVING; - cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); - } - mutex_exit(&zv->zv_state_lock); - - rw_enter(&zvol_state_lock, RW_WRITER); - mutex_enter(&zv->zv_state_lock); - - zvol_remove(zv); - zvol_os_clear_private(zv); - - mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); - - zvol_os_free(zv); -} - -static void -zvol_free_task(void *arg) -{ - zvol_os_free(arg); -} - -static void zvol_remove_minors_impl(zvol_task_t *task) { zvol_state_t *zv, *zv_next; const char *name = task ? task->zt_name1 : NULL; int namelen = ((name) ? strlen(name) : 0); - taskqid_t t; - list_t delay_list, free_list; + boolean_t children = task ? !!task->zt_value : B_TRUE; if (zvol_inhibit_dev) return; - list_create(&delay_list, sizeof (zvol_state_t), - offsetof(zvol_state_t, zv_next)); - list_create(&free_list, sizeof (zvol_state_t), - offsetof(zvol_state_t, zv_next)); + /* + * We collect up zvols that we want to remove on a separate list, so + * that we don't have to hold zvol_state_lock for the whole time. + * + * We can't remove them from the global lists until we're completely + * done with them, because that would make them appear to ZFS-side ops + * that they don't exist, and the name might be reused, which can't be + * good. + */ + list_t remove_list; + list_create(&remove_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_remove_node)); - rw_enter(&zvol_state_lock, RW_WRITER); + rw_enter(&zvol_state_lock, RW_READER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); mutex_enter(&zv->zv_state_lock); + if (zv->zv_flags & ZVOL_REMOVING) { + /* Another thread is handling shutdown, skip it. */ + mutex_exit(&zv->zv_state_lock); + continue; + } + + /* + * This zvol should be removed if: + * - no name was offered (ie removing all at shutdown); or + * - name matches exactly; or + * - we were asked to remove children, and + * - the start of the name matches, and + * - there is a '/' immediately after the matched name; or + * - there is a '@' immediately after the matched name + */ if (name == NULL || strcmp(zv->zv_name, name) == 0 || - (strncmp(zv->zv_name, name, namelen) == 0 && + (children && strncmp(zv->zv_name, name, namelen) == 0 && (zv->zv_name[namelen] == '/' || zv->zv_name[namelen] == '@'))) { - /* - * By holding zv_state_lock here, we guarantee that no - * one is currently using this zv - */ /* - * If in use, try to throw everyone off and try again - * later. + * Matched, so mark it removal. We want to take the + * write half of the suspend lock to make sure that + * the zvol is not suspended, and give any data ops + * chance to finish. */ - if (zv->zv_open_count > 0 || - atomic_read(&zv->zv_suspend_ref)) { - zv->zv_flags |= ZVOL_REMOVING; - t = taskq_dispatch( - zv->zv_objset->os_spa->spa_zvol_taskq, - zvol_remove_minor_task, zv, TQ_SLEEP); - if (t == TASKQID_INVALID) { - /* - * Couldn't create the task, so we'll - * do it in place once the loop is - * finished. - */ - list_insert_head(&delay_list, zv); - } + mutex_exit(&zv->zv_state_lock); + rw_enter(&zv->zv_suspend_lock, RW_WRITER); + mutex_enter(&zv->zv_state_lock); + + if (zv->zv_flags & ZVOL_REMOVING) { + /* Another thread has taken it, let them. */ mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); continue; } - zvol_remove(zv); - /* - * Cleared while holding zvol_state_lock as a writer - * which will prevent zvol_open() from opening it. + * Mark it and unlock. New entries will see the flag + * and return ENXIO. */ - zvol_os_clear_private(zv); - - /* Drop zv_state_lock before zvol_free() */ + zv->zv_flags |= ZVOL_REMOVING; mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); - /* Try parallel zv_free, if failed do it in place */ - t = taskq_dispatch(system_taskq, zvol_free_task, zv, - TQ_SLEEP); - if (t == TASKQID_INVALID) - list_insert_head(&free_list, zv); - } else { + /* Put it on the list for the next stage. */ + list_insert_head(&remove_list, zv); + } else mutex_exit(&zv->zv_state_lock); - } } - rw_exit(&zvol_state_lock); - /* Wait for zvols that we couldn't create a remove task for */ - while ((zv = list_remove_head(&delay_list)) != NULL) - zvol_remove_minor_task(zv); - - /* Free any that we couldn't free in parallel earlier */ - while ((zv = list_remove_head(&free_list)) != NULL) - zvol_os_free(zv); -} - -/* Remove minor for this specific volume only */ -static int -zvol_remove_minor_impl(const char *name) -{ - zvol_state_t *zv = NULL, *zv_next; - - if (zvol_inhibit_dev) - return (0); + rw_exit(&zvol_state_lock); - rw_enter(&zvol_state_lock, RW_WRITER); + /* Didn't match any, nothing to do! */ + if (list_is_empty(&remove_list)) { + if (task) + task->zt_error = SET_ERROR(ENOENT); + return; + } - for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { - zv_next = list_next(&zvol_state_list, zv); + /* Actually shut them all down. */ + for (zv = list_head(&remove_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&remove_list, zv); mutex_enter(&zv->zv_state_lock); - if (strcmp(zv->zv_name, name) == 0) - /* Found, leave the the loop with zv_lock held */ - break; - mutex_exit(&zv->zv_state_lock); - } - if (zv == NULL) { - rw_exit(&zvol_state_lock); - return (ENOENT); - } - - ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + /* + * Still open or suspended, just wait. This can happen if, for + * example, we managed to acquire zv_state_lock in the moments + * where zvol_open() or zvol_release() are trading locks to + * call zvol_first_open() or zvol_last_close(). + */ + while (zv->zv_open_count > 0 || + atomic_read(&zv->zv_suspend_ref)) + cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); - if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { /* - * In use, so try to throw everyone off, then wait - * until finished. + * No users, shut down the OS side. This may not remove the + * minor from view immediately, depending on the kernel + * specifics, but it will ensure that it is unusable and that + * this zvol_state_t can never again be reached from an OS-side + * operation. */ - zv->zv_flags |= ZVOL_REMOVING; + zvol_os_remove_minor(zv); mutex_exit(&zv->zv_state_lock); + + /* Remove it from the name lookup lists */ + rw_enter(&zvol_state_lock, RW_WRITER); + zvol_remove(zv); rw_exit(&zvol_state_lock); - zvol_remove_minor_task(zv); - return (0); } - zvol_remove(zv); - zvol_os_clear_private(zv); + /* + * Our own references on remove_list is the last one, free them and + * we're done. + */ + while ((zv = list_remove_head(&remove_list)) != NULL) + zvol_os_free(zv); - mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); + list_destroy(&remove_list); +} - zvol_os_free(zv); +/* Remove minor for this specific volume only */ +static int +zvol_remove_minor_impl(const char *name) +{ + if (zvol_inhibit_dev) + return (0); - return (0); + zvol_task_t task; + memset(&task, 0, sizeof (zvol_task_t)); + strlcpy(task.zt_name1, name, sizeof (task.zt_name1)); + task.zt_value = B_FALSE; + + zvol_remove_minors_impl(&task); + + return (task.zt_error); } /* @@ -2078,6 +2049,7 @@ zvol_remove_minors(spa_t *spa, const char *name, boolean_t async) task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP); task->zt_op = ZVOL_ASYNC_REMOVE_MINORS; strlcpy(task->zt_name1, name, sizeof (task->zt_name1)); + task->zt_value = B_TRUE; id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP); if ((async == B_FALSE) && (id != TASKQID_INVALID)) taskq_wait_id(spa->spa_zvol_taskq, id); @@ -2199,20 +2171,12 @@ zvol_fini_impl(void) zvol_remove_minors_impl(NULL); - /* - * The call to "zvol_remove_minors_impl" may dispatch entries to - * the system_taskq, but it doesn't wait for those entries to - * complete before it returns. Thus, we must wait for all of the - * removals to finish, before we can continue. - */ - taskq_wait_outstanding(system_taskq, 0); - kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head)); list_destroy(&zvol_state_list); rw_destroy(&zvol_state_lock); if (ztqs->tqs_taskq == NULL) { - ASSERT3U(ztqs->tqs_cnt, ==, 0); + ASSERT0(ztqs->tqs_cnt); } else { for (uint_t i = 0; i < ztqs->tqs_cnt; i++) { ASSERT3P(ztqs->tqs_taskq[i], !=, NULL); diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c index 391216d6e263..3db196953f74 100644 --- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c +++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c @@ -876,9 +876,9 @@ static void __init zstd_mempool_init(void) { zstd_mempool_cctx = - kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); + vmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); zstd_mempool_dctx = - kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); + vmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); for (int i = 0; i < ZSTD_POOL_MAX; i++) { mutex_init(&zstd_mempool_cctx[i].barrier, NULL, @@ -924,8 +924,8 @@ zstd_mempool_deinit(void) release_pool(&zstd_mempool_dctx[i]); } - kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); - kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); + vmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); + vmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); zstd_mempool_dctx = NULL; zstd_mempool_cctx = NULL; } diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index dddc0a6c8f02..1ce668e7b86d 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -506,7 +506,6 @@ systemctl --system daemon-reload >/dev/null || true # Core utilities %{_sbindir}/* %{_bindir}/raidz_test -%{_sbindir}/zgenhostid %{_bindir}/zvol_wait # Optional Python 3 scripts %{_bindir}/arc_summary diff --git a/sys/contrib/openzfs/scripts/spdxcheck.pl b/sys/contrib/openzfs/scripts/spdxcheck.pl index 88f5a235d70c..cdab5368f19c 100755 --- a/sys/contrib/openzfs/scripts/spdxcheck.pl +++ b/sys/contrib/openzfs/scripts/spdxcheck.pl @@ -190,6 +190,7 @@ my @path_license_tags = ( ['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'], 'module/icp' => ['Apache-2.0', 'CDDL-1.0'], + 'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'], # Python bindings are always Apache-2.0 'contrib/pyzfs' => ['Apache-2.0'], diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 9fad8946f4f3..2da46458289a 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -725,7 +725,11 @@ tests = ['fadvise_willneed'] tags = ['functional', 'fadvise'] [tests/functional/failmode] -tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue'] +tests = ['failmode_dmu_tx_wait', 'failmode_dmu_tx_continue', + 'failmode_fsync_wait', 'failmode_fsync_continue', + 'failmode_msync_wait', 'failmode_msync_continue', + 'failmode_osync_wait', 'failmode_osync_continue', + 'failmode_syncalways_wait', 'failmode_syncalways_continue'] tags = ['functional', 'failmode'] [tests/functional/fallocate] @@ -1089,7 +1093,7 @@ tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse', tags = ['functional', 'zvol', 'zvol_misc'] [tests/functional/zvol/zvol_stress] -tests = ['zvol_stress'] +tests = ['zvol_stress', 'zvol_stress_destroy'] tags = ['functional', 'zvol', 'zvol_stress'] [tests/functional/zvol/zvol_swap] diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore index 1cd90024e94d..62f1684acfb4 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore @@ -28,6 +28,7 @@ /mmap_seek /mmap_sync /mmapwrite +/mmap_write_sync /nvlist_to_lua /randfree_file /randwritecomp diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am index d5448055a1e1..85c3cf3c35a8 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am @@ -74,7 +74,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree scripts_zfs_tests_bin_PROGRAMS += \ %D%/mmap_exec %D%/mmap_ftruncate %D%/mmap_seek \ - %D%/mmap_sync %D%/mmapwrite %D%/readmmap + %D%/mmap_sync %D%/mmapwrite %D%/readmmap %D%/mmap_write_sync %C%_mmapwrite_LDADD = -lpthread if WANT_MMAP_LIBAIO diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c index e7b80d01efaa..f8948a61833d 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c @@ -207,7 +207,7 @@ insert_find_remove(zfs_btree_t *bt, char *why) "Found removed value (%llu)\n", *p); return (1); } - ASSERT3S(zfs_btree_numnodes(bt), ==, 0); + ASSERT0(zfs_btree_numnodes(bt)); zfs_btree_verify(bt); return (0); @@ -279,7 +279,7 @@ drain_tree(zfs_btree_t *bt, char *why) node = avl_last(&avl); ASSERT3U(node->data, ==, *(uint64_t *)zfs_btree_last(bt, NULL)); } - ASSERT3S(zfs_btree_numnodes(bt), ==, 0); + ASSERT0(zfs_btree_numnodes(bt)); void *avl_cookie = NULL; while ((node = avl_destroy_nodes(&avl, &avl_cookie)) != NULL) diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c index e08003f80464..cbebd33e0bf6 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c @@ -529,6 +529,8 @@ static const char *aes_gcm_impl[][2] = { { "aesni", "pclmulqdq" }, { "x86_64", "avx" }, { "aesni", "avx" }, + { "x86_64", "avx2" }, + { "aesni", "avx2" }, }; /* signature of function to call after setting implementation params */ diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_write_sync.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_write_sync.c new file mode 100644 index 000000000000..ad5e37f24960 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_write_sync.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2025, Klara, Inc. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#define PAGES (8) + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "usage: %s <filename>\n", argv[0]); + exit(1); + } + + long page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) { + perror("sysconf"); + exit(2); + } + size_t map_size = page_size * PAGES; + + int fd = open(argv[1], O_CREAT|O_RDWR, S_IRWXU|S_IRWXG|S_IRWXO); + if (fd < 0) { + perror("open"); + exit(2); + } + + if (ftruncate(fd, map_size) < 0) { + perror("ftruncate"); + close(fd); + exit(2); + } + + uint64_t *p = + mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + perror("mmap"); + close(fd); + exit(2); + } + + for (int i = 0; i < (map_size / sizeof (uint64_t)); i++) + p[i] = 0x0123456789abcdef; + + if (msync(p, map_size, MS_SYNC) < 0) { + perror("msync"); + munmap(p, map_size); + close(fd); + exit(3); + } + + munmap(p, map_size); + close(fd); + exit(0); +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index bbaa8665ecc8..884a99d785bc 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -210,6 +210,7 @@ export ZFSTEST_FILES='badsend mmap_seek mmap_sync mmapwrite + mmap_write_sync nvlist_to_lua randfree_file randwritecomp diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index c2542287c1d7..41e7b45ef4ec 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -276,6 +276,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/direct/dio.kshlib \ functional/events/events.cfg \ functional/events/events_common.kshlib \ + functional/failmode/failmode.kshlib \ functional/fault/fault.cfg \ functional/gang_blocks/gang_blocks.kshlib \ functional/grow/grow.cfg \ @@ -1541,6 +1542,14 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/failmode/cleanup.ksh \ functional/failmode/failmode_dmu_tx_wait.ksh \ functional/failmode/failmode_dmu_tx_continue.ksh \ + functional/failmode/failmode_fsync_wait.ksh \ + functional/failmode/failmode_fsync_continue.ksh \ + functional/failmode/failmode_msync_wait.ksh \ + functional/failmode/failmode_msync_continue.ksh \ + functional/failmode/failmode_osync_wait.ksh \ + functional/failmode/failmode_osync_continue.ksh \ + functional/failmode/failmode_syncalways_wait.ksh \ + functional/failmode/failmode_syncalways_continue.ksh \ functional/failmode/setup.ksh \ functional/fallocate/cleanup.ksh \ functional/fallocate/fallocate_prealloc.ksh \ @@ -2235,6 +2244,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/zvol/zvol_stress/cleanup.ksh \ functional/zvol/zvol_stress/setup.ksh \ functional/zvol/zvol_stress/zvol_stress.ksh \ + functional/zvol/zvol_stress/zvol_stress_destroy.ksh \ functional/zvol/zvol_swap/cleanup.ksh \ functional/zvol/zvol_swap/setup.ksh \ functional/zvol/zvol_swap/zvol_swap_001_pos.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode.kshlib new file mode 100644 index 000000000000..d0b7404557ab --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode.kshlib @@ -0,0 +1,149 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +typeset -A failmode_sync_helper_cmd=( + ["fsync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 conv=fsync' + ["msync"]='mmap_write_sync DATAFILE' + ["osync"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1 oflag=sync' + ["syncalways"]='dd if=/dev/urandom of=DATAFILE bs=128k count=1' +) + +typeset -A failmode_sync_helper_dsopts=( + ["syncalways"]="-o sync=always" +) + +function failmode_sync_cleanup +{ + zinject -c all || true + zpool clear $TESTPOOL || true + destroy_pool $TESTPOOL +} + +# +# failmode_sync_test <failmode> <helper> +# +# run a failmode sync test: +# - failmode: wait|continue +# - helper: fsync|msync|osync|syncalways +# +function failmode_sync_test +{ + typeset failmode=$1 + typeset helper=$2 + + # we'll need two disks, one for the main pool, one for the log + read -r DISK1 DISK2 _ <<<"$DISKS" + + # file to write to the pool + typeset datafile="/$TESTPOOL/$TESTFS/datafile" + + # create a single-disk pool with a separate log and the wanted failmode + log_must zpool create \ + -f -o failmode=$failmode $TESTPOOL $DISK1 log $DISK2 + + # create the test dataset. we bias the ZIL towards the log device to + # try to ensure that the sync write never involves the main device + log_must zfs create \ + -o recordsize=128k -o logbias=latency \ + ${failmode_sync_helper_dsopts[$helper]} \ + $TESTPOOL/$TESTFS + + # create the target file. the ZIL head structure is created on first + # use, and does a full txg wait to finish, which we want to avoid + log_must dd if=/dev/zero of=$datafile bs=128k count=1 conv=fsync + log_must zpool sync + + # inject errors. writes will fail, as will the followup probes + zinject -d $DISK1 -e io -T write $TESTPOOL + zinject -d $DISK1 -e nxio -T probe $TESTPOOL + zinject -d $DISK2 -e io -T write $TESTPOOL + zinject -d $DISK2 -e nxio -T probe $TESTPOOL + + # run the helper program in the background. the pool should immediately + # suspend, and the sync op block or fail based on the failmode + typeset helper_cmd=${failmode_sync_helper_cmd[$helper]/DATAFILE/$datafile} + log_note "running failmode sync helper: $helper_cmd" + $helper_cmd & + typeset -i pid=$! + + # should only take a moment, but give it a chance + log_note "waiting for pool to suspend" + typeset -i tries=10 + until [[ $(kstat_pool $TESTPOOL state) == "SUSPENDED" ]] ; do + if ((tries-- == 0)); then + log_fail "pool didn't suspend" + fi + sleep 1 + done + + # zil_commit() should have noticed the suspend by now + typeset -i zilerr=$(kstat zil.zil_commit_error_count) + + # see if the helper program blocked + typeset -i blocked + if kill -0 $pid ; then + blocked=1 + log_note "$helper: blocked in the kernel" + else + blocked=0 + log_note "$helper: exited while pool suspended" + fi + + # bring the pool back online + zinject -c all + zpool clear $TESTPOOL + + # program definitely exited now, get its return code + wait $pid + typeset -i rc=$? + + failmode_sync_cleanup + + log_note "$helper: zilerr=$zilerr blocked=$blocked rc=$rc" + + # confirm expected results for the failmode + if [[ $failmode = "wait" ]] ; then + # - the ZIL saw an error, and fell back to a txg sync + # - sync op blocked when the pool suspended + # - after resume, sync op succeeded, helper returned success + log_must test $zilerr -ne 0 + log_must test $blocked -eq 1 + log_must test $rc -eq 0 + elif [[ $failmode = "continue" ]] ; then + # confirm expected results: + # - the ZIL saw an error, and fell back to a txg sync + # - helper exited when the pool suspended + # - sync op returned an error, so helper returned failure + log_must test $zilerr -ne 0 + log_must test $blocked -eq 0 + log_must test $rc -ne 0 + else + log_fail "impossible failmode: $failmode" + fi +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_continue.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_continue.ksh new file mode 100755 index 000000000000..7b145d3a2b4c --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_continue.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="fsync() returns when pool suspends with failmode=continue" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test continue fsync +log_pass $desc diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_wait.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_wait.ksh new file mode 100755 index 000000000000..677d226b5481 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_fsync_wait.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="fsync() blocks when pool suspends with failmode=wait" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test wait fsync +log_pass $desc diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_continue.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_continue.ksh new file mode 100755 index 000000000000..0c79ee15a1ba --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_continue.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="msync() returns when pool suspends with failmode=continue" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test continue msync +log_pass $desc diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_wait.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_wait.ksh new file mode 100755 index 000000000000..a59d8cc50d61 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_msync_wait.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="msync() blocks when pool suspends with failmode=wait" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test wait msync +log_pass $desc diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_continue.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_continue.ksh new file mode 100755 index 000000000000..c4fa0c8f042c --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_continue.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="O_SYNC returns when pool suspends with failmode=continue" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test continue osync +log_pass $desc diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_wait.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_wait.ksh new file mode 100755 index 000000000000..5f65cf92ad33 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_osync_wait.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="O_SYNC blocks when pool suspends with failmode=wait" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test wait osync +log_pass $desc + diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_continue.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_continue.ksh new file mode 100755 index 000000000000..b80d776224a0 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_continue.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="write()+sync=always returns when pool suspends with failmode=continue" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test continue syncalways +log_pass $desc + diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_wait.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_wait.ksh new file mode 100755 index 000000000000..4fcb167b5c77 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/failmode/failmode_syncalways_wait.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/failmode/failmode.kshlib + +typeset desc="write()+sync=always blocks when pool suspends with failmode=wait" + +log_assert $desc +log_onexit failmode_sync_cleanup +log_must failmode_sync_test wait syncalways +log_pass $desc + diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh new file mode 100755 index 000000000000..669b59fac01f --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +typeset -i nzvols=1000 +typeset -i parallel=$(( $(get_num_cpus) * 2 )) + +function cleanup { + for zvol in $(zfs list -Ho name -t vol) ; do + log_must_busy zfs destroy $zvol + done +} + +log_onexit cleanup + +log_assert "stress test concurrent zvol create/destroy" + +function destroy_zvols_until { + typeset cond=$1 + while true ; do + IFS='' zfs list -Ho name -t vol | read -r -d '' zvols + if [[ -n $zvols ]] ; then + echo $zvols | xargs -n 1 -P $parallel zfs destroy + fi + if ! $cond ; then + break + fi + done +} + +( seq $nzvols | \ + xargs -P $parallel -I % zfs create -s -V 1G $TESTPOOL/testvol% ) & +cpid=$! +sleep 1 + +destroy_zvols_until "kill -0 $cpid" +destroy_zvols_until "false" + +log_pass "stress test done" diff --git a/sys/crypto/ccp/ccp.c b/sys/crypto/ccp/ccp.c index 7db9a27ab059..c3d40f6e99ac 100644 --- a/sys/crypto/ccp/ccp.c +++ b/sys/crypto/ccp/ccp.c @@ -79,7 +79,7 @@ static struct pciid { { 0x15df1022, "AMD CCP-5a" }, }; -static struct random_source random_ccp = { +static const struct random_source random_ccp = { .rs_ident = "AMD CCP TRNG", .rs_source = RANDOM_PURE_CCP, .rs_read = random_ccp_read, diff --git a/sys/dev/acpica/acpi_powerres.c b/sys/dev/acpica/acpi_powerres.c index 29d1690f1bdd..0a8b67a5fa84 100644 --- a/sys/dev/acpica/acpi_powerres.c +++ b/sys/dev/acpica/acpi_powerres.c @@ -76,6 +76,13 @@ struct acpi_powerconsumer { /* Device which is powered */ ACPI_HANDLE ac_consumer; int ac_state; + + struct { + bool prx_has; + size_t prx_count; + ACPI_HANDLE *prx_deps; + } ac_prx[ACPI_D_STATE_COUNT]; + TAILQ_ENTRY(acpi_powerconsumer) ac_link; TAILQ_HEAD(,acpi_powerreference) ac_references; }; @@ -96,9 +103,7 @@ static TAILQ_HEAD(acpi_powerconsumer_list, acpi_powerconsumer) ACPI_SERIAL_DECL(powerres, "ACPI power resources"); static ACPI_STATUS acpi_pwr_register_consumer(ACPI_HANDLE consumer); -#ifdef notyet static ACPI_STATUS acpi_pwr_deregister_consumer(ACPI_HANDLE consumer); -#endif /* notyet */ static ACPI_STATUS acpi_pwr_register_resource(ACPI_HANDLE res); #ifdef notyet static ACPI_STATUS acpi_pwr_deregister_resource(ACPI_HANDLE res); @@ -112,6 +117,8 @@ static struct acpi_powerresource *acpi_pwr_find_resource(ACPI_HANDLE res); static struct acpi_powerconsumer *acpi_pwr_find_consumer(ACPI_HANDLE consumer); +static ACPI_STATUS acpi_pwr_infer_state(struct acpi_powerconsumer *pc); +static ACPI_STATUS acpi_pwr_get_state_locked(ACPI_HANDLE consumer, int *state); /* * Register a power resource. @@ -222,6 +229,84 @@ acpi_pwr_deregister_resource(ACPI_HANDLE res) #endif /* notyet */ /* + * Evaluate the _PRx (power resources each D-state depends on). This also + * populates the acpi_powerresources queue with the power resources discovered + * during this step. + * + * ACPI 7.3.8 - 7.3.11 guarantee that _PRx will return the same data each + * time they are evaluated. + * + * If this function fails, acpi_pwr_deregister_consumer() must be called on the + * power consumer to free already allocated memory. + */ +static ACPI_STATUS +acpi_pwr_get_power_resources(ACPI_HANDLE consumer, struct acpi_powerconsumer *pc) +{ + ACPI_INTEGER status; + ACPI_STRING reslist_name; + ACPI_HANDLE reslist_handle; + ACPI_STRING reslist_names[] = {"_PR0", "_PR1", "_PR2", "_PR3"}; + ACPI_BUFFER reslist; + ACPI_OBJECT *reslist_object; + ACPI_OBJECT *dep; + ACPI_HANDLE *res; + + ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); + ACPI_SERIAL_ASSERT(powerres); + + MPASS(consumer != NULL); + + for (int state = ACPI_STATE_D0; state <= ACPI_STATE_D3_HOT; state++) { + pc->ac_prx[state].prx_has = false; + pc->ac_prx[state].prx_count = 0; + pc->ac_prx[state].prx_deps = NULL; + + reslist_name = reslist_names[state - ACPI_STATE_D0]; + if (ACPI_FAILURE(AcpiGetHandle(consumer, reslist_name, &reslist_handle))) + continue; + + reslist.Pointer = NULL; + reslist.Length = ACPI_ALLOCATE_BUFFER; + status = AcpiEvaluateObjectTyped(reslist_handle, NULL, NULL, &reslist, + ACPI_TYPE_PACKAGE); + if (ACPI_FAILURE(status) || reslist.Pointer == NULL) + /* + * ACPI_ALLOCATE_BUFFER entails everything will be freed on error + * by AcpiEvaluateObjectTyped. + */ + continue; + + reslist_object = (ACPI_OBJECT *)reslist.Pointer; + pc->ac_prx[state].prx_has = true; + pc->ac_prx[state].prx_count = reslist_object->Package.Count; + + if (reslist_object->Package.Count == 0) { + AcpiOsFree(reslist_object); + continue; + } + + pc->ac_prx[state].prx_deps = mallocarray(pc->ac_prx[state].prx_count, + sizeof(*pc->ac_prx[state].prx_deps), M_ACPIPWR, M_NOWAIT); + if (pc->ac_prx[state].prx_deps == NULL) { + AcpiOsFree(reslist_object); + return_ACPI_STATUS (AE_NO_MEMORY); + } + + for (size_t i = 0; i < reslist_object->Package.Count; i++) { + dep = &reslist_object->Package.Elements[i]; + res = dep->Reference.Handle; + pc->ac_prx[state].prx_deps[i] = res; + + /* It's fine to attempt to register the same resource twice. */ + acpi_pwr_register_resource(res); + } + AcpiOsFree(reslist_object); + } + + return_ACPI_STATUS (AE_OK); +} + +/* * Register a power consumer. * * It's OK to call this if we already know about the consumer. @@ -229,6 +314,7 @@ acpi_pwr_deregister_resource(ACPI_HANDLE res) static ACPI_STATUS acpi_pwr_register_consumer(ACPI_HANDLE consumer) { + ACPI_INTEGER status; struct acpi_powerconsumer *pc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); @@ -239,14 +325,30 @@ acpi_pwr_register_consumer(ACPI_HANDLE consumer) return_ACPI_STATUS (AE_OK); /* Allocate a new power consumer */ - if ((pc = malloc(sizeof(*pc), M_ACPIPWR, M_NOWAIT)) == NULL) + if ((pc = malloc(sizeof(*pc), M_ACPIPWR, M_NOWAIT | M_ZERO)) == NULL) return_ACPI_STATUS (AE_NO_MEMORY); TAILQ_INSERT_HEAD(&acpi_powerconsumers, pc, ac_link); TAILQ_INIT(&pc->ac_references); pc->ac_consumer = consumer; - /* XXX we should try to find its current state */ - pc->ac_state = ACPI_STATE_UNKNOWN; + /* + * Get all its power resource dependencies, if it has _PRx. We do this now + * as an opportunity to populate the acpi_powerresources queue. + * + * If this fails, immediately deregister it. + */ + status = acpi_pwr_get_power_resources(consumer, pc); + if (ACPI_FAILURE(status)) { + ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, + "failed to get power resources for %s\n", + acpi_name(consumer))); + acpi_pwr_deregister_consumer(consumer); + return_ACPI_STATUS (status); + } + + /* Find its initial state. */ + if (ACPI_FAILURE(acpi_pwr_get_state_locked(consumer, &pc->ac_state))) + pc->ac_state = ACPI_STATE_UNKNOWN; ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "registered power consumer %s\n", acpi_name(consumer))); @@ -254,7 +356,6 @@ acpi_pwr_register_consumer(ACPI_HANDLE consumer) return_ACPI_STATUS (AE_OK); } -#ifdef notyet /* * Deregister a power consumer. * @@ -279,6 +380,9 @@ acpi_pwr_deregister_consumer(ACPI_HANDLE consumer) /* Pull the consumer off the list and free it */ TAILQ_REMOVE(&acpi_powerconsumers, pc, ac_link); + for (size_t i = 0; i < sizeof(pc->ac_prx) / sizeof(*pc->ac_prx); i++) + if (pc->ac_prx[i].prx_deps != NULL) + free(pc->ac_prx[i].prx_deps, M_ACPIPWR); free(pc, M_ACPIPWR); ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "deregistered power consumer %s\n", @@ -286,10 +390,139 @@ acpi_pwr_deregister_consumer(ACPI_HANDLE consumer) return_ACPI_STATUS (AE_OK); } -#endif /* notyet */ /* - * Set a power consumer to a particular power state. + * The _PSC control method isn't required if it's possible to infer the D-state + * from the _PRx control methods. (See 7.3.6.) + * We can infer that a given D-state has been achieved when all the dependencies + * are in the ON state. + */ +static ACPI_STATUS +acpi_pwr_infer_state(struct acpi_powerconsumer *pc) +{ + ACPI_HANDLE *res; + uint32_t on; + bool all_on = false; + + ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); + ACPI_SERIAL_ASSERT(powerres); + + /* It is important we go from the hottest to the coldest state. */ + for ( + pc->ac_state = ACPI_STATE_D0; + pc->ac_state <= ACPI_STATE_D3_HOT && !all_on; + pc->ac_state++ + ) { + MPASS(pc->ac_state <= sizeof(pc->ac_prx) / sizeof(*pc->ac_prx)); + + if (!pc->ac_prx[pc->ac_state].prx_has) + continue; + + all_on = true; + + for (size_t i = 0; i < pc->ac_prx[pc->ac_state].prx_count; i++) { + res = pc->ac_prx[pc->ac_state].prx_deps[i]; + /* If failure, better to assume D-state is hotter than colder. */ + if (ACPI_FAILURE(acpi_GetInteger(res, "_STA", &on))) + continue; + if (on == 0) { + all_on = false; + break; + } + } + } + + MPASS(pc->ac_state != ACPI_STATE_D0); + + /* + * If none of the power resources required for the shallower D-states are + * on, then we can assume it is unpowered (i.e. D3cold). A device is not + * required to support D3cold however; in that case, _PR3 is not explicitly + * provided. Those devices should default to D3hot instead. + * + * See comments of first row of table 7.1 in ACPI spec. + */ + if (!all_on) + pc->ac_state = pc->ac_prx[ACPI_STATE_D3_HOT].prx_has ? + ACPI_STATE_D3_COLD : ACPI_STATE_D3_HOT; + else + pc->ac_state--; + + return_ACPI_STATUS (AE_OK); +} + +static ACPI_STATUS +acpi_pwr_get_state_locked(ACPI_HANDLE consumer, int *state) +{ + struct acpi_powerconsumer *pc; + ACPI_HANDLE method_handle; + ACPI_STATUS status; + ACPI_BUFFER result; + ACPI_OBJECT *object = NULL; + + ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); + ACPI_SERIAL_ASSERT(powerres); + + if (consumer == NULL) + return_ACPI_STATUS (AE_NOT_FOUND); + + if ((pc = acpi_pwr_find_consumer(consumer)) == NULL) { + if (ACPI_FAILURE(status = acpi_pwr_register_consumer(consumer))) + goto out; + if ((pc = acpi_pwr_find_consumer(consumer)) == NULL) + panic("acpi added power consumer but can't find it"); + } + + status = AcpiGetHandle(consumer, "_PSC", &method_handle); + if (ACPI_FAILURE(status)) { + ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "no _PSC object - %s\n", + AcpiFormatException(status))); + status = acpi_pwr_infer_state(pc); + if (ACPI_FAILURE(status)) { + ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "couldn't infer D-state - %s\n", + AcpiFormatException(status))); + pc->ac_state = ACPI_STATE_UNKNOWN; + } + goto out; + } + + result.Pointer = NULL; + result.Length = ACPI_ALLOCATE_BUFFER; + status = AcpiEvaluateObjectTyped(method_handle, NULL, NULL, &result, ACPI_TYPE_INTEGER); + if (ACPI_FAILURE(status) || result.Pointer == NULL) { + ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "failed to get state with _PSC - %s\n", + AcpiFormatException(status))); + pc->ac_state = ACPI_STATE_UNKNOWN; + goto out; + } + + object = (ACPI_OBJECT *)result.Pointer; + pc->ac_state = ACPI_STATE_D0 + object->Integer.Value; + status = AE_OK; + +out: + if (object != NULL) + AcpiOsFree(object); + *state = pc->ac_state; + return_ACPI_STATUS (status); +} + +/* + * Get a power consumer's D-state. + */ +ACPI_STATUS +acpi_pwr_get_state(ACPI_HANDLE consumer, int *state) +{ + ACPI_STATUS res; + + ACPI_SERIAL_BEGIN(powerres); + res = acpi_pwr_get_state_locked(consumer, state); + ACPI_SERIAL_END(powerres); + return (res); +} + +/* + * Set a power consumer to a particular D-state. */ ACPI_STATUS acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state) @@ -300,6 +533,7 @@ acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state) ACPI_OBJECT *reslist_object; ACPI_STATUS status; char *method_name, *reslist_name = NULL; + int new_state; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); @@ -501,8 +735,28 @@ acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state) } } - /* Transition was successful */ - pc->ac_state = state; + /* + * Make sure the transition succeeded. If getting new state failed, + * just assume the new state is what we wanted. This was the behaviour + * before we were checking D-states. + */ + if (ACPI_FAILURE(acpi_pwr_get_state_locked(consumer, &new_state))) { + printf("%s: failed to get new D-state\n", __func__); + pc->ac_state = state; + } else { + if (new_state != state) + printf("%s: new power state %s is not the one requested %s\n", + __func__, acpi_d_state_to_str(new_state), + acpi_d_state_to_str(state)); + pc->ac_state = new_state; + } + + /* + * We consider the transition successful even if the state we got doesn't + * reflect what we set it to. This is because we weren't previously + * checking the new state at all, so there might exist buggy platforms on + * which suspend would otherwise succeed if we failed here. + */ status = AE_OK; out: diff --git a/sys/dev/acpica/acpivar.h b/sys/dev/acpica/acpivar.h index 6887f080311d..7495a010432b 100644 --- a/sys/dev/acpica/acpivar.h +++ b/sys/dev/acpica/acpivar.h @@ -490,6 +490,7 @@ EVENTHANDLER_DECLARE(acpi_video_event, acpi_event_handler_t); /* Device power control. */ ACPI_STATUS acpi_pwr_wake_enable(ACPI_HANDLE consumer, int enable); +ACPI_STATUS acpi_pwr_get_state(ACPI_HANDLE consumer, int *state); ACPI_STATUS acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state); acpi_pwr_for_sleep_t acpi_device_pwr_for_sleep; int acpi_set_powerstate(device_t child, int state); diff --git a/sys/dev/amdgpio/amdgpio.c b/sys/dev/amdgpio/amdgpio.c index f39006d95805..2bd455c612b8 100644 --- a/sys/dev/amdgpio/amdgpio.c +++ b/sys/dev/amdgpio/amdgpio.c @@ -408,12 +408,13 @@ amdgpio_attach(device_t dev) GPIO_PIN_OUTPUT : GPIO_PIN_INPUT; } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "could not attach gpiobus\n"); goto err_bus; } + bus_attach_children(dev); return (0); err_bus: diff --git a/sys/dev/bhnd/cores/chipc/chipc_gpio.c b/sys/dev/bhnd/cores/chipc/chipc_gpio.c index a110bdda5fa7..429de0fc1fd8 100644 --- a/sys/dev/bhnd/cores/chipc/chipc_gpio.c +++ b/sys/dev/bhnd/cores/chipc/chipc_gpio.c @@ -173,11 +173,13 @@ chipc_gpio_attach(device_t dev) if (CC_GPIO_QUIRK(sc, NO_GPIOC)) { sc->gpiobus = NULL; } else { - if ((sc->gpiobus = gpiobus_attach_bus(dev)) == NULL) { + if ((sc->gpiobus = gpiobus_add_bus(dev)) == NULL) { device_printf(dev, "failed to attach gpiobus\n"); error = ENXIO; goto failed; } + + bus_attach_children(dev); } /* Register as the bus GPIO provider */ diff --git a/sys/dev/bnxt/bnxt_re/qplib_res.c b/sys/dev/bnxt/bnxt_re/qplib_res.c index 69661c67708c..f527af031176 100644 --- a/sys/dev/bnxt/bnxt_re/qplib_res.c +++ b/sys/dev/bnxt/bnxt_re/qplib_res.c @@ -875,7 +875,7 @@ int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res, dpi->umdbr = umaddr; switch (type) { case BNXT_QPLIB_DPI_TYPE_KERNEL: - /* priviledged dbr was already mapped just initialize it. */ + /* privileged dbr was already mapped just initialize it. */ dpi->umdbr = dpit->ucreg.bar_base + dpit->ucreg.offset + bit_num * PAGE_SIZE; dpi->dbr = dpit->priv_db; @@ -1150,7 +1150,7 @@ int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res) } ucreg->bar_reg = ioremap(ucreg->bar_base, ucreg->len); if (!ucreg->bar_reg) { - dev_err(&res->pdev->dev, "priviledged dpi map failed!\n"); + dev_err(&res->pdev->dev, "privileged dpi map failed!\n"); return -ENOMEM; } diff --git a/sys/dev/e1000/e1000_phy.c b/sys/dev/e1000/e1000_phy.c index c34897e3b31a..634f48171c3e 100644 --- a/sys/dev/e1000/e1000_phy.c +++ b/sys/dev/e1000/e1000_phy.c @@ -1707,10 +1707,9 @@ s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) * autonegotiation. */ ret_val = e1000_copper_link_autoneg(hw); - if (ret_val && !hw->mac.forced_speed_duplex) + if (ret_val) return ret_val; - } - if (!hw->mac.autoneg || (ret_val && hw->mac.forced_speed_duplex)) { + } else { /* PHY will be set to 10H, 10F, 100H or 100F * depending on user settings. */ diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index f0ef6051fab1..9c5ae2806f75 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -2000,18 +2000,7 @@ em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) (sc->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (sc->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; - switch (sc->link_speed) { - case 10: - ifmr->ifm_active |= IFM_10_FL; - break; - case 100: - ifmr->ifm_active |= IFM_100_FX; - break; - case 1000: - default: - ifmr->ifm_active |= fiber_type | IFM_FDX; - break; - } + ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (sc->link_speed) { case 10: @@ -2024,12 +2013,11 @@ em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) ifmr->ifm_active |= IFM_1000_T; break; } + if (sc->link_duplex == FULL_DUPLEX) + ifmr->ifm_active |= IFM_FDX; + else + ifmr->ifm_active |= IFM_HDX; } - - if (sc->link_duplex == FULL_DUPLEX) - ifmr->ifm_active |= IFM_FDX; - else - ifmr->ifm_active |= IFM_HDX; } /********************************************************************* @@ -2063,26 +2051,6 @@ em_if_media_change(if_ctx_t ctx) sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: - sc->hw.mac.autoneg = DO_AUTO_NEG; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { - sc->hw.phy.autoneg_advertised = ADVERTISE_100_FULL; - sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; - } else { - sc->hw.phy.autoneg_advertised = ADVERTISE_100_HALF; - sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; - } - break; - case IFM_10_T: - sc->hw.mac.autoneg = DO_AUTO_NEG; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { - sc->hw.phy.autoneg_advertised = ADVERTISE_10_FULL; - sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; - } else { - sc->hw.phy.autoneg_advertised = ADVERTISE_10_HALF; - sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; - } - break; - case IFM_100_FX: sc->hw.mac.autoneg = false; sc->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) @@ -2090,7 +2058,7 @@ em_if_media_change(if_ctx_t ctx) else sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; - case IFM_10_FL: + case IFM_10_T: sc->hw.mac.autoneg = false; sc->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) diff --git a/sys/dev/fdt/fdt_common.c b/sys/dev/fdt/fdt_common.c index 1fea4c6f1392..f43551c6310e 100644 --- a/sys/dev/fdt/fdt_common.c +++ b/sys/dev/fdt/fdt_common.c @@ -62,8 +62,6 @@ SYSCTL_NODE(_hw, OID_AUTO, fdt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Flattened Device Tree"); -struct fdt_ic_list fdt_ic_list_head = SLIST_HEAD_INITIALIZER(fdt_ic_list_head); - static int fdt_get_range_by_busaddr(phandle_t node, u_long addr, u_long *base, u_long *size) diff --git a/sys/dev/fdt/fdt_common.h b/sys/dev/fdt/fdt_common.h index ece54290a6ad..f597233f9771 100644 --- a/sys/dev/fdt/fdt_common.h +++ b/sys/dev/fdt/fdt_common.h @@ -59,13 +59,6 @@ struct fdt_fixup_entry { extern struct fdt_fixup_entry fdt_fixup_table[]; #endif -extern SLIST_HEAD(fdt_ic_list, fdt_ic) fdt_ic_list_head; -struct fdt_ic { - SLIST_ENTRY(fdt_ic) fdt_ics; - ihandle_t iph; - device_t dev; -}; - #if defined(FDT_DTB_STATIC) extern u_char fdt_static_dtb; #endif diff --git a/sys/dev/ftgpio/ftgpio.c b/sys/dev/ftgpio/ftgpio.c index 7acfdd5b900e..68787b54bb16 100644 --- a/sys/dev/ftgpio/ftgpio.c +++ b/sys/dev/ftgpio/ftgpio.c @@ -398,12 +398,13 @@ ftgpio_attach(device_t dev) FTGPIO_VERBOSE_PRINTF(sc->dev, "groups GPIO1..GPIO6 enabled\n"); GPIO_UNLOCK(sc); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { GPIO_LOCK_DESTROY(sc); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index 94f4e5771266..0d2455cab399 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -37,6 +37,7 @@ #include <dev/gpio/gpiobusvar.h> #include <dev/gpio/acpi_gpiobusvar.h> #include <dev/gpio/gpiobus_internal.h> +#include <sys/sbuf.h> #include "gpiobus_if.h" @@ -52,12 +53,11 @@ struct acpi_gpiobus_ctx { struct acpi_gpiobus_ivar { - struct gpiobus_ivar gpiobus; /* Must come first */ - ACPI_HANDLE dev_handle; /* ACPI handle for bus */ - uint32_t flags; + struct gpiobus_ivar gpiobus; + ACPI_HANDLE handle; }; -static uint32_t +uint32_t acpi_gpiobus_convflags(ACPI_RESOURCE_GPIO *gpio_res) { uint32_t flags = 0; @@ -150,70 +150,24 @@ acpi_gpiobus_enumerate_res(ACPI_RESOURCE *res, void *context) return (AE_OK); } -static struct acpi_gpiobus_ivar * -acpi_gpiobus_setup_devinfo(device_t bus, device_t child, - ACPI_RESOURCE_GPIO *gpio_res) -{ - struct acpi_gpiobus_ivar *devi; - - devi = malloc(sizeof(*devi), M_DEVBUF, M_NOWAIT | M_ZERO); - if (devi == NULL) - return (NULL); - resource_list_init(&devi->gpiobus.rl); - - devi->flags = acpi_gpiobus_convflags(gpio_res); - if (acpi_quirks & ACPI_Q_AEI_NOPULL) - devi->flags &= ~GPIO_PIN_PULLUP; - - devi->gpiobus.npins = 1; - if (gpiobus_alloc_ivars(&devi->gpiobus) != 0) { - free(devi, M_DEVBUF); - return (NULL); - } - - for (int i = 0; i < devi->gpiobus.npins; i++) - devi->gpiobus.pins[i] = gpio_res->PinTable[i]; - - return (devi); -} - static ACPI_STATUS acpi_gpiobus_enumerate_aei(ACPI_RESOURCE *res, void *context) { ACPI_RESOURCE_GPIO *gpio_res = &res->Data.Gpio; - struct acpi_gpiobus_ctx *ctx = context; - device_t bus = ctx->sc->sc_busdev; - device_t child; - struct acpi_gpiobus_ivar *devi; + uint32_t *npins = context, *pins = npins + 1; - /* Check that we have a GpioInt object. */ + /* + * Check that we have a GpioInt object. + * Note that according to the spec this + * should always be the case. + */ if (res->Type != ACPI_RESOURCE_TYPE_GPIO) return (AE_OK); if (gpio_res->ConnectionType != ACPI_RESOURCE_GPIO_TYPE_INT) return (AE_OK); - /* Add a child. */ - child = device_add_child_ordered(bus, 0, "gpio_aei", DEVICE_UNIT_ANY); - if (child == NULL) - return (AE_OK); - devi = acpi_gpiobus_setup_devinfo(bus, child, gpio_res); - if (devi == NULL) { - device_delete_child(bus, child); - return (AE_OK); - } - device_set_ivars(child, devi); - - for (int i = 0; i < devi->gpiobus.npins; i++) { - if (GPIOBUS_PIN_SETFLAGS(bus, child, 0, devi->flags & - ~GPIO_INTR_MASK)) { - device_delete_child(bus, child); - return (AE_OK); - } - } - - /* Pass ACPI information to children. */ - devi->dev_handle = ctx->dev_handle; - + for (int i = 0; i < gpio_res->PinTableLength; i++) + pins[(*npins)++] = gpio_res->PinTable[i]; return (AE_OK); } @@ -296,6 +250,63 @@ err: return (AE_BAD_PARAMETER); } +static void +acpi_gpiobus_attach_aei(struct acpi_gpiobus_softc *sc, ACPI_HANDLE handle) +{ + struct acpi_gpiobus_ivar *devi; + ACPI_HANDLE aei_handle; + device_t child; + uint32_t *pins; + ACPI_STATUS status; + int err; + + status = AcpiGetHandle(handle, "_AEI", &aei_handle); + if (ACPI_FAILURE(status)) + return; + + /* pins[0] specifies the length of the array. */ + pins = mallocarray(sc->super_sc.sc_npins + 1, + sizeof(uint32_t), M_DEVBUF, M_WAITOK); + pins[0] = 0; + + status = AcpiWalkResources(handle, "_AEI", + acpi_gpiobus_enumerate_aei, pins); + if (ACPI_FAILURE(status)) { + device_printf(sc->super_sc.sc_busdev, + "Failed to enumerate AEI resources\n"); + free(pins, M_DEVBUF); + return; + } + + child = BUS_ADD_CHILD(sc->super_sc.sc_busdev, 0, "gpio_aei", + DEVICE_UNIT_ANY); + if (child == NULL) { + device_printf(sc->super_sc.sc_busdev, + "Failed to add gpio_aei child\n"); + free(pins, M_DEVBUF); + return; + } + + devi = device_get_ivars(child); + devi->gpiobus.npins = pins[0]; + devi->handle = aei_handle; + + err = gpiobus_alloc_ivars(&devi->gpiobus); + if (err != 0) { + device_printf(sc->super_sc.sc_busdev, + "Failed to allocate gpio_aei ivars\n"); + device_delete_child(sc->super_sc.sc_busdev, child); + free(pins, M_DEVBUF); + return; + } + + for (int i = 0; i < pins[0]; i++) + devi->gpiobus.pins[i] = pins[i + 1]; + free(pins, M_DEVBUF); + + bus_attach_children(sc->super_sc.sc_busdev); +} + static int acpi_gpiobus_probe(device_t dev) { @@ -353,13 +364,8 @@ acpi_gpiobus_attach(device_t dev) if (ACPI_FAILURE(status)) device_printf(dev, "Failed to enumerate GPIO resources\n"); - /* Look for AEI children */ - status = AcpiWalkResources(handle, "_AEI", acpi_gpiobus_enumerate_aei, - &ctx); - - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) - device_printf(dev, "Failed to enumerate AEI resources\n"); - + /* Look for AEI child */ + acpi_gpiobus_attach_aei(sc, handle); return (0); } @@ -383,16 +389,14 @@ acpi_gpiobus_detach(device_t dev) } static int -acpi_gpiobus_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) +acpi_gpiobus_read_ivar(device_t dev, device_t child, int which, + uintptr_t *result) { struct acpi_gpiobus_ivar *devi = device_get_ivars(child); switch (which) { case ACPI_GPIOBUS_IVAR_HANDLE: - *result = (uintptr_t)devi->dev_handle; - break; - case ACPI_GPIOBUS_IVAR_FLAGS: - *result = (uintptr_t)devi->flags; + *result = (uintptr_t)devi->handle; break; default: return (gpiobus_read_ivar(dev, child, which, result)); @@ -401,6 +405,28 @@ acpi_gpiobus_read_ivar(device_t dev, device_t child, int which, uintptr_t *resul return (0); } +static device_t +acpi_gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +{ + return (gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct acpi_gpiobus_ivar))); +} + +static int +acpi_gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) +{ + struct acpi_gpiobus_ivar *devi; + int err; + + err = gpiobus_child_location(bus, child, sb); + if (err != 0) + return (err); + + devi = device_get_ivars(child); + sbuf_printf(sb, " handle=%s", acpi_name(devi->handle)); + return (0); +} + static device_method_t acpi_gpiobus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_gpiobus_probe), @@ -409,6 +435,8 @@ static device_method_t acpi_gpiobus_methods[] = { /* Bus interface */ DEVMETHOD(bus_read_ivar, acpi_gpiobus_read_ivar), + DEVMETHOD(bus_add_child, acpi_gpiobus_add_child), + DEVMETHOD(bus_child_location, acpi_gpiobus_child_location), DEVMETHOD_END }; diff --git a/sys/dev/gpio/acpi_gpiobusvar.h b/sys/dev/gpio/acpi_gpiobusvar.h index f8d502eab9d1..288e8bd0f2af 100644 --- a/sys/dev/gpio/acpi_gpiobusvar.h +++ b/sys/dev/gpio/acpi_gpiobusvar.h @@ -33,16 +33,16 @@ #include <contrib/dev/acpica/include/acpi.h> enum acpi_gpiobus_ivars { - ACPI_GPIOBUS_IVAR_HANDLE = 10600, - ACPI_GPIOBUS_IVAR_FLAGS, + ACPI_GPIOBUS_IVAR_HANDLE = 10600 }; #define ACPI_GPIOBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(acpi_gpiobus, var, ACPI_GPIOBUS, ivar, type) ACPI_GPIOBUS_ACCESSOR(handle, HANDLE, ACPI_HANDLE) -ACPI_GPIOBUS_ACCESSOR(flags, FLAGS, uint32_t) #undef ACPI_GPIOBUS_ACCESSOR +uint32_t acpi_gpiobus_convflags(ACPI_RESOURCE_GPIO *); + #endif /* __ACPI_GPIOBUS_H__ */ diff --git a/sys/dev/gpio/bytgpio.c b/sys/dev/gpio/bytgpio.c index f7b2a73ec6cb..5d685c155a03 100644 --- a/sys/dev/gpio/bytgpio.c +++ b/sys/dev/gpio/bytgpio.c @@ -608,7 +608,7 @@ bytgpio_attach(device_t dev) sc->sc_pad_funcs[pin] = val & BYTGPIO_PCONF0_FUNC_MASK; } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { BYTGPIO_LOCK_DESTROY(sc); bus_release_resource(dev, SYS_RES_MEMORY, @@ -616,6 +616,7 @@ bytgpio_attach(device_t dev) return (ENXIO); } + bus_attach_children(dev); return (0); error: diff --git a/sys/dev/gpio/chvgpio.c b/sys/dev/gpio/chvgpio.c index 199ad4d6f373..3273aad9242b 100644 --- a/sys/dev/gpio/chvgpio.c +++ b/sys/dev/gpio/chvgpio.c @@ -441,7 +441,7 @@ chvgpio_attach(device_t dev) bus_write_4(sc->sc_mem_res, CHVGPIO_INTERRUPT_MASK, 0); bus_write_4(sc->sc_mem_res, CHVGPIO_INTERRUPT_STATUS, 0xffff); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { CHVGPIO_LOCK_DESTROY(sc); bus_release_resource(dev, SYS_RES_MEMORY, @@ -451,6 +451,7 @@ chvgpio_attach(device_t dev) return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/gpio/dwgpio/dwgpio.c b/sys/dev/gpio/dwgpio/dwgpio.c index 5acb99ca591e..3908113d5fd4 100644 --- a/sys/dev/gpio/dwgpio/dwgpio.c +++ b/sys/dev/gpio/dwgpio/dwgpio.c @@ -167,12 +167,13 @@ dwgpio_attach(device_t dev) snprintf(sc->gpio_pins[i].gp_name, GPIOMAXNAME, "dwgpio%d.%d", device_get_unit(dev), i); } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { mtx_destroy(&sc->sc_mtx); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/gpio/gpioaei.c b/sys/dev/gpio/gpioaei.c index ecae8ccaf2fa..7b97277aaf61 100644 --- a/sys/dev/gpio/gpioaei.c +++ b/sys/dev/gpio/gpioaei.c @@ -45,13 +45,21 @@ enum gpio_aei_type { ACPI_AEI_TYPE_EVT }; -struct gpio_aei_softc { - ACPI_HANDLE handle; - enum gpio_aei_type type; - int pin; +struct gpio_aei_ctx { + SLIST_ENTRY(gpio_aei_ctx) next; struct resource * intr_res; - int intr_rid; void * intr_cookie; + ACPI_HANDLE handle; + gpio_pin_t gpio; + uint32_t pin; + int intr_rid; + enum gpio_aei_type type; +}; + +struct gpio_aei_softc { + SLIST_HEAD(, gpio_aei_ctx) aei_ctx; + ACPI_HANDLE dev_handle; + device_t dev; }; static int @@ -65,69 +73,157 @@ gpio_aei_probe(device_t dev) static void gpio_aei_intr(void * arg) { - struct gpio_aei_softc * sc = arg; + struct gpio_aei_ctx * ctx = arg; /* Ask ACPI to run the appropriate _EVT, _Exx or _Lxx method. */ - if (sc->type == ACPI_AEI_TYPE_EVT) - acpi_SetInteger(sc->handle, NULL, sc->pin); + if (ctx->type == ACPI_AEI_TYPE_EVT) + acpi_SetInteger(ctx->handle, NULL, ctx->pin); else - AcpiEvaluateObject(sc->handle, NULL, NULL, NULL); + AcpiEvaluateObject(ctx->handle, NULL, NULL, NULL); +} + +static ACPI_STATUS +gpio_aei_enumerate(ACPI_RESOURCE * res, void * context) +{ + ACPI_RESOURCE_GPIO * gpio_res = &res->Data.Gpio; + struct gpio_aei_softc * sc = context; + uint32_t flags, maxpin; + device_t busdev; + int err; + + /* + * Check that we have a GpioInt object. + * Note that according to the spec this + * should always be the case. + */ + if (res->Type != ACPI_RESOURCE_TYPE_GPIO) + return (AE_OK); + if (gpio_res->ConnectionType != ACPI_RESOURCE_GPIO_TYPE_INT) + return (AE_OK); + + flags = acpi_gpiobus_convflags(gpio_res); + if (acpi_quirks & ACPI_Q_AEI_NOPULL) + flags &= ~GPIO_PIN_PULLUP; + + err = GPIO_PIN_MAX(acpi_get_device(sc->dev_handle), &maxpin); + if (err != 0) + return (AE_ERROR); + + busdev = GPIO_GET_BUS(acpi_get_device(sc->dev_handle)); + for (int i = 0; i < gpio_res->PinTableLength; i++) { + struct gpio_aei_ctx * ctx; + uint32_t pin = gpio_res->PinTable[i]; + + if (__predict_false(pin > maxpin)) { + device_printf(sc->dev, + "Invalid pin 0x%x, max: 0x%x (bad ACPI tables?)\n", + pin, maxpin); + continue; + } + + ctx = malloc(sizeof(struct gpio_aei_ctx), M_DEVBUF, M_WAITOK); + ctx->type = ACPI_AEI_TYPE_UNKNOWN; + if (pin <= 255) { + char objname[5]; /* "_EXX" or "_LXX" */ + sprintf(objname, "_%c%02X", + (flags & GPIO_INTR_EDGE_MASK) ? 'E' : 'L', pin); + if (ACPI_SUCCESS(AcpiGetHandle(sc->dev_handle, objname, + &ctx->handle))) + ctx->type = ACPI_AEI_TYPE_ELX; + } + + if (ctx->type == ACPI_AEI_TYPE_UNKNOWN) { + if (ACPI_SUCCESS(AcpiGetHandle(sc->dev_handle, "_EVT", + &ctx->handle))) + ctx->type = ACPI_AEI_TYPE_EVT; + else { + device_printf(sc->dev, + "AEI Device type is unknown for pin 0x%x\n", + pin); + + free(ctx, M_DEVBUF); + continue; + } + } + + err = gpio_pin_get_by_bus_pinnum(busdev, pin, &ctx->gpio); + if (err != 0) { + device_printf(sc->dev, "Cannot acquire pin 0x%x\n", + pin); + + free(ctx, M_DEVBUF); + continue; + } + + err = gpio_pin_setflags(ctx->gpio, flags & ~GPIO_INTR_MASK); + if (err != 0) { + device_printf(sc->dev, + "Cannot set pin flags for pin 0x%x\n", pin); + + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + ctx->intr_rid = 0; + ctx->intr_res = gpio_alloc_intr_resource(sc->dev, + &ctx->intr_rid, RF_ACTIVE, ctx->gpio, + flags & GPIO_INTR_MASK); + if (ctx->intr_res == NULL) { + device_printf(sc->dev, + "Cannot allocate an IRQ for pin 0x%x\n", pin); + + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + err = bus_setup_intr(sc->dev, ctx->intr_res, INTR_TYPE_MISC | + INTR_MPSAFE | INTR_EXCL | INTR_SLEEPABLE, NULL, + gpio_aei_intr, ctx, &ctx->intr_cookie); + if (err != 0) { + device_printf(sc->dev, + "Cannot set up an IRQ for pin 0x%x\n", pin); + + bus_release_resource(sc->dev, ctx->intr_res); + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + ctx->pin = pin; + SLIST_INSERT_HEAD(&sc->aei_ctx, ctx, next); + } + + return (AE_OK); } static int gpio_aei_attach(device_t dev) { struct gpio_aei_softc * sc = device_get_softc(dev); - gpio_pin_t pin; - uint32_t flags; ACPI_HANDLE handle; - int err; + ACPI_STATUS status; /* This is us. */ device_set_desc(dev, "ACPI Event Information Device"); - /* Store parameters needed by gpio_aei_intr. */ handle = acpi_gpiobus_get_handle(dev); - if (gpio_pin_get_by_child_index(dev, 0, &pin) != 0) { - device_printf(dev, "Unable to get the input pin\n"); + status = AcpiGetParent(handle, &sc->dev_handle); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Cannot get parent of %s\n", + acpi_name(handle)); return (ENXIO); } - sc->type = ACPI_AEI_TYPE_UNKNOWN; - sc->pin = pin->pin; - - flags = acpi_gpiobus_get_flags(dev); - if (pin->pin <= 255) { - char objname[5]; /* "_EXX" or "_LXX" */ - sprintf(objname, "_%c%02X", - (flags & GPIO_INTR_EDGE_MASK) ? 'E' : 'L', pin->pin); - if (ACPI_SUCCESS(AcpiGetHandle(handle, objname, &sc->handle))) - sc->type = ACPI_AEI_TYPE_ELX; - } - if (sc->type == ACPI_AEI_TYPE_UNKNOWN) { - if (ACPI_SUCCESS(AcpiGetHandle(handle, "_EVT", &sc->handle))) - sc->type = ACPI_AEI_TYPE_EVT; - } - - if (sc->type == ACPI_AEI_TYPE_UNKNOWN) { - device_printf(dev, "ACPI Event Information Device type is unknown"); - return (ENOTSUP); - } + SLIST_INIT(&sc->aei_ctx); + sc->dev = dev; - /* Set up the interrupt. */ - if ((sc->intr_res = gpio_alloc_intr_resource(dev, &sc->intr_rid, - RF_ACTIVE, pin, flags & GPIO_INTR_MASK)) == NULL) { - device_printf(dev, "Cannot allocate an IRQ\n"); - return (ENOTSUP); - } - err = bus_setup_intr(dev, sc->intr_res, INTR_TYPE_MISC | INTR_MPSAFE | - INTR_EXCL | INTR_SLEEPABLE, NULL, gpio_aei_intr, sc, - &sc->intr_cookie); - if (err != 0) { - device_printf(dev, "Cannot set up IRQ\n"); - bus_release_resource(dev, SYS_RES_IRQ, sc->intr_rid, - sc->intr_res); - return (err); + status = AcpiWalkResources(sc->dev_handle, "_AEI", + gpio_aei_enumerate, sc); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Failed to enumerate AEI resources\n"); + return (ENXIO); } return (0); @@ -137,9 +233,15 @@ static int gpio_aei_detach(device_t dev) { struct gpio_aei_softc * sc = device_get_softc(dev); + struct gpio_aei_ctx * ctx, * tctx; + + SLIST_FOREACH_SAFE(ctx, &sc->aei_ctx, next, tctx) { + bus_teardown_intr(dev, ctx->intr_res, ctx->intr_cookie); + bus_release_resource(dev, ctx->intr_res); + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + } - bus_teardown_intr(dev, sc->intr_res, sc->intr_cookie); - bus_release_resource(dev, SYS_RES_IRQ, sc->intr_rid, sc->intr_res); return (0); } diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 764bcb7e6ee8..5f1f6532a79b 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -57,7 +57,6 @@ static int gpiobus_suspend(device_t); static int gpiobus_resume(device_t); static void gpiobus_probe_nomatch(device_t, device_t); static int gpiobus_print_child(device_t, device_t); -static int gpiobus_child_location(device_t, device_t, struct sbuf *); static device_t gpiobus_add_child(device_t, u_int, const char *, int); static void gpiobus_hinted_child(device_t, const char *, int); @@ -330,24 +329,6 @@ gpiobus_add_bus(device_t dev) return (busdev); } -/* - * Attach a gpiobus child. - * Note that the controller is expected - * to be fully initialized at this point. - */ -device_t -gpiobus_attach_bus(device_t dev) -{ - device_t busdev; - - busdev = gpiobus_add_bus(dev); - if (busdev == NULL) - return (NULL); - - bus_attach_children(dev); - return (busdev); -} - int gpiobus_detach_bus(device_t dev) { @@ -680,7 +661,7 @@ gpiobus_print_child(device_t dev, device_t child) return (retval); } -static int +int gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) { struct gpiobus_ivar *devi; @@ -692,16 +673,19 @@ gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) return (0); } -static device_t -gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +device_t +gpiobus_add_child_common(device_t dev, u_int order, const char *name, int unit, + size_t ivars_size) { device_t child; struct gpiobus_ivar *devi; + KASSERT(ivars_size >= sizeof(struct gpiobus_ivar), + ("child ivars must include gpiobus_ivar as their first member")); child = device_add_child_ordered(dev, order, name, unit); if (child == NULL) return (child); - devi = malloc(sizeof(struct gpiobus_ivar), M_DEVBUF, M_NOWAIT | M_ZERO); + devi = malloc(ivars_size, M_DEVBUF, M_NOWAIT | M_ZERO); if (devi == NULL) { device_delete_child(dev, child); return (NULL); @@ -712,6 +696,13 @@ gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) return (child); } +static device_t +gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +{ + return (gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct gpiobus_ivar))); +} + static void gpiobus_child_deleted(device_t dev, device_t child) { diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h index de3f57663132..c198e5f79989 100644 --- a/sys/dev/gpio/gpiobus_internal.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -42,6 +42,8 @@ void gpiobus_free_ivars(struct gpiobus_ivar *); int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); int gpiobus_acquire_pin(device_t, uint32_t); void gpiobus_release_pin(device_t, uint32_t); +int gpiobus_child_location(device_t, device_t, struct sbuf *); +device_t gpiobus_add_child_common(device_t, u_int, const char *, int, size_t); extern driver_t gpiobus_driver; #endif diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h index 7f504236a774..0528efe45525 100644 --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -171,7 +171,6 @@ struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid, int gpio_check_flags(uint32_t, uint32_t); device_t gpiobus_add_bus(device_t); -device_t gpiobus_attach_bus(device_t); int gpiobus_detach_bus(device_t); #endif /* __GPIOBUS_H__ */ diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index fc5fb03d6824..b12b78fac18c 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -451,28 +451,22 @@ ofw_gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) device_t child; struct ofw_gpiobus_devinfo *devi; - child = device_add_child_ordered(dev, order, name, unit); + child = gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct ofw_gpiobus_devinfo)); if (child == NULL) - return (child); - devi = malloc(sizeof(struct ofw_gpiobus_devinfo), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (devi == NULL) { - device_delete_child(dev, child); - return (0); - } + return (NULL); /* * NULL all the OFW-related parts of the ivars for non-OFW * children. */ + devi = device_get_ivars(child); devi->opd_obdinfo.obd_node = -1; devi->opd_obdinfo.obd_name = NULL; devi->opd_obdinfo.obd_compat = NULL; devi->opd_obdinfo.obd_type = NULL; devi->opd_obdinfo.obd_model = NULL; - device_set_ivars(child, devi); - return (child); } diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c index 87d4310a6396..32109e5982bc 100644 --- a/sys/dev/gpio/pl061.c +++ b/sys/dev/gpio/pl061.c @@ -495,13 +495,14 @@ pl061_attach(device_t dev) goto free_isrc; } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "couldn't attach gpio bus\n"); PL061_LOCK_DESTROY(sc); goto free_isrc; } + bus_attach_children(dev); return (0); free_isrc: diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c index 8b44cd256c79..d11868a23751 100644 --- a/sys/dev/gpio/qoriq_gpio.c +++ b/sys/dev/gpio/qoriq_gpio.c @@ -379,12 +379,13 @@ qoriq_gpio_attach(device_t dev) OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { qoriq_gpio_detach(dev); return (ENOMEM); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/hid/hidbus.c b/sys/dev/hid/hidbus.c index 96d36c8d191d..683449fca49c 100644 --- a/sys/dev/hid/hidbus.c +++ b/sys/dev/hid/hidbus.c @@ -65,7 +65,7 @@ struct hidbus_ivars { struct mtx *mtx; /* child intr mtx */ hid_intr_t *intr_handler; /* executed under mtx*/ void *intr_ctx; - unsigned int refcnt; /* protected by mtx */ + bool active; /* protected by mtx */ struct epoch_context epoch_ctx; CK_STAILQ_ENTRY(hidbus_ivars) link; }; @@ -398,7 +398,7 @@ hidbus_child_detached(device_t bus, device_t child) struct hidbus_softc *sc = device_get_softc(bus); struct hidbus_ivars *tlc = device_get_ivars(child); - KASSERT(tlc->refcnt == 0, ("Child device is running")); + KASSERT(!tlc->active, ("Child device is running")); tlc->mtx = &sc->mtx; tlc->intr_handler = NULL; tlc->flags &= ~HIDBUS_FLAG_CAN_POLL; @@ -423,7 +423,7 @@ hidbus_child_deleted(device_t bus, device_t child) struct hidbus_ivars *tlc = device_get_ivars(child); sx_xlock(&sc->sx); - KASSERT(tlc->refcnt == 0, ("Child device is running")); + KASSERT(!tlc->active, ("Child device is running")); CK_STAILQ_REMOVE(&sc->tlcs, tlc, hidbus_ivars, link); sx_unlock(&sc->sx); epoch_call(INPUT_EPOCH, hidbus_ivar_dtor, &tlc->epoch_ctx); @@ -572,7 +572,7 @@ hidbus_intr(void *context, void *buf, hid_size_t len) if (!HID_IN_POLLING_MODE()) epoch_enter_preempt(INPUT_EPOCH, &et); CK_STAILQ_FOREACH(tlc, &sc->tlcs, link) { - if (tlc->refcnt == 0 || tlc->intr_handler == NULL) + if (!tlc->active || tlc->intr_handler == NULL) continue; if (HID_IN_POLLING_MODE()) { if ((tlc->flags & HIDBUS_FLAG_CAN_POLL) != 0) @@ -602,21 +602,14 @@ hidbus_intr_start(device_t bus, device_t child) MPASS(bus == device_get_parent(child)); struct hidbus_softc *sc = device_get_softc(bus); struct hidbus_ivars *ivar = device_get_ivars(child); - struct hidbus_ivars *tlc; - bool refcnted = false; int error; if (sx_xlock_sig(&sc->sx) != 0) return (EINTR); - CK_STAILQ_FOREACH(tlc, &sc->tlcs, link) { - refcnted |= (tlc->refcnt != 0); - if (tlc == ivar) { - mtx_lock(tlc->mtx); - ++tlc->refcnt; - mtx_unlock(tlc->mtx); - } - } - error = refcnted ? 0 : hid_intr_start(bus); + mtx_lock(ivar->mtx); + ivar->active = true; + mtx_unlock(ivar->mtx); + error = hid_intr_start(bus); sx_unlock(&sc->sx); return (error); @@ -629,21 +622,17 @@ hidbus_intr_stop(device_t bus, device_t child) struct hidbus_softc *sc = device_get_softc(bus); struct hidbus_ivars *ivar = device_get_ivars(child); struct hidbus_ivars *tlc; - bool refcnted = false; + bool active = false; int error; if (sx_xlock_sig(&sc->sx) != 0) return (EINTR); - CK_STAILQ_FOREACH(tlc, &sc->tlcs, link) { - if (tlc == ivar) { - mtx_lock(tlc->mtx); - MPASS(tlc->refcnt != 0); - --tlc->refcnt; - mtx_unlock(tlc->mtx); - } - refcnted |= (tlc->refcnt != 0); - } - error = refcnted ? 0 : hid_intr_stop(bus); + mtx_lock(ivar->mtx); + ivar->active = false; + mtx_unlock(ivar->mtx); + CK_STAILQ_FOREACH(tlc, &sc->tlcs, link) + active |= tlc->active; + error = active ? 0 : hid_intr_stop(bus); sx_unlock(&sc->sx); return (error); diff --git a/sys/dev/hid/hidquirk.h b/sys/dev/hid/hidquirk.h index 4f8b8acbe201..f6fa9f88c6c9 100644 --- a/sys/dev/hid/hidquirk.h +++ b/sys/dev/hid/hidquirk.h @@ -50,6 +50,7 @@ HQ(IS_XBOX360GP), /* device is XBox 360 GamePad */ \ HQ(NOWRITE), /* device does not support writes */ \ HQ(IICHID_SAMPLING), /* IIC backend runs in sampling mode */ \ + HQ(NO_READAHEAD), /* Disable interrupt after one report */\ \ /* Various quirks */ \ HQ(HID_IGNORE), /* device should be ignored by hid class */ \ diff --git a/sys/dev/hid/hidraw.c b/sys/dev/hid/hidraw.c index 06f70070f61b..4855843cd265 100644 --- a/sys/dev/hid/hidraw.c +++ b/sys/dev/hid/hidraw.c @@ -85,6 +85,12 @@ SYSCTL_INT(_hw_hid_hidraw, OID_AUTO, debug, CTLFLAG_RWTUN, free((buf), M_DEVBUF); \ } +#ifdef HIDRAW_MAKE_UHID_ALIAS +#define HIDRAW_NAME "uhid" +#else +#define HIDRAW_NAME "hidraw" +#endif + struct hidraw_softc { device_t sc_dev; /* base device */ @@ -183,8 +189,8 @@ hidraw_identify(driver_t *driver, device_t parent) { device_t child; - if (device_find_child(parent, "hidraw", DEVICE_UNIT_ANY) == NULL) { - child = BUS_ADD_CHILD(parent, 0, "hidraw", + if (device_find_child(parent, HIDRAW_NAME, DEVICE_UNIT_ANY) == NULL) { + child = BUS_ADD_CHILD(parent, 0, HIDRAW_NAME, device_get_unit(parent)); if (child != NULL) hidbus_set_index(child, HIDRAW_INDEX); @@ -1050,7 +1056,7 @@ static device_method_t hidraw_methods[] = { }; static driver_t hidraw_driver = { - "hidraw", + HIDRAW_NAME, hidraw_methods, sizeof(struct hidraw_softc) }; diff --git a/sys/dev/hid/ietp.c b/sys/dev/hid/ietp.c index 217585a7948b..73a5cb7414d4 100644 --- a/sys/dev/hid/ietp.c +++ b/sys/dev/hid/ietp.c @@ -102,6 +102,7 @@ struct ietp_softc { device_t dev; struct evdev_dev *evdev; + bool open; uint8_t report_id; hid_size_t report_len; @@ -217,13 +218,25 @@ static const struct evdev_methods ietp_evdev_methods = { static int ietp_ev_open(struct evdev_dev *evdev) { - return (hid_intr_start(evdev_get_softc(evdev))); + struct ietp_softc *sc = evdev_get_softc(evdev); + int error; + + error = hid_intr_start(sc->dev); + if (error == 0) + sc->open = true; + return (error); } static int ietp_ev_close(struct evdev_dev *evdev) { - return (hid_intr_stop(evdev_get_softc(evdev))); + struct ietp_softc *sc = evdev_get_softc(evdev); + int error; + + error = hid_intr_stop(sc->dev); + if (error == 0) + sc->open = false; + return (error); } static int @@ -275,7 +288,7 @@ ietp_attach(struct ietp_softc *sc) evdev_set_id(sc->evdev, hw->idBus, hw->idVendor, hw->idProduct, hw->idVersion); evdev_set_serial(sc->evdev, hw->serial); - evdev_set_methods(sc->evdev, sc->dev, &ietp_evdev_methods); + evdev_set_methods(sc->evdev, sc, &ietp_evdev_methods); evdev_set_flag(sc->evdev, EVDEV_FLAG_MT_STCOMPAT); evdev_set_flag(sc->evdev, EVDEV_FLAG_EXT_EPOCH); /* hidbus child */ @@ -584,11 +597,13 @@ ietp_iic_set_absolute_mode(device_t dev, bool enable) * Some ASUS touchpads need to be powered on to enter absolute mode. */ require_wakeup = false; - for (i = 0; i < nitems(special_fw); i++) { - if (sc->ic_type == special_fw[i].ic_type && - sc->product_id == special_fw[i].product_id) { - require_wakeup = true; - break; + if (!sc->open) { + for (i = 0; i < nitems(special_fw); i++) { + if (sc->ic_type == special_fw[i].ic_type && + sc->product_id == special_fw[i].product_id) { + require_wakeup = true; + break; + } } } diff --git a/sys/dev/hid/u2f.c b/sys/dev/hid/u2f.c new file mode 100644 index 000000000000..ac2eba7a499d --- /dev/null +++ b/sys/dev/hid/u2f.c @@ -0,0 +1,590 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022-2023 Vladimir Kondratyev <wulf@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_hid.h" + +#include <sys/param.h> +#ifdef COMPAT_FREEBSD32 +#include <sys/abi_compat.h> +#endif +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/filio.h> +#include <sys/ioccom.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/poll.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/selinfo.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/uio.h> + +#include <dev/evdev/input.h> + +#define HID_DEBUG_VAR u2f_debug +#include <dev/hid/hid.h> +#include <dev/hid/hidbus.h> +#include <dev/hid/hidquirk.h> + +#include <dev/usb/usb_ioctl.h> + +#ifdef HID_DEBUG +static int u2f_debug = 0; +static SYSCTL_NODE(_hw_hid, OID_AUTO, u2f, CTLFLAG_RW, 0, + "FIDO/U2F authenticator"); +SYSCTL_INT(_hw_hid_u2f, OID_AUTO, debug, CTLFLAG_RWTUN, + &u2f_debug, 0, "Debug level"); +#endif + +#define U2F_MAX_REPORT_SIZE 64 + +/* A match on these entries will load u2f */ +static const struct hid_device_id u2f_devs[] = { + { HID_BUS(BUS_USB), HID_TLC(HUP_FIDO, HUF_U2FHID) }, +}; + +struct u2f_softc { + device_t sc_dev; /* base device */ + struct cdev *dev; + + struct mtx sc_mtx; /* hidbus private mutex */ + void *sc_rdesc; + hid_size_t sc_rdesc_size; + hid_size_t sc_isize; + hid_size_t sc_osize; + struct selinfo sc_rsel; + struct { /* driver state */ + bool open:1; /* device is open */ + bool aslp:1; /* waiting for device data in read() */ + bool sel:1; /* waiting for device data in poll() */ + bool data:1; /* input report is stored in sc_buf */ + int reserved:28; + } sc_state; + int sc_fflags; /* access mode for open lifetime */ + + uint8_t sc_buf[U2F_MAX_REPORT_SIZE]; +}; + +static d_open_t u2f_open; +static d_read_t u2f_read; +static d_write_t u2f_write; +static d_ioctl_t u2f_ioctl; +static d_poll_t u2f_poll; +static d_kqfilter_t u2f_kqfilter; + +static d_priv_dtor_t u2f_dtor; + +static struct cdevsw u2f_cdevsw = { + .d_version = D_VERSION, + .d_open = u2f_open, + .d_read = u2f_read, + .d_write = u2f_write, + .d_ioctl = u2f_ioctl, + .d_poll = u2f_poll, + .d_kqfilter = u2f_kqfilter, + .d_name = "u2f", +}; + +static hid_intr_t u2f_intr; + +static device_probe_t u2f_probe; +static device_attach_t u2f_attach; +static device_detach_t u2f_detach; + +static int u2f_kqread(struct knote *, long); +static void u2f_kqdetach(struct knote *); +static void u2f_notify(struct u2f_softc *); + +static struct filterops u2f_filterops_read = { + .f_isfd = 1, + .f_detach = u2f_kqdetach, + .f_event = u2f_kqread, +}; + +static int +u2f_probe(device_t dev) +{ + int error; + + error = HIDBUS_LOOKUP_DRIVER_INFO(dev, u2f_devs); + if (error != 0) + return (error); + + hidbus_set_desc(dev, "Authenticator"); + + return (BUS_PROBE_GENERIC); +} + +static int +u2f_attach(device_t dev) +{ + struct u2f_softc *sc = device_get_softc(dev); + struct hid_device_info *hw = __DECONST(struct hid_device_info *, + hid_get_device_info(dev)); + struct make_dev_args mda; + int error; + + sc->sc_dev = dev; + + error = hid_get_report_descr(dev, &sc->sc_rdesc, &sc->sc_rdesc_size); + if (error != 0) + return (ENXIO); + sc->sc_isize = hid_report_size_max(sc->sc_rdesc, sc->sc_rdesc_size, + hid_input, NULL); + if (sc->sc_isize > U2F_MAX_REPORT_SIZE) { + device_printf(dev, "Input report size too large. Truncate.\n"); + sc->sc_isize = U2F_MAX_REPORT_SIZE; + } + sc->sc_osize = hid_report_size_max(sc->sc_rdesc, sc->sc_rdesc_size, + hid_output, NULL); + if (sc->sc_osize > U2F_MAX_REPORT_SIZE) { + device_printf(dev, "Output report size too large. Truncate.\n"); + sc->sc_osize = U2F_MAX_REPORT_SIZE; + } + + mtx_init(&sc->sc_mtx, "u2f lock", NULL, MTX_DEF); + knlist_init_mtx(&sc->sc_rsel.si_note, &sc->sc_mtx); + + make_dev_args_init(&mda); + mda.mda_flags = MAKEDEV_WAITOK; + mda.mda_devsw = &u2f_cdevsw; + mda.mda_uid = UID_ROOT; + mda.mda_gid = GID_U2F; + mda.mda_mode = 0660; + mda.mda_si_drv1 = sc; + + error = make_dev_s(&mda, &sc->dev, "u2f/%d", device_get_unit(dev)); + if (error) { + device_printf(dev, "Can not create character device\n"); + u2f_detach(dev); + return (error); + } +#ifdef U2F_MAKE_UHID_ALIAS + (void)make_dev_alias(sc->dev, "uhid%d", device_get_unit(dev)); +#endif + + hid_add_dynamic_quirk(hw, HQ_NO_READAHEAD); + + hidbus_set_lock(dev, &sc->sc_mtx); + hidbus_set_intr(dev, u2f_intr, sc); + + return (0); +} + +static int +u2f_detach(device_t dev) +{ + struct u2f_softc *sc = device_get_softc(dev); + + DPRINTF("sc=%p\n", sc); + + if (sc->dev != NULL) { + mtx_lock(&sc->sc_mtx); + sc->dev->si_drv1 = NULL; + /* Wake everyone */ + u2f_notify(sc); + mtx_unlock(&sc->sc_mtx); + destroy_dev(sc->dev); + } + + hid_intr_stop(sc->sc_dev); + + knlist_clear(&sc->sc_rsel.si_note, 0); + knlist_destroy(&sc->sc_rsel.si_note); + seldrain(&sc->sc_rsel); + mtx_destroy(&sc->sc_mtx); + + return (0); +} + +void +u2f_intr(void *context, void *buf, hid_size_t len) +{ + struct u2f_softc *sc = context; + + mtx_assert(&sc->sc_mtx, MA_OWNED); + + DPRINTFN(5, "len=%d\n", len); + DPRINTFN(5, "data = %*D\n", len, buf, " "); + + if (sc->sc_state.data) + return; + + if (len > sc->sc_isize) + len = sc->sc_isize; + + bcopy(buf, sc->sc_buf, len); + + /* Make sure we don't process old data */ + if (len < sc->sc_isize) + bzero(sc->sc_buf + len, sc->sc_isize - len); + + sc->sc_state.data = true; + + u2f_notify(sc); +} + +static int +u2f_open(struct cdev *dev, int flag, int mode, struct thread *td) +{ + struct u2f_softc *sc = dev->si_drv1; + int error; + + if (sc == NULL) + return (ENXIO); + + DPRINTF("sc=%p\n", sc); + + mtx_lock(&sc->sc_mtx); + if (sc->sc_state.open) { + mtx_unlock(&sc->sc_mtx); + return (EBUSY); + } + sc->sc_state.open = true; + mtx_unlock(&sc->sc_mtx); + + error = devfs_set_cdevpriv(sc, u2f_dtor); + if (error != 0) { + mtx_lock(&sc->sc_mtx); + sc->sc_state.open = false; + mtx_unlock(&sc->sc_mtx); + return (error); + } + + /* Set up interrupt pipe. */ + sc->sc_state.data = false; + sc->sc_fflags = flag; + + return (0); +} + + +static void +u2f_dtor(void *data) +{ + struct u2f_softc *sc = data; + +#ifdef NOT_YET + /* Disable interrupts. */ + hid_intr_stop(sc->sc_dev); +#endif + + mtx_lock(&sc->sc_mtx); + sc->sc_state.open = false; + mtx_unlock(&sc->sc_mtx); +} + +static int +u2f_read(struct cdev *dev, struct uio *uio, int flag) +{ + uint8_t buf[U2F_MAX_REPORT_SIZE]; + struct u2f_softc *sc = dev->si_drv1; + size_t length = 0; + int error; + + DPRINTFN(1, "\n"); + + if (sc == NULL) + return (EIO); + + if (!sc->sc_state.data) + hid_intr_start(sc->sc_dev); + + mtx_lock(&sc->sc_mtx); + if (dev->si_drv1 == NULL) { + error = EIO; + goto exit; + } + + while (!sc->sc_state.data) { + if (flag & O_NONBLOCK) { + error = EWOULDBLOCK; + goto exit; + } + sc->sc_state.aslp = true; + DPRINTFN(5, "sleep on %p\n", &sc->sc_buf); + error = mtx_sleep(&sc->sc_buf, &sc->sc_mtx, PZERO | PCATCH, + "u2frd", 0); + DPRINTFN(5, "woke, error=%d\n", error); + if (dev->si_drv1 == NULL) + error = EIO; + if (error) { + sc->sc_state.aslp = false; + goto exit; + } + } + + if (sc->sc_state.data && uio->uio_resid > 0) { + length = min(uio->uio_resid, sc->sc_isize); + memcpy(buf, sc->sc_buf, length); + sc->sc_state.data = false; + } +exit: + mtx_unlock(&sc->sc_mtx); + if (length != 0) { + /* Copy the data to the user process. */ + DPRINTFN(5, "got %lu chars\n", (u_long)length); + error = uiomove(buf, length, uio); + } + + return (error); +} + +static int +u2f_write(struct cdev *dev, struct uio *uio, int flag) +{ + uint8_t buf[U2F_MAX_REPORT_SIZE]; + struct u2f_softc *sc = dev->si_drv1; + int error; + + DPRINTFN(1, "\n"); + + if (sc == NULL) + return (EIO); + + if (uio->uio_resid != sc->sc_osize) + return (EINVAL); + error = uiomove(buf, uio->uio_resid, uio); + if (error == 0) + error = hid_write(sc->sc_dev, buf, sc->sc_osize); + + return (error); +} + +#ifdef COMPAT_FREEBSD32 +static void +update_ugd32(const struct usb_gen_descriptor *ugd, + struct usb_gen_descriptor32 *ugd32) +{ + /* Don't update hgd_data pointer */ + CP(*ugd, *ugd32, ugd_lang_id); + CP(*ugd, *ugd32, ugd_maxlen); + CP(*ugd, *ugd32, ugd_actlen); + CP(*ugd, *ugd32, ugd_offset); + CP(*ugd, *ugd32, ugd_config_index); + CP(*ugd, *ugd32, ugd_string_index); + CP(*ugd, *ugd32, ugd_iface_index); + CP(*ugd, *ugd32, ugd_altif_index); + CP(*ugd, *ugd32, ugd_endpt_index); + CP(*ugd, *ugd32, ugd_report_type); + /* Don't update reserved */ +} +#endif + +static int +u2f_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, + struct thread *td) +{ +#ifdef COMPAT_FREEBSD32 + struct usb_gen_descriptor local_ugd; + struct usb_gen_descriptor32 *ugd32 = NULL; +#endif + struct u2f_softc *sc = dev->si_drv1; + struct usb_gen_descriptor *ugd = (struct usb_gen_descriptor *)addr; + uint32_t size; + + DPRINTFN(2, "cmd=%lx\n", cmd); + + if (sc == NULL) + return (EIO); + +#ifdef COMPAT_FREEBSD32 + switch (cmd) { + case USB_GET_REPORT_DESC32: + cmd = _IOC_NEWTYPE(cmd, struct usb_gen_descriptor); + ugd32 = (struct usb_gen_descriptor32 *)addr; + ugd = &local_ugd; + PTRIN_CP(*ugd32, *ugd, ugd_data); + CP(*ugd32, *ugd, ugd_lang_id); + CP(*ugd32, *ugd, ugd_maxlen); + CP(*ugd32, *ugd, ugd_actlen); + CP(*ugd32, *ugd, ugd_offset); + CP(*ugd32, *ugd, ugd_config_index); + CP(*ugd32, *ugd, ugd_string_index); + CP(*ugd32, *ugd, ugd_iface_index); + CP(*ugd32, *ugd, ugd_altif_index); + CP(*ugd32, *ugd, ugd_endpt_index); + CP(*ugd32, *ugd, ugd_report_type); + /* Don't copy reserved */ + break; + } +#endif + + /* fixed-length ioctls handling */ + switch (cmd) { + case FIONBIO: + /* All handled in the upper FS layer. */ + return (0); + + case USB_GET_REPORT_DESC: + size = MIN(sc->sc_rdesc_size, ugd->ugd_maxlen); + ugd->ugd_actlen = size; +#ifdef COMPAT_FREEBSD32 + if (ugd32 != NULL) + update_ugd32(ugd, ugd32); +#endif + if (ugd->ugd_data == NULL) + return (0); /* descriptor length only */ + + return (copyout(sc->sc_rdesc, ugd->ugd_data, size)); + + case USB_GET_DEVICEINFO: + return(hid_ioctl( + sc->sc_dev, USB_GET_DEVICEINFO, (uintptr_t)addr)); + } + + return (EINVAL); +} + +static int +u2f_poll(struct cdev *dev, int events, struct thread *td) +{ + struct u2f_softc *sc = dev->si_drv1; + int revents = 0; + bool start_intr = false; + + if (sc == NULL) + return (POLLHUP); + + if (events & (POLLOUT | POLLWRNORM) && (sc->sc_fflags & FWRITE)) + revents |= events & (POLLOUT | POLLWRNORM); + if (events & (POLLIN | POLLRDNORM) && (sc->sc_fflags & FREAD)) { + mtx_lock(&sc->sc_mtx); + if (sc->sc_state.data) + revents |= events & (POLLIN | POLLRDNORM); + else { + sc->sc_state.sel = true; + start_intr = true; + selrecord(td, &sc->sc_rsel); + } + mtx_unlock(&sc->sc_mtx); + if (start_intr) + hid_intr_start(sc->sc_dev); + } + + return (revents); +} + +static int +u2f_kqfilter(struct cdev *dev, struct knote *kn) +{ + struct u2f_softc *sc = dev->si_drv1; + + if (sc == NULL) + return (ENXIO); + + switch(kn->kn_filter) { + case EVFILT_READ: + if (sc->sc_fflags & FREAD) { + kn->kn_fop = &u2f_filterops_read; + break; + } + /* FALLTHROUGH */ + default: + return(EINVAL); + } + kn->kn_hook = sc; + + knlist_add(&sc->sc_rsel.si_note, kn, 0); + return (0); +} + +static int +u2f_kqread(struct knote *kn, long hint) +{ + struct u2f_softc *sc = kn->kn_hook; + int ret; + + mtx_assert(&sc->sc_mtx, MA_OWNED); + + if (sc->dev->si_drv1 == NULL) { + kn->kn_flags |= EV_EOF; + ret = 1; + } else { + ret = sc->sc_state.data ? 1 : 0; + if (!sc->sc_state.data) + hid_intr_start(sc->sc_dev); + } + + return (ret); +} + +static void +u2f_kqdetach(struct knote *kn) +{ + struct u2f_softc *sc = kn->kn_hook; + + knlist_remove(&sc->sc_rsel.si_note, kn, 0); +} + +static void +u2f_notify(struct u2f_softc *sc) +{ + mtx_assert(&sc->sc_mtx, MA_OWNED); + + if (sc->sc_state.aslp) { + sc->sc_state.aslp = false; + DPRINTFN(5, "waking %p\n", &sc->sc_buf); + wakeup(&sc->sc_buf); + } + if (sc->sc_state.sel) { + sc->sc_state.sel = false; + selwakeuppri(&sc->sc_rsel, PZERO); + } + KNOTE_LOCKED(&sc->sc_rsel.si_note, 0); +} + +static device_method_t u2f_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, u2f_probe), + DEVMETHOD(device_attach, u2f_attach), + DEVMETHOD(device_detach, u2f_detach), + + DEVMETHOD_END +}; + +static driver_t u2f_driver = { +#ifdef U2F_MAKE_UHID_ALIAS + "uhid", +#else + "u2f", +#endif + u2f_methods, + sizeof(struct u2f_softc) +}; + +DRIVER_MODULE(u2f, hidbus, u2f_driver, NULL, NULL); +MODULE_DEPEND(u2f, hidbus, 1, 1, 1); +MODULE_DEPEND(u2f, hid, 1, 1, 1); +MODULE_VERSION(u2f, 1); +HID_PNP_INFO(u2f_devs); diff --git a/sys/dev/ice/ice_bitops.h b/sys/dev/ice/ice_bitops.h index c480900596f4..a623f810c101 100644 --- a/sys/dev/ice/ice_bitops.h +++ b/sys/dev/ice/ice_bitops.h @@ -198,7 +198,7 @@ static inline void ice_zero_bitmap(ice_bitmap_t *bmp, u16 size) * ice_and_bitmap - bitwise AND 2 bitmaps and store result in dst bitmap * @dst: Destination bitmap that receive the result of the operation * @bmp1: The first bitmap to intersect - * @bmp2: The second bitmap to intersect wit the first + * @bmp2: The second bitmap to intersect with the first * @size: Size of the bitmaps in bits * * This function performs a bitwise AND on two "source" bitmaps of the same size @@ -237,7 +237,7 @@ ice_and_bitmap(ice_bitmap_t *dst, const ice_bitmap_t *bmp1, * ice_or_bitmap - bitwise OR 2 bitmaps and store result in dst bitmap * @dst: Destination bitmap that receive the result of the operation * @bmp1: The first bitmap to intersect - * @bmp2: The second bitmap to intersect wit the first + * @bmp2: The second bitmap to intersect with the first * @size: Size of the bitmaps in bits * * This function performs a bitwise OR on two "source" bitmaps of the same size diff --git a/sys/dev/ice/ice_lan_tx_rx.h b/sys/dev/ice/ice_lan_tx_rx.h index 693e0ca5efc6..eedacdab0216 100644 --- a/sys/dev/ice/ice_lan_tx_rx.h +++ b/sys/dev/ice/ice_lan_tx_rx.h @@ -630,7 +630,7 @@ enum ice_rxdid { ICE_RXDID_LAST = 63, }; -/* Recceive Flex descriptor Dword Index */ +/* Receive Flex descriptor Dword Index */ enum ice_flex_word { ICE_RX_FLEX_DWORD_0 = 0, ICE_RX_FLEX_DWORD_1, diff --git a/sys/dev/ice/ice_lib.h b/sys/dev/ice/ice_lib.h index 308b2bda2790..640bdf8fed7b 100644 --- a/sys/dev/ice/ice_lib.h +++ b/sys/dev/ice/ice_lib.h @@ -313,7 +313,7 @@ enum ice_dyn_idx_t { ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ }; -/* By convenction ITR0 is used for RX, and ITR1 is used for TX */ +/* By convention ITR0 is used for RX, and ITR1 is used for TX */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 diff --git a/sys/dev/ice/ice_protocol_type.h b/sys/dev/ice/ice_protocol_type.h index 300d61bfb5d9..b90c25e6c427 100644 --- a/sys/dev/ice/ice_protocol_type.h +++ b/sys/dev/ice/ice_protocol_type.h @@ -143,7 +143,7 @@ enum ice_prot_id { ICE_PROT_LLDP_OF = 117, ICE_PROT_ARP_OF = 118, ICE_PROT_EAPOL_OF = 120, - ICE_PROT_META_ID = 255, /* when offset == metaddata */ + ICE_PROT_META_ID = 255, /* when offset == metadata */ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; diff --git a/sys/dev/iicbus/gpio/pcf8574.c b/sys/dev/iicbus/gpio/pcf8574.c index ab6e2bc07d1f..bf60dec67557 100644 --- a/sys/dev/iicbus/gpio/pcf8574.c +++ b/sys/dev/iicbus/gpio/pcf8574.c @@ -142,12 +142,13 @@ pcf8574_attach(device_t dev) (void)pcf8574_write(sc, 0xff); sx_init(&sc->lock, "pcf8574"); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); sx_destroy(&sc->lock); return (ENXIO); } + bus_attach_children(dev); return (0); } @@ -158,9 +159,7 @@ pcf8574_detach(device_t dev) sc = device_get_softc(dev); - if (sc->busdev != NULL) - gpiobus_detach_bus(sc->busdev); - + gpiobus_detach_bus(dev); sx_destroy(&sc->lock); return (0); } diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c index cd011ae9be75..ab8fedd3f8fd 100644 --- a/sys/dev/iicbus/gpio/tca64xx.c +++ b/sys/dev/iicbus/gpio/tca64xx.c @@ -262,7 +262,7 @@ tca64xx_attach(device_t dev) mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF); OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); return (ENXIO); @@ -281,6 +281,7 @@ tca64xx_attach(device_t dev) } #endif + bus_attach_children(dev); return (0); } @@ -291,9 +292,7 @@ tca64xx_detach(device_t dev) sc = device_get_softc(dev); - if (sc->busdev != NULL) - gpiobus_detach_bus(sc->busdev); - + gpiobus_detach_bus(dev); mtx_destroy(&sc->mtx); return (0); diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c index 3f1d7a0cefba..fdb4816b8bd9 100644 --- a/sys/dev/iicbus/iichid.c +++ b/sys/dev/iicbus/iichid.c @@ -861,7 +861,8 @@ iichid_intr_start(device_t dev, device_t child __unused) sc = device_get_softc(dev); DPRINTF(sc, "iichid device open\n"); - iichid_set_power_state(sc, IICHID_PS_ON, IICHID_PS_NULL); + if (!sc->open) + iichid_set_power_state(sc, IICHID_PS_ON, IICHID_PS_NULL); return (0); } diff --git a/sys/dev/isci/scil/intel_sata.h b/sys/dev/isci/scil/intel_sata.h index 4cf4adf03e07..fdad5be9b083 100644 --- a/sys/dev/isci/scil/intel_sata.h +++ b/sys/dev/isci/scil/intel_sata.h @@ -61,7 +61,7 @@ * * @brief This file defines all of the SATA releated constants, enumerations, * and types. Please note that this file does not necessarily contain - * an exhaustive list of all contants and commands. + * an exhaustive list of all constants and commands. */ /** diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 959afa79e7da..73c0fd1ab16f 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -45,7 +45,7 @@ /************************************************************************ * Driver version ************************************************************************/ -static const char ixgbe_driver_version[] = "4.0.1-k"; +static const char ixgbe_driver_version[] = "5.0.1-k"; /************************************************************************ * PCI Device ID Table @@ -144,6 +144,16 @@ static const pci_vendor_info_t ixgbe_vendor_info_array[] = "Intel(R) X540-T2 (Bypass)"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, "Intel(R) X520 82599 (Bypass)"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_BACKPLANE, + "Intel(R) E610 (Backplane)"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_SFP, + "Intel(R) E610 (SFP)"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_2_5G_T, + "Intel(R) E610 (2.5 GbE)"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_10G_T, + "Intel(R) E610 (10 GbE)"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_SGMII, + "Intel(R) E610 (SGMII)"), /* required last entry */ PVID_END }; @@ -253,6 +263,10 @@ static int ixgbe_sysctl_tso_tcp_flags_mask(SYSCTL_HANDLER_ARGS); static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); static void ixgbe_handle_phy(void *); +static void ixgbe_handle_fw_event(void *); + +static int ixgbe_enable_lse(struct ixgbe_softc *sc); +static int ixgbe_disable_lse(struct ixgbe_softc *sc); /************************************************************************ * FreeBSD Device Interface Entry Points @@ -621,6 +635,7 @@ ixgbe_initialize_rss_mapping(struct ixgbe_softc *sc) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: + case ixgbe_mac_E610: table_size = 512; break; default: @@ -902,6 +917,32 @@ ixgbe_initialize_transmit_units(if_ctx_t ctx) } /* ixgbe_initialize_transmit_units */ +static int +ixgbe_check_fw_api_version(struct ixgbe_softc *sc) +{ + struct ixgbe_hw *hw = &sc->hw; + if (hw->api_maj_ver > IXGBE_FW_API_VER_MAJOR) { + device_printf(sc->dev, + "The driver for the device stopped because the NVM " + "image is newer than expected. You must install the " + "most recent version of the network driver.\n"); + return (EOPNOTSUPP); + } else if (hw->api_maj_ver == IXGBE_FW_API_VER_MAJOR && + hw->api_min_ver > (IXGBE_FW_API_VER_MINOR + 2)) { + device_printf(sc->dev, + "The driver for the device detected a newer version of " + "the NVM image than expected. Please install the most " + "recent version of the network driver.\n"); + } else if (hw->api_maj_ver < IXGBE_FW_API_VER_MAJOR || + hw->api_min_ver < IXGBE_FW_API_VER_MINOR - 2) { + device_printf(sc->dev, + "The driver for the device detected an older version " + "of the NVM image than expected. " + "Please update the NVM image.\n"); + } + return (0); +} + /************************************************************************ * ixgbe_register ************************************************************************/ @@ -970,6 +1011,9 @@ ixgbe_if_attach_pre(if_ctx_t ctx) goto err_pci; } + if (hw->mac.type == ixgbe_mac_E610) + ixgbe_init_aci(hw); + if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) { device_printf(dev, @@ -1058,6 +1102,12 @@ ixgbe_if_attach_pre(if_ctx_t ctx) break; } + /* Check the FW API version */ + if (hw->mac.type == ixgbe_mac_E610 && ixgbe_check_fw_api_version(sc)) { + error = EIO; + goto err_pci; + } + /* Most of the iflib initialization... */ iflib_set_mac(ctx, hw->mac.addr); @@ -1111,6 +1161,9 @@ err_pci: IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(ctx); + if (hw->mac.type == ixgbe_mac_E610) + ixgbe_shutdown_aci(hw); + return (error); } /* ixgbe_if_attach_pre */ @@ -1358,6 +1411,10 @@ ixgbe_add_media_types(if_ctx_t ctx) /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ifmedia_add(sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_5000BASE_T) + ifmedia_add(sc->media, IFM_ETHER | IFM_5000_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_T) + ifmedia_add(sc->media, IFM_ETHER | IFM_2500_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ifmedia_add(sc->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) @@ -1459,6 +1516,7 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) } case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: + case ixgbe_mac_E610: if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) return (true); return (false); @@ -1525,6 +1583,15 @@ ixgbe_config_link(if_ctx_t ctx) IXGBE_LINK_SPEED_5GB_FULL); } + if (hw->mac.type == ixgbe_mac_E610) { + hw->phy.ops.init(hw); + err = ixgbe_enable_lse(sc); + if (err) + device_printf(sc->dev, + "Failed to enable Link Status Event, " + "error: %d", err); + } + if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, sc->link_up); @@ -2158,14 +2225,15 @@ get_parent_info: ixgbe_set_pci_config_data_generic(hw, link); display: - device_printf(dev, "PCI Express Bus: Speed %s %s\n", - ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s" : + device_printf(dev, "PCI Express Bus: Speed %s Width %s\n", + ((hw->bus.speed == ixgbe_bus_speed_16000) ? "16.0GT/s" : + (hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s" : "Unknown"), - ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : - (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : + ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "x8" : + (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "x4" : + (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "x1" : "Unknown")); if (bus_info_valid) { @@ -2372,14 +2440,17 @@ ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) ifmr->ifm_status |= IFM_ACTIVE; layer = sc->phy_layer; - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || - layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || - layer & IXGBE_PHYSICAL_LAYER_100BASE_TX || - layer & IXGBE_PHYSICAL_LAYER_10BASE_T) + if (layer & IXGBE_PHYSICAL_LAYERS_BASE_T_ALL) switch (sc->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; + case IXGBE_LINK_SPEED_5GB_FULL: + ifmr->ifm_active |= IFM_5000_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_T | IFM_FDX; + break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; @@ -2390,15 +2461,6 @@ ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } - if (hw->mac.type == ixgbe_mac_X550) - switch (sc->link_speed) { - case IXGBE_LINK_SPEED_5GB_FULL: - ifmr->ifm_active |= IFM_5000_T | IFM_FDX; - break; - case IXGBE_LINK_SPEED_2_5GB_FULL: - ifmr->ifm_active |= IFM_2500_T | IFM_FDX; - break; - } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) switch (sc->link_speed) { @@ -2676,6 +2738,11 @@ ixgbe_msix_link(void *arg) sc->task_requests |= IXGBE_REQUEST_TASK_LSC; } + if (eicr & IXGBE_EICR_FW_EVENT) { + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FW_EVENT); + sc->task_requests |= IXGBE_REQUEST_TASK_FWEVENT; + } + if (sc->hw.mac.type != ixgbe_mac_82598EB) { if ((sc->feat_en & IXGBE_FEATURE_FDIR) && (eicr & IXGBE_EICR_FLOW_DIR)) { @@ -2734,11 +2801,16 @@ ixgbe_msix_link(void *arg) /* Check for VF message */ if ((sc->feat_en & IXGBE_FEATURE_SRIOV) && - (eicr & IXGBE_EICR_MAILBOX)) + (eicr & IXGBE_EICR_MAILBOX)) { sc->task_requests |= IXGBE_REQUEST_TASK_MBX; + } } - if (ixgbe_is_sfp(hw)) { + /* + * On E610, the firmware handles PHY configuration, so + * there is no need to perform any SFP-specific tasks. + */ + if (hw->mac.type != ixgbe_mac_E610 && ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; @@ -2985,7 +3057,13 @@ ixgbe_if_detach(if_ctx_t ctx) callout_drain(&sc->fw_mode_timer); + if (sc->hw.mac.type == ixgbe_mac_E610) { + ixgbe_disable_lse(sc); + ixgbe_shutdown_aci(&sc->hw); + } + ixgbe_free_pci_resources(ctx); + free(sc->mta, M_IXGBE); return (0); @@ -3404,6 +3482,7 @@ ixgbe_set_ivar(struct ixgbe_softc *sc, u8 entry, u8 vector, s8 type) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: + case ixgbe_mac_E610: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); @@ -3826,6 +3905,96 @@ ixgbe_handle_phy(void *context) } /* ixgbe_handle_phy */ /************************************************************************ + * ixgbe_enable_lse - enable link status events + * + * Sets mask and enables link status events + ************************************************************************/ +s32 ixgbe_enable_lse(struct ixgbe_softc *sc) +{ + s32 error; + + u16 mask = ~((u16)(IXGBE_ACI_LINK_EVENT_UPDOWN | + IXGBE_ACI_LINK_EVENT_MEDIA_NA | + IXGBE_ACI_LINK_EVENT_MODULE_QUAL_FAIL | + IXGBE_ACI_LINK_EVENT_PHY_FW_LOAD_FAIL)); + + error = ixgbe_configure_lse(&sc->hw, TRUE, mask); + if (error) + return (error); + + sc->lse_mask = mask; + return (IXGBE_SUCCESS); +} /* ixgbe_enable_lse */ + +/************************************************************************ + * ixgbe_disable_lse - disable link status events + ************************************************************************/ +s32 ixgbe_disable_lse(struct ixgbe_softc *sc) +{ + s32 error; + + error = ixgbe_configure_lse(&sc->hw, false, sc->lse_mask); + if (error) + return (error); + + sc->lse_mask = 0; + return (IXGBE_SUCCESS); +} /* ixgbe_disable_lse */ + +/************************************************************************ + * ixgbe_handle_fw_event - Tasklet for MSI-X Link Status Event interrupts + ************************************************************************/ +static void +ixgbe_handle_fw_event(void *context) +{ + if_ctx_t ctx = context; + struct ixgbe_softc *sc = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &sc->hw; + struct ixgbe_aci_event event; + bool pending = false; + s32 error; + + event.buf_len = IXGBE_ACI_MAX_BUFFER_SIZE; + event.msg_buf = malloc(event.buf_len, M_IXGBE, M_ZERO | M_NOWAIT); + if (!event.msg_buf) { + device_printf(sc->dev, "Can not allocate buffer for " + "event message\n"); + return; + } + + do { + error = ixgbe_aci_get_event(hw, &event, &pending); + if (error) { + device_printf(sc->dev, "Error getting event from " + "FW:%d\n", error); + break; + } + + switch (le16toh(event.desc.opcode)) { + case ixgbe_aci_opc_get_link_status: + sc->task_requests |= IXGBE_REQUEST_TASK_LSC; + break; + + case ixgbe_aci_opc_temp_tca_event: + if (hw->adapter_stopped == FALSE) + ixgbe_if_stop(ctx); + device_printf(sc->dev, + "CRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); + device_printf(sc->dev, "System shutdown required!\n"); + break; + + default: + device_printf(sc->dev, + "Unknown FW event captured, opcode=0x%04X\n", + le16toh(event.desc.opcode)); + break; + } + } while (pending); + + free(event.msg_buf, M_IXGBE); +} /* ixgbe_handle_fw_event */ + +/************************************************************************ * ixgbe_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a @@ -3899,6 +4068,8 @@ ixgbe_if_update_admin_status(if_ctx_t ctx) } /* Handle task requests from msix_link() */ + if (sc->task_requests & IXGBE_REQUEST_TASK_FWEVENT) + ixgbe_handle_fw_event(ctx); if (sc->task_requests & IXGBE_REQUEST_TASK_MOD) ixgbe_handle_mod(ctx); if (sc->task_requests & IXGBE_REQUEST_TASK_MSF) @@ -3986,6 +4157,9 @@ ixgbe_if_enable_intr(if_ctx_t ctx) mask |= IXGBE_EICR_GPI_SDP0_X540; mask |= IXGBE_EIMS_ECC; break; + case ixgbe_mac_E610: + mask |= IXGBE_EIMS_FW_EVENT; + break; default: break; } @@ -4008,6 +4182,7 @@ ixgbe_if_enable_intr(if_ctx_t ctx) /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; + mask &= ~IXGBE_EIMS_FW_EVENT; if (sc->feat_cap & IXGBE_FEATURE_SRIOV) mask &= ~IXGBE_EIMS_MAILBOX; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); @@ -4026,7 +4201,7 @@ ixgbe_if_enable_intr(if_ctx_t ctx) } /* ixgbe_if_enable_intr */ /************************************************************************ - * ixgbe_disable_intr + * ixgbe_if_disable_intr ************************************************************************/ static void ixgbe_if_disable_intr(if_ctx_t ctx) @@ -4176,8 +4351,9 @@ ixgbe_intr(void *arg) /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && - (eicr & IXGBE_EICR_GPI_SDP0_X540)) + (eicr & IXGBE_EICR_GPI_SDP0_X540)) { sc->task_requests |= IXGBE_REQUEST_TASK_PHY; + } return (FILTER_SCHEDULE_THREAD); } /* ixgbe_intr */ @@ -4219,7 +4395,7 @@ ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) int error, fc; sc = (struct ixgbe_softc *)arg1; - fc = sc->hw.fc.current_mode; + fc = sc->hw.fc.requested_mode; error = sysctl_handle_int(oidp, &fc, 0, req); if ((error) || (req->newptr == NULL)) @@ -4248,12 +4424,10 @@ ixgbe_set_flowcntl(struct ixgbe_softc *sc, int fc) case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: - sc->hw.fc.requested_mode = fc; if (sc->num_rx_queues > 1) ixgbe_disable_rx_drop(sc); break; case ixgbe_fc_none: - sc->hw.fc.requested_mode = ixgbe_fc_none; if (sc->num_rx_queues > 1) ixgbe_enable_rx_drop(sc); break; @@ -4261,6 +4435,8 @@ ixgbe_set_flowcntl(struct ixgbe_softc *sc, int fc) return (EINVAL); } + sc->hw.fc.requested_mode = fc; + /* Don't autoneg if forcing a value */ sc->hw.fc.disable_fc_autoneg = true; ixgbe_fc_enable(&sc->hw); @@ -4978,6 +5154,9 @@ ixgbe_init_device_features(struct ixgbe_softc *sc) if (sc->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) sc->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; break; + case ixgbe_mac_E610: + sc->feat_cap |= IXGBE_FEATURE_RECOVERY_MODE; + break; default: break; } diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 54b2c8c1dd68..8a1c1aae041d 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -68,6 +68,8 @@ static const pci_vendor_info_t ixv_vendor_info_array[] = "Intel(R) X552 Virtual Function"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, "Intel(R) X553 Virtual Function"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_E610_VF, + "Intel(R) E610 Virtual Function"), /* required last entry */ PVID_END }; @@ -1020,6 +1022,9 @@ ixv_identify_hardware(if_ctx_t ctx) case IXGBE_DEV_ID_X550EM_A_VF: hw->mac.type = ixgbe_mac_X550EM_a_vf; break; + case IXGBE_DEV_ID_E610_VF: + hw->mac.type = ixgbe_mac_E610_vf; + break; default: device_printf(dev, "unknown mac type\n"); hw->mac.type = ixgbe_mac_unknown; @@ -1955,6 +1960,7 @@ ixv_init_device_features(struct ixgbe_softc *sc) case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: + case ixgbe_mac_E610_vf: sc->feat_cap |= IXGBE_FEATURE_NEEDS_CTXD; sc->feat_cap |= IXGBE_FEATURE_RSS; break; diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 341d4ebfcebc..844064bf8543 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -86,6 +86,7 @@ #include "ixgbe_phy.h" #include "ixgbe_vf.h" #include "ixgbe_features.h" +#include "ixgbe_e610.h" /* Tunables */ @@ -195,6 +196,15 @@ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) +/* All BASE-T Physical layers */ +#define IXGBE_PHYSICAL_LAYERS_BASE_T_ALL \ + (IXGBE_PHYSICAL_LAYER_10GBASE_T |\ + IXGBE_PHYSICAL_LAYER_5000BASE_T |\ + IXGBE_PHYSICAL_LAYER_2500BASE_T |\ + IXGBE_PHYSICAL_LAYER_1000BASE_T |\ + IXGBE_PHYSICAL_LAYER_100BASE_TX |\ + IXGBE_PHYSICAL_LAYER_10BASE_T) + #define IXGBE_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_TSO | \ IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | \ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | \ @@ -464,6 +474,7 @@ struct ixgbe_softc { /* Feature capable/enabled flags. See ixgbe_features.h */ u32 feat_cap; u32 feat_en; + u16 lse_mask; }; /* Precision Time Sync (IEEE 1588) defines */ diff --git a/sys/dev/ixgbe/ixgbe_api.c b/sys/dev/ixgbe/ixgbe_api.c index 4c50f10ed92e..f11f52a646e4 100644 --- a/sys/dev/ixgbe/ixgbe_api.c +++ b/sys/dev/ixgbe/ixgbe_api.c @@ -112,11 +112,15 @@ s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) case ixgbe_mac_X550EM_a: status = ixgbe_init_ops_X550EM_a(hw); break; + case ixgbe_mac_E610: + status = ixgbe_init_ops_E610(hw); + break; case ixgbe_mac_82599_vf: case ixgbe_mac_X540_vf: case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: + case ixgbe_mac_E610_vf: status = ixgbe_init_ops_vf(hw); break; default: @@ -240,6 +244,18 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) hw->mac.type = ixgbe_mac_X550EM_a_vf; hw->mvals = ixgbe_mvals_X550EM_a; break; + case IXGBE_DEV_ID_E610_BACKPLANE: + case IXGBE_DEV_ID_E610_SFP: + case IXGBE_DEV_ID_E610_10G_T: + case IXGBE_DEV_ID_E610_2_5G_T: + case IXGBE_DEV_ID_E610_SGMII: + hw->mac.type = ixgbe_mac_E610; + hw->mvals = ixgbe_mvals_X550EM_a; + break; + case IXGBE_DEV_ID_E610_VF: + hw->mac.type = ixgbe_mac_E610_vf; + hw->mvals = ixgbe_mvals_X550EM_a; + break; default: ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, diff --git a/sys/dev/ixgbe/ixgbe_api.h b/sys/dev/ixgbe/ixgbe_api.h index b81510dacb95..2b4cec8d110e 100644 --- a/sys/dev/ixgbe/ixgbe_api.h +++ b/sys/dev/ixgbe/ixgbe_api.h @@ -48,6 +48,7 @@ extern s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_E610(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw); s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); diff --git a/sys/dev/ixgbe/ixgbe_common.c b/sys/dev/ixgbe/ixgbe_common.c index df7ab90e72ab..bff022585a03 100644 --- a/sys/dev/ixgbe/ixgbe_common.c +++ b/sys/dev/ixgbe/ixgbe_common.c @@ -178,6 +178,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_SFP_N: case IXGBE_DEV_ID_X550EM_A_QSFP: case IXGBE_DEV_ID_X550EM_A_QSFP_N: + case IXGBE_DEV_ID_E610_SFP: supported = false; break; default: @@ -210,6 +211,8 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: + case IXGBE_DEV_ID_E610_10G_T: + case IXGBE_DEV_ID_E610_2_5G_T: supported = true; break; default: @@ -616,7 +619,8 @@ s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw) } } - if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X540) { + if (hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_X550) { if (hw->phy.id == 0) ixgbe_identify_phy(hw); hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, @@ -1037,6 +1041,9 @@ void ixgbe_set_pci_config_data_generic(struct ixgbe_hw *hw, u16 link_status) case IXGBE_PCI_LINK_SPEED_8000: hw->bus.speed = ixgbe_bus_speed_8000; break; + case IXGBE_PCI_LINK_SPEED_16000: + hw->bus.speed = ixgbe_bus_speed_16000; + break; default: hw->bus.speed = ixgbe_bus_speed_unknown; break; @@ -1059,7 +1066,9 @@ s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_get_bus_info_generic"); /* Get the negotiated link width and speed from PCI config space */ - link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); + link_status = IXGBE_READ_PCIE_WORD(hw, hw->mac.type == ixgbe_mac_E610 ? + IXGBE_PCI_LINK_STATUS_E610 : + IXGBE_PCI_LINK_STATUS); ixgbe_set_pci_config_data_generic(hw, link_status); @@ -1878,7 +1887,6 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_get_eeprom_semaphore"); - /* Get SMBI software semaphore between device drivers first */ for (i = 0; i < timeout; i++) { /* @@ -3363,7 +3371,6 @@ s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_disable_sec_rx_path_generic"); - secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); secrxreg |= IXGBE_SECRXCTRL_RX_DIS; IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); @@ -3692,6 +3699,10 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS; max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; break; + case ixgbe_mac_E610: + pcie_offset = IXGBE_PCIE_MSIX_E610_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; + break; default: return msix_count; } @@ -4139,7 +4150,6 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } - /** * ixgbe_toggle_txdctl_generic - Toggle VF's queues * @hw: pointer to hardware structure @@ -4323,7 +4333,8 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; - if (hw->mac.type == ixgbe_mac_X550) { + if (hw->mac.type == ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_E610) { if (links_reg & IXGBE_LINKS_SPEED_NON_STD) *speed = IXGBE_LINK_SPEED_5GB_FULL; } @@ -5494,6 +5505,7 @@ void ixgbe_get_nvm_version(struct ixgbe_hw *hw, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: + case ixgbe_mac_E610: /* version of eeprom section */ if (ixgbe_read_eeprom(hw, NVM_EEP_OFFSET_X540, &word)) word = NVM_VER_INVALID; @@ -5512,6 +5524,7 @@ void ixgbe_get_nvm_version(struct ixgbe_hw *hw, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: + case ixgbe_mac_E610: /* intel phy firmware version */ if (ixgbe_read_eeprom(hw, NVM_EEP_PHY_OFF_X540, &word)) word = NVM_VER_INVALID; diff --git a/sys/dev/ixgbe/ixgbe_e610.c b/sys/dev/ixgbe/ixgbe_e610.c new file mode 100644 index 000000000000..95c6dca416c6 --- /dev/null +++ b/sys/dev/ixgbe/ixgbe_e610.c @@ -0,0 +1,5567 @@ +/****************************************************************************** + SPDX-License-Identifier: BSD-3-Clause + + Copyright (c) 2025, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include "ixgbe_type.h" +#include "ixgbe_e610.h" +#include "ixgbe_x550.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" +#include "ixgbe_api.h" + +/** + * ixgbe_init_aci - initialization routine for Admin Command Interface + * @hw: pointer to the hardware structure + * + * Initialize the ACI lock. + */ +void ixgbe_init_aci(struct ixgbe_hw *hw) +{ + ixgbe_init_lock(&hw->aci.lock); +} + +/** + * ixgbe_shutdown_aci - shutdown routine for Admin Command Interface + * @hw: pointer to the hardware structure + * + * Destroy the ACI lock. + */ +void ixgbe_shutdown_aci(struct ixgbe_hw *hw) +{ + ixgbe_destroy_lock(&hw->aci.lock); +} + +/** + * ixgbe_should_retry_aci_send_cmd_execute - decide if ACI command should + * be resent + * @opcode: ACI opcode + * + * Check if ACI command should be sent again depending on the provided opcode. + * + * Return: true if the sending command routine should be repeated, + * otherwise false. + */ +static bool ixgbe_should_retry_aci_send_cmd_execute(u16 opcode) +{ + switch (opcode) { + case ixgbe_aci_opc_disable_rxen: + case ixgbe_aci_opc_get_phy_caps: + case ixgbe_aci_opc_get_link_status: + case ixgbe_aci_opc_get_link_topo: + return true; + } + + return false; +} + +/** + * ixgbe_aci_send_cmd_execute - execute sending FW Admin Command to FW Admin + * Command Interface + * @hw: pointer to the HW struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * + * Admin Command is sent using CSR by setting descriptor and buffer in specific + * registers. + * + * Return: the exit code of the operation. + * * - IXGBE_SUCCESS - success. + * * - IXGBE_ERR_ACI_DISABLED - CSR mechanism is not enabled. + * * - IXGBE_ERR_ACI_BUSY - CSR mechanism is busy. + * * - IXGBE_ERR_PARAM - buf_size is too big or + * invalid argument buf or buf_size. + * * - IXGBE_ERR_ACI_TIMEOUT - Admin Command X command timeout. + * * - IXGBE_ERR_ACI_ERROR - Admin Command X invalid state of HICR register or + * Admin Command failed because of bad opcode was returned or + * Admin Command failed with error Y. + */ +static s32 +ixgbe_aci_send_cmd_execute(struct ixgbe_hw *hw, struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size) +{ + u32 hicr = 0, tmp_buf_size = 0, i = 0; + u32 *raw_desc = (u32 *)desc; + s32 status = IXGBE_SUCCESS; + bool valid_buf = false; + u32 *tmp_buf = NULL; + u16 opcode = 0; + + do { + hw->aci.last_status = IXGBE_ACI_RC_OK; + + /* It's necessary to check if mechanism is enabled */ + hicr = IXGBE_READ_REG(hw, PF_HICR); + if (!(hicr & PF_HICR_EN)) { + status = IXGBE_ERR_ACI_DISABLED; + break; + } + if (hicr & PF_HICR_C) { + hw->aci.last_status = IXGBE_ACI_RC_EBUSY; + status = IXGBE_ERR_ACI_BUSY; + break; + } + opcode = desc->opcode; + + if (buf_size > IXGBE_ACI_MAX_BUFFER_SIZE) { + status = IXGBE_ERR_PARAM; + break; + } + + if (buf) + desc->flags |= IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_BUF); + + /* Check if buf and buf_size are proper params */ + if (desc->flags & IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_BUF)) { + if ((buf && buf_size == 0) || + (buf == NULL && buf_size)) { + status = IXGBE_ERR_PARAM; + break; + } + if (buf && buf_size) + valid_buf = true; + } + + if (valid_buf == true) { + if (buf_size % 4 == 0) + tmp_buf_size = buf_size; + else + tmp_buf_size = (buf_size & (u16)(~0x03)) + 4; + + tmp_buf = (u32*)ixgbe_malloc(hw, tmp_buf_size); + if (!tmp_buf) + return IXGBE_ERR_OUT_OF_MEM; + + /* tmp_buf will be firstly filled with 0xFF and after + * that the content of buf will be written into it. + * This approach lets us use valid buf_size and + * prevents us from reading past buf area + * when buf_size mod 4 not equal to 0. + */ + memset(tmp_buf, 0xFF, tmp_buf_size); + memcpy(tmp_buf, buf, buf_size); + + if (tmp_buf_size > IXGBE_ACI_LG_BUF) + desc->flags |= + IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_LB); + + desc->datalen = IXGBE_CPU_TO_LE16(buf_size); + + if (desc->flags & IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD)) { + for (i = 0; i < tmp_buf_size / 4; i++) { + IXGBE_WRITE_REG(hw, PF_HIBA(i), + IXGBE_LE32_TO_CPU(tmp_buf[i])); + } + } + } + + /* Descriptor is written to specific registers */ + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) + IXGBE_WRITE_REG(hw, PF_HIDA(i), + IXGBE_LE32_TO_CPU(raw_desc[i])); + + /* SW has to set PF_HICR.C bit and clear PF_HICR.SV and + * PF_HICR_EV + */ + hicr = IXGBE_READ_REG(hw, PF_HICR); + hicr = (hicr | PF_HICR_C) & ~(PF_HICR_SV | PF_HICR_EV); + IXGBE_WRITE_REG(hw, PF_HICR, hicr); + + /* Wait for sync Admin Command response */ + for (i = 0; i < IXGBE_ACI_SYNC_RESPONSE_TIMEOUT; i += 1) { + hicr = IXGBE_READ_REG(hw, PF_HICR); + if ((hicr & PF_HICR_SV) || !(hicr & PF_HICR_C)) + break; + + msec_delay(1); + } + + /* Wait for async Admin Command response */ + if ((hicr & PF_HICR_SV) && (hicr & PF_HICR_C)) { + for (i = 0; i < IXGBE_ACI_ASYNC_RESPONSE_TIMEOUT; + i += 1) { + hicr = IXGBE_READ_REG(hw, PF_HICR); + if ((hicr & PF_HICR_EV) || !(hicr & PF_HICR_C)) + break; + + msec_delay(1); + } + } + + /* Read sync Admin Command response */ + if ((hicr & PF_HICR_SV)) { + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) { + raw_desc[i] = IXGBE_READ_REG(hw, PF_HIDA(i)); + raw_desc[i] = IXGBE_CPU_TO_LE32(raw_desc[i]); + } + } + + /* Read async Admin Command response */ + if ((hicr & PF_HICR_EV) && !(hicr & PF_HICR_C)) { + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) { + raw_desc[i] = IXGBE_READ_REG(hw, PF_HIDA_2(i)); + raw_desc[i] = IXGBE_CPU_TO_LE32(raw_desc[i]); + } + } + + /* Handle timeout and invalid state of HICR register */ + if (hicr & PF_HICR_C) { + status = IXGBE_ERR_ACI_TIMEOUT; + break; + } else if (!(hicr & PF_HICR_SV) && !(hicr & PF_HICR_EV)) { + status = IXGBE_ERR_ACI_ERROR; + break; + } + + /* For every command other than 0x0014 treat opcode mismatch + * as an error. Response to 0x0014 command read from HIDA_2 + * is a descriptor of an event which is expected to contain + * different opcode than the command. + */ + if (desc->opcode != opcode && + opcode != IXGBE_CPU_TO_LE16(ixgbe_aci_opc_get_fw_event)) { + status = IXGBE_ERR_ACI_ERROR; + break; + } + + if (desc->retval != IXGBE_ACI_RC_OK) { + hw->aci.last_status = (enum ixgbe_aci_err)desc->retval; + status = IXGBE_ERR_ACI_ERROR; + break; + } + + /* Write a response values to a buf */ + if (valid_buf && (desc->flags & + IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_BUF))) { + for (i = 0; i < tmp_buf_size / 4; i++) { + tmp_buf[i] = IXGBE_READ_REG(hw, PF_HIBA(i)); + tmp_buf[i] = IXGBE_CPU_TO_LE32(tmp_buf[i]); + } + memcpy(buf, tmp_buf, buf_size); + } + } while (0); + + if (tmp_buf) + ixgbe_free(hw, tmp_buf); + + return status; +} + +/** + * ixgbe_aci_send_cmd - send FW Admin Command to FW Admin Command Interface + * @hw: pointer to the HW struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * + * Helper function to send FW Admin Commands to the FW Admin Command Interface. + * + * Retry sending the FW Admin Command multiple times to the FW ACI + * if the EBUSY Admin Command error is returned. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_send_cmd(struct ixgbe_hw *hw, struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size) +{ + struct ixgbe_aci_desc desc_cpy; + enum ixgbe_aci_err last_status; + bool is_cmd_for_retry; + u8 *buf_cpy = NULL; + s32 status; + u16 opcode; + u8 idx = 0; + + opcode = IXGBE_LE16_TO_CPU(desc->opcode); + is_cmd_for_retry = ixgbe_should_retry_aci_send_cmd_execute(opcode); + memset(&desc_cpy, 0, sizeof(desc_cpy)); + + if (is_cmd_for_retry) { + if (buf) { + buf_cpy = (u8 *)ixgbe_malloc(hw, buf_size); + if (!buf_cpy) + return IXGBE_ERR_OUT_OF_MEM; + } + memcpy(&desc_cpy, desc, sizeof(desc_cpy)); + } + + do { + ixgbe_acquire_lock(&hw->aci.lock); + status = ixgbe_aci_send_cmd_execute(hw, desc, buf, buf_size); + last_status = hw->aci.last_status; + ixgbe_release_lock(&hw->aci.lock); + + if (!is_cmd_for_retry || status == IXGBE_SUCCESS || + (last_status != IXGBE_ACI_RC_EBUSY && status != IXGBE_ERR_ACI_ERROR)) + break; + + if (buf) + memcpy(buf, buf_cpy, buf_size); + memcpy(desc, &desc_cpy, sizeof(desc_cpy)); + + msec_delay(IXGBE_ACI_SEND_DELAY_TIME_MS); + } while (++idx < IXGBE_ACI_SEND_MAX_EXECUTE); + + if (buf_cpy) + ixgbe_free(hw, buf_cpy); + + return status; +} + +/** + * ixgbe_aci_check_event_pending - check if there are any pending events + * @hw: pointer to the HW struct + * + * Determine if there are any pending events. + * + * Return: true if there are any currently pending events + * otherwise false. + */ +bool ixgbe_aci_check_event_pending(struct ixgbe_hw *hw) +{ + u32 ep_bit_mask; + u32 fwsts; + + ep_bit_mask = hw->bus.func ? GL_FWSTS_EP_PF1 : GL_FWSTS_EP_PF0; + + /* Check state of Event Pending (EP) bit */ + fwsts = IXGBE_READ_REG(hw, GL_FWSTS); + return (fwsts & ep_bit_mask) ? true : false; +} + +/** + * ixgbe_aci_get_event - get an event from ACI + * @hw: pointer to the HW struct + * @e: event information structure + * @pending: optional flag signaling that there are more pending events + * + * Obtain an event from ACI and return its content + * through 'e' using ACI command (0x0014). + * Provide information if there are more events + * to retrieve through 'pending'. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_event(struct ixgbe_hw *hw, struct ixgbe_aci_event *e, + bool *pending) +{ + struct ixgbe_aci_desc desc; + s32 status; + + if (!e || (!e->msg_buf && e->buf_len) || (e->msg_buf && !e->buf_len)) + return IXGBE_ERR_PARAM; + + ixgbe_acquire_lock(&hw->aci.lock); + + /* Check if there are any events pending */ + if (!ixgbe_aci_check_event_pending(hw)) { + status = IXGBE_ERR_ACI_NO_EVENTS; + goto aci_get_event_exit; + } + + /* Obtain pending event */ + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_fw_event); + status = ixgbe_aci_send_cmd_execute(hw, &desc, e->msg_buf, e->buf_len); + if (status) + goto aci_get_event_exit; + + /* Returned 0x0014 opcode indicates that no event was obtained */ + if (desc.opcode == IXGBE_CPU_TO_LE16(ixgbe_aci_opc_get_fw_event)) { + status = IXGBE_ERR_ACI_NO_EVENTS; + goto aci_get_event_exit; + } + + /* Determine size of event data */ + e->msg_len = MIN_T(u16, IXGBE_LE16_TO_CPU(desc.datalen), e->buf_len); + /* Write event descriptor to event info structure */ + memcpy(&e->desc, &desc, sizeof(e->desc)); + + /* Check if there are any further events pending */ + if (pending) { + *pending = ixgbe_aci_check_event_pending(hw); + } + +aci_get_event_exit: + ixgbe_release_lock(&hw->aci.lock); + + return status; +} + +/** + * ixgbe_fill_dflt_direct_cmd_desc - fill ACI descriptor with default values. + * @desc: pointer to the temp descriptor (non DMA mem) + * @opcode: the opcode can be used to decide which flags to turn off or on + * + * Helper function to fill the descriptor desc with default values + * and the provided opcode. + */ +void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode) +{ + /* zero out the desc */ + memset(desc, 0, sizeof(*desc)); + desc->opcode = IXGBE_CPU_TO_LE16(opcode); + desc->flags = IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_SI); +} + +/** + * ixgbe_aci_get_fw_ver - get the firmware version + * @hw: pointer to the HW struct + * + * Get the firmware version using ACI command (0x0001). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_fw_ver(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_ver *resp; + struct ixgbe_aci_desc desc; + s32 status; + + resp = &desc.params.get_ver; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_ver); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + if (!status) { + hw->fw_branch = resp->fw_branch; + hw->fw_maj_ver = resp->fw_major; + hw->fw_min_ver = resp->fw_minor; + hw->fw_patch = resp->fw_patch; + hw->fw_build = IXGBE_LE32_TO_CPU(resp->fw_build); + hw->api_branch = resp->api_branch; + hw->api_maj_ver = resp->api_major; + hw->api_min_ver = resp->api_minor; + hw->api_patch = resp->api_patch; + } + + return status; +} + +/** + * ixgbe_aci_send_driver_ver - send the driver version to firmware + * @hw: pointer to the HW struct + * @dv: driver's major, minor version + * + * Send the driver version to the firmware + * using the ACI command (0x0002). + * + * Return: the exit code of the operation. + * Returns IXGBE_ERR_PARAM, if dv is NULL. + */ +s32 ixgbe_aci_send_driver_ver(struct ixgbe_hw *hw, struct ixgbe_driver_ver *dv) +{ + struct ixgbe_aci_cmd_driver_ver *cmd; + struct ixgbe_aci_desc desc; + u16 len; + + cmd = &desc.params.driver_ver; + + if (!dv) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_driver_ver); + + desc.flags |= IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD); + cmd->major_ver = dv->major_ver; + cmd->minor_ver = dv->minor_ver; + cmd->build_ver = dv->build_ver; + cmd->subbuild_ver = dv->subbuild_ver; + + len = 0; + while (len < sizeof(dv->driver_string) && + IS_ASCII(dv->driver_string[len]) && dv->driver_string[len]) + len++; + + return ixgbe_aci_send_cmd(hw, &desc, dv->driver_string, len); +} + +/** + * ixgbe_aci_req_res - request a common resource + * @hw: pointer to the HW struct + * @res: resource ID + * @access: access type + * @sdp_number: resource number + * @timeout: the maximum time in ms that the driver may hold the resource + * + * Requests a common resource using the ACI command (0x0008). + * Specifies the maximum time the driver may hold the resource. + * If the requested resource is currently occupied by some other driver, + * a busy return value is returned and the timeout field value indicates the + * maximum time the current owner has to free it. + * + * Return: the exit code of the operation. + */ +static s32 +ixgbe_aci_req_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, u8 sdp_number, + u32 *timeout) +{ + struct ixgbe_aci_cmd_req_res *cmd_resp; + struct ixgbe_aci_desc desc; + s32 status; + + cmd_resp = &desc.params.res_owner; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_req_res); + + cmd_resp->res_id = IXGBE_CPU_TO_LE16(res); + cmd_resp->access_type = IXGBE_CPU_TO_LE16(access); + cmd_resp->res_number = IXGBE_CPU_TO_LE32(sdp_number); + cmd_resp->timeout = IXGBE_CPU_TO_LE32(*timeout); + *timeout = 0; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + /* The completion specifies the maximum time in ms that the driver + * may hold the resource in the Timeout field. + * If the resource is held by some other driver, the command completes + * with a busy return value and the timeout field indicates the maximum + * time the current owner of the resource has to free it. + */ + if (!status || hw->aci.last_status == IXGBE_ACI_RC_EBUSY) + *timeout = IXGBE_LE32_TO_CPU(cmd_resp->timeout); + + return status; +} + +/** + * ixgbe_aci_release_res - release a common resource using ACI + * @hw: pointer to the HW struct + * @res: resource ID + * @sdp_number: resource number + * + * Release a common resource using ACI command (0x0009). + * + * Return: the exit code of the operation. + */ +static s32 +ixgbe_aci_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + u8 sdp_number) +{ + struct ixgbe_aci_cmd_req_res *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.res_owner; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_release_res); + + cmd->res_id = IXGBE_CPU_TO_LE16(res); + cmd->res_number = IXGBE_CPU_TO_LE32(sdp_number); + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_acquire_res - acquire the ownership of a resource + * @hw: pointer to the HW structure + * @res: resource ID + * @access: access type (read or write) + * @timeout: timeout in milliseconds + * + * Make an attempt to acquire the ownership of a resource using + * the ixgbe_aci_req_res to utilize ACI. + * In case if some other driver has previously acquired the resource and + * performed any necessary updates, the IXGBE_ERR_ACI_NO_WORK is returned, + * and the caller does not obtain the resource and has no further work to do. + * If needed, the function will poll until the current lock owner timeouts. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_acquire_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, u32 timeout) +{ +#define IXGBE_RES_POLLING_DELAY_MS 10 + u32 delay = IXGBE_RES_POLLING_DELAY_MS; + u32 res_timeout = timeout; + u32 retry_timeout = 0; + s32 status; + + status = ixgbe_aci_req_res(hw, res, access, 0, &res_timeout); + + /* A return code of IXGBE_ERR_ACI_NO_WORK means that another driver has + * previously acquired the resource and performed any necessary updates; + * in this case the caller does not obtain the resource and has no + * further work to do. + */ + if (status == IXGBE_ERR_ACI_NO_WORK) + goto ixgbe_acquire_res_exit; + + /* If necessary, poll until the current lock owner timeouts. + * Set retry_timeout to the timeout value reported by the FW in the + * response to the "Request Resource Ownership" (0x0008) Admin Command + * as it indicates the maximum time the current owner of the resource + * is allowed to hold it. + */ + retry_timeout = res_timeout; + while (status && retry_timeout && res_timeout) { + msec_delay(delay); + retry_timeout = (retry_timeout > delay) ? + retry_timeout - delay : 0; + status = ixgbe_aci_req_res(hw, res, access, 0, &res_timeout); + + if (status == IXGBE_ERR_ACI_NO_WORK) + /* lock free, but no work to do */ + break; + + if (!status) + /* lock acquired */ + break; + } + +ixgbe_acquire_res_exit: + return status; +} + +/** + * ixgbe_release_res - release a common resource + * @hw: pointer to the HW structure + * @res: resource ID + * + * Release a common resource using ixgbe_aci_release_res. + */ +void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res) +{ + u32 total_delay = 0; + s32 status; + + status = ixgbe_aci_release_res(hw, res, 0); + + /* There are some rare cases when trying to release the resource + * results in an admin command timeout, so handle them correctly. + */ + while ((status == IXGBE_ERR_ACI_TIMEOUT) && + (total_delay < IXGBE_ACI_RELEASE_RES_TIMEOUT)) { + msec_delay(1); + status = ixgbe_aci_release_res(hw, res, 0); + total_delay++; + } +} + +/** + * ixgbe_parse_common_caps - Parse common device/function capabilities + * @hw: pointer to the HW struct + * @caps: pointer to common capabilities structure + * @elem: the capability element to parse + * @prefix: message prefix for tracing capabilities + * + * Given a capability element, extract relevant details into the common + * capability structure. + * + * Return: true if the capability matches one of the common capability ids, + * false otherwise. + */ +static bool +ixgbe_parse_common_caps(struct ixgbe_hw *hw, struct ixgbe_hw_common_caps *caps, + struct ixgbe_aci_cmd_list_caps_elem *elem, + const char *prefix) +{ + u32 logical_id = IXGBE_LE32_TO_CPU(elem->logical_id); + u32 phys_id = IXGBE_LE32_TO_CPU(elem->phys_id); + u32 number = IXGBE_LE32_TO_CPU(elem->number); + u16 cap = IXGBE_LE16_TO_CPU(elem->cap); + bool found = true; + + UNREFERENCED_1PARAMETER(hw); + + switch (cap) { + case IXGBE_ACI_CAPS_VALID_FUNCTIONS: + caps->valid_functions = number; + break; + case IXGBE_ACI_CAPS_SRIOV: + caps->sr_iov_1_1 = (number == 1); + break; + case IXGBE_ACI_CAPS_VMDQ: + caps->vmdq = (number == 1); + break; + case IXGBE_ACI_CAPS_DCB: + caps->dcb = (number == 1); + caps->active_tc_bitmap = logical_id; + caps->maxtc = phys_id; + break; + case IXGBE_ACI_CAPS_RSS: + caps->rss_table_size = number; + caps->rss_table_entry_width = logical_id; + break; + case IXGBE_ACI_CAPS_RXQS: + caps->num_rxq = number; + caps->rxq_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_TXQS: + caps->num_txq = number; + caps->txq_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_MSIX: + caps->num_msix_vectors = number; + caps->msix_vector_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_NVM_VER: + break; + case IXGBE_ACI_CAPS_NVM_MGMT: + caps->sec_rev_disabled = + (number & IXGBE_NVM_MGMT_SEC_REV_DISABLED) ? + true : false; + caps->update_disabled = + (number & IXGBE_NVM_MGMT_UPDATE_DISABLED) ? + true : false; + caps->nvm_unified_update = + (number & IXGBE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ? + true : false; + caps->netlist_auth = + (number & IXGBE_NVM_MGMT_NETLIST_AUTH_SUPPORT) ? + true : false; + break; + case IXGBE_ACI_CAPS_MAX_MTU: + caps->max_mtu = number; + break; + case IXGBE_ACI_CAPS_PCIE_RESET_AVOIDANCE: + caps->pcie_reset_avoidance = (number > 0); + break; + case IXGBE_ACI_CAPS_POST_UPDATE_RESET_RESTRICT: + caps->reset_restrict_support = (number == 1); + break; + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG1: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG2: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG3: + { + u8 index = cap - IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0; + + caps->ext_topo_dev_img_ver_high[index] = number; + caps->ext_topo_dev_img_ver_low[index] = logical_id; + caps->ext_topo_dev_img_part_num[index] = + (phys_id & IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_M) >> + IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_S; + caps->ext_topo_dev_img_load_en[index] = + (phys_id & IXGBE_EXT_TOPO_DEV_IMG_LOAD_EN) != 0; + caps->ext_topo_dev_img_prog_en[index] = + (phys_id & IXGBE_EXT_TOPO_DEV_IMG_PROG_EN) != 0; + break; + } + case IXGBE_ACI_CAPS_OROM_RECOVERY_UPDATE: + caps->orom_recovery_update = (number == 1); + break; + case IXGBE_ACI_CAPS_NEXT_CLUSTER_ID: + caps->next_cluster_id_support = (number == 1); + DEBUGOUT2("%s: next_cluster_id_support = %d\n", + prefix, caps->next_cluster_id_support); + break; + default: + /* Not one of the recognized common capabilities */ + found = false; + } + + return found; +} + +/** + * ixgbe_hweight8 - count set bits among the 8 lowest bits + * @w: variable storing set bits to count + * + * Return: the number of set bits among the 8 lowest bits in the provided value. + */ +static u8 ixgbe_hweight8(u32 w) +{ + u8 hweight = 0, i; + + for (i = 0; i < 8; i++) + if (w & (1 << i)) + hweight++; + + return hweight; +} + +/** + * ixgbe_hweight32 - count set bits among the 32 lowest bits + * @w: variable storing set bits to count + * + * Return: the number of set bits among the 32 lowest bits in the + * provided value. + */ +static u8 ixgbe_hweight32(u32 w) +{ + u32 bitMask = 0x1, i; + u8 bitCnt = 0; + + for (i = 0; i < 32; i++) + { + if (w & bitMask) + bitCnt++; + + bitMask = bitMask << 0x1; + } + + return bitCnt; +} + +/** + * ixgbe_parse_valid_functions_cap - Parse IXGBE_ACI_CAPS_VALID_FUNCTIONS caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VALID_FUNCTIONS for device capabilities. + */ +static void +ixgbe_parse_valid_functions_cap(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + u32 number = IXGBE_LE32_TO_CPU(cap->number); + + UNREFERENCED_1PARAMETER(hw); + + dev_p->num_funcs = ixgbe_hweight32(number); +} + +/** + * ixgbe_parse_vf_dev_caps - Parse IXGBE_ACI_CAPS_VF device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VF for device capabilities. + */ +static void ixgbe_parse_vf_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + u32 number = IXGBE_LE32_TO_CPU(cap->number); + + UNREFERENCED_1PARAMETER(hw); + + dev_p->num_vfs_exposed = number; +} + +/** + * ixgbe_parse_vsi_dev_caps - Parse IXGBE_ACI_CAPS_VSI device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VSI for device capabilities. + */ +static void ixgbe_parse_vsi_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + u32 number = IXGBE_LE32_TO_CPU(cap->number); + + UNREFERENCED_1PARAMETER(hw); + + dev_p->num_vsi_allocd_to_host = number; +} + +/** + * ixgbe_parse_fdir_dev_caps - Parse IXGBE_ACI_CAPS_FD device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_FD for device capabilities. + */ +static void ixgbe_parse_fdir_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + u32 number = IXGBE_LE32_TO_CPU(cap->number); + + UNREFERENCED_1PARAMETER(hw); + + dev_p->num_flow_director_fltr = number; +} + +/** + * ixgbe_parse_dev_caps - Parse device capabilities + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @buf: buffer containing the device capability records + * @cap_count: the number of capabilities + * + * Helper device to parse device (0x000B) capabilities list. For + * capabilities shared between device and function, this relies on + * ixgbe_parse_common_caps. + * + * Loop through the list of provided capabilities and extract the relevant + * data into the device capabilities structured. + */ +static void ixgbe_parse_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + void *buf, u32 cap_count) +{ + struct ixgbe_aci_cmd_list_caps_elem *cap_resp; + u32 i; + + cap_resp = (struct ixgbe_aci_cmd_list_caps_elem *)buf; + + memset(dev_p, 0, sizeof(*dev_p)); + + for (i = 0; i < cap_count; i++) { + u16 cap = IXGBE_LE16_TO_CPU(cap_resp[i].cap); + bool found; + + found = ixgbe_parse_common_caps(hw, &dev_p->common_cap, + &cap_resp[i], "dev caps"); + + switch (cap) { + case IXGBE_ACI_CAPS_VALID_FUNCTIONS: + ixgbe_parse_valid_functions_cap(hw, dev_p, + &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VF: + ixgbe_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VSI: + ixgbe_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_FD: + ixgbe_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]); + break; + default: + /* Don't list common capabilities as unknown */ + if (!found) + break; + } + } + +} + +/** + * ixgbe_parse_vf_func_caps - Parse IXGBE_ACI_CAPS_VF function caps + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @cap: pointer to the capability element to parse + * + * Extract function capabilities for IXGBE_ACI_CAPS_VF. + */ +static void ixgbe_parse_vf_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + u32 logical_id = IXGBE_LE32_TO_CPU(cap->logical_id); + u32 number = IXGBE_LE32_TO_CPU(cap->number); + + UNREFERENCED_1PARAMETER(hw); + + func_p->num_allocd_vfs = number; + func_p->vf_base_id = logical_id; +} + +/** + * ixgbe_get_num_per_func - determine number of resources per PF + * @hw: pointer to the HW structure + * @max: value to be evenly split between each PF + * + * Determine the number of valid functions by going through the bitmap returned + * from parsing capabilities and use this to calculate the number of resources + * per PF based on the max value passed in. + * + * Return: the number of resources per PF or 0, if no PH are available. + */ +static u32 ixgbe_get_num_per_func(struct ixgbe_hw *hw, u32 max) +{ + u8 funcs; + +#define IXGBE_CAPS_VALID_FUNCS_M 0xFF + funcs = ixgbe_hweight8(hw->dev_caps.common_cap.valid_functions & + IXGBE_CAPS_VALID_FUNCS_M); + + if (!funcs) + return 0; + + return max / funcs; +} + +/** + * ixgbe_parse_vsi_func_caps - Parse IXGBE_ACI_CAPS_VSI function caps + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @cap: pointer to the capability element to parse + * + * Extract function capabilities for IXGBE_ACI_CAPS_VSI. + */ +static void ixgbe_parse_vsi_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + func_p->guar_num_vsi = ixgbe_get_num_per_func(hw, IXGBE_MAX_VSI); +} + +/** + * ixgbe_parse_func_caps - Parse function capabilities + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @buf: buffer containing the function capability records + * @cap_count: the number of capabilities + * + * Helper function to parse function (0x000A) capabilities list. For + * capabilities shared between device and function, this relies on + * ixgbe_parse_common_caps. + * + * Loop through the list of provided capabilities and extract the relevant + * data into the function capabilities structured. + */ +static void ixgbe_parse_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + void *buf, u32 cap_count) +{ + struct ixgbe_aci_cmd_list_caps_elem *cap_resp; + u32 i; + + cap_resp = (struct ixgbe_aci_cmd_list_caps_elem *)buf; + + memset(func_p, 0, sizeof(*func_p)); + + for (i = 0; i < cap_count; i++) { + u16 cap = IXGBE_LE16_TO_CPU(cap_resp[i].cap); + ixgbe_parse_common_caps(hw, &func_p->common_cap, + &cap_resp[i], "func caps"); + + switch (cap) { + case IXGBE_ACI_CAPS_VF: + ixgbe_parse_vf_func_caps(hw, func_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VSI: + ixgbe_parse_vsi_func_caps(hw, func_p, &cap_resp[i]); + break; + default: + /* Don't list common capabilities as unknown */ + break; + } + } + +} + +/** + * ixgbe_aci_list_caps - query function/device capabilities + * @hw: pointer to the HW struct + * @buf: a buffer to hold the capabilities + * @buf_size: size of the buffer + * @cap_count: if not NULL, set to the number of capabilities reported + * @opc: capabilities type to discover, device or function + * + * Get the function (0x000A) or device (0x000B) capabilities description from + * firmware and store it in the buffer. + * + * If the cap_count pointer is not NULL, then it is set to the number of + * capabilities firmware will report. Note that if the buffer size is too + * small, it is possible the command will return IXGBE_ERR_OUT_OF_MEM. The + * cap_count will still be updated in this case. It is recommended that the + * buffer size be set to IXGBE_ACI_MAX_BUFFER_SIZE (the largest possible + * buffer that firmware could return) to avoid this. + * + * Return: the exit code of the operation. + * Exit code of IXGBE_ERR_OUT_OF_MEM means the buffer size is too small. + */ +s32 ixgbe_aci_list_caps(struct ixgbe_hw *hw, void *buf, u16 buf_size, + u32 *cap_count, enum ixgbe_aci_opc opc) +{ + struct ixgbe_aci_cmd_list_caps *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.get_cap; + + if (opc != ixgbe_aci_opc_list_func_caps && + opc != ixgbe_aci_opc_list_dev_caps) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, opc); + status = ixgbe_aci_send_cmd(hw, &desc, buf, buf_size); + + if (cap_count) + *cap_count = IXGBE_LE32_TO_CPU(cmd->count); + + return status; +} + +/** + * ixgbe_discover_dev_caps - Read and extract device capabilities + * @hw: pointer to the hardware structure + * @dev_caps: pointer to device capabilities structure + * + * Read the device capabilities and extract them into the dev_caps structure + * for later use. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_discover_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_caps) +{ + u32 status, cap_count = 0; + u8 *cbuf = NULL; + + cbuf = (u8*)ixgbe_malloc(hw, IXGBE_ACI_MAX_BUFFER_SIZE); + if (!cbuf) + return IXGBE_ERR_OUT_OF_MEM; + /* Although the driver doesn't know the number of capabilities the + * device will return, we can simply send a 4KB buffer, the maximum + * possible size that firmware can return. + */ + cap_count = IXGBE_ACI_MAX_BUFFER_SIZE / + sizeof(struct ixgbe_aci_cmd_list_caps_elem); + + status = ixgbe_aci_list_caps(hw, cbuf, IXGBE_ACI_MAX_BUFFER_SIZE, + &cap_count, + ixgbe_aci_opc_list_dev_caps); + if (!status) + ixgbe_parse_dev_caps(hw, dev_caps, cbuf, cap_count); + + if (cbuf) + ixgbe_free(hw, cbuf); + + return status; +} + +/** + * ixgbe_discover_func_caps - Read and extract function capabilities + * @hw: pointer to the hardware structure + * @func_caps: pointer to function capabilities structure + * + * Read the function capabilities and extract them into the func_caps structure + * for later use. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_discover_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_caps) +{ + u32 cap_count = 0; + u8 *cbuf = NULL; + s32 status; + + cbuf = (u8*)ixgbe_malloc(hw, IXGBE_ACI_MAX_BUFFER_SIZE); + if(!cbuf) + return IXGBE_ERR_OUT_OF_MEM; + /* Although the driver doesn't know the number of capabilities the + * device will return, we can simply send a 4KB buffer, the maximum + * possible size that firmware can return. + */ + cap_count = IXGBE_ACI_MAX_BUFFER_SIZE / + sizeof(struct ixgbe_aci_cmd_list_caps_elem); + + status = ixgbe_aci_list_caps(hw, cbuf, IXGBE_ACI_MAX_BUFFER_SIZE, + &cap_count, + ixgbe_aci_opc_list_func_caps); + if (!status) + ixgbe_parse_func_caps(hw, func_caps, cbuf, cap_count); + + if (cbuf) + ixgbe_free(hw, cbuf); + + return status; +} + +/** + * ixgbe_get_caps - get info about the HW + * @hw: pointer to the hardware structure + * + * Retrieve both device and function capabilities. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_caps(struct ixgbe_hw *hw) +{ + s32 status; + + status = ixgbe_discover_dev_caps(hw, &hw->dev_caps); + if (status) + return status; + + return ixgbe_discover_func_caps(hw, &hw->func_caps); +} + +/** + * ixgbe_aci_disable_rxen - disable RX + * @hw: pointer to the HW struct + * + * Request a safe disable of Receive Enable using ACI command (0x000C). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_disable_rxen(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_disable_rxen *cmd; + struct ixgbe_aci_desc desc; + + UNREFERENCED_1PARAMETER(hw); + + cmd = &desc.params.disable_rxen; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_disable_rxen); + + cmd->lport_num = (u8)hw->bus.func; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_get_phy_caps - returns PHY capabilities + * @hw: pointer to the HW struct + * @qual_mods: report qualified modules + * @report_mode: report mode capabilities + * @pcaps: structure for PHY capabilities to be filled + * + * Returns the various PHY capabilities supported on the Port + * using ACI command (0x0600). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_phy_caps(struct ixgbe_hw *hw, bool qual_mods, u8 report_mode, + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps) +{ + struct ixgbe_aci_cmd_get_phy_caps *cmd; + u16 pcaps_size = sizeof(*pcaps); + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.get_phy; + + if (!pcaps || (report_mode & ~IXGBE_ACI_REPORT_MODE_M)) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_phy_caps); + + if (qual_mods) + cmd->param0 |= IXGBE_CPU_TO_LE16(IXGBE_ACI_GET_PHY_RQM); + + cmd->param0 |= IXGBE_CPU_TO_LE16(report_mode); + status = ixgbe_aci_send_cmd(hw, &desc, pcaps, pcaps_size); + + if (status == IXGBE_SUCCESS && + report_mode == IXGBE_ACI_REPORT_TOPO_CAP_MEDIA) { + hw->phy.phy_type_low = IXGBE_LE64_TO_CPU(pcaps->phy_type_low); + hw->phy.phy_type_high = IXGBE_LE64_TO_CPU(pcaps->phy_type_high); + memcpy(hw->link.link_info.module_type, &pcaps->module_type, + sizeof(hw->link.link_info.module_type)); + } + + return status; +} + +/** + * ixgbe_phy_caps_equals_cfg - check if capabilities match the PHY config + * @phy_caps: PHY capabilities + * @phy_cfg: PHY configuration + * + * Helper function to determine if PHY capabilities match PHY + * configuration + * + * Return: true if PHY capabilities match PHY configuration. + */ +bool +ixgbe_phy_caps_equals_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *phy_caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *phy_cfg) +{ + u8 caps_mask, cfg_mask; + + if (!phy_caps || !phy_cfg) + return false; + + /* These bits are not common between capabilities and configuration. + * Do not use them to determine equality. + */ + caps_mask = IXGBE_ACI_PHY_CAPS_MASK & ~(IXGBE_ACI_PHY_AN_MODE | + IXGBE_ACI_PHY_EN_MOD_QUAL); + cfg_mask = IXGBE_ACI_PHY_ENA_VALID_MASK & + ~IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + if (phy_caps->phy_type_low != phy_cfg->phy_type_low || + phy_caps->phy_type_high != phy_cfg->phy_type_high || + ((phy_caps->caps & caps_mask) != (phy_cfg->caps & cfg_mask)) || + phy_caps->low_power_ctrl_an != phy_cfg->low_power_ctrl_an || + phy_caps->eee_cap != phy_cfg->eee_cap || + phy_caps->eeer_value != phy_cfg->eeer_value || + phy_caps->link_fec_options != phy_cfg->link_fec_opt) + return false; + + return true; +} + +/** + * ixgbe_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data + * @caps: PHY ability structure to copy data from + * @cfg: PHY configuration structure to copy data to + * + * Helper function to copy data from PHY capabilities data structure + * to PHY configuration data structure + */ +void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg) +{ + if (!caps || !cfg) + return; + + memset(cfg, 0, sizeof(*cfg)); + cfg->phy_type_low = caps->phy_type_low; + cfg->phy_type_high = caps->phy_type_high; + cfg->caps = caps->caps; + cfg->low_power_ctrl_an = caps->low_power_ctrl_an; + cfg->eee_cap = caps->eee_cap; + cfg->eeer_value = caps->eeer_value; + cfg->link_fec_opt = caps->link_fec_options; + cfg->module_compliance_enforcement = + caps->module_compliance_enforcement; +} + +/** + * ixgbe_aci_set_phy_cfg - set PHY configuration + * @hw: pointer to the HW struct + * @cfg: structure with PHY configuration data to be set + * + * Set the various PHY configuration parameters supported on the Port + * using ACI command (0x0601). + * One or more of the Set PHY config parameters may be ignored in an MFP + * mode as the PF may not have the privilege to set some of the PHY Config + * parameters. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_phy_cfg(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg) +{ + struct ixgbe_aci_desc desc; + s32 status; + + if (!cfg) + return IXGBE_ERR_PARAM; + + /* Ensure that only valid bits of cfg->caps can be turned on. */ + if (cfg->caps & ~IXGBE_ACI_PHY_ENA_VALID_MASK) { + cfg->caps &= IXGBE_ACI_PHY_ENA_VALID_MASK; + } + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_phy_cfg); + desc.flags |= IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD); + + status = ixgbe_aci_send_cmd(hw, &desc, cfg, sizeof(*cfg)); + + if (!status) + hw->phy.curr_user_phy_cfg = *cfg; + + return status; +} + +/** + * ixgbe_aci_set_link_restart_an - set up link and restart AN + * @hw: pointer to the HW struct + * @ena_link: if true: enable link, if false: disable link + * + * Function sets up the link and restarts the Auto-Negotiation over the link. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link) +{ + struct ixgbe_aci_cmd_restart_an *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.restart_an; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_restart_an); + + cmd->cmd_flags = IXGBE_ACI_RESTART_AN_LINK_RESTART; + if (ena_link) + cmd->cmd_flags |= IXGBE_ACI_RESTART_AN_LINK_ENABLE; + else + cmd->cmd_flags &= ~IXGBE_ACI_RESTART_AN_LINK_ENABLE; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_is_media_cage_present - check if media cage is present + * @hw: pointer to the HW struct + * + * Identify presence of media cage using the ACI command (0x06E0). + * + * Return: true if media cage is present, else false. If no cage, then + * media type is backplane or BASE-T. + */ +static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_link_topo *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.get_link_topo; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo); + + cmd->addr.topo_params.node_type_ctx = + (IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT << + IXGBE_ACI_LINK_TOPO_NODE_CTX_S); + + /* set node type */ + cmd->addr.topo_params.node_type_ctx |= + (IXGBE_ACI_LINK_TOPO_NODE_TYPE_M & + IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE); + + /* Node type cage can be used to determine if cage is present. If AQC + * returns error (ENOENT), then no cage present. If no cage present then + * connection type is backplane or BASE-T. + */ + return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); +} + +/** + * ixgbe_get_media_type_from_phy_type - Gets media type based on phy type + * @hw: pointer to the HW struct + * + * Try to identify the media type based on the phy type. + * If more than one media type, the ixgbe_media_type_unknown is returned. + * First, phy_type_low is checked, then phy_type_high. + * If none are identified, the ixgbe_media_type_unknown is returned + * + * Return: type of a media based on phy type in form of enum. + */ +static enum ixgbe_media_type +ixgbe_get_media_type_from_phy_type(struct ixgbe_hw *hw) +{ + struct ixgbe_link_status *hw_link_info; + + if (!hw) + return ixgbe_media_type_unknown; + + hw_link_info = &hw->link.link_info; + if (hw_link_info->phy_type_low && hw_link_info->phy_type_high) + /* If more than one media type is selected, report unknown */ + return ixgbe_media_type_unknown; + + if (hw_link_info->phy_type_low) { + /* 1G SGMII is a special case where some DA cable PHYs + * may show this as an option when it really shouldn't + * be since SGMII is meant to be between a MAC and a PHY + * in a backplane. Try to detect this case and handle it + */ + if (hw_link_info->phy_type_low == IXGBE_PHY_TYPE_LOW_1G_SGMII && + (hw_link_info->module_type[IXGBE_ACI_MOD_TYPE_IDENT] == + IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE || + hw_link_info->module_type[IXGBE_ACI_MOD_TYPE_IDENT] == + IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE)) + return ixgbe_media_type_da; + + switch (hw_link_info->phy_type_low) { + case IXGBE_PHY_TYPE_LOW_1000BASE_SX: + case IXGBE_PHY_TYPE_LOW_1000BASE_LX: + case IXGBE_PHY_TYPE_LOW_10GBASE_SR: + case IXGBE_PHY_TYPE_LOW_10GBASE_LR: + return ixgbe_media_type_fiber; + case IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: + return ixgbe_media_type_fiber; + case IXGBE_PHY_TYPE_LOW_100BASE_TX: + case IXGBE_PHY_TYPE_LOW_1000BASE_T: + case IXGBE_PHY_TYPE_LOW_2500BASE_T: + case IXGBE_PHY_TYPE_LOW_5GBASE_T: + case IXGBE_PHY_TYPE_LOW_10GBASE_T: + return ixgbe_media_type_copper; + case IXGBE_PHY_TYPE_LOW_10G_SFI_DA: + return ixgbe_media_type_da; + case IXGBE_PHY_TYPE_LOW_1000BASE_KX: + case IXGBE_PHY_TYPE_LOW_2500BASE_KX: + case IXGBE_PHY_TYPE_LOW_2500BASE_X: + case IXGBE_PHY_TYPE_LOW_5GBASE_KR: + case IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1: + case IXGBE_PHY_TYPE_LOW_10G_SFI_C2C: + return ixgbe_media_type_backplane; + } + } else { + switch (hw_link_info->phy_type_high) { + case IXGBE_PHY_TYPE_HIGH_10BASE_T: + return ixgbe_media_type_copper; + } + } + return ixgbe_media_type_unknown; +} + +/** + * ixgbe_update_link_info - update status of the HW network link + * @hw: pointer to the HW struct + * + * Update the status of the HW network link. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_update_link_info(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps; + struct ixgbe_link_status *li; + s32 status; + + if (!hw) + return IXGBE_ERR_PARAM; + + li = &hw->link.link_info; + + status = ixgbe_aci_get_link_info(hw, true, NULL); + if (status) + return status; + + if (li->link_info & IXGBE_ACI_MEDIA_AVAILABLE) { + pcaps = (struct ixgbe_aci_cmd_get_phy_caps_data *) + ixgbe_malloc(hw, sizeof(*pcaps)); + if (!pcaps) + return IXGBE_ERR_OUT_OF_MEM; + + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + pcaps); + + if (status == IXGBE_SUCCESS) + memcpy(li->module_type, &pcaps->module_type, + sizeof(li->module_type)); + + ixgbe_free(hw, pcaps); + } + + return status; +} + +/** + * ixgbe_get_link_status - get status of the HW network link + * @hw: pointer to the HW struct + * @link_up: pointer to bool (true/false = linkup/linkdown) + * + * Variable link_up is true if link is up, false if link is down. + * The variable link_up is invalid if status is non zero. As a + * result of this call, link status reporting becomes enabled + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_link_status(struct ixgbe_hw *hw, bool *link_up) +{ + s32 status = IXGBE_SUCCESS; + + if (!hw || !link_up) + return IXGBE_ERR_PARAM; + + if (hw->link.get_link_info) { + status = ixgbe_update_link_info(hw); + if (status) { + return status; + } + } + + *link_up = hw->link.link_info.link_info & IXGBE_ACI_LINK_UP; + + return status; +} + +/** + * ixgbe_aci_get_link_info - get the link status + * @hw: pointer to the HW struct + * @ena_lse: enable/disable LinkStatusEvent reporting + * @link: pointer to link status structure - optional + * + * Get the current Link Status using ACI command (0x607). + * The current link can be optionally provided to update + * the status. + * + * Return: the link status of the adapter. + */ +s32 ixgbe_aci_get_link_info(struct ixgbe_hw *hw, bool ena_lse, + struct ixgbe_link_status *link) +{ + struct ixgbe_aci_cmd_get_link_status_data link_data = { 0 }; + struct ixgbe_aci_cmd_get_link_status *resp; + struct ixgbe_link_status *li_old, *li; + struct ixgbe_fc_info *hw_fc_info; + struct ixgbe_aci_desc desc; + bool tx_pause, rx_pause; + u8 cmd_flags; + s32 status; + + if (!hw) + return IXGBE_ERR_PARAM; + + li_old = &hw->link.link_info_old; + li = &hw->link.link_info; + hw_fc_info = &hw->fc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_status); + cmd_flags = (ena_lse) ? IXGBE_ACI_LSE_ENA : IXGBE_ACI_LSE_DIS; + resp = &desc.params.get_link_status; + resp->cmd_flags = cmd_flags; + + status = ixgbe_aci_send_cmd(hw, &desc, &link_data, sizeof(link_data)); + + if (status != IXGBE_SUCCESS) + return status; + + /* save off old link status information */ + *li_old = *li; + + /* update current link status information */ + li->link_speed = IXGBE_LE16_TO_CPU(link_data.link_speed); + li->phy_type_low = IXGBE_LE64_TO_CPU(link_data.phy_type_low); + li->phy_type_high = IXGBE_LE64_TO_CPU(link_data.phy_type_high); + li->link_info = link_data.link_info; + li->link_cfg_err = link_data.link_cfg_err; + li->an_info = link_data.an_info; + li->ext_info = link_data.ext_info; + li->max_frame_size = IXGBE_LE16_TO_CPU(link_data.max_frame_size); + li->fec_info = link_data.cfg & IXGBE_ACI_FEC_MASK; + li->topo_media_conflict = link_data.topo_media_conflict; + li->pacing = link_data.cfg & (IXGBE_ACI_CFG_PACING_M | + IXGBE_ACI_CFG_PACING_TYPE_M); + + /* update fc info */ + tx_pause = !!(link_data.an_info & IXGBE_ACI_LINK_PAUSE_TX); + rx_pause = !!(link_data.an_info & IXGBE_ACI_LINK_PAUSE_RX); + if (tx_pause && rx_pause) + hw_fc_info->current_mode = ixgbe_fc_full; + else if (tx_pause) + hw_fc_info->current_mode = ixgbe_fc_tx_pause; + else if (rx_pause) + hw_fc_info->current_mode = ixgbe_fc_rx_pause; + else + hw_fc_info->current_mode = ixgbe_fc_none; + + li->lse_ena = !!(resp->cmd_flags & IXGBE_ACI_LSE_IS_ENABLED); + + /* save link status information */ + if (link) + *link = *li; + + /* flag cleared so calling functions don't call AQ again */ + hw->link.get_link_info = false; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_aci_set_event_mask - set event mask + * @hw: pointer to the HW struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * + * Set the event mask using ACI command (0x0613). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_event_mask(struct ixgbe_hw *hw, u8 port_num, u16 mask) +{ + struct ixgbe_aci_cmd_set_event_mask *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.set_event_mask; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_event_mask); + + cmd->event_mask = IXGBE_CPU_TO_LE16(mask); + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_configure_lse - enable/disable link status events + * @hw: pointer to the HW struct + * @activate: bool value deciding if lse should be enabled nor disabled + * @mask: event mask to be set; a set bit means deactivation of the + * corresponding event + * + * Set the event mask and then enable or disable link status events + * + * Return: the exit code of the operation. + */ +s32 ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask) +{ + s32 rc; + + rc = ixgbe_aci_set_event_mask(hw, (u8)hw->bus.func, mask); + if (rc) { + return rc; + } + + /* Enabling link status events generation by fw */ + rc = ixgbe_aci_get_link_info(hw, activate, NULL); + if (rc) { + return rc; + } + return IXGBE_SUCCESS; +} + +/** + * ixgbe_aci_get_netlist_node - get a node handle + * @hw: pointer to the hw struct + * @cmd: get_link_topo AQ structure + * @node_part_number: output node part number if node found + * @node_handle: output node handle parameter if node found + * + * Get the netlist node and assigns it to + * the provided handle using ACI command (0x06E0). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle) +{ + struct ixgbe_aci_desc desc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo); + desc.params.get_link_topo = *cmd; + + if (ixgbe_aci_send_cmd(hw, &desc, NULL, 0)) + return IXGBE_ERR_NOT_SUPPORTED; + + if (node_handle) + *node_handle = + IXGBE_LE16_TO_CPU(desc.params.get_link_topo.addr.handle); + if (node_part_number) + *node_part_number = desc.params.get_link_topo.node_part_num; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_find_netlist_node - find a node handle + * @hw: pointer to the hw struct + * @node_type_ctx: type of netlist node to look for + * @node_part_number: node part number to look for + * @node_handle: output parameter if node found - optional + * + * Find and return the node handle for a given node type and part number in the + * netlist. When found IXGBE_SUCCESS is returned, IXGBE_ERR_NOT_SUPPORTED + * otherwise. If @node_handle provided, it would be set to found node handle. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_find_netlist_node(struct ixgbe_hw *hw, u8 node_type_ctx, + u8 node_part_number, u16 *node_handle) +{ + struct ixgbe_aci_cmd_get_link_topo cmd; + u8 rec_node_part_number; + u16 rec_node_handle; + s32 status; + u8 idx; + + for (idx = 0; idx < IXGBE_MAX_NETLIST_SIZE; idx++) { + memset(&cmd, 0, sizeof(cmd)); + + cmd.addr.topo_params.node_type_ctx = + (node_type_ctx << IXGBE_ACI_LINK_TOPO_NODE_TYPE_S); + cmd.addr.topo_params.index = idx; + + status = ixgbe_aci_get_netlist_node(hw, &cmd, + &rec_node_part_number, + &rec_node_handle); + if (status) + return status; + + if (rec_node_part_number == node_part_number) { + if (node_handle) + *node_handle = rec_node_handle; + return IXGBE_SUCCESS; + } + } + + return IXGBE_ERR_NOT_SUPPORTED; +} + +/** + * ixgbe_aci_read_i2c - read I2C register value + * @hw: pointer to the hw struct + * @topo_addr: topology address for a device to communicate with + * @bus_addr: 7-bit I2C bus address + * @addr: I2C memory address (I2C offset) with up to 16 bits + * @params: I2C parameters: bit [7] - Repeated start, + * bits [6:5] data offset size, + * bit [4] - I2C address type, bits [3:0] - data size + * to read (0-16 bytes) + * @data: pointer to data (0 to 16 bytes) to be read from the I2C device + * + * Read the value of the I2C pin register using ACI command (0x06E2). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_read_i2c(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data) +{ + struct ixgbe_aci_desc desc = { 0 }; + struct ixgbe_aci_cmd_i2c *cmd; + u8 data_size; + s32 status; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_read_i2c); + cmd = &desc.params.read_write_i2c; + + if (!data) + return IXGBE_ERR_PARAM; + + data_size = (params & IXGBE_ACI_I2C_DATA_SIZE_M) >> + IXGBE_ACI_I2C_DATA_SIZE_S; + + cmd->i2c_bus_addr = IXGBE_CPU_TO_LE16(bus_addr); + cmd->topo_addr = topo_addr; + cmd->i2c_params = params; + cmd->i2c_addr = addr; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (!status) { + struct ixgbe_aci_cmd_read_i2c_resp *resp; + u8 i; + + resp = &desc.params.read_i2c_resp; + for (i = 0; i < data_size; i++) { + *data = resp->i2c_data[i]; + data++; + } + } + + return status; +} + +/** + * ixgbe_aci_write_i2c - write a value to I2C register + * @hw: pointer to the hw struct + * @topo_addr: topology address for a device to communicate with + * @bus_addr: 7-bit I2C bus address + * @addr: I2C memory address (I2C offset) with up to 16 bits + * @params: I2C parameters: bit [4] - I2C address type, bits [3:0] - data size + * to write (0-7 bytes) + * @data: pointer to data (0 to 4 bytes) to be written to the I2C device + * + * Write a value to the I2C pin register using ACI command (0x06E3). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_write_i2c(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data) +{ + struct ixgbe_aci_desc desc = { 0 }; + struct ixgbe_aci_cmd_i2c *cmd; + u8 i, data_size; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_write_i2c); + cmd = &desc.params.read_write_i2c; + + data_size = (params & IXGBE_ACI_I2C_DATA_SIZE_M) >> + IXGBE_ACI_I2C_DATA_SIZE_S; + + /* data_size limited to 4 */ + if (data_size > 4) + return IXGBE_ERR_PARAM; + + cmd->i2c_bus_addr = IXGBE_CPU_TO_LE16(bus_addr); + cmd->topo_addr = topo_addr; + cmd->i2c_params = params; + cmd->i2c_addr = addr; + + for (i = 0; i < data_size; i++) { + cmd->i2c_data[i] = *data; + data++; + } + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_set_port_id_led - set LED value for the given port + * @hw: pointer to the HW struct + * @orig_mode: set LED original mode + * + * Set LED value for the given port (0x06E9) + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_port_id_led(struct ixgbe_hw *hw, bool orig_mode) +{ + struct ixgbe_aci_cmd_set_port_id_led *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.set_port_id_led; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_port_id_led); + + cmd->lport_num = (u8)hw->bus.func; + cmd->lport_num_valid = IXGBE_ACI_PORT_ID_PORT_NUM_VALID; + + if (orig_mode) + cmd->ident_mode = IXGBE_ACI_PORT_IDENT_LED_ORIG; + else + cmd->ident_mode = IXGBE_ACI_PORT_IDENT_LED_BLINK; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_set_gpio - set GPIO pin state + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: SW provide IO value to set in the LSB + * + * Set the GPIO pin state that is a part of the topology + * using ACI command (0x06EC). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_gpio(struct ixgbe_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool value) +{ + struct ixgbe_aci_cmd_gpio *cmd; + struct ixgbe_aci_desc desc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = IXGBE_CPU_TO_LE16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + cmd->gpio_val = value ? 1 : 0; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_get_gpio - get GPIO pin state + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: IO value read + * + * Get the value of a GPIO signal which is part of the topology + * using ACI command (0x06ED). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_gpio(struct ixgbe_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value) +{ + struct ixgbe_aci_cmd_gpio *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = IXGBE_CPU_TO_LE16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (status) + return status; + + *value = !!cmd->gpio_val; + return IXGBE_SUCCESS; +} + +/** + * ixgbe_aci_sff_eeprom - read/write SFF EEPROM + * @hw: pointer to the HW struct + * @lport: bits [7:0] = logical port, bit [8] = logical port valid + * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default) + * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding. + * @page: QSFP page + * @page_bank_ctrl: configuration of SFF/CMIS paging and banking control + * @data: pointer to data buffer to be read/written to the I2C device. + * @length: 1-16 for read, 1 for write. + * @write: 0 read, 1 for write. + * + * Read/write SFF EEPROM using ACI command (0x06EE). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_sff_eeprom(struct ixgbe_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 page_bank_ctrl, u8 *data, + u8 length, bool write) +{ + struct ixgbe_aci_cmd_sff_eeprom *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + if (!data || (mem_addr & 0xff00)) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_sff_eeprom); + cmd = &desc.params.read_write_sff_param; + desc.flags = IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD); + cmd->lport_num = (u8)(lport & 0xff); + cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); + cmd->i2c_bus_addr = IXGBE_CPU_TO_LE16(((bus_addr >> 1) & + IXGBE_ACI_SFF_I2CBUS_7BIT_M) | + ((page_bank_ctrl << + IXGBE_ACI_SFF_PAGE_BANK_CTRL_S) & + IXGBE_ACI_SFF_PAGE_BANK_CTRL_M)); + cmd->i2c_offset = IXGBE_CPU_TO_LE16(mem_addr & 0xff); + cmd->module_page = page; + if (write) + cmd->i2c_bus_addr |= IXGBE_CPU_TO_LE16(IXGBE_ACI_SFF_IS_WRITE); + + status = ixgbe_aci_send_cmd(hw, &desc, data, length); + return status; +} + +/** + * ixgbe_aci_prog_topo_dev_nvm - program Topology Device NVM + * @hw: pointer to the hardware structure + * @topo_params: pointer to structure storing topology parameters for a device + * + * Program Topology Device NVM using ACI command (0x06F2). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_prog_topo_dev_nvm(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_params *topo_params) +{ + struct ixgbe_aci_cmd_prog_topo_dev_nvm *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.prog_topo_dev_nvm; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_prog_topo_dev_nvm); + + memcpy(&cmd->topo_params, topo_params, sizeof(*topo_params)); + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_read_topo_dev_nvm - read Topology Device NVM + * @hw: pointer to the hardware structure + * @topo_params: pointer to structure storing topology parameters for a device + * @start_address: byte offset in the topology device NVM + * @data: pointer to data buffer + * @data_size: number of bytes to be read from the topology device NVM + * Read Topology Device NVM (0x06F3) + * + * Read Topology of Device NVM using ACI command (0x06F3). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_read_topo_dev_nvm(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_params *topo_params, + u32 start_address, u8 *data, u8 data_size) +{ + struct ixgbe_aci_cmd_read_topo_dev_nvm *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + if (!data || data_size == 0 || + data_size > IXGBE_ACI_READ_TOPO_DEV_NVM_DATA_READ_SIZE) + return IXGBE_ERR_PARAM; + + cmd = &desc.params.read_topo_dev_nvm; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_read_topo_dev_nvm); + + desc.datalen = IXGBE_CPU_TO_LE16(data_size); + memcpy(&cmd->topo_params, topo_params, sizeof(*topo_params)); + cmd->start_address = IXGBE_CPU_TO_LE32(start_address); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (status) + return status; + + memcpy(data, cmd->data_read, data_size); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_acquire_nvm - Generic request for acquiring the NVM ownership + * @hw: pointer to the HW structure + * @access: NVM access type (read or write) + * + * Request NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_acquire_nvm(struct ixgbe_hw *hw, + enum ixgbe_aci_res_access_type access) +{ + u32 fla; + + /* Skip if we are in blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, GLNVM_FLA); + if ((fla & GLNVM_FLA_LOCKED_M) == 0) + return IXGBE_SUCCESS; + + return ixgbe_acquire_res(hw, IXGBE_NVM_RES_ID, access, + IXGBE_NVM_TIMEOUT); +} + +/** + * ixgbe_release_nvm - Generic request for releasing the NVM ownership + * @hw: pointer to the HW structure + * + * Release NVM ownership. + */ +void ixgbe_release_nvm(struct ixgbe_hw *hw) +{ + u32 fla; + + /* Skip if we are in blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, GLNVM_FLA); + if ((fla & GLNVM_FLA_LOCKED_M) == 0) + return; + + ixgbe_release_res(hw, IXGBE_NVM_RES_ID); +} + + +/** + * ixgbe_aci_read_nvm - read NVM + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be read (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @read_shadow_ram: tell if this is a shadow RAM read + * + * Read the NVM using ACI command (0x0701). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram) +{ + struct ixgbe_aci_desc desc; + struct ixgbe_aci_cmd_nvm *cmd; + + cmd = &desc.params.nvm; + + if (offset > IXGBE_ACI_NVM_MAX_OFFSET) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_read); + + if (!read_shadow_ram && module_typeid == IXGBE_ACI_NVM_START_POINT) + cmd->cmd_flags |= IXGBE_ACI_NVM_FLASH_ONLY; + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= IXGBE_ACI_NVM_LAST_CMD; + cmd->module_typeid = IXGBE_CPU_TO_LE16(module_typeid); + cmd->offset_low = IXGBE_CPU_TO_LE16(offset & 0xFFFF); + cmd->offset_high = (offset >> 16) & 0xFF; + cmd->length = IXGBE_CPU_TO_LE16(length); + + return ixgbe_aci_send_cmd(hw, &desc, data, length); +} + +/** + * ixgbe_aci_erase_nvm - erase NVM sector + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * + * Erase the NVM sector using the ACI command (0x0702). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid) +{ + struct ixgbe_aci_desc desc; + struct ixgbe_aci_cmd_nvm *cmd; + s32 status; + __le16 len; + + /* read a length value from SR, so module_typeid is equal to 0 */ + /* calculate offset where module size is placed from bytes to words */ + /* set last command and read from SR values to true */ + status = ixgbe_aci_read_nvm(hw, 0, 2 * module_typeid + 2, 2, &len, true, + true); + if (status) + return status; + + cmd = &desc.params.nvm; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_erase); + + cmd->module_typeid = IXGBE_CPU_TO_LE16(module_typeid); + cmd->length = len; + cmd->offset_low = 0; + cmd->offset_high = 0; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_update_nvm - update NVM + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be written (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @command_flags: command parameters + * + * Update the NVM using the ACI command (0x0703). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid, + u32 offset, u16 length, void *data, + bool last_command, u8 command_flags) +{ + struct ixgbe_aci_desc desc; + struct ixgbe_aci_cmd_nvm *cmd; + + cmd = &desc.params.nvm; + + /* In offset the highest byte must be zeroed. */ + if (offset & 0xFF000000) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_write); + + cmd->cmd_flags |= command_flags; + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= IXGBE_ACI_NVM_LAST_CMD; + cmd->module_typeid = IXGBE_CPU_TO_LE16(module_typeid); + cmd->offset_low = IXGBE_CPU_TO_LE16(offset & 0xFFFF); + cmd->offset_high = (offset >> 16) & 0xFF; + cmd->length = IXGBE_CPU_TO_LE16(length); + + desc.flags |= IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD); + + return ixgbe_aci_send_cmd(hw, &desc, data, length); +} + +/** + * ixgbe_aci_read_nvm_cfg - read an NVM config block + * @hw: pointer to the HW struct + * @cmd_flags: NVM access admin command bits + * @field_id: field or feature ID + * @data: buffer for result + * @buf_size: buffer size + * @elem_count: pointer to count of elements read by FW + * + * Reads a single or multiple feature/field ID and data using ACI command + * (0x0704). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_read_nvm_cfg(struct ixgbe_hw *hw, u8 cmd_flags, + u16 field_id, void *data, u16 buf_size, + u16 *elem_count) +{ + struct ixgbe_aci_cmd_nvm_cfg *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.nvm_cfg; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_cfg_read); + + cmd->cmd_flags = cmd_flags; + cmd->id = IXGBE_CPU_TO_LE16(field_id); + + status = ixgbe_aci_send_cmd(hw, &desc, data, buf_size); + if (!status && elem_count) + *elem_count = IXGBE_LE16_TO_CPU(cmd->count); + + return status; +} + +/** + * ixgbe_aci_write_nvm_cfg - write an NVM config block + * @hw: pointer to the HW struct + * @cmd_flags: NVM access admin command bits + * @data: buffer for result + * @buf_size: buffer size + * @elem_count: count of elements to be written + * + * Writes a single or multiple feature/field ID and data using ACI command + * (0x0705). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_write_nvm_cfg(struct ixgbe_hw *hw, u8 cmd_flags, + void *data, u16 buf_size, u16 elem_count) +{ + struct ixgbe_aci_cmd_nvm_cfg *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.nvm_cfg; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_cfg_write); + desc.flags |= IXGBE_CPU_TO_LE16(IXGBE_ACI_FLAG_RD); + + cmd->count = IXGBE_CPU_TO_LE16(elem_count); + cmd->cmd_flags = cmd_flags; + + return ixgbe_aci_send_cmd(hw, &desc, data, buf_size); +} + +/** + * ixgbe_nvm_validate_checksum - validate checksum + * @hw: pointer to the HW struct + * + * Verify NVM PFA checksum validity using ACI command (0x0706). + * If the checksum verification failed, IXGBE_ERR_NVM_CHECKSUM is returned. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_nvm_checksum *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + cmd = &desc.params.nvm_checksum; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_checksum); + cmd->flags = IXGBE_ACI_NVM_CHECKSUM_VERIFY; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + ixgbe_release_nvm(hw); + + if (!status) + if (IXGBE_LE16_TO_CPU(cmd->checksum) != + IXGBE_ACI_NVM_CHECKSUM_CORRECT) { + ERROR_REPORT1(IXGBE_ERROR_INVALID_STATE, + "Invalid Shadow Ram checksum"); + status = IXGBE_ERR_NVM_CHECKSUM; + } + + return status; +} + +/** + * ixgbe_nvm_recalculate_checksum - recalculate checksum + * @hw: pointer to the HW struct + * + * Recalculate NVM PFA checksum using ACI command (0x0706). + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_nvm_recalculate_checksum(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_nvm_checksum *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (status) + return status; + + cmd = &desc.params.nvm_checksum; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_checksum); + cmd->flags = IXGBE_ACI_NVM_CHECKSUM_RECALC; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_nvm_write_activate - NVM activate write + * @hw: pointer to the HW struct + * @cmd_flags: flags for write activate command + * @response_flags: response indicators from firmware + * + * Update the control word with the required banks' validity bits + * and dumps the Shadow RAM to flash using ACI command (0x0707). + * + * cmd_flags controls which banks to activate, the preservation level to use + * when activating the NVM bank, and whether an EMP reset is required for + * activation. + * + * Note that the 16bit cmd_flags value is split between two separate 1 byte + * flag values in the descriptor. + * + * On successful return of the firmware command, the response_flags variable + * is updated with the flags reported by firmware indicating certain status, + * such as whether EMP reset is enabled. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags, + u8 *response_flags) +{ + struct ixgbe_aci_desc desc; + struct ixgbe_aci_cmd_nvm *cmd; + s32 status; + + cmd = &desc.params.nvm; + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_nvm_write_activate); + + cmd->cmd_flags = LO_BYTE(cmd_flags); + cmd->offset_high = HI_BYTE(cmd_flags); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (!status && response_flags) + *response_flags = cmd->cmd_flags; + + return status; +} + +/** + * ixgbe_get_flash_bank_offset - Get offset into requested flash bank + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive flash bank + * @module: the module to read from + * + * Based on the module, lookup the module offset from the beginning of the + * flash. + * + * Return: the flash offset. Note that a value of zero is invalid and must be + * treated as an error. + */ +static u32 ixgbe_get_flash_bank_offset(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u16 module) +{ + struct ixgbe_bank_info *banks = &hw->flash.banks; + enum ixgbe_flash_bank active_bank; + bool second_bank_active; + u32 offset, size; + + switch (module) { + case E610_SR_1ST_NVM_BANK_PTR: + offset = banks->nvm_ptr; + size = banks->nvm_size; + active_bank = banks->nvm_bank; + break; + case E610_SR_1ST_OROM_BANK_PTR: + offset = banks->orom_ptr; + size = banks->orom_size; + active_bank = banks->orom_bank; + break; + case E610_SR_NETLIST_BANK_PTR: + offset = banks->netlist_ptr; + size = banks->netlist_size; + active_bank = banks->netlist_bank; + break; + default: + return 0; + } + + switch (active_bank) { + case IXGBE_1ST_FLASH_BANK: + second_bank_active = false; + break; + case IXGBE_2ND_FLASH_BANK: + second_bank_active = true; + break; + default: + return 0; + } + + /* The second flash bank is stored immediately following the first + * bank. Based on whether the 1st or 2nd bank is active, and whether + * we want the active or inactive bank, calculate the desired offset. + */ + switch (bank) { + case IXGBE_ACTIVE_FLASH_BANK: + return offset + (second_bank_active ? size : 0); + case IXGBE_INACTIVE_FLASH_BANK: + return offset + (second_bank_active ? 0 : size); + } + + return 0; +} + +/** + * ixgbe_read_flash_module - Read a word from one of the main NVM modules + * @hw: pointer to the HW structure + * @bank: which bank of the module to read + * @module: the module to read + * @offset: the offset into the module in bytes + * @data: storage for the word read from the flash + * @length: bytes of data to read + * + * Read data from the specified flash module. The bank parameter indicates + * whether or not to read from the active bank or the inactive bank of that + * module. + * + * The word will be read using flat NVM access, and relies on the + * hw->flash.banks data being setup by ixgbe_determine_active_flash_banks() + * during initialization. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_flash_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u16 module, u32 offset, u8 *data, u32 length) +{ + s32 status; + u32 start; + + start = ixgbe_get_flash_bank_offset(hw, bank, module); + if (!start) { + return IXGBE_ERR_PARAM; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + status = ixgbe_read_flat_nvm(hw, start + offset, &length, data, false); + + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_read_netlist_module - Read data from the netlist module area + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive module + * @offset: offset into the netlist to read from + * @data: storage for returned word value + * + * Read a word from the specified netlist bank. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_netlist_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + __le16 data_local; + s32 status; + + status = ixgbe_read_flash_module(hw, bank, E610_SR_NETLIST_BANK_PTR, + offset * sizeof(u16), + (u8 *)&data_local, + sizeof(u16)); + if (!status) + *data = IXGBE_LE16_TO_CPU(data_local); + + return status; +} + +/** + * ixgbe_read_nvm_module - Read from the active main NVM module + * @hw: pointer to the HW structure + * @bank: whether to read from active or inactive NVM module + * @offset: offset into the NVM module to read, in words + * @data: storage for returned word value + * + * Read the specified word from the active NVM module. This includes the CSS + * header at the start of the NVM module. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_nvm_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + __le16 data_local; + s32 status; + + status = ixgbe_read_flash_module(hw, bank, E610_SR_1ST_NVM_BANK_PTR, + offset * sizeof(u16), + (u8 *)&data_local, + sizeof(u16)); + if (!status) + *data = IXGBE_LE16_TO_CPU(data_local); + + return status; +} + +/** + * ixgbe_get_nvm_css_hdr_len - Read the CSS header length from the + * NVM CSS header + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @hdr_len: storage for header length in words + * + * Read the CSS header length from the NVM CSS header and add the + * Authentication header size, and then convert to words. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_get_nvm_css_hdr_len(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 *hdr_len) +{ + u16 hdr_len_l, hdr_len_h; + u32 hdr_len_dword; + s32 status; + + status = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_L, + &hdr_len_l); + if (status) + return status; + + status = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_H, + &hdr_len_h); + if (status) + return status; + + /* CSS header length is in DWORD, so convert to words and add + * authentication header size + */ + hdr_len_dword = hdr_len_h << 16 | hdr_len_l; + *hdr_len = (hdr_len_dword * 2) + IXGBE_NVM_AUTH_HEADER_LEN; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_read_nvm_sr_copy - Read a word from the Shadow RAM copy in the NVM bank + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive NVM module + * @offset: offset into the Shadow RAM copy to read, in words + * @data: storage for returned word value + * + * Read the specified word from the copy of the Shadow RAM found in the + * specified NVM module. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_nvm_sr_copy(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + u32 hdr_len; + s32 status; + + status = ixgbe_get_nvm_css_hdr_len(hw, bank, &hdr_len); + if (status) + return status; + + hdr_len = ROUND_UP(hdr_len, 32); + + return ixgbe_read_nvm_module(hw, bank, hdr_len + offset, data); +} + +/** + * ixgbe_get_nvm_minsrevs - Get the minsrevs values from flash + * @hw: pointer to the HW struct + * @minsrevs: structure to store NVM and OROM minsrev values + * + * Read the Minimum Security Revision TLV and extract + * the revision values from the flash image + * into a readable structure for processing. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_nvm_minsrevs(struct ixgbe_hw *hw, + struct ixgbe_minsrev_info *minsrevs) +{ + struct ixgbe_aci_cmd_nvm_minsrev data; + s32 status; + u16 valid; + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + status = ixgbe_aci_read_nvm(hw, IXGBE_ACI_NVM_MINSREV_MOD_ID, + 0, sizeof(data), &data, + true, false); + + ixgbe_release_nvm(hw); + + if (status) + return status; + + valid = IXGBE_LE16_TO_CPU(data.validity); + + /* Extract NVM minimum security revision */ + if (valid & IXGBE_ACI_NVM_MINSREV_NVM_VALID) { + u16 minsrev_l = IXGBE_LE16_TO_CPU(data.nvm_minsrev_l); + u16 minsrev_h = IXGBE_LE16_TO_CPU(data.nvm_minsrev_h); + + minsrevs->nvm = minsrev_h << 16 | minsrev_l; + minsrevs->nvm_valid = true; + } + + /* Extract the OROM minimum security revision */ + if (valid & IXGBE_ACI_NVM_MINSREV_OROM_VALID) { + u16 minsrev_l = IXGBE_LE16_TO_CPU(data.orom_minsrev_l); + u16 minsrev_h = IXGBE_LE16_TO_CPU(data.orom_minsrev_h); + + minsrevs->orom = minsrev_h << 16 | minsrev_l; + minsrevs->orom_valid = true; + } + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_update_nvm_minsrevs - Update minsrevs TLV data in flash + * @hw: pointer to the HW struct + * @minsrevs: minimum security revision information + * + * Update the NVM or Option ROM minimum security revision fields in the PFA + * area of the flash. Reads the minsrevs->nvm_valid and minsrevs->orom_valid + * fields to determine what update is being requested. If the valid bit is not + * set for that module, then the associated minsrev will be left as is. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_update_nvm_minsrevs(struct ixgbe_hw *hw, + struct ixgbe_minsrev_info *minsrevs) +{ + struct ixgbe_aci_cmd_nvm_minsrev data; + s32 status; + + if (!minsrevs->nvm_valid && !minsrevs->orom_valid) { + return IXGBE_ERR_PARAM; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (status) + return status; + + /* Get current data */ + status = ixgbe_aci_read_nvm(hw, IXGBE_ACI_NVM_MINSREV_MOD_ID, 0, + sizeof(data), &data, true, false); + if (status) + goto exit_release_res; + + if (minsrevs->nvm_valid) { + data.nvm_minsrev_l = IXGBE_CPU_TO_LE16(minsrevs->nvm & 0xFFFF); + data.nvm_minsrev_h = IXGBE_CPU_TO_LE16(minsrevs->nvm >> 16); + data.validity |= + IXGBE_CPU_TO_LE16(IXGBE_ACI_NVM_MINSREV_NVM_VALID); + } + + if (minsrevs->orom_valid) { + data.orom_minsrev_l = IXGBE_CPU_TO_LE16(minsrevs->orom & 0xFFFF); + data.orom_minsrev_h = IXGBE_CPU_TO_LE16(minsrevs->orom >> 16); + data.validity |= + IXGBE_CPU_TO_LE16(IXGBE_ACI_NVM_MINSREV_OROM_VALID); + } + + /* Update flash data */ + status = ixgbe_aci_update_nvm(hw, IXGBE_ACI_NVM_MINSREV_MOD_ID, 0, + sizeof(data), &data, false, + IXGBE_ACI_NVM_SPECIAL_UPDATE); + if (status) + goto exit_release_res; + + /* Dump the Shadow RAM to the flash */ + status = ixgbe_nvm_write_activate(hw, 0, NULL); + +exit_release_res: + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_get_nvm_srev - Read the security revision from the NVM CSS header + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @srev: storage for security revision + * + * Read the security revision out of the CSS header of the active NVM module + * bank. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_get_nvm_srev(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, u32 *srev) +{ + u16 srev_l, srev_h; + s32 status; + + status = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_L, &srev_l); + if (status) + return status; + + status = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_H, &srev_h); + if (status) + return status; + + *srev = srev_h << 16 | srev_l; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_get_nvm_ver_info - Read NVM version information + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @nvm: pointer to NVM info structure + * + * Read the NVM EETRACK ID and map version of the main NVM image bank, filling + * in the nvm info structure. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_get_nvm_ver_info(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + struct ixgbe_nvm_info *nvm) +{ + u16 eetrack_lo, eetrack_hi, ver; + s32 status; + + status = ixgbe_read_nvm_sr_copy(hw, bank, + E610_SR_NVM_DEV_STARTER_VER, &ver); + if (status) { + return status; + } + + nvm->major = (ver & E610_NVM_VER_HI_MASK) >> E610_NVM_VER_HI_SHIFT; + nvm->minor = (ver & E610_NVM_VER_LO_MASK) >> E610_NVM_VER_LO_SHIFT; + + status = ixgbe_read_nvm_sr_copy(hw, bank, E610_SR_NVM_EETRACK_LO, + &eetrack_lo); + if (status) { + return status; + } + status = ixgbe_read_nvm_sr_copy(hw, bank, E610_SR_NVM_EETRACK_HI, + &eetrack_hi); + if (status) { + return status; + } + + nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + + status = ixgbe_get_nvm_srev(hw, bank, &nvm->srev); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_get_inactive_nvm_ver - Read Option ROM version from the inactive bank + * @hw: pointer to the HW structure + * @nvm: storage for Option ROM version information + * + * Reads the NVM EETRACK ID, Map version, and security revision of the + * inactive NVM bank. Used to access version data for a pending update that + * has not yet been activated. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm) +{ + return ixgbe_get_nvm_ver_info(hw, IXGBE_INACTIVE_FLASH_BANK, nvm); +} + +/** + * ixgbe_get_active_nvm_ver - Read Option ROM version from the active bank + * @hw: pointer to the HW structure + * @nvm: storage for Option ROM version information + * + * Reads the NVM EETRACK ID, Map version, and security revision of the + * active NVM bank. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_active_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm) +{ + return ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, nvm); +} + +/** + * ixgbe_get_netlist_info + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @netlist: pointer to netlist version info structure + * + * Get the netlist version information from the requested bank. Reads the Link + * Topology section to find the Netlist ID block and extract the relevant + * information into the netlist version structure. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_get_netlist_info(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + struct ixgbe_netlist_info *netlist) +{ + u16 module_id, length, node_count, i; + u16 *id_blk; + s32 status; + + status = ixgbe_read_netlist_module(hw, bank, IXGBE_NETLIST_TYPE_OFFSET, + &module_id); + if (status) + return status; + + if (module_id != IXGBE_NETLIST_LINK_TOPO_MOD_ID) { + return IXGBE_ERR_NVM; + } + + status = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_MODULE_LEN, + &length); + if (status) + return status; + + /* sanity check that we have at least enough words to store the + * netlist ID block + */ + if (length < IXGBE_NETLIST_ID_BLK_SIZE) { + return IXGBE_ERR_NVM; + } + + status = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_NODE_COUNT, + &node_count); + if (status) + return status; + node_count &= IXGBE_LINK_TOPO_NODE_COUNT_M; + + id_blk = (u16 *)ixgbe_calloc(hw, IXGBE_NETLIST_ID_BLK_SIZE, + sizeof(*id_blk)); + if (!id_blk) + return IXGBE_ERR_NO_SPACE; + + /* Read out the entire Netlist ID Block at once. */ + status = ixgbe_read_flash_module(hw, bank, E610_SR_NETLIST_BANK_PTR, + IXGBE_NETLIST_ID_BLK_OFFSET(node_count) * sizeof(u16), + (u8 *)id_blk, + IXGBE_NETLIST_ID_BLK_SIZE * sizeof(u16)); + if (status) + goto exit_error; + + for (i = 0; i < IXGBE_NETLIST_ID_BLK_SIZE; i++) + id_blk[i] = IXGBE_LE16_TO_CPU(((__le16 *)id_blk)[i]); + + netlist->major = id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW]; + netlist->minor = id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW]; + netlist->type = id_blk[IXGBE_NETLIST_ID_BLK_TYPE_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_TYPE_LOW]; + netlist->rev = id_blk[IXGBE_NETLIST_ID_BLK_REV_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_REV_LOW]; + netlist->cust_ver = id_blk[IXGBE_NETLIST_ID_BLK_CUST_VER]; + /* Read the left most 4 bytes of SHA */ + netlist->hash = id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(14)]; + +exit_error: + ixgbe_free(hw, id_blk); + + return status; +} + +/** + * ixgbe_get_inactive_netlist_ver + * @hw: pointer to the HW struct + * @netlist: pointer to netlist version info structure + * + * Read the netlist version data from the inactive netlist bank. Used to + * extract version data of a pending flash update in order to display the + * version data. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw, + struct ixgbe_netlist_info *netlist) +{ + return ixgbe_get_netlist_info(hw, IXGBE_INACTIVE_FLASH_BANK, netlist); +} + +/** + * ixgbe_read_sr_pointer - Read the value of a Shadow RAM pointer word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM word to read + * @pointer: pointer value read from Shadow RAM + * + * Read the given Shadow RAM word, and convert it to a pointer value specified + * in bytes. This function assumes the specified offset is a valid pointer + * word. + * + * Each pointer word specifies whether it is stored in word size or 4KB + * sector size by using the highest bit. The reported pointer value will be in + * bytes, intended for flat NVM reads. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_sr_pointer(struct ixgbe_hw *hw, u16 offset, u32 *pointer) +{ + s32 status; + u16 value; + + status = ixgbe_read_ee_aci_E610(hw, offset, &value); + if (status) + return status; + + /* Determine if the pointer is in 4KB or word units */ + if (value & IXGBE_SR_NVM_PTR_4KB_UNITS) + *pointer = (value & ~IXGBE_SR_NVM_PTR_4KB_UNITS) * 4 * 1024; + else + *pointer = value * 2; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_read_sr_area_size - Read an area size from a Shadow RAM word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM to read + * @size: size value read from the Shadow RAM + * + * Read the given Shadow RAM word, and convert it to an area size value + * specified in bytes. This function assumes the specified offset is a valid + * area size word. + * + * Each area size word is specified in 4KB sector units. This function reports + * the size in bytes, intended for flat NVM reads. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_read_sr_area_size(struct ixgbe_hw *hw, u16 offset, u32 *size) +{ + s32 status; + u16 value; + + status = ixgbe_read_ee_aci_E610(hw, offset, &value); + if (status) + return status; + + /* Area sizes are always specified in 4KB units */ + *size = value * 4 * 1024; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_discover_flash_size - Discover the available flash size. + * @hw: pointer to the HW struct + * + * The device flash could be up to 16MB in size. However, it is possible that + * the actual size is smaller. Use bisection to determine the accessible size + * of flash memory. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_discover_flash_size(struct ixgbe_hw *hw) +{ + u32 min_size = 0, max_size = IXGBE_ACI_NVM_MAX_OFFSET + 1; + s32 status; + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + while ((max_size - min_size) > 1) { + u32 offset = (max_size + min_size) / 2; + u32 len = 1; + u8 data; + + status = ixgbe_read_flat_nvm(hw, offset, &len, &data, false); + if (status == IXGBE_ERR_ACI_ERROR && + hw->aci.last_status == IXGBE_ACI_RC_EINVAL) { + status = IXGBE_SUCCESS; + max_size = offset; + } else if (!status) { + min_size = offset; + } else { + /* an unexpected error occurred */ + goto err_read_flat_nvm; + } + } + + hw->flash.flash_size = max_size; + +err_read_flat_nvm: + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_determine_active_flash_banks - Discover active bank for each module + * @hw: pointer to the HW struct + * + * Read the Shadow RAM control word and determine which banks are active for + * the NVM, OROM, and Netlist modules. Also read and calculate the associated + * pointer and size. These values are then cached into the ixgbe_flash_info + * structure for later use in order to calculate the correct offset to read + * from the active module. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_determine_active_flash_banks(struct ixgbe_hw *hw) +{ + struct ixgbe_bank_info *banks = &hw->flash.banks; + u16 ctrl_word; + s32 status; + + status = ixgbe_read_ee_aci_E610(hw, E610_SR_NVM_CTRL_WORD, &ctrl_word); + if (status) { + return status; + } + + /* Check that the control word indicates validity */ + if ((ctrl_word & IXGBE_SR_CTRL_WORD_1_M) >> IXGBE_SR_CTRL_WORD_1_S != + IXGBE_SR_CTRL_WORD_VALID) { + return IXGBE_ERR_CONFIG; + } + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NVM_BANK)) + banks->nvm_bank = IXGBE_1ST_FLASH_BANK; + else + banks->nvm_bank = IXGBE_2ND_FLASH_BANK; + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_OROM_BANK)) + banks->orom_bank = IXGBE_1ST_FLASH_BANK; + else + banks->orom_bank = IXGBE_2ND_FLASH_BANK; + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NETLIST_BANK)) + banks->netlist_bank = IXGBE_1ST_FLASH_BANK; + else + banks->netlist_bank = IXGBE_2ND_FLASH_BANK; + + status = ixgbe_read_sr_pointer(hw, E610_SR_1ST_NVM_BANK_PTR, + &banks->nvm_ptr); + if (status) { + return status; + } + + status = ixgbe_read_sr_area_size(hw, E610_SR_NVM_BANK_SIZE, + &banks->nvm_size); + if (status) { + return status; + } + + status = ixgbe_read_sr_pointer(hw, E610_SR_1ST_OROM_BANK_PTR, + &banks->orom_ptr); + if (status) { + return status; + } + + status = ixgbe_read_sr_area_size(hw, E610_SR_OROM_BANK_SIZE, + &banks->orom_size); + if (status) { + return status; + } + + status = ixgbe_read_sr_pointer(hw, E610_SR_NETLIST_BANK_PTR, + &banks->netlist_ptr); + if (status) { + return status; + } + + status = ixgbe_read_sr_area_size(hw, E610_SR_NETLIST_BANK_SIZE, + &banks->netlist_size); + if (status) { + return status; + } + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_init_nvm - initializes NVM setting + * @hw: pointer to the HW struct + * + * Read and populate NVM settings such as Shadow RAM size, + * max_timeout, and blank_nvm_mode + * + * Return: the exit code of the operation. + */ +s32 ixgbe_init_nvm(struct ixgbe_hw *hw) +{ + struct ixgbe_flash_info *flash = &hw->flash; + u32 fla, gens_stat, status; + u8 sr_size; + + /* The SR size is stored regardless of the NVM programming mode + * as the blank mode may be used in the factory line. + */ + gens_stat = IXGBE_READ_REG(hw, GLNVM_GENS); + sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S; + + /* Switching to words (sr_size contains power of 2) */ + flash->sr_words = BIT(sr_size) * IXGBE_SR_WORDS_IN_1KB; + + /* Check if we are in the normal or blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, GLNVM_FLA); + if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ + flash->blank_nvm_mode = false; + } else { + /* Blank programming mode */ + flash->blank_nvm_mode = true; + return IXGBE_ERR_NVM_BLANK_MODE; + } + + status = ixgbe_discover_flash_size(hw); + if (status) { + return status; + } + + status = ixgbe_determine_active_flash_banks(hw); + if (status) { + return status; + } + + status = ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, + &flash->nvm); + if (status) { + return status; + } + + /* read the netlist version information */ + status = ixgbe_get_netlist_info(hw, IXGBE_ACTIVE_FLASH_BANK, + &flash->netlist); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_sanitize_operate - Clear the user data + * @hw: pointer to the HW struct + * + * Clear user data from NVM using ACI command (0x070C). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_sanitize_operate(struct ixgbe_hw *hw) +{ + s32 status; + u8 values; + + u8 cmd_flags = IXGBE_ACI_SANITIZE_REQ_OPERATE | + IXGBE_ACI_SANITIZE_OPERATE_SUBJECT_CLEAR; + + status = ixgbe_sanitize_nvm(hw, cmd_flags, &values); + if (status) + return status; + if ((!(values & IXGBE_ACI_SANITIZE_OPERATE_HOST_CLEAN_DONE) && + !(values & IXGBE_ACI_SANITIZE_OPERATE_BMC_CLEAN_DONE)) || + ((values & IXGBE_ACI_SANITIZE_OPERATE_HOST_CLEAN_DONE) && + !(values & IXGBE_ACI_SANITIZE_OPERATE_HOST_CLEAN_SUCCESS)) || + ((values & IXGBE_ACI_SANITIZE_OPERATE_BMC_CLEAN_DONE) && + !(values & IXGBE_ACI_SANITIZE_OPERATE_BMC_CLEAN_SUCCESS))) + return IXGBE_ERR_ACI_ERROR; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_sanitize_nvm - Sanitize NVM + * @hw: pointer to the HW struct + * @cmd_flags: flag to the ACI command + * @values: values returned from the command + * + * Sanitize NVM using ACI command (0x070C). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_sanitize_nvm(struct ixgbe_hw *hw, u8 cmd_flags, u8 *values) +{ + struct ixgbe_aci_desc desc; + struct ixgbe_aci_cmd_nvm_sanitization *cmd; + s32 status; + + cmd = &desc.params.nvm_sanitization; + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_sanitization); + cmd->cmd_flags = cmd_flags; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (values) + *values = cmd->values; + + return status; +} + +/** + * ixgbe_read_sr_word_aci - Reads Shadow RAM via ACI + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using ixgbe_read_flat_nvm. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + u32 bytes = sizeof(u16); + __le16 data_local; + s32 status; + + status = ixgbe_read_flat_nvm(hw, offset * sizeof(u16), &bytes, + (u8 *)&data_local, true); + if (status) + return status; + + *data = IXGBE_LE16_TO_CPU(data_local); + return IXGBE_SUCCESS; +} + +/** + * ixgbe_read_sr_buf_aci - Reads Shadow RAM buf via ACI + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @words: (in) number of words to read; (out) number of words actually read + * @data: words read from the Shadow RAM + * + * Reads 16 bit words (data buf) from the Shadow RAM. Ownership of the NVM is + * taken before reading the buffer and later released. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_sr_buf_aci(struct ixgbe_hw *hw, u16 offset, u16 *words, + u16 *data) +{ + u32 bytes = *words * 2, i; + s32 status; + + status = ixgbe_read_flat_nvm(hw, offset * 2, &bytes, (u8 *)data, true); + + *words = bytes / 2; + + for (i = 0; i < *words; i++) + data[i] = IXGBE_LE16_TO_CPU(((__le16 *)data)[i]); + + return status; +} + +/** + * ixgbe_read_flat_nvm - Read portion of NVM by flat offset + * @hw: pointer to the HW struct + * @offset: offset from beginning of NVM + * @length: (in) number of bytes to read; (out) number of bytes actually read + * @data: buffer to return data in (sized to fit the specified length) + * @read_shadow_ram: if true, read from shadow RAM instead of NVM + * + * Reads a portion of the NVM, as a flat memory space. This function correctly + * breaks read requests across Shadow RAM sectors, prevents Shadow RAM size + * from being exceeded in case of Shadow RAM read requests and ensures that no + * single read request exceeds the maximum 4KB read for a single admin command. + * + * Returns a status code on failure. Note that the data pointer may be + * partially updated if some reads succeed before a failure. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, + u8 *data, bool read_shadow_ram) +{ + u32 inlen = *length; + u32 bytes_read = 0; + bool last_cmd; + s32 status; + + *length = 0; + + /* Verify the length of the read if this is for the Shadow RAM */ + if (read_shadow_ram && ((offset + inlen) > + (hw->eeprom.word_size * 2u))) { + return IXGBE_ERR_PARAM; + } + + do { + u32 read_size, sector_offset; + + /* ixgbe_aci_read_nvm cannot read more than 4KB at a time. + * Additionally, a read from the Shadow RAM may not cross over + * a sector boundary. Conveniently, the sector size is also 4KB. + */ + sector_offset = offset % IXGBE_ACI_MAX_BUFFER_SIZE; + read_size = MIN_T(u32, + IXGBE_ACI_MAX_BUFFER_SIZE - sector_offset, + inlen - bytes_read); + + last_cmd = !(bytes_read + read_size < inlen); + + /* ixgbe_aci_read_nvm takes the length as a u16. Our read_size + * is calculated using a u32, but the IXGBE_ACI_MAX_BUFFER_SIZE + * maximum size guarantees that it will fit within the 2 bytes. + */ + status = ixgbe_aci_read_nvm(hw, IXGBE_ACI_NVM_START_POINT, + offset, (u16)read_size, + data + bytes_read, last_cmd, + read_shadow_ram); + if (status) + break; + + bytes_read += read_size; + offset += read_size; + } while (!last_cmd); + + *length = bytes_read; + return status; +} + +/** + * ixgbe_check_sr_access_params - verify params for Shadow RAM R/W operations. + * @hw: pointer to the HW structure + * @offset: offset in words from module start + * @words: number of words to access + * + * Check if all the parameters are valid + * before performing any Shadow RAM read/write operations. + * + * Return: the exit code of the operation. + * * - IXGBE_SUCCESS - success. + * * - IXGBE_ERR_PARAM - NVM error: offset beyond SR limit or + * NVM error: tried to access more words then the set limit or + * NVM error: cannot spread over two sectors. + */ +static s32 ixgbe_check_sr_access_params(struct ixgbe_hw *hw, u32 offset, + u16 words) +{ + if ((offset + words) > hw->eeprom.word_size) { + return IXGBE_ERR_PARAM; + } + + if (words > IXGBE_SR_SECTOR_SIZE_IN_WORDS) { + /* We can access only up to 4KB (one sector), + * in one Admin Command write + */ + return IXGBE_ERR_PARAM; + } + + if (((offset + (words - 1)) / IXGBE_SR_SECTOR_SIZE_IN_WORDS) != + (offset / IXGBE_SR_SECTOR_SIZE_IN_WORDS)) { + /* A single access cannot spread over two sectors */ + return IXGBE_ERR_PARAM; + } + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_write_sr_word_aci - Writes Shadow RAM word + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to write + * @data: word to write to the Shadow RAM + * + * Writes a 16 bit word to the Shadow RAM using the admin command. + * NVM ownership must be acquired before calling this function and released + * by a caller. To commit SR to NVM update checksum function should be called. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_write_sr_word_aci(struct ixgbe_hw *hw, u32 offset, const u16 *data) +{ + __le16 data_local = IXGBE_CPU_TO_LE16(*data); + s32 status; + + status = ixgbe_check_sr_access_params(hw, offset, 1); + if (!status) + status = ixgbe_aci_update_nvm(hw, 0, BYTES_PER_WORD * offset, + BYTES_PER_WORD, &data_local, + false, 0); + + return status; +} + +/** + * ixgbe_write_sr_buf_aci - Writes Shadow RAM buf + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM buffer to write + * @words: number of words to write + * @data: words to write to the Shadow RAM + * + * Writes a 16 bit word to the Shadow RAM using the admin command. + * NVM ownership must be acquired before calling this function and released + * by a caller. To commit SR to NVM update checksum function should be called. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_write_sr_buf_aci(struct ixgbe_hw *hw, u32 offset, u16 words, + const u16 *data) +{ + __le16 *data_local; + s32 status; + void *vmem; + u32 i; + + vmem = ixgbe_calloc(hw, words, sizeof(u16)); + if (!vmem) + return IXGBE_ERR_OUT_OF_MEM; + data_local = (__le16 *)vmem; + + for (i = 0; i < words; i++) + data_local[i] = IXGBE_CPU_TO_LE16(data[i]); + + /* Here we will only write one buffer as the size of the modules + * mirrored in the Shadow RAM is always less than 4K. + */ + status = ixgbe_check_sr_access_params(hw, offset, words); + if (!status) + status = ixgbe_aci_update_nvm(hw, 0, BYTES_PER_WORD * offset, + BYTES_PER_WORD * words, + data_local, false, 0); + + ixgbe_free(hw, vmem); + + return status; +} + +/** + * ixgbe_aci_alternate_write - write to alternate structure + * @hw: pointer to the hardware structure + * @reg_addr0: address of first dword to be written + * @reg_val0: value to be written under 'reg_addr0' + * @reg_addr1: address of second dword to be written + * @reg_val1: value to be written under 'reg_addr1' + * + * Write one or two dwords to alternate structure using ACI command (0x0900). + * Fields are indicated by 'reg_addr0' and 'reg_addr1' register numbers. + * + * Return: 0 on success and error code on failure. + */ +s32 ixgbe_aci_alternate_write(struct ixgbe_hw *hw, u32 reg_addr0, + u32 reg_val0, u32 reg_addr1, u32 reg_val1) +{ + struct ixgbe_aci_cmd_read_write_alt_direct *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.read_write_alt_direct; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_write_alt_direct); + cmd->dword0_addr = IXGBE_CPU_TO_LE32(reg_addr0); + cmd->dword1_addr = IXGBE_CPU_TO_LE32(reg_addr1); + cmd->dword0_value = IXGBE_CPU_TO_LE32(reg_val0); + cmd->dword1_value = IXGBE_CPU_TO_LE32(reg_val1); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + return status; +} + +/** + * ixgbe_aci_alternate_read - read from alternate structure + * @hw: pointer to the hardware structure + * @reg_addr0: address of first dword to be read + * @reg_val0: pointer for data read from 'reg_addr0' + * @reg_addr1: address of second dword to be read + * @reg_val1: pointer for data read from 'reg_addr1' + * + * Read one or two dwords from alternate structure using ACI command (0x0902). + * Fields are indicated by 'reg_addr0' and 'reg_addr1' register numbers. + * If 'reg_val1' pointer is not passed then only register at 'reg_addr0' + * is read. + * + * Return: 0 on success and error code on failure. + */ +s32 ixgbe_aci_alternate_read(struct ixgbe_hw *hw, u32 reg_addr0, + u32 *reg_val0, u32 reg_addr1, u32 *reg_val1) +{ + struct ixgbe_aci_cmd_read_write_alt_direct *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.read_write_alt_direct; + + if (!reg_val0) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_read_alt_direct); + cmd->dword0_addr = IXGBE_CPU_TO_LE32(reg_addr0); + cmd->dword1_addr = IXGBE_CPU_TO_LE32(reg_addr1); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + if (status == IXGBE_SUCCESS) { + *reg_val0 = IXGBE_LE32_TO_CPU(cmd->dword0_value); + + if (reg_val1) + *reg_val1 = IXGBE_LE32_TO_CPU(cmd->dword1_value); + } + + return status; +} + +/** + * ixgbe_aci_alternate_write_done - check if writing to alternate structure + * is done + * @hw: pointer to the HW structure. + * @bios_mode: indicates whether the command is executed by UEFI or legacy BIOS + * @reset_needed: indicates the SW should trigger GLOBAL reset + * + * Indicates to the FW that alternate structures have been changed. + * + * Return: 0 on success and error code on failure. + */ +s32 ixgbe_aci_alternate_write_done(struct ixgbe_hw *hw, u8 bios_mode, + bool *reset_needed) +{ + struct ixgbe_aci_cmd_done_alt_write *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.done_alt_write; + + if (!reset_needed) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_done_alt_write); + cmd->flags = bios_mode; + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (!status) + *reset_needed = (IXGBE_LE16_TO_CPU(cmd->flags) & + IXGBE_ACI_RESP_RESET_NEEDED) != 0; + + return status; +} + +/** + * ixgbe_aci_alternate_clear - clear alternate structure + * @hw: pointer to the HW structure. + * + * Clear the alternate structures of the port from which the function + * is called. + * + * Return: 0 on success and error code on failure. + */ +s32 ixgbe_aci_alternate_clear(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_desc desc; + s32 status; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_clear_port_alt_write); + + status = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + return status; +} + +/** + * ixgbe_aci_get_internal_data - get internal FW/HW data + * @hw: pointer to the hardware structure + * @cluster_id: specific cluster to dump + * @table_id: table ID within cluster + * @start: index of line in the block to read + * @buf: dump buffer + * @buf_size: dump buffer size + * @ret_buf_size: return buffer size (returned by FW) + * @ret_next_cluster: next cluster to read (returned by FW) + * @ret_next_table: next block to read (returned by FW) + * @ret_next_index: next index to read (returned by FW) + * + * Get internal FW/HW data using ACI command (0xFF08) for debug purposes. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_get_internal_data(struct ixgbe_hw *hw, u16 cluster_id, + u16 table_id, u32 start, void *buf, + u16 buf_size, u16 *ret_buf_size, + u16 *ret_next_cluster, u16 *ret_next_table, + u32 *ret_next_index) +{ + struct ixgbe_aci_cmd_debug_dump_internals *cmd; + struct ixgbe_aci_desc desc; + s32 status; + + cmd = &desc.params.debug_dump; + + if (buf_size == 0 || !buf) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_debug_dump_internals); + + cmd->cluster_id = IXGBE_CPU_TO_LE16(cluster_id); + cmd->table_id = IXGBE_CPU_TO_LE16(table_id); + cmd->idx = IXGBE_CPU_TO_LE32(start); + + status = ixgbe_aci_send_cmd(hw, &desc, buf, buf_size); + + if (!status) { + if (ret_buf_size) + *ret_buf_size = IXGBE_LE16_TO_CPU(desc.datalen); + if (ret_next_cluster) + *ret_next_cluster = IXGBE_LE16_TO_CPU(cmd->cluster_id); + if (ret_next_table) + *ret_next_table = IXGBE_LE16_TO_CPU(cmd->table_id); + if (ret_next_index) + *ret_next_index = IXGBE_LE32_TO_CPU(cmd->idx); + } + + return status; +} + +/** + * ixgbe_validate_nvm_rw_reg - Check that an NVM access request is valid + * @cmd: NVM access command structure + * + * Validates that an NVM access structure is request to read or write a valid + * register offset. First validates that the module and flags are correct, and + * then ensures that the register offset is one of the accepted registers. + * + * Return: 0 if the register access is valid, out of range error code otherwise. + */ +static s32 +ixgbe_validate_nvm_rw_reg(struct ixgbe_nvm_access_cmd *cmd) +{ + u16 i; + + switch (cmd->offset) { + case GL_HICR: + case GL_HICR_EN: /* Note, this register is read only */ + case GL_FWSTS: + case GL_MNG_FWSM: + case GLNVM_GENS: + case GLNVM_FLA: + case GL_FWRESETCNT: + return 0; + default: + break; + } + + for (i = 0; i <= GL_HIDA_MAX_INDEX; i++) + if (cmd->offset == (u32)GL_HIDA(i)) + return 0; + + for (i = 0; i <= GL_HIBA_MAX_INDEX; i++) + if (cmd->offset == (u32)GL_HIBA(i)) + return 0; + + /* All other register offsets are not valid */ + return IXGBE_ERR_OUT_OF_RANGE; +} + +/** + * ixgbe_nvm_access_read - Handle an NVM read request + * @hw: pointer to the HW struct + * @cmd: NVM access command to process + * @data: storage for the register value read + * + * Process an NVM access request to read a register. + * + * Return: 0 if the register read is valid and successful, + * out of range error code otherwise. + */ +static s32 ixgbe_nvm_access_read(struct ixgbe_hw *hw, + struct ixgbe_nvm_access_cmd *cmd, + struct ixgbe_nvm_access_data *data) +{ + s32 status; + + /* Always initialize the output data, even on failure */ + memset(&data->regval, 0, cmd->data_size); + + /* Make sure this is a valid read/write access request */ + status = ixgbe_validate_nvm_rw_reg(cmd); + if (status) + return status; + + DEBUGOUT1("NVM access: reading register %08x\n", cmd->offset); + + /* Read the register and store the contents in the data field */ + data->regval = IXGBE_READ_REG(hw, cmd->offset); + + return 0; +} + +/** + * ixgbe_nvm_access_write - Handle an NVM write request + * @hw: pointer to the HW struct + * @cmd: NVM access command to process + * @data: NVM access data to write + * + * Process an NVM access request to write a register. + * + * Return: 0 if the register write is valid and successful, + * out of range error code otherwise. + */ +static s32 ixgbe_nvm_access_write(struct ixgbe_hw *hw, + struct ixgbe_nvm_access_cmd *cmd, + struct ixgbe_nvm_access_data *data) +{ + s32 status; + + /* Make sure this is a valid read/write access request */ + status = ixgbe_validate_nvm_rw_reg(cmd); + if (status) + return status; + + /* Reject requests to write to read-only registers */ + switch (cmd->offset) { + case GL_HICR_EN: + return IXGBE_ERR_OUT_OF_RANGE; + default: + break; + } + + DEBUGOUT2("NVM access: writing register %08x with value %08x\n", + cmd->offset, data->regval); + + /* Write the data field to the specified register */ + IXGBE_WRITE_REG(hw, cmd->offset, data->regval); + + return 0; +} + +/** + * ixgbe_handle_nvm_access - Handle an NVM access request + * @hw: pointer to the HW struct + * @cmd: NVM access command info + * @data: pointer to read or return data + * + * Process an NVM access request. Read the command structure information and + * determine if it is valid. If not, report an error indicating the command + * was invalid. + * + * For valid commands, perform the necessary function, copying the data into + * the provided data buffer. + * + * Return: 0 if the nvm access request is valid and successful, + * error code otherwise. + */ +s32 ixgbe_handle_nvm_access(struct ixgbe_hw *hw, + struct ixgbe_nvm_access_cmd *cmd, + struct ixgbe_nvm_access_data *data) +{ + switch (cmd->command) { + case IXGBE_NVM_CMD_READ: + return ixgbe_nvm_access_read(hw, cmd, data); + case IXGBE_NVM_CMD_WRITE: + return ixgbe_nvm_access_write(hw, cmd, data); + default: + return IXGBE_ERR_PARAM; + } +} + +/** + * ixgbe_aci_set_health_status_config - Configure FW health events + * @hw: pointer to the HW struct + * @event_source: type of diagnostic events to enable + * + * Configure the health status event types that the firmware will send to this + * PF using ACI command (0xFF20). The supported event types are: PF-specific, + * all PFs, and global. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_aci_set_health_status_config(struct ixgbe_hw *hw, u8 event_source) +{ + struct ixgbe_aci_cmd_set_health_status_config *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.set_health_status_config; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_set_health_status_config); + + cmd->event_source = event_source; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_init_ops_E610 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for E610. + * Does not touch the hardware. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_init_ops_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val; + + ret_val = ixgbe_init_ops_X550(hw); + + /* MAC */ + mac->ops.reset_hw = ixgbe_reset_hw_E610; + mac->ops.start_hw = ixgbe_start_hw_E610; + mac->ops.get_media_type = ixgbe_get_media_type_E610; + mac->ops.get_supported_physical_layer = + ixgbe_get_supported_physical_layer_E610; + mac->ops.get_san_mac_addr = NULL; + mac->ops.set_san_mac_addr = NULL; + mac->ops.get_wwn_prefix = NULL; + mac->ops.setup_link = ixgbe_setup_link_E610; + mac->ops.check_link = ixgbe_check_link_E610; + mac->ops.get_link_capabilities = ixgbe_get_link_capabilities_E610; + mac->ops.setup_fc = ixgbe_setup_fc_E610; + mac->ops.fc_autoneg = ixgbe_fc_autoneg_E610; + mac->ops.set_fw_drv_ver = ixgbe_set_fw_drv_ver_E610; + mac->ops.disable_rx = ixgbe_disable_rx_E610; + mac->ops.setup_eee = ixgbe_setup_eee_E610; + mac->ops.fw_recovery_mode = ixgbe_fw_recovery_mode_E610; + mac->ops.fw_rollback_mode = ixgbe_fw_rollback_mode_E610; + mac->ops.get_fw_tsam_mode = ixgbe_get_fw_tsam_mode_E610; + mac->ops.get_fw_version = ixgbe_aci_get_fw_ver; + mac->ops.get_nvm_version = ixgbe_get_active_nvm_ver; + mac->ops.get_thermal_sensor_data = NULL; + mac->ops.init_thermal_sensor_thresh = NULL; + + /* PHY */ + phy->ops.init = ixgbe_init_phy_ops_E610; + phy->ops.identify = ixgbe_identify_phy_E610; + phy->eee_speeds_supported = IXGBE_LINK_SPEED_10_FULL | + IXGBE_LINK_SPEED_100_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + phy->eee_speeds_advertised = phy->eee_speeds_supported; + + /* Additional ops overrides for e610 to go here */ + eeprom->ops.init_params = ixgbe_init_eeprom_params_E610; + eeprom->ops.read = ixgbe_read_ee_aci_E610; + eeprom->ops.read_buffer = ixgbe_read_ee_aci_buffer_E610; + eeprom->ops.write = ixgbe_write_ee_aci_E610; + eeprom->ops.write_buffer = ixgbe_write_ee_aci_buffer_E610; + eeprom->ops.calc_checksum = ixgbe_calc_eeprom_checksum_E610; + eeprom->ops.update_checksum = ixgbe_update_eeprom_checksum_E610; + eeprom->ops.validate_checksum = ixgbe_validate_eeprom_checksum_E610; + eeprom->ops.read_pba_string = ixgbe_read_pba_string_E610; + + /* Initialize bus function number */ + hw->mac.ops.set_lan_id(hw); + + return ret_val; +} + +/** + * ixgbe_reset_hw_E610 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, and perform a reset. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_reset_hw_E610(struct ixgbe_hw *hw) +{ + u32 swfw_mask = hw->phy.phy_semaphore_mask; + u32 ctrl, i; + s32 status; + + DEBUGFUNC("ixgbe_reset_hw_E610"); + + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != IXGBE_SUCCESS) + goto reset_hw_out; + + /* flush pending Tx transactions */ + ixgbe_clear_tx_pending(hw); + + status = hw->phy.ops.init(hw); + if (status != IXGBE_SUCCESS) + DEBUGOUT1("Failed to initialize PHY ops, STATUS = %d\n", + status); +mac_reset_top: + status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + if (status != IXGBE_SUCCESS) { + ERROR_REPORT2(IXGBE_ERROR_CAUTION, + "semaphore failed with %d", status); + return IXGBE_ERR_SWFW_SYNC; + } + ctrl = IXGBE_CTRL_RST; + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + usec_delay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + status = IXGBE_ERR_RESET_FAILED; + ERROR_REPORT1(IXGBE_ERROR_POLLING, + "Reset polling failed to complete.\n"); + } + msec_delay(100); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* Set the Rx packet buffer size. */ + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT); + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to 128, + * since we modify this value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = 128; + hw->mac.ops.init_rx_addrs(hw); + +reset_hw_out: + return status; +} + +/** + * ixgbe_start_hw_E610 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Gets firmware version and if API version matches it + * starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +s32 ixgbe_start_hw_E610(struct ixgbe_hw *hw) +{ + s32 ret_val = IXGBE_SUCCESS; + + ret_val = hw->mac.ops.get_fw_version(hw); + if (ret_val) + goto out; + + ret_val = ixgbe_start_hw_generic(hw); + if (ret_val != IXGBE_SUCCESS) + goto out; + + ixgbe_start_hw_gen2(hw); + +out: + return ret_val; +} + +/** + * ixgbe_get_media_type_E610 - Gets media type + * @hw: pointer to the HW struct + * + * In order to get the media type, the function gets PHY + * capabilities and later on use them to identify the PHY type + * checking phy_type_high and phy_type_low. + * + * Return: the type of media in form of ixgbe_media_type enum + * or ixgbe_media_type_unknown in case of an error. + */ +enum ixgbe_media_type ixgbe_get_media_type_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + u64 phy_mask = 0; + s32 rc; + u8 i; + + rc = ixgbe_update_link_info(hw); + if (rc) { + return ixgbe_media_type_unknown; + } + + /* If there is no link but PHY (dongle) is available SW should use + * Get PHY Caps admin command instead of Get Link Status, find most + * significant bit that is set in PHY types reported by the command + * and use it to discover media type. + */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_LINK_UP) && + (hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE)) { + /* Get PHY Capabilities */ + rc = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + &pcaps); + if (rc) { + return ixgbe_media_type_unknown; + } + + /* Check if there is some bit set in phy_type_high */ + for (i = 64; i > 0; i--) { + phy_mask = (u64)((u64)1 << (i - 1)); + if ((pcaps.phy_type_high & phy_mask) != 0) { + /* If any bit is set treat it as PHY type */ + hw->link.link_info.phy_type_high = phy_mask; + hw->link.link_info.phy_type_low = 0; + break; + } + phy_mask = 0; + } + + /* If nothing found in phy_type_high search in phy_type_low */ + if (phy_mask == 0) { + for (i = 64; i > 0; i--) { + phy_mask = (u64)((u64)1 << (i - 1)); + if ((pcaps.phy_type_low & phy_mask) != 0) { + /* If any bit is set treat it as PHY type */ + hw->link.link_info.phy_type_high = 0; + hw->link.link_info.phy_type_low = phy_mask; + break; + } + } + } + + } + + /* Based on link status or search above try to discover media type */ + hw->phy.media_type = ixgbe_get_media_type_from_phy_type(hw); + + return hw->phy.media_type; +} + +/** + * ixgbe_get_supported_physical_layer_E610 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + * + * Return: the exit code of the operation. + **/ +u64 ixgbe_get_supported_physical_layer_E610(struct ixgbe_hw *hw) +{ + u64 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + u64 phy_type; + s32 rc; + + rc = ixgbe_aci_get_phy_caps(hw, false, IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + &pcaps); + if (rc) + return IXGBE_PHYSICAL_LAYER_UNKNOWN; + + phy_type = IXGBE_LE64_TO_CPU(pcaps.phy_type_low); + if(phy_type & IXGBE_PHY_TYPE_LOW_10GBASE_T) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if(phy_type & IXGBE_PHY_TYPE_LOW_1000BASE_T) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if(phy_type & IXGBE_PHY_TYPE_LOW_100BASE_TX) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + if(phy_type & IXGBE_PHY_TYPE_LOW_10GBASE_LR) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_LR; + if(phy_type & IXGBE_PHY_TYPE_LOW_10GBASE_SR) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_SR; + if(phy_type & IXGBE_PHY_TYPE_LOW_1000BASE_KX) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; + if(phy_type & IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR; + if(phy_type & IXGBE_PHY_TYPE_LOW_1000BASE_SX) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_SX; + if(phy_type & IXGBE_PHY_TYPE_LOW_2500BASE_KX) + physical_layer |= IXGBE_PHYSICAL_LAYER_2500BASE_KX; + if(phy_type & IXGBE_PHY_TYPE_LOW_2500BASE_T) + physical_layer |= IXGBE_PHYSICAL_LAYER_2500BASE_T; + if(phy_type & IXGBE_PHY_TYPE_LOW_5GBASE_T) + physical_layer |= IXGBE_PHYSICAL_LAYER_5000BASE_T; + + phy_type = IXGBE_LE64_TO_CPU(pcaps.phy_type_high); + if(phy_type & IXGBE_PHY_TYPE_HIGH_10BASE_T) + physical_layer |= IXGBE_PHYSICAL_LAYER_10BASE_T; + + return physical_layer; +} + +/** + * ixgbe_setup_link_E610 - Set up link + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg_wait: true when waiting for completion is needed + * + * Set up the link with the specified speed. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_setup_link_E610(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait) +{ + /* Simply request FW to perform proper PHY setup */ + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); +} + +/** + * ixgbe_check_link_E610 - Determine link and speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true when link is up + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Determine if the link is up and the current link speed + * using ACI command (0x0607). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_check_link_E610(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + s32 rc; + u32 i; + + if (!speed || !link_up) + return IXGBE_ERR_PARAM; + + /* Set get_link_info flag to ensure that fresh + * link information will be obtained from FW + * by sending Get Link Status admin command. */ + hw->link.get_link_info = true; + + /* Update link information in adapter context. */ + rc = ixgbe_get_link_status(hw, link_up); + if (rc) + return rc; + + /* Wait for link up if it was requested. */ + if (link_up_wait_to_complete && *link_up == false) { + for (i = 0; i < hw->mac.max_link_up_time; i++) { + msec_delay(100); + hw->link.get_link_info = true; + rc = ixgbe_get_link_status(hw, link_up); + if (rc) + return rc; + if (*link_up) + break; + } + } + + /* Use link information in adapter context updated by the call + * to ixgbe_get_link_status() to determine current link speed. + * Link speed information is valid only when link up was + * reported by FW. */ + if (*link_up) { + switch (hw->link.link_info.link_speed) { + case IXGBE_ACI_LINK_SPEED_10MB: + *speed = IXGBE_LINK_SPEED_10_FULL; + break; + case IXGBE_ACI_LINK_SPEED_100MB: + *speed = IXGBE_LINK_SPEED_100_FULL; + break; + case IXGBE_ACI_LINK_SPEED_1000MB: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_2500MB: + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_5GB: + *speed = IXGBE_LINK_SPEED_5GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_10GB: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + break; + } + } else { + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_get_link_capabilities_E610 - Determine link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: true when autoneg or autotry is enabled + * + * Determine speed and AN parameters of a link. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_link_capabilities_E610(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + if (!speed || !autoneg) + return IXGBE_ERR_PARAM; + + *autoneg = true; + *speed = hw->phy.speeds_supported; + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_cfg_phy_fc - Configure PHY Flow Control (FC) data based on FC mode + * @hw: pointer to hardware structure + * @cfg: PHY configuration data to set FC mode + * @req_mode: FC mode to configure + * + * Configures PHY Flow Control according to the provided configuration. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_cfg_phy_fc(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg, + enum ixgbe_fc_mode req_mode) +{ + struct ixgbe_aci_cmd_get_phy_caps_data* pcaps = NULL; + s32 status = IXGBE_SUCCESS; + u8 pause_mask = 0x0; + + if (!cfg) + return IXGBE_ERR_PARAM; + + switch (req_mode) { + case ixgbe_fc_auto: + { + pcaps = (struct ixgbe_aci_cmd_get_phy_caps_data *) + ixgbe_malloc(hw, sizeof(*pcaps)); + if (!pcaps) { + status = IXGBE_ERR_OUT_OF_MEM; + goto out; + } + + /* Query the value of FC that both the NIC and the attached + * media can do. */ + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, pcaps); + if (status) + goto out; + + pause_mask |= pcaps->caps & IXGBE_ACI_PHY_EN_TX_LINK_PAUSE; + pause_mask |= pcaps->caps & IXGBE_ACI_PHY_EN_RX_LINK_PAUSE; + + break; + } + case ixgbe_fc_full: + pause_mask |= IXGBE_ACI_PHY_EN_TX_LINK_PAUSE; + pause_mask |= IXGBE_ACI_PHY_EN_RX_LINK_PAUSE; + break; + case ixgbe_fc_rx_pause: + pause_mask |= IXGBE_ACI_PHY_EN_RX_LINK_PAUSE; + break; + case ixgbe_fc_tx_pause: + pause_mask |= IXGBE_ACI_PHY_EN_TX_LINK_PAUSE; + break; + default: + break; + } + + /* clear the old pause settings */ + cfg->caps &= ~(IXGBE_ACI_PHY_EN_TX_LINK_PAUSE | + IXGBE_ACI_PHY_EN_RX_LINK_PAUSE); + + /* set the new capabilities */ + cfg->caps |= pause_mask; + +out: + if (pcaps) + ixgbe_free(hw, pcaps); + return status; +} + +/** + * ixgbe_setup_fc_E610 - Set up flow control + * @hw: pointer to hardware structure + * + * Set up flow control. This has to be done during init time. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_setup_fc_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps = { 0 }; + struct ixgbe_aci_cmd_set_phy_cfg_data cfg = { 0 }; + s32 status; + + /* Get the current PHY config */ + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, &pcaps); + if (status) + return status; + + ixgbe_copy_phy_caps_to_cfg(&pcaps, &cfg); + + /* Configure the set PHY data */ + status = ixgbe_cfg_phy_fc(hw, &cfg, hw->fc.requested_mode); + if (status) + return status; + + /* If the capabilities have changed, then set the new config */ + if (cfg.caps != pcaps.caps) { + cfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + status = ixgbe_aci_set_phy_cfg(hw, &cfg); + if (status) + return status; + } + + return status; +} + +/** + * ixgbe_fc_autoneg_E610 - Configure flow control + * @hw: pointer to hardware structure + * + * Configure Flow Control. + */ +void ixgbe_fc_autoneg_E610(struct ixgbe_hw *hw) +{ + s32 status; + + /* Get current link status. + * Current FC mode will be stored in the hw context. */ + status = ixgbe_aci_get_link_info(hw, false, NULL); + if (status) { + goto out; + } + + /* Check if the link is up */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_LINK_UP)) { + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + + /* Check if auto-negotiation has completed */ + if (!(hw->link.link_info.an_info & IXGBE_ACI_AN_COMPLETED)) { + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + +out: + if (status == IXGBE_SUCCESS) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_set_fw_drv_ver_E610 - Send driver version to FW + * @hw: pointer to the HW structure + * @maj: driver version major number + * @minor: driver version minor number + * @build: driver version build number + * @sub: driver version sub build number + * @len: length of driver_ver string + * @driver_ver: driver string + * + * Send driver version number to Firmware using ACI command (0x0002). + * + * Return: the exit code of the operation. + * IXGBE_SUCCESS - OK + * IXGBE_ERR_PARAM - incorrect parameters were given + * IXGBE_ERR_ACI_ERROR - encountered an error during sending the command + * IXGBE_ERR_ACI_TIMEOUT - a timeout occurred + * IXGBE_ERR_OUT_OF_MEM - ran out of memory + */ +s32 ixgbe_set_fw_drv_ver_E610(struct ixgbe_hw *hw, u8 maj, u8 minor, u8 build, + u8 sub, u16 len, const char *driver_ver) +{ + size_t limited_len = min(len, (u16)IXGBE_DRV_VER_STR_LEN_E610); + struct ixgbe_driver_ver dv; + + DEBUGFUNC("ixgbe_set_fw_drv_ver_E610"); + + if (!len || !driver_ver) + return IXGBE_ERR_PARAM; + + dv.major_ver = maj; + dv.minor_ver = minor; + dv.build_ver = build; + dv.subbuild_ver = sub; + + memset(dv.driver_string, 0, IXGBE_DRV_VER_STR_LEN_E610); + memcpy(dv.driver_string, driver_ver, limited_len); + + return ixgbe_aci_send_driver_ver(hw, &dv); +} + +/** + * ixgbe_disable_rx_E610 - Disable RX unit + * @hw: pointer to hardware structure + * + * Disable RX DMA unit on E610 with use of ACI command (0x000C). + * + * Return: the exit code of the operation. + */ +void ixgbe_disable_rx_E610(struct ixgbe_hw *hw) +{ + u32 rxctrl; + + DEBUGFUNC("ixgbe_disable_rx_E610"); + + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (rxctrl & IXGBE_RXCTRL_RXEN) { + u32 pfdtxgswc; + s32 status; + + pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC); + if (pfdtxgswc & IXGBE_PFDTXGSWC_VT_LBEN) { + pfdtxgswc &= ~IXGBE_PFDTXGSWC_VT_LBEN; + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc); + hw->mac.set_lben = true; + } else { + hw->mac.set_lben = false; + } + + status = ixgbe_aci_disable_rxen(hw); + + /* If we fail - disable RX using register write */ + if (status) { + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (rxctrl & IXGBE_RXCTRL_RXEN) { + rxctrl &= ~IXGBE_RXCTRL_RXEN; + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl); + } + } + } +} + +/** + * ixgbe_setup_eee_E610 - Enable/disable EEE support + * @hw: pointer to the HW structure + * @enable_eee: boolean flag to enable EEE + * + * Enables/disable EEE based on enable_eee flag. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_setup_eee_E610(struct ixgbe_hw *hw, bool enable_eee) +{ + struct ixgbe_aci_cmd_get_phy_caps_data phy_caps = { 0 }; + struct ixgbe_aci_cmd_set_phy_cfg_data phy_cfg = { 0 }; + u16 eee_cap = 0; + s32 status; + + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, &phy_caps); + if (status != IXGBE_SUCCESS) + return status; + + ixgbe_copy_phy_caps_to_cfg(&phy_caps, &phy_cfg); + + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_LINK; + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + if (enable_eee) { + if (phy_caps.phy_type_low & IXGBE_PHY_TYPE_LOW_100BASE_TX) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_100BASE_TX; + if (phy_caps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_T) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_1000BASE_T; + if (phy_caps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_KX) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_1000BASE_KX; + if (phy_caps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_T) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_10GBASE_T; + if (phy_caps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_10GBASE_KR; + if (phy_caps.phy_type_high & IXGBE_PHY_TYPE_HIGH_10BASE_T) + eee_cap |= IXGBE_ACI_PHY_EEE_EN_10BASE_T; + } + + /* Set EEE capability for particular PHY types */ + phy_cfg.eee_cap = IXGBE_CPU_TO_LE16(eee_cap); + + status = ixgbe_aci_set_phy_cfg(hw, &phy_cfg); + + return status; +} + +/** + * ixgbe_fw_recovery_mode_E610 - Check FW NVM recovery mode + * @hw: pointer to hardware structure + * + * Checks FW NVM recovery mode by + * reading the value of the dedicated register. + * + * Return: true if FW is in recovery mode, otherwise false. + */ +bool ixgbe_fw_recovery_mode_E610(struct ixgbe_hw *hw) +{ + u32 fwsm = IXGBE_READ_REG(hw, GL_MNG_FWSM); + + return !!(fwsm & GL_MNG_FWSM_FW_MODES_RECOVERY_M); +} + +/** + * ixgbe_fw_rollback_mode_E610 - Check FW NVM Rollback + * @hw: pointer to hardware structure + * + * Checks FW NVM Rollback mode by reading the + * value of the dedicated register. + * + * Return: true if FW is in Rollback mode, otherwise false. + */ +bool ixgbe_fw_rollback_mode_E610(struct ixgbe_hw *hw) +{ + u32 fwsm = IXGBE_READ_REG(hw, GL_MNG_FWSM); + + return !!(fwsm & GL_MNG_FWSM_FW_MODES_ROLLBACK_M); +} + +/** + * ixgbe_get_fw_tsam_mode_E610 - Check FW NVM Thermal Sensor Autonomous Mode + * @hw: pointer to hardware structure + * + * Checks Thermal Sensor Autonomous Mode by reading the + * value of the dedicated register. + * + * Return: true if FW is in TSAM, otherwise false. + */ +bool ixgbe_get_fw_tsam_mode_E610(struct ixgbe_hw *hw) +{ + u32 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM_X550EM_a); + + return !!(fwsm & IXGBE_FWSM_TS_ENABLED); +} + +/** + * ixgbe_init_phy_ops_E610 - PHY specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY type was not known. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_init_phy_ops_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val; + + phy->ops.identify_sfp = ixgbe_identify_module_E610; + phy->ops.read_reg = NULL; /* PHY reg access is not required */ + phy->ops.write_reg = NULL; + phy->ops.read_reg_mdi = NULL; + phy->ops.write_reg_mdi = NULL; + phy->ops.setup_link = ixgbe_setup_phy_link_E610; + phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_E610; + phy->ops.read_i2c_byte = NULL; /* disabled for E610 */ + phy->ops.write_i2c_byte = NULL; /* disabled for E610 */ + phy->ops.read_i2c_sff8472 = ixgbe_read_i2c_sff8472_E610; + phy->ops.read_i2c_eeprom = ixgbe_read_i2c_eeprom_E610; + phy->ops.write_i2c_eeprom = ixgbe_write_i2c_eeprom_E610; + phy->ops.i2c_bus_clear = NULL; /* do not use generic implementation */ + phy->ops.check_overtemp = ixgbe_check_overtemp_E610; + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) + phy->ops.set_phy_power = ixgbe_set_phy_power_E610; + else + phy->ops.set_phy_power = NULL; + phy->ops.enter_lplu = ixgbe_enter_lplu_E610; + phy->ops.handle_lasi = NULL; /* no implementation for E610 */ + phy->ops.read_i2c_byte_unlocked = NULL; /* disabled for E610 */ + phy->ops.write_i2c_byte_unlocked = NULL; /* disabled for E610 */ + + /* TODO: Set functions pointers based on device ID */ + + /* Identify the PHY */ + ret_val = phy->ops.identify(hw); + if (ret_val != IXGBE_SUCCESS) + return ret_val; + + /* TODO: Set functions pointers based on PHY type */ + + return ret_val; +} + +/** + * ixgbe_identify_phy_E610 - Identify PHY + * @hw: pointer to hardware structure + * + * Determine PHY type, supported speeds and PHY ID. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_identify_phy_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + s32 rc; + + /* Set PHY type */ + hw->phy.type = ixgbe_phy_fw; + + rc = ixgbe_aci_get_phy_caps(hw, false, IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + &pcaps); + if (rc) + return rc; + + if (!(pcaps.module_compliance_enforcement & + IXGBE_ACI_MOD_ENFORCE_STRICT_MODE)) { + /* Handle lenient mode */ + rc = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_NO_MEDIA, + &pcaps); + if (rc) + return rc; + } + + /* Determine supported speeds */ + hw->phy.speeds_supported = IXGBE_LINK_SPEED_UNKNOWN; + + if (pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_10BASE_T || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_10M_SGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10_FULL; + if (pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_100BASE_TX || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_100M_SGMII || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_100M_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_100_FULL; + if (pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_T || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_SX || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_LX || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_KX || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_1G_SGMII || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_1G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_1GB_FULL; + if (pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_T || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_DA || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_SR || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_LR || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1 || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_C2C || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_10G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10GB_FULL; + + /* 2.5 and 5 Gbps link speeds must be excluded from the + * auto-negotiation set used during driver initialization due to + * compatibility issues with certain switches. Those issues do not + * exist in case of E610 2.5G SKU device (0x57b1). + */ + if (!hw->phy.autoneg_advertised && + hw->device_id != IXGBE_DEV_ID_E610_2_5G_T) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + if (pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_T || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_X || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_KX || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_SGMII || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL; + + if (!hw->phy.autoneg_advertised && + hw->device_id == IXGBE_DEV_ID_E610_2_5G_T) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + if (pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_T || + pcaps.phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_KR || + pcaps.phy_type_high & IXGBE_PHY_TYPE_HIGH_5G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; + + /* Set PHY ID */ + memcpy(&hw->phy.id, pcaps.phy_id_oui, sizeof(u32)); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_identify_module_E610 - Identify SFP module type + * @hw: pointer to hardware structure + * + * Identify the SFP module type. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_identify_module_E610(struct ixgbe_hw *hw) +{ + bool media_available; + u8 module_type; + s32 rc; + + rc = ixgbe_update_link_info(hw); + if (rc) + goto err; + + media_available = + (hw->link.link_info.link_info & + IXGBE_ACI_MEDIA_AVAILABLE) ? true : false; + + if (media_available) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + + /* Get module type from hw context updated by ixgbe_update_link_info() */ + module_type = hw->link.link_info.module_type[IXGBE_ACI_MOD_TYPE_IDENT]; + + if ((module_type & IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE) || + (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE)) { + hw->phy.sfp_type = ixgbe_sfp_type_da_cu; + } else if (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_SR) { + hw->phy.sfp_type = ixgbe_sfp_type_sr; + } else if ((module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LR) || + (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LRM)) { + hw->phy.sfp_type = ixgbe_sfp_type_lr; + } + rc = IXGBE_SUCCESS; + } else { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + rc = IXGBE_ERR_SFP_NOT_PRESENT; + } +err: + return rc; +} + +/** + * ixgbe_setup_phy_link_E610 - Sets up firmware-controlled PHYs + * @hw: pointer to hardware structure + * + * Set the parameters for the firmware-controlled PHYs. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_setup_phy_link_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + struct ixgbe_aci_cmd_set_phy_cfg_data pcfg; + u8 rmode = IXGBE_ACI_REPORT_TOPO_CAP_MEDIA; + u64 sup_phy_type_low, sup_phy_type_high; + s32 rc; + + rc = ixgbe_aci_get_link_info(hw, false, NULL); + if (rc) { + goto err; + } + + /* If media is not available get default config */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE)) + rmode = IXGBE_ACI_REPORT_DFLT_CFG; + + rc = ixgbe_aci_get_phy_caps(hw, false, rmode, &pcaps); + if (rc) { + goto err; + } + + sup_phy_type_low = pcaps.phy_type_low; + sup_phy_type_high = pcaps.phy_type_high; + + /* Get Active configuration to avoid unintended changes */ + rc = ixgbe_aci_get_phy_caps(hw, false, IXGBE_ACI_REPORT_ACTIVE_CFG, + &pcaps); + if (rc) { + goto err; + } + ixgbe_copy_phy_caps_to_cfg(&pcaps, &pcfg); + + /* Set default PHY types for a given speed */ + pcfg.phy_type_low = 0; + pcfg.phy_type_high = 0; + + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10_FULL) { + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_10BASE_T; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_10M_SGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) { + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_100BASE_TX; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_100M_SGMII; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_100M_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) { + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_T; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_SX; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_LX; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_KX; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_1G_SGMII; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_1G_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL) { + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_T; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_X; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_KX; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_2500M_SGMII; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_2500M_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) { + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_5GBASE_T; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_5GBASE_KR; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_5G_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) { + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_T; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_DA; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_SR; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_LR; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC; + pcfg.phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_C2C; + pcfg.phy_type_high |= IXGBE_PHY_TYPE_HIGH_10G_USXGMII; + } + + /* Mask the set values to avoid requesting unsupported link types */ + pcfg.phy_type_low &= sup_phy_type_low; + pcfg.phy_type_high &= sup_phy_type_high; + + if (pcfg.phy_type_high != pcaps.phy_type_high || + pcfg.phy_type_low != pcaps.phy_type_low || + pcfg.caps != pcaps.caps) { + pcfg.caps |= IXGBE_ACI_PHY_ENA_LINK; + pcfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + rc = ixgbe_aci_set_phy_cfg(hw, &pcfg); + } + +err: + return rc; +} + +/** + * ixgbe_get_phy_firmware_version_E610 - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + * + * Determines PHY FW version based on response to Get PHY Capabilities + * admin command (0x0600). + * + * Return: the exit code of the operation. + */ +s32 ixgbe_get_phy_firmware_version_E610(struct ixgbe_hw *hw, + u16 *firmware_version) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + s32 status; + + if (!firmware_version) + return IXGBE_ERR_PARAM; + + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, + &pcaps); + if (status) + return status; + + /* TODO: determine which bytes of the 8-byte phy_fw_ver + * field should be written to the 2-byte firmware_version + * output argument. */ + memcpy(firmware_version, pcaps.phy_fw_ver, sizeof(u16)); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_read_i2c_sff8472_E610 - Reads 8 bit word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: byte offset at address 0xA2 + * @sff8472_data: value read + * + * Performs byte read operation from SFP module's SFF-8472 data over I2C. + * + * Return: the exit code of the operation. + **/ +s32 ixgbe_read_i2c_sff8472_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 *sff8472_data) +{ + return ixgbe_aci_sff_eeprom(hw, 0, IXGBE_I2C_EEPROM_DEV_ADDR2, + byte_offset, 0, + IXGBE_ACI_SFF_NO_PAGE_BANK_UPDATE, + sff8472_data, 1, false); +} + +/** + * ixgbe_read_i2c_eeprom_E610 - Reads 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs byte read operation from SFP module's EEPROM over I2C interface. + * + * Return: the exit code of the operation. + **/ +s32 ixgbe_read_i2c_eeprom_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) +{ + return ixgbe_aci_sff_eeprom(hw, 0, IXGBE_I2C_EEPROM_DEV_ADDR, + byte_offset, 0, + IXGBE_ACI_SFF_NO_PAGE_BANK_UPDATE, + eeprom_data, 1, false); +} + +/** + * ixgbe_write_i2c_eeprom_E610 - Writes 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to write + * @eeprom_data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface. + * + * Return: the exit code of the operation. + **/ +s32 ixgbe_write_i2c_eeprom_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data) +{ + return ixgbe_aci_sff_eeprom(hw, 0, IXGBE_I2C_EEPROM_DEV_ADDR, + byte_offset, 0, + IXGBE_ACI_SFF_NO_PAGE_BANK_UPDATE, + &eeprom_data, 1, true); +} + +/** + * ixgbe_check_overtemp_E610 - Check firmware-controlled PHYs for overtemp + * @hw: pointer to hardware structure + * + * Get the link status and check if the PHY temperature alarm detected. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_check_overtemp_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_link_status_data link_data = { 0 }; + struct ixgbe_aci_cmd_get_link_status *resp; + struct ixgbe_aci_desc desc; + s32 status = IXGBE_SUCCESS; + + if (!hw) + return IXGBE_ERR_PARAM; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_status); + resp = &desc.params.get_link_status; + resp->cmd_flags = IXGBE_CPU_TO_LE16(IXGBE_ACI_LSE_NOP); + + status = ixgbe_aci_send_cmd(hw, &desc, &link_data, sizeof(link_data)); + if (status != IXGBE_SUCCESS) + return status; + + if (link_data.ext_info & IXGBE_ACI_LINK_PHY_TEMP_ALARM) { + ERROR_REPORT1(IXGBE_ERROR_CAUTION, + "PHY Temperature Alarm detected"); + status = IXGBE_ERR_OVERTEMP; + } + + return status; +} + +/** + * ixgbe_set_phy_power_E610 - Control power for copper PHY + * @hw: pointer to hardware structure + * @on: true for on, false for off + * + * Set the power on/off of the PHY + * by getting its capabilities and setting the appropriate + * configuration parameters. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_set_phy_power_E610(struct ixgbe_hw *hw, bool on) +{ + struct ixgbe_aci_cmd_get_phy_caps_data phy_caps = { 0 }; + struct ixgbe_aci_cmd_set_phy_cfg_data phy_cfg = { 0 }; + s32 status; + + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, &phy_caps); + if (status != IXGBE_SUCCESS) + return status; + + ixgbe_copy_phy_caps_to_cfg(&phy_caps, &phy_cfg); + + if (on) { + phy_cfg.caps &= ~IXGBE_ACI_PHY_ENA_LOW_POWER; + } else { + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_LOW_POWER; + } + + /* PHY is already in requested power mode */ + if (phy_caps.caps == phy_cfg.caps) + return IXGBE_SUCCESS; + + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_LINK; + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + status = ixgbe_aci_set_phy_cfg(hw, &phy_cfg); + + return status; +} + +/** + * ixgbe_enter_lplu_E610 - Transition to low power states + * @hw: pointer to hardware structure + * + * Configures Low Power Link Up on transition to low power states + * (from D0 to non-D0). Link is required to enter LPLU so avoid resetting the + * X557 PHY immediately prior to entering LPLU. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_enter_lplu_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data phy_caps = { 0 }; + struct ixgbe_aci_cmd_set_phy_cfg_data phy_cfg = { 0 }; + s32 status; + + status = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, &phy_caps); + if (status != IXGBE_SUCCESS) + return status; + + ixgbe_copy_phy_caps_to_cfg(&phy_caps, &phy_cfg); + + phy_cfg.low_power_ctrl_an |= IXGBE_ACI_PHY_EN_D3COLD_LOW_POWER_AUTONEG; + + status = ixgbe_aci_set_phy_cfg(hw, &phy_cfg); + + return status; +} + +/** + * ixgbe_init_eeprom_params_E610 - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_init_eeprom_params_E610(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 gens_stat; + u8 sr_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { + eeprom->type = ixgbe_flash; + + gens_stat = IXGBE_READ_REG(hw, GLNVM_GENS); + sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> + GLNVM_GENS_SR_SIZE_S; + + /* Switching to words (sr_size contains power of 2) */ + eeprom->word_size = BIT(sr_size) * IXGBE_SR_WORDS_IN_1KB; + + DEBUGOUT2("Eeprom params: type = %d, size = %d\n", + eeprom->type, eeprom->word_size); + } + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_read_ee_aci_E610 - Read EEPROM word using the admin command. + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the ACI. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding with reading. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_ee_aci_E610(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + s32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + status = ixgbe_read_sr_word_aci(hw, offset, data); + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_read_ee_aci_buffer_E610- Read EEPROM word(s) using admin commands. + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of words + * @data: word(s) read from the EEPROM + * + * Reads a 16 bit word(s) from the EEPROM using the ACI. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding with reading. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_ee_aci_buffer_E610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + status = ixgbe_read_sr_buf_aci(hw, offset, &words, data); + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_write_ee_aci_E610 - Write EEPROM word using the admin command. + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @data: word write to the EEPROM + * + * Write a 16 bit word to the EEPROM using the ACI. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding with writing. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_write_ee_aci_E610(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + s32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (status) + return status; + + status = ixgbe_write_sr_word_aci(hw, (u32)offset, &data); + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_write_ee_aci_buffer_E610 - Write EEPROM word(s) using admin commands. + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of words + * @data: word(s) write to the EEPROM + * + * Write a 16 bit word(s) to the EEPROM using the ACI. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding with writing. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_write_ee_aci_buffer_E610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (status) + return status; + + status = ixgbe_write_sr_buf_aci(hw, (u32)offset, words, data); + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_calc_eeprom_checksum_E610 - Calculates and returns the checksum + * @hw: pointer to hardware structure + * + * Calculate SW Checksum that covers the whole 64kB shadow RAM + * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD + * is customer specific and unknown. Therefore, this function skips all maximum + * possible size of VPD (1kB). + * If the EEPROM params are not initialized, the function + * initializes them before proceeding. + * The function acquires and then releases the NVM ownership. + * + * Return: the negative error code on error, or the 16-bit checksum + */ +s32 ixgbe_calc_eeprom_checksum_E610(struct ixgbe_hw *hw) +{ + bool nvm_acquired = false; + u16 pcie_alt_module = 0; + u16 checksum_local = 0; + u16 checksum = 0; + u16 vpd_module; + void *vmem; + s32 status; + u16 *data; + u16 i; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + vmem = ixgbe_calloc(hw, IXGBE_SR_SECTOR_SIZE_IN_WORDS, sizeof(u16)); + if (!vmem) + return IXGBE_ERR_OUT_OF_MEM; + data = (u16 *)vmem; + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + goto ixgbe_calc_sr_checksum_exit; + nvm_acquired = true; + + /* read pointer to VPD area */ + status = ixgbe_read_sr_word_aci(hw, E610_SR_VPD_PTR, &vpd_module); + if (status) + goto ixgbe_calc_sr_checksum_exit; + + /* read pointer to PCIe Alt Auto-load module */ + status = ixgbe_read_sr_word_aci(hw, E610_SR_PCIE_ALT_AUTO_LOAD_PTR, + &pcie_alt_module); + if (status) + goto ixgbe_calc_sr_checksum_exit; + + /* Calculate SW checksum that covers the whole 64kB shadow RAM + * except the VPD and PCIe ALT Auto-load modules + */ + for (i = 0; i < hw->eeprom.word_size; i++) { + /* Read SR page */ + if ((i % IXGBE_SR_SECTOR_SIZE_IN_WORDS) == 0) { + u16 words = IXGBE_SR_SECTOR_SIZE_IN_WORDS; + + status = ixgbe_read_sr_buf_aci(hw, i, &words, data); + if (status != IXGBE_SUCCESS) + goto ixgbe_calc_sr_checksum_exit; + } + + /* Skip Checksum word */ + if (i == E610_SR_SW_CHECKSUM_WORD) + continue; + /* Skip VPD module (convert byte size to word count) */ + if (i >= (u32)vpd_module && + i < ((u32)vpd_module + E610_SR_VPD_SIZE_WORDS)) + continue; + /* Skip PCIe ALT module (convert byte size to word count) */ + if (i >= (u32)pcie_alt_module && + i < ((u32)pcie_alt_module + E610_SR_PCIE_ALT_SIZE_WORDS)) + continue; + + checksum_local += data[i % IXGBE_SR_SECTOR_SIZE_IN_WORDS]; + } + + checksum = (u16)IXGBE_SR_SW_CHECKSUM_BASE - checksum_local; + +ixgbe_calc_sr_checksum_exit: + if(nvm_acquired) + ixgbe_release_nvm(hw); + ixgbe_free(hw, vmem); + + if(!status) + return (s32)checksum; + else + return status; +} + +/** + * ixgbe_update_eeprom_checksum_E610 - Updates the EEPROM checksum and flash + * @hw: pointer to hardware structure + * + * After writing EEPROM to Shadow RAM, software sends the admin command + * to recalculate and update EEPROM checksum and instructs the hardware + * to update the flash. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_update_eeprom_checksum_E610(struct ixgbe_hw *hw) +{ + s32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_nvm_recalculate_checksum(hw); + if (status) + return status; + status = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (status) + return status; + + status = ixgbe_nvm_write_activate(hw, IXGBE_ACI_NVM_ACTIV_REQ_EMPR, + NULL); + ixgbe_release_nvm(hw); + + return status; +} + +/** + * ixgbe_validate_eeprom_checksum_E610 - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_validate_eeprom_checksum_E610(struct ixgbe_hw *hw, u16 *checksum_val) +{ + u32 status; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + status = ixgbe_init_eeprom_params(hw); + if (status) + return status; + } + + status = ixgbe_nvm_validate_checksum(hw); + + if (status) + return status; + + if (checksum_val) { + u16 tmp_checksum; + status = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (status) + return status; + + status = ixgbe_read_sr_word_aci(hw, E610_SR_SW_CHECKSUM_WORD, + &tmp_checksum); + ixgbe_release_nvm(hw); + + if (!status) + *checksum_val = tmp_checksum; + } + + return status; +} + +/** + * ixgbe_get_pfa_module_tlv - Reads sub module TLV from NVM PFA + * @hw: pointer to hardware structure + * @module_tlv: pointer to module TLV to return + * @module_tlv_len: pointer to module TLV length to return + * @module_type: module type requested + * + * Finds the requested sub module TLV type from the Preserved Field + * Area (PFA) and returns the TLV pointer and length. The caller can + * use these to read the variable length TLV value. + * + * Return: the exit code of the operation. + */ +static s32 ixgbe_get_pfa_module_tlv(struct ixgbe_hw *hw, u16 *module_tlv, + u16 *module_tlv_len, u16 module_type) +{ + u16 pfa_len, pfa_ptr, pfa_end_ptr; + u16 next_tlv; + s32 status; + + status = ixgbe_read_ee_aci_E610(hw, E610_SR_PFA_PTR, &pfa_ptr); + if (status != IXGBE_SUCCESS) { + return status; + } + status = ixgbe_read_ee_aci_E610(hw, pfa_ptr, &pfa_len); + if (status != IXGBE_SUCCESS) { + return status; + } + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; + pfa_end_ptr = pfa_ptr + pfa_len; + while (next_tlv < pfa_end_ptr) { + u16 tlv_sub_module_type, tlv_len; + + /* Read TLV type */ + status = ixgbe_read_ee_aci_E610(hw, next_tlv, + &tlv_sub_module_type); + if (status != IXGBE_SUCCESS) { + break; + } + /* Read TLV length */ + status = ixgbe_read_ee_aci_E610(hw, next_tlv + 1, &tlv_len); + if (status != IXGBE_SUCCESS) { + break; + } + if (tlv_sub_module_type == module_type) { + if (tlv_len) { + *module_tlv = next_tlv; + *module_tlv_len = tlv_len; + return IXGBE_SUCCESS; + } + return IXGBE_ERR_INVAL_SIZE; + } + /* Check next TLV, i.e. current TLV pointer + length + 2 words + * (for current TLV's type and length) + */ + next_tlv = next_tlv + tlv_len + 2; + } + /* Module does not exist */ + return IXGBE_ERR_DOES_NOT_EXIST; +} + +/** + * ixgbe_read_pba_string_E610 - Reads part number string from NVM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the NVM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the NVM. + * + * Return: the exit code of the operation. + */ +s32 ixgbe_read_pba_string_E610(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + u16 pba_tlv, pba_tlv_len; + u16 pba_word, pba_size; + s32 status; + u16 i; + + status = ixgbe_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len, + E610_SR_PBA_BLOCK_PTR); + if (status != IXGBE_SUCCESS) { + return status; + } + + /* pba_size is the next word */ + status = ixgbe_read_ee_aci_E610(hw, (pba_tlv + 2), &pba_size); + if (status != IXGBE_SUCCESS) { + return status; + } + + if (pba_tlv_len < pba_size) { + return IXGBE_ERR_INVAL_SIZE; + } + + /* Subtract one to get PBA word count (PBA Size word is included in + * total size) + */ + pba_size--; + if (pba_num_size < (((u32)pba_size * 2) + 1)) { + return IXGBE_ERR_PARAM; + } + + for (i = 0; i < pba_size; i++) { + status = ixgbe_read_ee_aci_E610(hw, (pba_tlv + 2 + 1) + i, + &pba_word); + if (status != IXGBE_SUCCESS) { + return status; + } + + pba_num[(i * 2)] = (pba_word >> 8) & 0xFF; + pba_num[(i * 2) + 1] = pba_word & 0xFF; + } + pba_num[(pba_size * 2)] = '\0'; + + return status; +} diff --git a/sys/dev/ixgbe/ixgbe_e610.h b/sys/dev/ixgbe/ixgbe_e610.h new file mode 100644 index 000000000000..94e600139499 --- /dev/null +++ b/sys/dev/ixgbe/ixgbe_e610.h @@ -0,0 +1,224 @@ +/****************************************************************************** + SPDX-License-Identifier: BSD-3-Clause + + Copyright (c) 2025, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#ifndef _IXGBE_E610_H_ +#define _IXGBE_E610_H_ + +#include "ixgbe_type.h" + +void ixgbe_init_aci(struct ixgbe_hw *hw); +void ixgbe_shutdown_aci(struct ixgbe_hw *hw); +s32 ixgbe_aci_send_cmd(struct ixgbe_hw *hw, struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size); +bool ixgbe_aci_check_event_pending(struct ixgbe_hw *hw); +s32 ixgbe_aci_get_event(struct ixgbe_hw *hw, struct ixgbe_aci_event *e, + bool *pending); + +void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode); + +s32 ixgbe_aci_get_fw_ver(struct ixgbe_hw *hw); +s32 ixgbe_aci_send_driver_ver(struct ixgbe_hw *hw, struct ixgbe_driver_ver *dv); +s32 ixgbe_aci_set_pf_context(struct ixgbe_hw *hw, u8 pf_id); + +s32 ixgbe_acquire_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, u32 timeout); +void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res); +s32 ixgbe_aci_list_caps(struct ixgbe_hw *hw, void *buf, u16 buf_size, + u32 *cap_count, enum ixgbe_aci_opc opc); +s32 ixgbe_discover_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_caps); +s32 ixgbe_discover_func_caps(struct ixgbe_hw* hw, + struct ixgbe_hw_func_caps* func_caps); +s32 ixgbe_get_caps(struct ixgbe_hw *hw); +s32 ixgbe_aci_disable_rxen(struct ixgbe_hw *hw); +s32 ixgbe_aci_get_phy_caps(struct ixgbe_hw *hw, bool qual_mods, u8 report_mode, + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps); +bool ixgbe_phy_caps_equals_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); +void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); +s32 ixgbe_aci_set_phy_cfg(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); +s32 ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link); +s32 ixgbe_update_link_info(struct ixgbe_hw *hw); +s32 ixgbe_get_link_status(struct ixgbe_hw *hw, bool *link_up); +s32 ixgbe_aci_get_link_info(struct ixgbe_hw *hw, bool ena_lse, + struct ixgbe_link_status *link); +s32 ixgbe_aci_set_event_mask(struct ixgbe_hw *hw, u8 port_num, u16 mask); +s32 ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask); + +s32 ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle); +s32 ixgbe_find_netlist_node(struct ixgbe_hw *hw, u8 node_type_ctx, + u8 node_part_number, u16 *node_handle); +s32 ixgbe_aci_read_i2c(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data); +s32 ixgbe_aci_write_i2c(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data); + +s32 ixgbe_aci_set_port_id_led(struct ixgbe_hw *hw, bool orig_mode); +s32 ixgbe_aci_set_gpio(struct ixgbe_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool value); +s32 ixgbe_aci_get_gpio(struct ixgbe_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value); +s32 ixgbe_aci_sff_eeprom(struct ixgbe_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 page_bank_ctrl, u8 *data, + u8 length, bool write); +s32 ixgbe_aci_prog_topo_dev_nvm(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_params *topo_params); +s32 ixgbe_aci_read_topo_dev_nvm(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_link_topo_params *topo_params, + u32 start_address, u8 *data, u8 data_size); + +s32 ixgbe_acquire_nvm(struct ixgbe_hw *hw, + enum ixgbe_aci_res_access_type access); +void ixgbe_release_nvm(struct ixgbe_hw *hw); + +s32 ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram); + +s32 ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid); +s32 ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid, + u32 offset, u16 length, void *data, + bool last_command, u8 command_flags); + +s32 ixgbe_aci_read_nvm_cfg(struct ixgbe_hw *hw, u8 cmd_flags, + u16 field_id, void *data, u16 buf_size, + u16 *elem_count); +s32 ixgbe_aci_write_nvm_cfg(struct ixgbe_hw *hw, u8 cmd_flags, + void *data, u16 buf_size, u16 elem_count); + +s32 ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw); +s32 ixgbe_nvm_recalculate_checksum(struct ixgbe_hw *hw); + +s32 ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags, + u8 *response_flags); + +s32 ixgbe_get_nvm_minsrevs(struct ixgbe_hw *hw, struct ixgbe_minsrev_info *minsrevs); +s32 ixgbe_update_nvm_minsrevs(struct ixgbe_hw *hw, struct ixgbe_minsrev_info *minsrevs); + +s32 ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm); +s32 ixgbe_get_active_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm); + +s32 ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw, struct ixgbe_netlist_info *netlist); +s32 ixgbe_init_nvm(struct ixgbe_hw *hw); + +s32 ixgbe_sanitize_operate(struct ixgbe_hw *hw); +s32 ixgbe_sanitize_nvm(struct ixgbe_hw *hw, u8 cmd_flags, u8 *values); + +s32 ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_sr_buf_aci(struct ixgbe_hw *hw, u16 offset, u16 *words, u16 *data); +s32 ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, + u8 *data, bool read_shadow_ram); + +s32 ixgbe_write_sr_word_aci(struct ixgbe_hw *hw, u32 offset, const u16 *data); +s32 ixgbe_write_sr_buf_aci(struct ixgbe_hw *hw, u32 offset, u16 words, const u16 *data); + +s32 ixgbe_aci_alternate_write(struct ixgbe_hw *hw, u32 reg_addr0, + u32 reg_val0, u32 reg_addr1, u32 reg_val1); +s32 ixgbe_aci_alternate_read(struct ixgbe_hw *hw, u32 reg_addr0, + u32 *reg_val0, u32 reg_addr1, u32 *reg_val1); +s32 ixgbe_aci_alternate_write_done(struct ixgbe_hw *hw, u8 bios_mode, + bool *reset_needed); +s32 ixgbe_aci_alternate_clear(struct ixgbe_hw *hw); + +s32 ixgbe_aci_get_internal_data(struct ixgbe_hw *hw, u16 cluster_id, + u16 table_id, u32 start, void *buf, + u16 buf_size, u16 *ret_buf_size, + u16 *ret_next_cluster, u16 *ret_next_table, + u32 *ret_next_index); + +s32 ixgbe_handle_nvm_access(struct ixgbe_hw *hw, + struct ixgbe_nvm_access_cmd *cmd, + struct ixgbe_nvm_access_data *data); + +s32 ixgbe_aci_set_health_status_config(struct ixgbe_hw *hw, u8 event_source); + +/* E610 operations */ +s32 ixgbe_init_ops_E610(struct ixgbe_hw *hw); +s32 ixgbe_reset_hw_E610(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_E610(struct ixgbe_hw *hw); +enum ixgbe_media_type ixgbe_get_media_type_E610(struct ixgbe_hw *hw); +u64 ixgbe_get_supported_physical_layer_E610(struct ixgbe_hw *hw); +s32 ixgbe_setup_link_E610(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait); +s32 ixgbe_check_link_E610(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); +s32 ixgbe_get_link_capabilities_E610(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); +s32 ixgbe_cfg_phy_fc(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg, + enum ixgbe_fc_mode req_mode); +s32 ixgbe_setup_fc_E610(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_E610(struct ixgbe_hw *hw); +s32 ixgbe_set_fw_drv_ver_E610(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, + u8 sub, u16 len, const char *driver_ver); +void ixgbe_disable_rx_E610(struct ixgbe_hw *hw); +s32 ixgbe_setup_eee_E610(struct ixgbe_hw *hw, bool enable_eee); +bool ixgbe_fw_recovery_mode_E610(struct ixgbe_hw *hw); +bool ixgbe_fw_rollback_mode_E610(struct ixgbe_hw *hw); +bool ixgbe_get_fw_tsam_mode_E610(struct ixgbe_hw *hw); +s32 ixgbe_init_phy_ops_E610(struct ixgbe_hw *hw); +s32 ixgbe_identify_phy_E610(struct ixgbe_hw *hw); +s32 ixgbe_identify_module_E610(struct ixgbe_hw *hw); +s32 ixgbe_setup_phy_link_E610(struct ixgbe_hw *hw); +s32 ixgbe_get_phy_firmware_version_E610(struct ixgbe_hw *hw, + u16 *firmware_version); +s32 ixgbe_read_i2c_sff8472_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 *sff8472_data); +s32 ixgbe_read_i2c_eeprom_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data); +s32 ixgbe_write_i2c_eeprom_E610(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data); +s32 ixgbe_check_overtemp_E610(struct ixgbe_hw *hw); +s32 ixgbe_set_phy_power_E610(struct ixgbe_hw *hw, bool on); +s32 ixgbe_enter_lplu_E610(struct ixgbe_hw *hw); +s32 ixgbe_init_eeprom_params_E610(struct ixgbe_hw *hw); +s32 ixgbe_read_ee_aci_E610(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_ee_aci_buffer_E610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_write_ee_aci_E610(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_ee_aci_buffer_E610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_calc_eeprom_checksum_E610(struct ixgbe_hw *hw); +s32 ixgbe_update_eeprom_checksum_E610(struct ixgbe_hw *hw); +s32 ixgbe_validate_eeprom_checksum_E610(struct ixgbe_hw *hw, u16 *checksum_val); +s32 ixgbe_read_pba_string_E610(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size); + +#endif /* _IXGBE_E610_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_osdep.c b/sys/dev/ixgbe/ixgbe_osdep.c index 892924712c38..9bd9ce63b786 100644 --- a/sys/dev/ixgbe/ixgbe_osdep.c +++ b/sys/dev/ixgbe/ixgbe_osdep.c @@ -114,3 +114,29 @@ ixgbe_link_speed_to_baudrate(ixgbe_link_speed speed) return baudrate; } + +void +ixgbe_init_lock(struct ixgbe_lock *lock) +{ + mtx_init(&lock->mutex, "mutex", + "ixgbe ACI lock", MTX_DEF | MTX_DUPOK); +} + +void +ixgbe_acquire_lock(struct ixgbe_lock *lock) +{ + mtx_lock(&lock->mutex); +} + +void +ixgbe_release_lock(struct ixgbe_lock *lock) +{ + mtx_unlock(&lock->mutex); +} + +void +ixgbe_destroy_lock(struct ixgbe_lock *lock) +{ + if (mtx_initialized(&lock->mutex)) + mtx_destroy(&lock->mutex); +} diff --git a/sys/dev/ixgbe/ixgbe_osdep.h b/sys/dev/ixgbe/ixgbe_osdep.h index cf7c578fd684..8cf1d13736ce 100644 --- a/sys/dev/ixgbe/ixgbe_osdep.h +++ b/sys/dev/ixgbe/ixgbe_osdep.h @@ -133,7 +133,9 @@ enum { /* XXX these need to be revisited */ #define IXGBE_CPU_TO_LE16 htole16 #define IXGBE_CPU_TO_LE32 htole32 +#define IXGBE_LE16_TO_CPU le16toh #define IXGBE_LE32_TO_CPU le32toh +#define IXGBE_LE64_TO_CPU le64toh #define IXGBE_LE32_TO_CPUS(x) *(x) = le32dec(x) #define IXGBE_CPU_TO_BE16 htobe16 #define IXGBE_CPU_TO_BE32 htobe32 @@ -146,6 +148,7 @@ typedef int16_t s16; typedef uint32_t u32; typedef int32_t s32; typedef uint64_t u64; +typedef int64_t s64; #ifndef __bool_true_false_are_defined typedef boolean_t bool; #endif @@ -195,6 +198,11 @@ struct ixgbe_osdep bus_space_handle_t mem_bus_space_handle; }; +struct ixgbe_lock +{ + struct mtx mutex; +}; + /* These routines need struct ixgbe_hw declared */ struct ixgbe_hw; device_t ixgbe_dev_from_hw(struct ixgbe_hw *hw); @@ -222,4 +230,27 @@ extern void ixgbe_write_reg_array(struct ixgbe_hw *, u32, u32, u32); #define IXGBE_WRITE_REG_ARRAY(a, reg, offset, val) \ ixgbe_write_reg_array(a, reg, offset, val) +void ixgbe_init_lock(struct ixgbe_lock *); +void ixgbe_destroy_lock(struct ixgbe_lock *); +void ixgbe_acquire_lock(struct ixgbe_lock *); +void ixgbe_release_lock(struct ixgbe_lock *); + +static inline void * +ixgbe_calloc(struct ixgbe_hw __unused *hw, size_t count, size_t size) +{ + return (malloc(count * size, M_DEVBUF, M_ZERO | M_NOWAIT)); +} + +static inline void * +ixgbe_malloc(struct ixgbe_hw __unused *hw, size_t size) +{ + return (malloc(size, M_DEVBUF, M_ZERO | M_NOWAIT)); +} + +static inline void +ixgbe_free(struct ixgbe_hw __unused *hw, void *addr) +{ + free(addr, M_DEVBUF); +} + #endif /* _IXGBE_OSDEP_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index 91b46da72c75..0bbe7806d41d 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -74,6 +74,7 @@ */ #include "ixgbe_osdep.h" +#include "ixgbe_type_e610.h" /* Override this by setting IOMEM in your ixgbe_osdep.h header */ #define IOMEM @@ -150,12 +151,19 @@ #define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD #define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE #define IXGBE_DEV_ID_X550EM_X_XFI 0x15B0 +#define IXGBE_DEV_ID_E610_BACKPLANE 0x57AE +#define IXGBE_DEV_ID_E610_SFP 0x57AF +#define IXGBE_DEV_ID_E610_10G_T 0x57B0 +#define IXGBE_DEV_ID_E610_2_5G_T 0x57B1 +#define IXGBE_DEV_ID_E610_SGMII 0x57B2 #define IXGBE_DEV_ID_X550_VF_HV 0x1564 #define IXGBE_DEV_ID_X550_VF 0x1565 #define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 #define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 #define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 #define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 +#define IXGBE_DEV_ID_E610_VF 0x57AD +#define IXGBE_SUBDEV_ID_E610_VF_HV 0x0001 #define IXGBE_CAT(r, m) IXGBE_##r##m @@ -1969,6 +1977,7 @@ enum { #define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */ #define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ #define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ +#define IXGBE_EICR_FW_EVENT 0x00200000 /* Async FW event */ #define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ #define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ #define IXGBE_EICR_TIMESYNC 0x01000000 /* Timesync Event */ @@ -2004,6 +2013,7 @@ enum { #define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EICS_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EICS_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ #define IXGBE_EICS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ @@ -2025,6 +2035,7 @@ enum { #define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMS_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermal Sensor Event */ #define IXGBE_EIMS_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ @@ -2047,6 +2058,7 @@ enum { #define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMC_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EIMC_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ #define IXGBE_EIMC_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ @@ -2454,6 +2466,7 @@ enum { #define IXGBE_82599_SERIAL_NUMBER_MAC_ADDR 0x11 #define IXGBE_X550_SERIAL_NUMBER_MAC_ADDR 0x04 +#define IXGBE_PCIE_MSIX_E610_CAPS 0xB2 #define IXGBE_PCIE_MSIX_82599_CAPS 0x72 #define IXGBE_MAX_MSIX_VECTORS_82599 0x40 #define IXGBE_PCIE_MSIX_82598_CAPS 0x62 @@ -2571,6 +2584,7 @@ enum { #define IXGBE_PCI_DEVICE_STATUS 0xAA #define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 #define IXGBE_PCI_LINK_STATUS 0xB2 +#define IXGBE_PCI_LINK_STATUS_E610 0x82 #define IXGBE_PCI_DEVICE_CONTROL2 0xC8 #define IXGBE_PCI_LINK_WIDTH 0x3F0 #define IXGBE_PCI_LINK_WIDTH_1 0x10 @@ -2581,6 +2595,7 @@ enum { #define IXGBE_PCI_LINK_SPEED_2500 0x1 #define IXGBE_PCI_LINK_SPEED_5000 0x2 #define IXGBE_PCI_LINK_SPEED_8000 0x3 +#define IXGBE_PCI_LINK_SPEED_16000 0x4 #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 #define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 @@ -3743,6 +3758,8 @@ enum ixgbe_mac_type { ixgbe_mac_X550_vf, ixgbe_mac_X550EM_x_vf, ixgbe_mac_X550EM_a_vf, + ixgbe_mac_E610, + ixgbe_mac_E610_vf, ixgbe_num_macs }; @@ -3822,7 +3839,9 @@ enum ixgbe_media_type { ixgbe_media_type_copper, ixgbe_media_type_backplane, ixgbe_media_type_cx4, - ixgbe_media_type_virtual + ixgbe_media_type_virtual, + ixgbe_media_type_da, + ixgbe_media_type_aui }; /* Flow Control Settings */ @@ -3831,6 +3850,7 @@ enum ixgbe_fc_mode { ixgbe_fc_rx_pause, ixgbe_fc_tx_pause, ixgbe_fc_full, + ixgbe_fc_auto, ixgbe_fc_default }; @@ -3863,6 +3883,7 @@ enum ixgbe_bus_speed { ixgbe_bus_speed_2500 = 2500, ixgbe_bus_speed_5000 = 5000, ixgbe_bus_speed_8000 = 8000, + ixgbe_bus_speed_16000 = 16000, ixgbe_bus_speed_reserved }; @@ -4007,6 +4028,7 @@ struct ixgbe_eeprom_operations { s32 (*validate_checksum)(struct ixgbe_hw *, u16 *); s32 (*update_checksum)(struct ixgbe_hw *); s32 (*calc_checksum)(struct ixgbe_hw *); + s32 (*read_pba_string)(struct ixgbe_hw *, u8 *, u32); }; struct ixgbe_mac_operations { @@ -4118,6 +4140,10 @@ struct ixgbe_mac_operations { void (*mdd_event)(struct ixgbe_hw *hw, u32 *vf_bitmap); void (*restore_mdd_vf)(struct ixgbe_hw *hw, u32 vf); bool (*fw_recovery_mode)(struct ixgbe_hw *hw); + bool (*fw_rollback_mode)(struct ixgbe_hw *hw); + bool (*get_fw_tsam_mode)(struct ixgbe_hw *hw); + s32 (*get_fw_version)(struct ixgbe_hw *hw); + s32 (*get_nvm_version)(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm); }; struct ixgbe_phy_operations { @@ -4162,6 +4188,9 @@ struct ixgbe_link_operations { struct ixgbe_link_info { struct ixgbe_link_operations ops; u8 addr; + struct ixgbe_link_status link_info; + struct ixgbe_link_status link_info_old; + u8 get_link_info; }; struct ixgbe_eeprom_info { @@ -4233,6 +4262,9 @@ struct ixgbe_phy_info { bool reset_if_overtemp; bool qsfp_shared_i2c_bus; u32 nw_mng_if_sel; + u64 phy_type_low; + u64 phy_type_high; + struct ixgbe_aci_cmd_set_phy_cfg_data curr_user_phy_cfg; }; #include "ixgbe_mbx.h" @@ -4261,6 +4293,22 @@ struct ixgbe_hw { bool wol_enabled; bool need_crosstalk_fix; u32 fw_rst_cnt; + u8 api_branch; + u8 api_maj_ver; + u8 api_min_ver; + u8 api_patch; + u8 fw_branch; + u8 fw_maj_ver; + u8 fw_min_ver; + u8 fw_patch; + u32 fw_build; + struct ixgbe_aci_info aci; + struct ixgbe_flash_info flash; + struct ixgbe_hw_dev_caps dev_caps; + struct ixgbe_hw_func_caps func_caps; + struct ixgbe_fwlog_cfg fwlog_cfg; + bool fwlog_support_ena; + struct ixgbe_fwlog_ring fwlog_ring; }; #define ixgbe_call_func(hw, func, params, error) \ @@ -4312,6 +4360,24 @@ struct ixgbe_hw { #define IXGBE_ERR_MBX_NOMSG -42 #define IXGBE_ERR_TIMEOUT -43 +#define IXGBE_ERR_NOT_SUPPORTED -45 +#define IXGBE_ERR_OUT_OF_RANGE -46 + +#define IXGBE_ERR_NVM -50 +#define IXGBE_ERR_NVM_CHECKSUM -51 +#define IXGBE_ERR_BUF_TOO_SHORT -52 +#define IXGBE_ERR_NVM_BLANK_MODE -53 +#define IXGBE_ERR_INVAL_SIZE -54 +#define IXGBE_ERR_DOES_NOT_EXIST -55 + +#define IXGBE_ERR_ACI_ERROR -100 +#define IXGBE_ERR_ACI_DISABLED -101 +#define IXGBE_ERR_ACI_TIMEOUT -102 +#define IXGBE_ERR_ACI_BUSY -103 +#define IXGBE_ERR_ACI_NO_WORK -104 +#define IXGBE_ERR_ACI_NO_EVENTS -105 +#define IXGBE_ERR_FW_API_VER -106 + #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF @@ -4540,5 +4606,6 @@ struct ixgbe_bypass_eeprom { #define IXGBE_REQUEST_TASK_FDIR 0x08 #define IXGBE_REQUEST_TASK_PHY 0x10 #define IXGBE_REQUEST_TASK_LSC 0x20 +#define IXGBE_REQUEST_TASK_FWEVENT 0x40 #endif /* _IXGBE_TYPE_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_type_e610.h b/sys/dev/ixgbe/ixgbe_type_e610.h new file mode 100644 index 000000000000..e300030c3ba4 --- /dev/null +++ b/sys/dev/ixgbe/ixgbe_type_e610.h @@ -0,0 +1,2278 @@ +/****************************************************************************** + SPDX-License-Identifier: BSD-3-Clause + + Copyright (c) 2025, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#ifndef _IXGBE_TYPE_E610_H_ +#define _IXGBE_TYPE_E610_H_ + + +/* Generic defines */ +#ifndef BIT +#define BIT(a) (1UL << (a)) +#endif /* !BIT */ +#ifndef BIT_ULL +#define BIT_ULL(a) (1ULL << (a)) +#endif /* !BIT_ULL */ +#ifndef BITS_PER_BYTE +#define BITS_PER_BYTE 8 +#endif /* !BITS_PER_BYTE */ +#ifndef DIVIDE_AND_ROUND_UP +#define DIVIDE_AND_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) +#endif /* !DIVIDE_AND_ROUND_UP */ + +#ifndef ROUND_UP +/** + * ROUND_UP - round up to next arbitrary multiple (not a power of 2) + * @a: value to round up + * @b: arbitrary multiple + * + * Round up to the next multiple of the arbitrary b. + */ +#define ROUND_UP(a, b) ((b) * DIVIDE_AND_ROUND_UP((a), (b))) +#endif /* !ROUND_UP */ + +#define MAKEMASK(mask, shift) (mask << shift) + +#define BYTES_PER_WORD 2 +#define BYTES_PER_DWORD 4 + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG 64 +#endif /* !BITS_PER_LONG */ +#ifndef BITS_PER_LONG_LONG +#define BITS_PER_LONG_LONG 64 +#endif /* !BITS_PER_LONG_LONG */ +#undef GENMASK +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) +#undef GENMASK_ULL +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + +/* Data type manipulation macros. */ +#define HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF)) +#define LO_DWORD(x) ((u32)((x) & 0xFFFFFFFF)) +#define HI_WORD(x) ((u16)(((x) >> 16) & 0xFFFF)) +#define LO_WORD(x) ((u16)((x) & 0xFFFF)) +#define HI_BYTE(x) ((u8)(((x) >> 8) & 0xFF)) +#define LO_BYTE(x) ((u8)((x) & 0xFF)) + +#ifndef MIN_T +#define MIN_T(_t, _a, _b) min((_t)(_a), (_t)(_b)) +#endif + +#define IS_ASCII(_ch) ((_ch) < 0x80) + +/** + * ixgbe_struct_size - size of struct with C99 flexible array member + * @ptr: pointer to structure + * @field: flexible array member (last member of the structure) + * @num: number of elements of that flexible array member + */ +#define ixgbe_struct_size(ptr, field, num) \ + (sizeof(*(ptr)) + sizeof(*(ptr)->field) * (num)) + +/* General E610 defines */ +#define IXGBE_MAX_VSI 768 + +/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */ +#define E610_SR_VPD_SIZE_WORDS 512 +#define E610_SR_PCIE_ALT_SIZE_WORDS 512 + +/* Checksum and Shadow RAM pointers */ +#define E610_SR_NVM_DEV_STARTER_VER 0x18 +#define E610_NVM_VER_LO_SHIFT 0 +#define E610_NVM_VER_LO_MASK (0xff << E610_NVM_VER_LO_SHIFT) +#define E610_NVM_VER_HI_SHIFT 12 +#define E610_NVM_VER_HI_MASK (0xf << E610_NVM_VER_HI_SHIFT) +#define E610_SR_NVM_MAP_VER 0x29 +#define E610_SR_NVM_EETRACK_LO 0x2D +#define E610_SR_NVM_EETRACK_HI 0x2E +#define E610_SR_VPD_PTR 0x2F +#define E610_SR_PCIE_ALT_AUTO_LOAD_PTR 0x3E +#define E610_SR_SW_CHECKSUM_WORD 0x3F +#define E610_SR_PFA_PTR 0x40 +#define E610_SR_1ST_NVM_BANK_PTR 0x42 +#define E610_SR_NVM_BANK_SIZE 0x43 +#define E610_SR_1ST_OROM_BANK_PTR 0x44 +#define E610_SR_OROM_BANK_SIZE 0x45 +#define E610_SR_NETLIST_BANK_PTR 0x46 +#define E610_SR_NETLIST_BANK_SIZE 0x47 +#define E610_SR_POINTER_TYPE_BIT BIT(15) +#define E610_SR_POINTER_MASK 0x7fff +#define E610_SR_HALF_4KB_SECTOR_UNITS 2048 +#define E610_GET_PFA_POINTER_IN_WORDS(offset) \ + ((offset & E610_SR_POINTER_TYPE_BIT) == E610_SR_POINTER_TYPE_BIT) ? \ + ((offset & E610_SR_POINTER_MASK) * E610_SR_HALF_4KB_SECTOR_UNITS) : \ + (offset & E610_SR_POINTER_MASK) + +/* Checksum and Shadow RAM pointers */ +#define E610_SR_NVM_CTRL_WORD 0x00 +#define E610_SR_PBA_BLOCK_PTR 0x16 + +/* The Orom version topology */ +#define IXGBE_OROM_VER_PATCH_SHIFT 0 +#define IXGBE_OROM_VER_PATCH_MASK (0xff << IXGBE_OROM_VER_PATCH_SHIFT) +#define IXGBE_OROM_VER_BUILD_SHIFT 8 +#define IXGBE_OROM_VER_BUILD_MASK (0xffff << IXGBE_OROM_VER_BUILD_SHIFT) +#define IXGBE_OROM_VER_SHIFT 24 +#define IXGBE_OROM_VER_MASK (0xff << IXGBE_OROM_VER_SHIFT) + +/* CSS Header words */ +#define IXGBE_NVM_CSS_HDR_LEN_L 0x02 +#define IXGBE_NVM_CSS_HDR_LEN_H 0x03 +#define IXGBE_NVM_CSS_SREV_L 0x14 +#define IXGBE_NVM_CSS_SREV_H 0x15 + +/* Length of Authentication header section in words */ +#define IXGBE_NVM_AUTH_HEADER_LEN 0x08 + +/* The Netlist ID Block is located after all of the Link Topology nodes. */ +#define IXGBE_NETLIST_ID_BLK_SIZE 0x30 +#define IXGBE_NETLIST_ID_BLK_OFFSET(n) IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n)) + +/* netlist ID block field offsets (word offsets) */ +#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW 0x02 +#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH 0x03 +#define IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW 0x04 +#define IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH 0x05 +#define IXGBE_NETLIST_ID_BLK_TYPE_LOW 0x06 +#define IXGBE_NETLIST_ID_BLK_TYPE_HIGH 0x07 +#define IXGBE_NETLIST_ID_BLK_REV_LOW 0x08 +#define IXGBE_NETLIST_ID_BLK_REV_HIGH 0x09 +#define IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(n) (0x0A + (n)) +#define IXGBE_NETLIST_ID_BLK_CUST_VER 0x2F + +/* The Link Topology Netlist section is stored as a series of words. It is + * stored in the NVM as a TLV, with the first two words containing the type + * and length. + */ +#define IXGBE_NETLIST_LINK_TOPO_MOD_ID 0x011B +#define IXGBE_NETLIST_TYPE_OFFSET 0x0000 +#define IXGBE_NETLIST_LEN_OFFSET 0x0001 + +/* The Link Topology section follows the TLV header. When reading the netlist + * using ixgbe_read_netlist_module, we need to account for the 2-word TLV + * header. + */ +#define IXGBE_NETLIST_LINK_TOPO_OFFSET(n) ((n) + 2) +#define IXGBE_LINK_TOPO_MODULE_LEN IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0000) +#define IXGBE_LINK_TOPO_NODE_COUNT IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0001) +#define IXGBE_LINK_TOPO_NODE_COUNT_M MAKEMASK(0x3FF, 0) + +/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */ +#define IXGBE_SR_CTRL_WORD_1_S 0x06 +#define IXGBE_SR_CTRL_WORD_1_M (0x03 << IXGBE_SR_CTRL_WORD_1_S) +#define IXGBE_SR_CTRL_WORD_VALID 0x1 +#define IXGBE_SR_CTRL_WORD_OROM_BANK BIT(3) +#define IXGBE_SR_CTRL_WORD_NETLIST_BANK BIT(4) +#define IXGBE_SR_CTRL_WORD_NVM_BANK BIT(5) +#define IXGBE_SR_NVM_PTR_4KB_UNITS BIT(15) + +/* These macros strip from NVM Image Revision the particular part of NVM ver: + major ver, minor ver and image id */ +#define E610_NVM_MAJOR_VER(x) ((x & 0xF000) >> 12) +#define E610_NVM_MINOR_VER(x) (x & 0x00FF) + +/* Shadow RAM related */ +#define IXGBE_SR_SECTOR_SIZE_IN_WORDS 0x800 +#define IXGBE_SR_WORDS_IN_1KB 512 +/* Checksum should be calculated such that after adding all the words, + * including the checksum word itself, the sum should be 0xBABA. + */ +#define IXGBE_SR_SW_CHECKSUM_BASE 0xBABA + +/* Netlist */ +#define IXGBE_MAX_NETLIST_SIZE 10 + +/* General registers */ + +/* Firmware Status Register (GL_FWSTS) */ +#define GL_FWSTS 0x00083048 /* Reset Source: POR */ +#define GL_FWSTS_FWS0B_S 0 +#define GL_FWSTS_FWS0B_M MAKEMASK(0xFF, 0) +#define GL_FWSTS_FWROWD_S 8 +#define GL_FWSTS_FWROWD_M BIT(8) +#define GL_FWSTS_FWRI_S 9 +#define GL_FWSTS_FWRI_M BIT(9) +#define GL_FWSTS_FWS1B_S 16 +#define GL_FWSTS_FWS1B_M MAKEMASK(0xFF, 16) +#define GL_FWSTS_EP_PF0 BIT(24) +#define GL_FWSTS_EP_PF1 BIT(25) + +/* Recovery mode values of Firmware Status 1 Byte (FWS1B) bitfield */ +#define GL_FWSTS_FWS1B_RECOVERY_MODE_CORER_LEGACY 0x0B +#define GL_FWSTS_FWS1B_RECOVERY_MODE_GLOBR_LEGACY 0x0C +#define GL_FWSTS_FWS1B_RECOVERY_MODE_CORER 0x30 +#define GL_FWSTS_FWS1B_RECOVERY_MODE_GLOBR 0x31 +#define GL_FWSTS_FWS1B_RECOVERY_MODE_TRANSITION 0x32 +#define GL_FWSTS_FWS1B_RECOVERY_MODE_NVM 0x33 + +/* Firmware Status (GL_MNG_FWSM) */ +#define GL_MNG_FWSM 0x000B6134 /* Reset Source: POR */ +#define GL_MNG_FWSM_FW_MODES_S 0 +#define GL_MNG_FWSM_FW_MODES_M MAKEMASK(0x7, 0) +#define GL_MNG_FWSM_RSV0_S 2 +#define GL_MNG_FWSM_RSV0_M MAKEMASK(0xFF, 2) +#define GL_MNG_FWSM_EEP_RELOAD_IND_S 10 +#define GL_MNG_FWSM_EEP_RELOAD_IND_M BIT(10) +#define GL_MNG_FWSM_RSV1_S 11 +#define GL_MNG_FWSM_RSV1_M MAKEMASK(0xF, 11) +#define GL_MNG_FWSM_RSV2_S 15 +#define GL_MNG_FWSM_RSV2_M BIT(15) +#define GL_MNG_FWSM_PCIR_AL_FAILURE_S 16 +#define GL_MNG_FWSM_PCIR_AL_FAILURE_M BIT(16) +#define GL_MNG_FWSM_POR_AL_FAILURE_S 17 +#define GL_MNG_FWSM_POR_AL_FAILURE_M BIT(17) +#define GL_MNG_FWSM_RSV3_S 18 +#define GL_MNG_FWSM_RSV3_M BIT(18) +#define GL_MNG_FWSM_EXT_ERR_IND_S 19 +#define GL_MNG_FWSM_EXT_ERR_IND_M MAKEMASK(0x3F, 19) +#define GL_MNG_FWSM_RSV4_S 25 +#define GL_MNG_FWSM_RSV4_M BIT(25) +#define GL_MNG_FWSM_RESERVED_11_S 26 +#define GL_MNG_FWSM_RESERVED_11_M MAKEMASK(0xF, 26) +#define GL_MNG_FWSM_RSV5_S 30 +#define GL_MNG_FWSM_RSV5_M MAKEMASK(0x3, 30) + +/* FW mode indications */ +#define GL_MNG_FWSM_FW_MODES_DEBUG_M BIT(0) +#define GL_MNG_FWSM_FW_MODES_RECOVERY_M BIT(1) +#define GL_MNG_FWSM_FW_MODES_ROLLBACK_M BIT(2) + +/* Global NVM General Status Register */ +#define GLNVM_GENS 0x000B6100 /* Reset Source: POR */ +#define GLNVM_GENS_NVM_PRES_S 0 +#define GLNVM_GENS_NVM_PRES_M BIT(0) +#define GLNVM_GENS_SR_SIZE_S 5 +#define GLNVM_GENS_SR_SIZE_M MAKEMASK(0x7, 5) +#define GLNVM_GENS_BANK1VAL_S 8 +#define GLNVM_GENS_BANK1VAL_M BIT(8) +#define GLNVM_GENS_ALT_PRST_S 23 +#define GLNVM_GENS_ALT_PRST_M BIT(23) +#define GLNVM_GENS_FL_AUTO_RD_S 25 +#define GLNVM_GENS_FL_AUTO_RD_M BIT(25) + +/* Flash Access Register */ +#define GLNVM_FLA 0x000B6108 /* Reset Source: POR */ +#define GLNVM_FLA_LOCKED_S 6 +#define GLNVM_FLA_LOCKED_M BIT(6) + +/* Bit Bang registers */ +#define RDASB_MSGCTL 0x000B6820 +#define RDASB_MSGCTL_HDR_DWS_S 0 +#define RDASB_MSGCTL_EXP_RDW_S 8 +#define RDASB_MSGCTL_CMDV_M BIT(31) +#define RDASB_RSPCTL 0x000B6824 +#define RDASB_RSPCTL_BAD_LENGTH_M BIT(30) +#define RDASB_RSPCTL_NOT_SUCCESS_M BIT(31) +#define RDASB_WHDR0 0x000B68F4 +#define RDASB_WHDR1 0x000B68F8 +#define RDASB_WHDR2 0x000B68FC +#define RDASB_WHDR3 0x000B6900 +#define RDASB_WHDR4 0x000B6904 +#define RDASB_RHDR0 0x000B6AFC +#define RDASB_RHDR0_RESPONSE_S 27 +#define RDASB_RHDR0_RESPONSE_M MAKEMASK(0x7, 27) +#define RDASB_RDATA0 0x000B6B00 +#define RDASB_RDATA1 0x000B6B04 + +/* SPI Registers */ +#define SPISB_MSGCTL 0x000B7020 +#define SPISB_MSGCTL_HDR_DWS_S 0 +#define SPISB_MSGCTL_EXP_RDW_S 8 +#define SPISB_MSGCTL_MSG_MODE_S 26 +#define SPISB_MSGCTL_TOKEN_MODE_S 28 +#define SPISB_MSGCTL_BARCLR_S 30 +#define SPISB_MSGCTL_CMDV_S 31 +#define SPISB_MSGCTL_CMDV_M BIT(31) +#define SPISB_RSPCTL 0x000B7024 +#define SPISB_RSPCTL_BAD_LENGTH_M BIT(30) +#define SPISB_RSPCTL_NOT_SUCCESS_M BIT(31) +#define SPISB_WHDR0 0x000B70F4 +#define SPISB_WHDR0_DEST_SEL_S 12 +#define SPISB_WHDR0_OPCODE_SEL_S 16 +#define SPISB_WHDR0_TAG_S 24 +#define SPISB_WHDR1 0x000B70F8 +#define SPISB_WHDR2 0x000B70FC +#define SPISB_RDATA 0x000B7300 +#define SPISB_WDATA 0x000B7100 + +/* Firmware Reset Count register */ +#define GL_FWRESETCNT 0x00083100 /* Reset Source: POR */ +#define GL_FWRESETCNT_FWRESETCNT_S 0 +#define GL_FWRESETCNT_FWRESETCNT_M MAKEMASK(0xFFFFFFFF, 0) + +/* Admin Command Interface (ACI) registers */ +#define PF_HIDA(_i) (0x00085000 + ((_i) * 4)) +#define PF_HIDA_2(_i) (0x00085020 + ((_i) * 4)) +#define PF_HIBA(_i) (0x00084000 + ((_i) * 4)) +#define PF_HICR 0x00082048 + +#define PF_HIDA_MAX_INDEX 15 +#define PF_HIBA_MAX_INDEX 1023 + +#define PF_HICR_EN BIT(0) +#define PF_HICR_C BIT(1) +#define PF_HICR_SV BIT(2) +#define PF_HICR_EV BIT(3) + +#define GL_HIDA(_i) (0x00082000 + ((_i) * 4)) +#define GL_HIDA_2(_i) (0x00082020 + ((_i) * 4)) +#define GL_HIBA(_i) (0x00081000 + ((_i) * 4)) +#define GL_HICR 0x00082040 + +#define GL_HIDA_MAX_INDEX 15 +#define GL_HIBA_MAX_INDEX 1023 + +#define GL_HICR_C BIT(1) +#define GL_HICR_SV BIT(2) +#define GL_HICR_EV BIT(3) + +#define GL_HICR_EN 0x00082044 + +#define GL_HICR_EN_CHECK BIT(0) + +/* Admin Command Interface (ACI) defines */ +/* Defines that help manage the driver vs FW API checks. + */ +#define IXGBE_FW_API_VER_BRANCH 0x00 +#define IXGBE_FW_API_VER_MAJOR 0x01 +#define IXGBE_FW_API_VER_MINOR 0x07 +#define IXGBE_FW_API_VER_DIFF_ALLOWED 0x02 + +#define IXGBE_ACI_DESC_SIZE 32 +#define IXGBE_ACI_DESC_SIZE_IN_DWORDS IXGBE_ACI_DESC_SIZE / BYTES_PER_DWORD + +#define IXGBE_ACI_MAX_BUFFER_SIZE 4096 /* Size in bytes */ +#define IXGBE_ACI_DESC_COOKIE_L_DWORD_OFFSET 3 +#define IXGBE_ACI_SEND_DELAY_TIME_MS 10 +#define IXGBE_ACI_SEND_MAX_EXECUTE 3 +/* [ms] timeout of waiting for sync response */ +#define IXGBE_ACI_SYNC_RESPONSE_TIMEOUT 100000 +/* [ms] timeout of waiting for async response */ +#define IXGBE_ACI_ASYNC_RESPONSE_TIMEOUT 150000 +/* [ms] timeout of waiting for resource release */ +#define IXGBE_ACI_RELEASE_RES_TIMEOUT 10000 + +/* Timestamp spacing for Tools ACI: queue is active if spacing is within the range [LO..HI] */ +#define IXGBE_TOOLS_ACI_ACTIVE_STAMP_SPACING_LO 0 +#define IXGBE_TOOLS_ACI_ACTIVE_STAMP_SPACING_HI 200 + +/* Timestamp spacing for Tools ACI: queue is expired if spacing is outside the range [LO..HI] */ +#define IXGBE_TOOLS_ACI_EXPIRED_STAMP_SPACING_LO -5 +#define IXGBE_TOOLS_ACI_EXPIRED_STAMP_SPACING_HI 205 + +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ +#define IXGBE_ACI_LG_BUF 512 + +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE | + */ + +/* command flags and offsets */ +#define IXGBE_ACI_FLAG_DD_S 0 +#define IXGBE_ACI_FLAG_CMP_S 1 +#define IXGBE_ACI_FLAG_ERR_S 2 +#define IXGBE_ACI_FLAG_VFE_S 3 +#define IXGBE_ACI_FLAG_LB_S 9 +#define IXGBE_ACI_FLAG_RD_S 10 +#define IXGBE_ACI_FLAG_VFC_S 11 +#define IXGBE_ACI_FLAG_BUF_S 12 +#define IXGBE_ACI_FLAG_SI_S 13 +#define IXGBE_ACI_FLAG_EI_S 14 +#define IXGBE_ACI_FLAG_FE_S 15 + +#define IXGBE_ACI_FLAG_DD BIT(IXGBE_ACI_FLAG_DD_S) /* 0x1 */ +#define IXGBE_ACI_FLAG_CMP BIT(IXGBE_ACI_FLAG_CMP_S) /* 0x2 */ +#define IXGBE_ACI_FLAG_ERR BIT(IXGBE_ACI_FLAG_ERR_S) /* 0x4 */ +#define IXGBE_ACI_FLAG_VFE BIT(IXGBE_ACI_FLAG_VFE_S) /* 0x8 */ +#define IXGBE_ACI_FLAG_LB BIT(IXGBE_ACI_FLAG_LB_S) /* 0x200 */ +#define IXGBE_ACI_FLAG_RD BIT(IXGBE_ACI_FLAG_RD_S) /* 0x400 */ +#define IXGBE_ACI_FLAG_VFC BIT(IXGBE_ACI_FLAG_VFC_S) /* 0x800 */ +#define IXGBE_ACI_FLAG_BUF BIT(IXGBE_ACI_FLAG_BUF_S) /* 0x1000 */ +#define IXGBE_ACI_FLAG_SI BIT(IXGBE_ACI_FLAG_SI_S) /* 0x2000 */ +#define IXGBE_ACI_FLAG_EI BIT(IXGBE_ACI_FLAG_EI_S) /* 0x4000 */ +#define IXGBE_ACI_FLAG_FE BIT(IXGBE_ACI_FLAG_FE_S) /* 0x8000 */ + +/* Admin Command Interface (ACI) error codes */ +enum ixgbe_aci_err { + IXGBE_ACI_RC_OK = 0, /* Success */ + IXGBE_ACI_RC_EPERM = 1, /* Operation not permitted */ + IXGBE_ACI_RC_ENOENT = 2, /* No such element */ + IXGBE_ACI_RC_ESRCH = 3, /* Bad opcode */ + IXGBE_ACI_RC_EINTR = 4, /* Operation interrupted */ + IXGBE_ACI_RC_EIO = 5, /* I/O error */ + IXGBE_ACI_RC_ENXIO = 6, /* No such resource */ + IXGBE_ACI_RC_E2BIG = 7, /* Arg too long */ + IXGBE_ACI_RC_EAGAIN = 8, /* Try again */ + IXGBE_ACI_RC_ENOMEM = 9, /* Out of memory */ + IXGBE_ACI_RC_EACCES = 10, /* Permission denied */ + IXGBE_ACI_RC_EFAULT = 11, /* Bad address */ + IXGBE_ACI_RC_EBUSY = 12, /* Device or resource busy */ + IXGBE_ACI_RC_EEXIST = 13, /* Object already exists */ + IXGBE_ACI_RC_EINVAL = 14, /* Invalid argument */ + IXGBE_ACI_RC_ENOTTY = 15, /* Not a typewriter */ + IXGBE_ACI_RC_ENOSPC = 16, /* No space left or allocation failure */ + IXGBE_ACI_RC_ENOSYS = 17, /* Function not implemented */ + IXGBE_ACI_RC_ERANGE = 18, /* Parameter out of range */ + IXGBE_ACI_RC_EFLUSHED = 19, /* Cmd flushed due to prev cmd error */ + IXGBE_ACI_RC_BAD_ADDR = 20, /* Descriptor contains a bad pointer */ + IXGBE_ACI_RC_EMODE = 21, /* Op not allowed in current dev mode */ + IXGBE_ACI_RC_EFBIG = 22, /* File too big */ + IXGBE_ACI_RC_ESBCOMP = 23, /* SB-IOSF completion unsuccessful */ + IXGBE_ACI_RC_ENOSEC = 24, /* Missing security manifest */ + IXGBE_ACI_RC_EBADSIG = 25, /* Bad RSA signature */ + IXGBE_ACI_RC_ESVN = 26, /* SVN number prohibits this package */ + IXGBE_ACI_RC_EBADMAN = 27, /* Manifest hash mismatch */ + IXGBE_ACI_RC_EBADBUF = 28, /* Buffer hash mismatches manifest */ + IXGBE_ACI_RC_EACCES_BMCU = 29, /* BMC Update in progress */ +}; + +/* Admin Command Interface (ACI) opcodes */ +enum ixgbe_aci_opc { + ixgbe_aci_opc_get_ver = 0x0001, + ixgbe_aci_opc_driver_ver = 0x0002, + ixgbe_aci_opc_get_exp_err = 0x0005, + + /* resource ownership */ + ixgbe_aci_opc_req_res = 0x0008, + ixgbe_aci_opc_release_res = 0x0009, + + /* device/function capabilities */ + ixgbe_aci_opc_list_func_caps = 0x000A, + ixgbe_aci_opc_list_dev_caps = 0x000B, + + /* safe disable of RXEN */ + ixgbe_aci_opc_disable_rxen = 0x000C, + + /* FW events */ + ixgbe_aci_opc_get_fw_event = 0x0014, + + /* PHY commands */ + ixgbe_aci_opc_get_phy_caps = 0x0600, + ixgbe_aci_opc_set_phy_cfg = 0x0601, + ixgbe_aci_opc_restart_an = 0x0605, + ixgbe_aci_opc_get_link_status = 0x0607, + ixgbe_aci_opc_set_event_mask = 0x0613, + ixgbe_aci_opc_get_link_topo = 0x06E0, + ixgbe_aci_opc_read_i2c = 0x06E2, + ixgbe_aci_opc_write_i2c = 0x06E3, + ixgbe_aci_opc_read_mdio = 0x06E4, + ixgbe_aci_opc_write_mdio = 0x06E5, + ixgbe_aci_opc_set_gpio_by_func = 0x06E6, + ixgbe_aci_opc_get_gpio_by_func = 0x06E7, + ixgbe_aci_opc_set_port_id_led = 0x06E9, + ixgbe_aci_opc_set_gpio = 0x06EC, + ixgbe_aci_opc_get_gpio = 0x06ED, + ixgbe_aci_opc_sff_eeprom = 0x06EE, + ixgbe_aci_opc_prog_topo_dev_nvm = 0x06F2, + ixgbe_aci_opc_read_topo_dev_nvm = 0x06F3, + + /* NVM commands */ + ixgbe_aci_opc_nvm_read = 0x0701, + ixgbe_aci_opc_nvm_erase = 0x0702, + ixgbe_aci_opc_nvm_write = 0x0703, + ixgbe_aci_opc_nvm_cfg_read = 0x0704, + ixgbe_aci_opc_nvm_cfg_write = 0x0705, + ixgbe_aci_opc_nvm_checksum = 0x0706, + ixgbe_aci_opc_nvm_write_activate = 0x0707, + ixgbe_aci_opc_nvm_sr_dump = 0x0707, + ixgbe_aci_opc_nvm_save_factory_settings = 0x0708, + ixgbe_aci_opc_nvm_update_empr = 0x0709, + ixgbe_aci_opc_nvm_pkg_data = 0x070A, + ixgbe_aci_opc_nvm_pass_component_tbl = 0x070B, + ixgbe_aci_opc_nvm_sanitization = 0x070C, + + /* Alternate Structure Commands */ + ixgbe_aci_opc_write_alt_direct = 0x0900, + ixgbe_aci_opc_write_alt_indirect = 0x0901, + ixgbe_aci_opc_read_alt_direct = 0x0902, + ixgbe_aci_opc_read_alt_indirect = 0x0903, + ixgbe_aci_opc_done_alt_write = 0x0904, + ixgbe_aci_opc_clear_port_alt_write = 0x0906, + + ixgbe_aci_opc_temp_tca_event = 0x0C94, + + /* debug commands */ + ixgbe_aci_opc_debug_dump_internals = 0xFF08, + + /* SystemDiagnostic commands */ + ixgbe_aci_opc_set_health_status_config = 0xFF20, + ixgbe_aci_opc_get_supported_health_status_codes = 0xFF21, + ixgbe_aci_opc_get_health_status = 0xFF22, + ixgbe_aci_opc_clear_health_status = 0xFF23, + + /* FW Logging Commands */ + ixgbe_aci_opc_fw_logs_config = 0xFF30, + ixgbe_aci_opc_fw_logs_register = 0xFF31, + ixgbe_aci_opc_fw_logs_query = 0xFF32, + ixgbe_aci_opc_fw_logs_event = 0xFF33, + ixgbe_aci_opc_fw_logs_get = 0xFF34, + ixgbe_aci_opc_fw_logs_clear = 0xFF35 +}; + +/* This macro is used to generate a compilation error if a structure + * is not exactly the correct length. It gives a divide by zero error if the + * structure is not of the correct size, otherwise it creates an enum that is + * never used. + */ +#define IXGBE_CHECK_STRUCT_LEN(n, X) enum ixgbe_static_assert_enum_##X \ + { ixgbe_static_assert_##X = (n) / ((sizeof(struct X) == (n)) ? 1 : 0) } + +/* This macro is used to generate a compilation error if a variable-length + * structure is not exactly the correct length assuming a single element of + * the variable-length object as the last element of the structure. It gives + * a divide by zero error if the structure is not of the correct size, + * otherwise it creates an enum that is never used. + */ +#define IXGBE_CHECK_VAR_LEN_STRUCT_LEN(n, X, T) enum ixgbe_static_assert_enum_##X \ + { ixgbe_static_assert_##X = (n) / \ + (((sizeof(struct X) + sizeof(T)) == (n)) ? 1 : 0) } + +/* This macro is used to ensure that parameter structures (i.e. structures + * in the params union member of struct ixgbe_aci_desc) are 16 bytes in length. + * + * NOT intended to be used to check the size of an indirect command/response + * additional data buffer (e.g. struct foo) which should just happen to be 16 + * bytes (instead, use IXGBE_CHECK_STRUCT_LEN(16, foo) for that). + */ +#define IXGBE_CHECK_PARAM_LEN(X) IXGBE_CHECK_STRUCT_LEN(16, X) + +struct ixgbe_aci_cmd_generic { + __le32 param0; + __le32 param1; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_generic); + +/* Get version (direct 0x0001) */ +struct ixgbe_aci_cmd_get_ver { + __le32 rom_ver; + __le32 fw_build; + u8 fw_branch; + u8 fw_major; + u8 fw_minor; + u8 fw_patch; + u8 api_branch; + u8 api_major; + u8 api_minor; + u8 api_patch; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_ver); + +#define IXGBE_DRV_VER_STR_LEN_E610 32 + +struct ixgbe_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 driver_string[IXGBE_DRV_VER_STR_LEN_E610]; +}; + +/* Send driver version (indirect 0x0002) */ +struct ixgbe_aci_cmd_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_driver_ver); + +/* Get Expanded Error Code (0x0005, direct) */ +struct ixgbe_aci_cmd_get_exp_err { + __le32 reason; +#define IXGBE_ACI_EXPANDED_ERROR_NOT_PROVIDED 0xFFFFFFFF + __le32 identifier; + u8 rsvd[8]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_exp_err); + +/* FW update timeout definitions are in milliseconds */ +#define IXGBE_NVM_TIMEOUT 180000 +#define IXGBE_CHANGE_LOCK_TIMEOUT 1000 +#define IXGBE_GLOBAL_CFG_LOCK_TIMEOUT 3000 + +enum ixgbe_aci_res_access_type { + IXGBE_RES_READ = 1, + IXGBE_RES_WRITE +}; + +enum ixgbe_aci_res_ids { + IXGBE_NVM_RES_ID = 1, + IXGBE_SPD_RES_ID, + IXGBE_CHANGE_LOCK_RES_ID, + IXGBE_GLOBAL_CFG_LOCK_RES_ID +}; + +/* Request resource ownership (direct 0x0008) + * Release resource ownership (direct 0x0009) + */ +struct ixgbe_aci_cmd_req_res { + __le16 res_id; +#define IXGBE_ACI_RES_ID_NVM 1 +#define IXGBE_ACI_RES_ID_SDP 2 +#define IXGBE_ACI_RES_ID_CHNG_LOCK 3 +#define IXGBE_ACI_RES_ID_GLBL_LOCK 4 + __le16 access_type; +#define IXGBE_ACI_RES_ACCESS_READ 1 +#define IXGBE_ACI_RES_ACCESS_WRITE 2 + + /* Upon successful completion, FW writes this value and driver is + * expected to release resource before timeout. This value is provided + * in milliseconds. + */ + __le32 timeout; +#define IXGBE_ACI_RES_NVM_READ_DFLT_TIMEOUT_MS 3000 +#define IXGBE_ACI_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000 +#define IXGBE_ACI_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000 +#define IXGBE_ACI_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000 + /* For SDP: pin ID of the SDP */ + __le32 res_number; + /* Status is only used for IXGBE_ACI_RES_ID_GLBL_LOCK */ + __le16 status; +#define IXGBE_ACI_RES_GLBL_SUCCESS 0 +#define IXGBE_ACI_RES_GLBL_IN_PROG 1 +#define IXGBE_ACI_RES_GLBL_DONE 2 + u8 reserved[2]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_req_res); + +/* Get function capabilities (indirect 0x000A) + * Get device capabilities (indirect 0x000B) + */ +struct ixgbe_aci_cmd_list_caps { + u8 cmd_flags; + u8 pf_index; + u8 reserved[2]; + __le32 count; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_list_caps); + +/* Device/Function buffer entry, repeated per reported capability */ +struct ixgbe_aci_cmd_list_caps_elem { + __le16 cap; +#define IXGBE_ACI_CAPS_VALID_FUNCTIONS 0x0005 +#define IXGBE_ACI_MAX_VALID_FUNCTIONS 0x8 +#define IXGBE_ACI_CAPS_SRIOV 0x0012 +#define IXGBE_ACI_CAPS_VF 0x0013 +#define IXGBE_ACI_CAPS_VMDQ 0x0014 +#define IXGBE_ACI_CAPS_VSI 0x0017 +#define IXGBE_ACI_CAPS_DCB 0x0018 +#define IXGBE_ACI_CAPS_RSS 0x0040 +#define IXGBE_ACI_CAPS_RXQS 0x0041 +#define IXGBE_ACI_CAPS_TXQS 0x0042 +#define IXGBE_ACI_CAPS_MSIX 0x0043 +#define IXGBE_ACI_CAPS_FD 0x0045 +#define IXGBE_ACI_CAPS_MAX_MTU 0x0047 +#define IXGBE_ACI_CAPS_NVM_VER 0x0048 +#define IXGBE_ACI_CAPS_INLINE_IPSEC 0x0070 +#define IXGBE_ACI_CAPS_NUM_ENABLED_PORTS 0x0072 +#define IXGBE_ACI_CAPS_PCIE_RESET_AVOIDANCE 0x0076 +#define IXGBE_ACI_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077 +#define IXGBE_ACI_CAPS_NVM_MGMT 0x0080 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0 0x0081 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG1 0x0082 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG2 0x0083 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG3 0x0084 +#define IXGBE_ACI_CAPS_OROM_RECOVERY_UPDATE 0x0090 +#define IXGBE_ACI_CAPS_NEXT_CLUSTER_ID 0x0096 + u8 major_ver; + u8 minor_ver; + /* Number of resources described by this capability */ + __le32 number; + /* Only meaningful for some types of resources */ + __le32 logical_id; + /* Only meaningful for some types of resources */ + __le32 phys_id; + __le64 rsvd1; + __le64 rsvd2; +}; + +IXGBE_CHECK_STRUCT_LEN(32, ixgbe_aci_cmd_list_caps_elem); + +/* Disable RXEN (direct 0x000C) */ +struct ixgbe_aci_cmd_disable_rxen { + u8 lport_num; + u8 reserved[15]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_disable_rxen); + +/* Get FW Event (indirect 0x0014) */ +struct ixgbe_aci_cmd_get_fw_event { + __le16 fw_buf_status; +#define IXGBE_ACI_GET_FW_EVENT_STATUS_OBTAINED BIT(0) +#define IXGBE_ACI_GET_FW_EVENT_STATUS_PENDING BIT(1) + u8 rsvd[14]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_fw_event); + +/* Get PHY capabilities (indirect 0x0600) */ +struct ixgbe_aci_cmd_get_phy_caps { + u8 lport_num; + u8 reserved; + __le16 param0; + /* 18.0 - Report qualified modules */ +#define IXGBE_ACI_GET_PHY_RQM BIT(0) + /* 18.1 - 18.3 : Report mode + * 000b - Report topology capabilities, without media + * 001b - Report topology capabilities, with media + * 010b - Report Active configuration + * 011b - Report PHY Type and FEC mode capabilities + * 100b - Report Default capabilities + */ +#define IXGBE_ACI_REPORT_MODE_S 1 +#define IXGBE_ACI_REPORT_MODE_M (7 << IXGBE_ACI_REPORT_MODE_S) +#define IXGBE_ACI_REPORT_TOPO_CAP_NO_MEDIA 0 +#define IXGBE_ACI_REPORT_TOPO_CAP_MEDIA BIT(1) +#define IXGBE_ACI_REPORT_ACTIVE_CFG BIT(2) +#define IXGBE_ACI_REPORT_DFLT_CFG BIT(3) + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_phy_caps); + +/* This is #define of PHY type (Extended): + * The first set of defines is for phy_type_low. + */ +#define IXGBE_PHY_TYPE_LOW_100BASE_TX BIT_ULL(0) +#define IXGBE_PHY_TYPE_LOW_100M_SGMII BIT_ULL(1) +#define IXGBE_PHY_TYPE_LOW_1000BASE_T BIT_ULL(2) +#define IXGBE_PHY_TYPE_LOW_1000BASE_SX BIT_ULL(3) +#define IXGBE_PHY_TYPE_LOW_1000BASE_LX BIT_ULL(4) +#define IXGBE_PHY_TYPE_LOW_1000BASE_KX BIT_ULL(5) +#define IXGBE_PHY_TYPE_LOW_1G_SGMII BIT_ULL(6) +#define IXGBE_PHY_TYPE_LOW_2500BASE_T BIT_ULL(7) +#define IXGBE_PHY_TYPE_LOW_2500BASE_X BIT_ULL(8) +#define IXGBE_PHY_TYPE_LOW_2500BASE_KX BIT_ULL(9) +#define IXGBE_PHY_TYPE_LOW_5GBASE_T BIT_ULL(10) +#define IXGBE_PHY_TYPE_LOW_5GBASE_KR BIT_ULL(11) +#define IXGBE_PHY_TYPE_LOW_10GBASE_T BIT_ULL(12) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_DA BIT_ULL(13) +#define IXGBE_PHY_TYPE_LOW_10GBASE_SR BIT_ULL(14) +#define IXGBE_PHY_TYPE_LOW_10GBASE_LR BIT_ULL(15) +#define IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1 BIT_ULL(16) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC BIT_ULL(17) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_C2C BIT_ULL(18) +#define IXGBE_PHY_TYPE_LOW_MAX_INDEX 18 +/* The second set of defines is for phy_type_high. */ +#define IXGBE_PHY_TYPE_HIGH_10BASE_T BIT_ULL(1) +#define IXGBE_PHY_TYPE_HIGH_10M_SGMII BIT_ULL(2) +#define IXGBE_PHY_TYPE_HIGH_2500M_SGMII BIT_ULL(56) +#define IXGBE_PHY_TYPE_HIGH_100M_USXGMII BIT_ULL(57) +#define IXGBE_PHY_TYPE_HIGH_1G_USXGMII BIT_ULL(58) +#define IXGBE_PHY_TYPE_HIGH_2500M_USXGMII BIT_ULL(59) +#define IXGBE_PHY_TYPE_HIGH_5G_USXGMII BIT_ULL(60) +#define IXGBE_PHY_TYPE_HIGH_10G_USXGMII BIT_ULL(61) +#define IXGBE_PHY_TYPE_HIGH_MAX_INDEX 61 + +struct ixgbe_aci_cmd_get_phy_caps_data { + __le64 phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + u8 caps; +#define IXGBE_ACI_PHY_EN_TX_LINK_PAUSE BIT(0) +#define IXGBE_ACI_PHY_EN_RX_LINK_PAUSE BIT(1) +#define IXGBE_ACI_PHY_LOW_POWER_MODE BIT(2) +#define IXGBE_ACI_PHY_EN_LINK BIT(3) +#define IXGBE_ACI_PHY_AN_MODE BIT(4) +#define IXGBE_ACI_PHY_EN_MOD_QUAL BIT(5) +#define IXGBE_ACI_PHY_EN_LESM BIT(6) +#define IXGBE_ACI_PHY_EN_AUTO_FEC BIT(7) +#define IXGBE_ACI_PHY_CAPS_MASK MAKEMASK(0xff, 0) + u8 low_power_ctrl_an; +#define IXGBE_ACI_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE28 BIT(1) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE73 BIT(2) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE37 BIT(3) + __le16 eee_cap; +#define IXGBE_ACI_PHY_EEE_EN_100BASE_TX BIT(0) +#define IXGBE_ACI_PHY_EEE_EN_1000BASE_T BIT(1) +#define IXGBE_ACI_PHY_EEE_EN_10GBASE_T BIT(2) +#define IXGBE_ACI_PHY_EEE_EN_1000BASE_KX BIT(3) +#define IXGBE_ACI_PHY_EEE_EN_10GBASE_KR BIT(4) +#define IXGBE_ACI_PHY_EEE_EN_25GBASE_KR BIT(5) +#define IXGBE_ACI_PHY_EEE_EN_10BASE_T BIT(11) + __le16 eeer_value; + u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */ + u8 phy_fw_ver[8]; + u8 link_fec_options; +#define IXGBE_ACI_PHY_FEC_10G_KR_40G_KR4_EN BIT(0) +#define IXGBE_ACI_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1) +#define IXGBE_ACI_PHY_FEC_25G_RS_528_REQ BIT(2) +#define IXGBE_ACI_PHY_FEC_25G_KR_REQ BIT(3) +#define IXGBE_ACI_PHY_FEC_25G_RS_544_REQ BIT(4) +#define IXGBE_ACI_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6) +#define IXGBE_ACI_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7) +#define IXGBE_ACI_PHY_FEC_MASK MAKEMASK(0xdf, 0) + u8 module_compliance_enforcement; +#define IXGBE_ACI_MOD_ENFORCE_STRICT_MODE BIT(0) + u8 extended_compliance_code; +#define IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE 3 + u8 module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE]; +#define IXGBE_ACI_MOD_TYPE_BYTE0_SFP_PLUS 0xA0 +#define IXGBE_ACI_MOD_TYPE_BYTE0_QSFP_PLUS 0x80 +#define IXGBE_ACI_MOD_TYPE_IDENT 1 +#define IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0) +#define IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LR BIT(5) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LRM BIT(6) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_ER BIT(7) +#define IXGBE_ACI_MOD_TYPE_BYTE2_SFP_PLUS 0xA0 +#define IXGBE_ACI_MOD_TYPE_BYTE2_QSFP_PLUS 0x86 + u8 qualified_module_count; + u8 rsvd2[7]; /* Bytes 47:41 reserved */ +#define IXGBE_ACI_QUAL_MOD_COUNT_MAX 16 + struct { + u8 v_oui[3]; + u8 rsvd3; + u8 v_part[16]; + __le32 v_rev; + __le64 rsvd4; + } qual_modules[IXGBE_ACI_QUAL_MOD_COUNT_MAX]; +}; + +IXGBE_CHECK_STRUCT_LEN(560, ixgbe_aci_cmd_get_phy_caps_data); + +/* Set PHY capabilities (direct 0x0601) + * NOTE: This command must be followed by setup link and restart auto-neg + */ +struct ixgbe_aci_cmd_set_phy_cfg { + u8 reserved[8]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_phy_cfg); + +/* Set PHY config command data structure */ +struct ixgbe_aci_cmd_set_phy_cfg_data { + __le64 phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + u8 caps; +#define IXGBE_ACI_PHY_ENA_VALID_MASK MAKEMASK(0xef, 0) +#define IXGBE_ACI_PHY_ENA_TX_PAUSE_ABILITY BIT(0) +#define IXGBE_ACI_PHY_ENA_RX_PAUSE_ABILITY BIT(1) +#define IXGBE_ACI_PHY_ENA_LOW_POWER BIT(2) +#define IXGBE_ACI_PHY_ENA_LINK BIT(3) +#define IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT BIT(5) +#define IXGBE_ACI_PHY_ENA_LESM BIT(6) +#define IXGBE_ACI_PHY_ENA_AUTO_FEC BIT(7) + u8 low_power_ctrl_an; + __le16 eee_cap; /* Value from ixgbe_aci_get_phy_caps */ + __le16 eeer_value; /* Use defines from ixgbe_aci_get_phy_caps */ + u8 link_fec_opt; /* Use defines from ixgbe_aci_get_phy_caps */ + u8 module_compliance_enforcement; +}; + +IXGBE_CHECK_STRUCT_LEN(24, ixgbe_aci_cmd_set_phy_cfg_data); + +/* Restart AN command data structure (direct 0x0605) + * Also used for response, with only the lport_num field present. + */ +struct ixgbe_aci_cmd_restart_an { + u8 reserved[2]; + u8 cmd_flags; +#define IXGBE_ACI_RESTART_AN_LINK_RESTART BIT(1) +#define IXGBE_ACI_RESTART_AN_LINK_ENABLE BIT(2) + u8 reserved2[13]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_restart_an); + +#pragma pack(1) +/* Get link status (indirect 0x0607), also used for Link Status Event */ +struct ixgbe_aci_cmd_get_link_status { + u8 reserved[2]; + u8 cmd_flags; +#define IXGBE_ACI_LSE_M 0x3 +#define IXGBE_ACI_LSE_NOP 0x0 +#define IXGBE_ACI_LSE_DIS 0x2 +#define IXGBE_ACI_LSE_ENA 0x3 + /* only response uses this flag */ +#define IXGBE_ACI_LSE_IS_ENABLED 0x1 + u8 reserved2[5]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_link_status); + +/* Get link status response data structure, also used for Link Status Event */ +struct ixgbe_aci_cmd_get_link_status_data { + u8 topo_media_conflict; +#define IXGBE_ACI_LINK_TOPO_CONFLICT BIT(0) +#define IXGBE_ACI_LINK_MEDIA_CONFLICT BIT(1) +#define IXGBE_ACI_LINK_TOPO_CORRUPT BIT(2) +#define IXGBE_ACI_LINK_TOPO_UNREACH_PRT BIT(4) +#define IXGBE_ACI_LINK_TOPO_UNDRUTIL_PRT BIT(5) +#define IXGBE_ACI_LINK_TOPO_UNDRUTIL_MEDIA BIT(6) +#define IXGBE_ACI_LINK_TOPO_UNSUPP_MEDIA BIT(7) + u8 link_cfg_err; +#define IXGBE_ACI_LINK_CFG_ERR BIT(0) +#define IXGBE_ACI_LINK_CFG_COMPLETED BIT(1) +#define IXGBE_ACI_LINK_ACT_PORT_OPT_INVAL BIT(2) +#define IXGBE_ACI_LINK_FEAT_ID_OR_CONFIG_ID_INVAL BIT(3) +#define IXGBE_ACI_LINK_TOPO_CRITICAL_SDP_ERR BIT(4) +#define IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED BIT(5) +#define IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE BIT(6) +#define IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT BIT(7) + u8 link_info; +#define IXGBE_ACI_LINK_UP BIT(0) /* Link Status */ +#define IXGBE_ACI_LINK_FAULT BIT(1) +#define IXGBE_ACI_LINK_FAULT_TX BIT(2) +#define IXGBE_ACI_LINK_FAULT_RX BIT(3) +#define IXGBE_ACI_LINK_FAULT_REMOTE BIT(4) +#define IXGBE_ACI_LINK_UP_PORT BIT(5) /* External Port Link Status */ +#define IXGBE_ACI_MEDIA_AVAILABLE BIT(6) +#define IXGBE_ACI_SIGNAL_DETECT BIT(7) + u8 an_info; +#define IXGBE_ACI_AN_COMPLETED BIT(0) +#define IXGBE_ACI_LP_AN_ABILITY BIT(1) +#define IXGBE_ACI_PD_FAULT BIT(2) /* Parallel Detection Fault */ +#define IXGBE_ACI_FEC_EN BIT(3) +#define IXGBE_ACI_PHY_LOW_POWER BIT(4) /* Low Power State */ +#define IXGBE_ACI_LINK_PAUSE_TX BIT(5) +#define IXGBE_ACI_LINK_PAUSE_RX BIT(6) +#define IXGBE_ACI_QUALIFIED_MODULE BIT(7) + u8 ext_info; +#define IXGBE_ACI_LINK_PHY_TEMP_ALARM BIT(0) +#define IXGBE_ACI_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */ + /* Port Tx Suspended */ +#define IXGBE_ACI_LINK_TX_S 2 +#define IXGBE_ACI_LINK_TX_M (0x03 << IXGBE_ACI_LINK_TX_S) +#define IXGBE_ACI_LINK_TX_ACTIVE 0 +#define IXGBE_ACI_LINK_TX_DRAINED 1 +#define IXGBE_ACI_LINK_TX_FLUSHED 3 + u8 lb_status; +#define IXGBE_ACI_LINK_LB_PHY_LCL BIT(0) +#define IXGBE_ACI_LINK_LB_PHY_RMT BIT(1) +#define IXGBE_ACI_LINK_LB_MAC_LCL BIT(2) +#define IXGBE_ACI_LINK_LB_PHY_IDX_S 3 +#define IXGBE_ACI_LINK_LB_PHY_IDX_M (0x7 << IXGBE_ACI_LB_PHY_IDX_S) + __le16 max_frame_size; + u8 cfg; +#define IXGBE_ACI_LINK_25G_KR_FEC_EN BIT(0) +#define IXGBE_ACI_LINK_25G_RS_528_FEC_EN BIT(1) +#define IXGBE_ACI_LINK_25G_RS_544_FEC_EN BIT(2) +#define IXGBE_ACI_FEC_MASK MAKEMASK(0x7, 0) + /* Pacing Config */ +#define IXGBE_ACI_CFG_PACING_S 3 +#define IXGBE_ACI_CFG_PACING_M (0xF << IXGBE_ACI_CFG_PACING_S) +#define IXGBE_ACI_CFG_PACING_TYPE_M BIT(7) +#define IXGBE_ACI_CFG_PACING_TYPE_AVG 0 +#define IXGBE_ACI_CFG_PACING_TYPE_FIXED IXGBE_ACI_CFG_PACING_TYPE_M + /* External Device Power Ability */ + u8 power_desc; +#define IXGBE_ACI_PWR_CLASS_M 0x3F +#define IXGBE_ACI_LINK_PWR_BASET_LOW_HIGH 0 +#define IXGBE_ACI_LINK_PWR_BASET_HIGH 1 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_1 0 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_2 1 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_3 2 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_4 3 + __le16 link_speed; +#define IXGBE_ACI_LINK_SPEED_M 0x7FF +#define IXGBE_ACI_LINK_SPEED_10MB BIT(0) +#define IXGBE_ACI_LINK_SPEED_100MB BIT(1) +#define IXGBE_ACI_LINK_SPEED_1000MB BIT(2) +#define IXGBE_ACI_LINK_SPEED_2500MB BIT(3) +#define IXGBE_ACI_LINK_SPEED_5GB BIT(4) +#define IXGBE_ACI_LINK_SPEED_10GB BIT(5) +#define IXGBE_ACI_LINK_SPEED_20GB BIT(6) +#define IXGBE_ACI_LINK_SPEED_25GB BIT(7) +#define IXGBE_ACI_LINK_SPEED_40GB BIT(8) +#define IXGBE_ACI_LINK_SPEED_50GB BIT(9) +#define IXGBE_ACI_LINK_SPEED_100GB BIT(10) +#define IXGBE_ACI_LINK_SPEED_200GB BIT(11) +#define IXGBE_ACI_LINK_SPEED_UNKNOWN BIT(15) + __le16 reserved3; /* Aligns next field to 8-byte boundary */ + u8 ext_fec_status; +#define IXGBE_ACI_LINK_RS_272_FEC_EN BIT(0) /* RS 272 FEC enabled */ + u8 reserved4; + __le64 phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + /* Get link status version 2 link partner data */ + __le64 lp_phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 lp_phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + u8 lp_fec_adv; +#define IXGBE_ACI_LINK_LP_10G_KR_FEC_CAP BIT(0) +#define IXGBE_ACI_LINK_LP_25G_KR_FEC_CAP BIT(1) +#define IXGBE_ACI_LINK_LP_RS_528_FEC_CAP BIT(2) +#define IXGBE_ACI_LINK_LP_50G_KR_272_FEC_CAP BIT(3) +#define IXGBE_ACI_LINK_LP_100G_KR_272_FEC_CAP BIT(4) +#define IXGBE_ACI_LINK_LP_200G_KR_272_FEC_CAP BIT(5) + u8 lp_fec_req; +#define IXGBE_ACI_LINK_LP_10G_KR_FEC_REQ BIT(0) +#define IXGBE_ACI_LINK_LP_25G_KR_FEC_REQ BIT(1) +#define IXGBE_ACI_LINK_LP_RS_528_FEC_REQ BIT(2) +#define IXGBE_ACI_LINK_LP_KR_272_FEC_REQ BIT(3) + u8 lp_flowcontrol; +#define IXGBE_ACI_LINK_LP_PAUSE_ADV BIT(0) +#define IXGBE_ACI_LINK_LP_ASM_DIR_ADV BIT(1) + u8 reserved5[5]; +}; +#pragma pack() + +IXGBE_CHECK_STRUCT_LEN(56, ixgbe_aci_cmd_get_link_status_data); + +/* Set event mask command (direct 0x0613) */ +struct ixgbe_aci_cmd_set_event_mask { + u8 reserved[8]; + __le16 event_mask; +#define IXGBE_ACI_LINK_EVENT_UPDOWN BIT(1) +#define IXGBE_ACI_LINK_EVENT_MEDIA_NA BIT(2) +#define IXGBE_ACI_LINK_EVENT_LINK_FAULT BIT(3) +#define IXGBE_ACI_LINK_EVENT_PHY_TEMP_ALARM BIT(4) +#define IXGBE_ACI_LINK_EVENT_EXCESSIVE_ERRORS BIT(5) +#define IXGBE_ACI_LINK_EVENT_SIGNAL_DETECT BIT(6) +#define IXGBE_ACI_LINK_EVENT_AN_COMPLETED BIT(7) +#define IXGBE_ACI_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) +#define IXGBE_ACI_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) +#define IXGBE_ACI_LINK_EVENT_TOPO_CONFLICT BIT(10) +#define IXGBE_ACI_LINK_EVENT_MEDIA_CONFLICT BIT(11) +#define IXGBE_ACI_LINK_EVENT_PHY_FW_LOAD_FAIL BIT(12) + u8 reserved1[6]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_event_mask); + +struct ixgbe_aci_cmd_link_topo_params { + u8 lport_num; + u8 lport_num_valid; +#define IXGBE_ACI_LINK_TOPO_PORT_NUM_VALID BIT(0) + u8 node_type_ctx; +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_S 0 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_M (0xF << IXGBE_ACI_LINK_TOPO_NODE_TYPE_S) +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_PHY 0 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_GPIO_CTRL 1 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_MUX_CTRL 2 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_LED_CTRL 3 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_LED 4 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_THERMAL 5 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE 6 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_MEZZ 7 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_ID_EEPROM 8 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_GPS 11 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_S 4 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_M \ + (0xF << IXGBE_ACI_LINK_TOPO_NODE_CTX_S) +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_GLOBAL 0 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_BOARD 1 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT 2 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE 3 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE_HANDLE 4 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_DIRECT_BUS_ACCESS 5 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE_HANDLE_BUS_ADDRESS 6 + u8 index; +}; + +IXGBE_CHECK_STRUCT_LEN(4, ixgbe_aci_cmd_link_topo_params); + +struct ixgbe_aci_cmd_link_topo_addr { + struct ixgbe_aci_cmd_link_topo_params topo_params; + __le16 handle; +#define IXGBE_ACI_LINK_TOPO_HANDLE_S 0 +#define IXGBE_ACI_LINK_TOPO_HANDLE_M (0x3FF << IXGBE_ACI_LINK_TOPO_HANDLE_S) +/* Used to decode the handle field */ +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_M BIT(9) +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_LOM BIT(9) +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ 0 +#define IXGBE_ACI_LINK_TOPO_HANDLE_NODE_S 0 +/* In case of a Mezzanine type */ +#define IXGBE_ACI_LINK_TOPO_HANDLE_MEZZ_NODE_M \ + (0x3F << IXGBE_ACI_LINK_TOPO_HANDLE_NODE_S) +#define IXGBE_ACI_LINK_TOPO_HANDLE_MEZZ_S 6 +#define IXGBE_ACI_LINK_TOPO_HANDLE_MEZZ_M \ + (0x7 << IXGBE_ACI_LINK_TOPO_HANDLE_MEZZ_S) +/* In case of a LOM type */ +#define IXGBE_ACI_LINK_TOPO_HANDLE_LOM_NODE_M \ + (0x1FF << IXGBE_ACI_LINK_TOPO_HANDLE_NODE_S) +}; + +IXGBE_CHECK_STRUCT_LEN(6, ixgbe_aci_cmd_link_topo_addr); + +/* Get Link Topology Handle (direct, 0x06E0) */ +struct ixgbe_aci_cmd_get_link_topo { + struct ixgbe_aci_cmd_link_topo_addr addr; + u8 node_part_num; +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_GEN_GPS 0x48 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_E610_PTC 0x49 + u8 rsvd[9]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_link_topo); + +/* Read/Write I2C (direct, 0x06E2/0x06E3) */ +struct ixgbe_aci_cmd_i2c { + struct ixgbe_aci_cmd_link_topo_addr topo_addr; + __le16 i2c_addr; + u8 i2c_params; +#define IXGBE_ACI_I2C_DATA_SIZE_S 0 +#define IXGBE_ACI_I2C_DATA_SIZE_M (0xF << IXGBE_ACI_I2C_DATA_SIZE_S) +#define IXGBE_ACI_I2C_ADDR_TYPE_M BIT(4) +#define IXGBE_ACI_I2C_ADDR_TYPE_7BIT 0 +#define IXGBE_ACI_I2C_ADDR_TYPE_10BIT IXGBE_ACI_I2C_ADDR_TYPE_M +#define IXGBE_ACI_I2C_DATA_OFFSET_S 5 +#define IXGBE_ACI_I2C_DATA_OFFSET_M (0x3 << IXGBE_ACI_I2C_DATA_OFFSET_S) +#define IXGBE_ACI_I2C_USE_REPEATED_START BIT(7) + u8 rsvd; + __le16 i2c_bus_addr; +#define IXGBE_ACI_I2C_ADDR_7BIT_MASK 0x7F +#define IXGBE_ACI_I2C_ADDR_10BIT_MASK 0x3FF + u8 i2c_data[4]; /* Used only by write command, reserved in read. */ +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_i2c); + +/* Read I2C Response (direct, 0x06E2) */ +struct ixgbe_aci_cmd_read_i2c_resp { + u8 i2c_data[16]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_read_i2c_resp); + +/* Read/Write MDIO (direct, 0x06E4/0x06E5) */ +struct ixgbe_aci_cmd_mdio { + struct ixgbe_aci_cmd_link_topo_addr topo_addr; + u8 mdio_device_addr; +#define IXGBE_ACI_MDIO_DEV_S 0 +#define IXGBE_ACI_MDIO_DEV_M (0x1F << IXGBE_ACI_MDIO_DEV_S) +#define IXGBE_ACI_MDIO_CLAUSE_22 BIT(5) +#define IXGBE_ACI_MDIO_CLAUSE_45 BIT(6) + u8 mdio_bus_address; +#define IXGBE_ACI_MDIO_BUS_ADDR_S 0 +#define IXGBE_ACI_MDIO_BUS_ADDR_M (0x1F << IXGBE_ACI_MDIO_BUS_ADDR_S) + __le16 offset; + __le16 data; /* Input in write cmd, output in read cmd. */ + u8 rsvd1[4]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_mdio); + +/* Set/Get GPIO By Function (direct, 0x06E6/0x06E7) */ +struct ixgbe_aci_cmd_gpio_by_func { + struct ixgbe_aci_cmd_link_topo_addr topo_addr; + u8 io_func_num; +#define IXGBE_ACI_GPIO_FUNC_S 0 +#define IXGBE_ACI_GPIO_FUNC_M (0x1F << IXGBE_ACI_GPIO_IO_FUNC_NUM_S) + u8 io_value; /* Input in write cmd, output in read cmd. */ +#define IXGBE_ACI_GPIO_ON BIT(0) +#define IXGBE_ACI_GPIO_OFF 0 + u8 rsvd[8]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_gpio_by_func); + +/* Set Port Identification LED (direct, 0x06E9) */ +struct ixgbe_aci_cmd_set_port_id_led { + u8 lport_num; + u8 lport_num_valid; +#define IXGBE_ACI_PORT_ID_PORT_NUM_VALID BIT(0) + u8 ident_mode; +#define IXGBE_ACI_PORT_IDENT_LED_BLINK BIT(0) +#define IXGBE_ACI_PORT_IDENT_LED_ORIG 0 + u8 rsvd[13]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_port_id_led); + +/* Set/Get GPIO (direct, 0x06EC/0x06ED) */ +struct ixgbe_aci_cmd_gpio { + __le16 gpio_ctrl_handle; +#define IXGBE_ACI_GPIO_HANDLE_S 0 +#define IXGBE_ACI_GPIO_HANDLE_M (0x3FF << IXGBE_ACI_GPIO_HANDLE_S) + u8 gpio_num; + u8 gpio_val; + u8 rsvd[12]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_gpio); + +/* Read/Write SFF EEPROM command (indirect 0x06EE) */ +struct ixgbe_aci_cmd_sff_eeprom { + u8 lport_num; + u8 lport_num_valid; +#define IXGBE_ACI_SFF_PORT_NUM_VALID BIT(0) + __le16 i2c_bus_addr; +#define IXGBE_ACI_SFF_I2CBUS_7BIT_M 0x7F +#define IXGBE_ACI_SFF_I2CBUS_10BIT_M 0x3FF +#define IXGBE_ACI_SFF_I2CBUS_TYPE_M BIT(10) +#define IXGBE_ACI_SFF_I2CBUS_TYPE_7BIT 0 +#define IXGBE_ACI_SFF_I2CBUS_TYPE_10BIT IXGBE_ACI_SFF_I2CBUS_TYPE_M +#define IXGBE_ACI_SFF_PAGE_BANK_CTRL_S 11 +#define IXGBE_ACI_SFF_PAGE_BANK_CTRL_M (0x3 << IXGBE_ACI_SFF_PAGE_BANK_CTRL_S) +#define IXGBE_ACI_SFF_NO_PAGE_BANK_UPDATE 0 +#define IXGBE_ACI_SFF_UPDATE_PAGE 1 +#define IXGBE_ACI_SFF_UPDATE_BANK 2 +#define IXGBE_ACI_SFF_UPDATE_PAGE_BANK 3 +#define IXGBE_ACI_SFF_IS_WRITE BIT(15) + __le16 i2c_offset; + u8 module_bank; + u8 module_page; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_sff_eeprom); + +/* Program Topology Device NVM (direct, 0x06F2) */ +struct ixgbe_aci_cmd_prog_topo_dev_nvm { + struct ixgbe_aci_cmd_link_topo_params topo_params; + u8 rsvd[12]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_prog_topo_dev_nvm); + +/* Read Topology Device NVM (direct, 0x06F3) */ +struct ixgbe_aci_cmd_read_topo_dev_nvm { + struct ixgbe_aci_cmd_link_topo_params topo_params; + __le32 start_address; +#define IXGBE_ACI_READ_TOPO_DEV_NVM_DATA_READ_SIZE 8 + u8 data_read[IXGBE_ACI_READ_TOPO_DEV_NVM_DATA_READ_SIZE]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_read_topo_dev_nvm); + +/* NVM Read command (indirect 0x0701) + * NVM Erase commands (direct 0x0702) + * NVM Write commands (indirect 0x0703) + * NVM Write Activate commands (direct 0x0707) + * NVM Shadow RAM Dump commands (direct 0x0707) + */ +struct ixgbe_aci_cmd_nvm { +#define IXGBE_ACI_NVM_MAX_OFFSET 0xFFFFFF + __le16 offset_low; + u8 offset_high; /* For Write Activate offset_high is used as flags2 */ + u8 cmd_flags; +#define IXGBE_ACI_NVM_LAST_CMD BIT(0) +#define IXGBE_ACI_NVM_PCIR_REQ BIT(0) /* Used by NVM Write reply */ +#define IXGBE_ACI_NVM_PRESERVATION_S 1 /* Used by NVM Write Activate only */ +#define IXGBE_ACI_NVM_PRESERVATION_M (3 << IXGBE_ACI_NVM_PRESERVATION_S) +#define IXGBE_ACI_NVM_NO_PRESERVATION (0 << IXGBE_ACI_NVM_PRESERVATION_S) +#define IXGBE_ACI_NVM_PRESERVE_ALL BIT(1) +#define IXGBE_ACI_NVM_FACTORY_DEFAULT (2 << IXGBE_ACI_NVM_PRESERVATION_S) +#define IXGBE_ACI_NVM_PRESERVE_SELECTED (3 << IXGBE_ACI_NVM_PRESERVATION_S) +#define IXGBE_ACI_NVM_ACTIV_SEL_NVM BIT(3) /* Write Activate/SR Dump only */ +#define IXGBE_ACI_NVM_ACTIV_SEL_OROM BIT(4) +#define IXGBE_ACI_NVM_ACTIV_SEL_NETLIST BIT(5) +#define IXGBE_ACI_NVM_SPECIAL_UPDATE BIT(6) +#define IXGBE_ACI_NVM_REVERT_LAST_ACTIV BIT(6) /* Write Activate only */ +#define IXGBE_ACI_NVM_ACTIV_SEL_MASK MAKEMASK(0x7, 3) +#define IXGBE_ACI_NVM_FLASH_ONLY BIT(7) +#define IXGBE_ACI_NVM_RESET_LVL_M MAKEMASK(0x3, 0) /* Write reply only */ +#define IXGBE_ACI_NVM_POR_FLAG 0 +#define IXGBE_ACI_NVM_PERST_FLAG 1 +#define IXGBE_ACI_NVM_EMPR_FLAG 2 +#define IXGBE_ACI_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */ + /* For Write Activate, several flags are sent as part of a separate + * flags2 field using a separate byte. For simplicity of the software + * interface, we pass the flags as a 16 bit value so these flags are + * all offset by 8 bits + */ +#define IXGBE_ACI_NVM_ACTIV_REQ_EMPR BIT(8) /* NVM Write Activate only */ + __le16 module_typeid; + __le16 length; +#define IXGBE_ACI_NVM_ERASE_LEN 0xFFFF + __le32 addr_high; + __le32 addr_low; +}; + +/* NVM Module_Type ID, needed offset and read_len for struct ixgbe_aci_cmd_nvm. */ +#define IXGBE_ACI_NVM_SECTOR_UNIT 4096 /* In Bytes */ +#define IXGBE_ACI_NVM_WORD_UNIT 2 /* In Bytes */ + +#define IXGBE_ACI_NVM_START_POINT 0 +#define IXGBE_ACI_NVM_EMP_SR_PTR_OFFSET 0x90 +#define IXGBE_ACI_NVM_EMP_SR_PTR_RD_LEN 2 /* In Bytes */ +#define IXGBE_ACI_NVM_EMP_SR_PTR_M MAKEMASK(0x7FFF, 0) +#define IXGBE_ACI_NVM_EMP_SR_PTR_TYPE_S 15 +#define IXGBE_ACI_NVM_EMP_SR_PTR_TYPE_M BIT(15) +#define IXGBE_ACI_NVM_EMP_SR_PTR_TYPE_SECTOR 1 + +#define IXGBE_ACI_NVM_LLDP_CFG_PTR_OFFSET 0x46 +#define IXGBE_ACI_NVM_LLDP_CFG_HEADER_LEN 2 /* In Bytes */ +#define IXGBE_ACI_NVM_LLDP_CFG_PTR_RD_LEN 2 /* In Bytes */ + +#define IXGBE_ACI_NVM_LLDP_PRESERVED_MOD_ID 0x129 +#define IXGBE_ACI_NVM_CUR_LLDP_PERSIST_RD_OFFSET 2 /* In Bytes */ +#define IXGBE_ACI_NVM_LLDP_STATUS_M MAKEMASK(0xF, 0) +#define IXGBE_ACI_NVM_LLDP_STATUS_M_LEN 4 /* In Bits */ +#define IXGBE_ACI_NVM_LLDP_STATUS_RD_LEN 4 /* In Bytes */ + +#define IXGBE_ACI_NVM_MINSREV_MOD_ID 0x130 + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_nvm); + +/* Used for reading and writing MinSRev using 0x0701 and 0x0703. Note that the + * type field is excluded from the section when reading and writing from + * a module using the module_typeid field with these AQ commands. + */ +struct ixgbe_aci_cmd_nvm_minsrev { + __le16 length; + __le16 validity; +#define IXGBE_ACI_NVM_MINSREV_NVM_VALID BIT(0) +#define IXGBE_ACI_NVM_MINSREV_OROM_VALID BIT(1) + __le16 nvm_minsrev_l; + __le16 nvm_minsrev_h; + __le16 orom_minsrev_l; + __le16 orom_minsrev_h; +}; + +IXGBE_CHECK_STRUCT_LEN(12, ixgbe_aci_cmd_nvm_minsrev); + +/* Used for 0x0704 as well as for 0x0705 commands */ +struct ixgbe_aci_cmd_nvm_cfg { + u8 cmd_flags; +#define IXGBE_ACI_ANVM_MULTIPLE_ELEMS BIT(0) +#define IXGBE_ACI_ANVM_IMMEDIATE_FIELD BIT(1) +#define IXGBE_ACI_ANVM_NEW_CFG BIT(2) + u8 reserved; + __le16 count; + __le16 id; + u8 reserved1[2]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_nvm_cfg); + +struct ixgbe_aci_cmd_nvm_cfg_data { + __le16 field_id; + __le16 field_options; + __le16 field_value; +}; + +IXGBE_CHECK_STRUCT_LEN(6, ixgbe_aci_cmd_nvm_cfg_data); + +/* NVM Checksum Command (direct, 0x0706) */ +struct ixgbe_aci_cmd_nvm_checksum { + u8 flags; +#define IXGBE_ACI_NVM_CHECKSUM_VERIFY BIT(0) +#define IXGBE_ACI_NVM_CHECKSUM_RECALC BIT(1) + u8 rsvd; + __le16 checksum; /* Used only by response */ +#define IXGBE_ACI_NVM_CHECKSUM_CORRECT 0xBABA + u8 rsvd2[12]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_nvm_checksum); + +/* Used for NVM Sanitization command - 0x070C */ +struct ixgbe_aci_cmd_nvm_sanitization { + u8 cmd_flags; +#define IXGBE_ACI_SANITIZE_REQ_READ 0 +#define IXGBE_ACI_SANITIZE_REQ_OPERATE BIT(0) + +#define IXGBE_ACI_SANITIZE_READ_SUBJECT_NVM_BITS 0 +#define IXGBE_ACI_SANITIZE_READ_SUBJECT_NVM_STATE BIT(1) +#define IXGBE_ACI_SANITIZE_OPERATE_SUBJECT_CLEAR 0 + u8 values; +#define IXGBE_ACI_SANITIZE_NVM_BITS_HOST_CLEAN_SUPPORT BIT(0) +#define IXGBE_ACI_SANITIZE_NVM_BITS_BMC_CLEAN_SUPPORT BIT(2) +#define IXGBE_ACI_SANITIZE_NVM_STATE_HOST_CLEAN_DONE BIT(0) +#define IXGBE_ACI_SANITIZE_NVM_STATE_HOST_CLEAN_SUCCESS BIT(1) +#define IXGBE_ACI_SANITIZE_NVM_STATE_BMC_CLEAN_DONE BIT(2) +#define IXGBE_ACI_SANITIZE_NVM_STATE_BMC_CLEAN_SUCCESS BIT(3) +#define IXGBE_ACI_SANITIZE_OPERATE_HOST_CLEAN_DONE BIT(0) +#define IXGBE_ACI_SANITIZE_OPERATE_HOST_CLEAN_SUCCESS BIT(1) +#define IXGBE_ACI_SANITIZE_OPERATE_BMC_CLEAN_DONE BIT(2) +#define IXGBE_ACI_SANITIZE_OPERATE_BMC_CLEAN_SUCCESS BIT(3) + u8 reserved[14]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_nvm_sanitization); + +/* Write/Read Alternate - Direct (direct 0x0900/0x0902) */ +struct ixgbe_aci_cmd_read_write_alt_direct { + __le32 dword0_addr; + __le32 dword0_value; + __le32 dword1_addr; + __le32 dword1_value; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_read_write_alt_direct); + +/* Write/Read Alternate - Indirect (indirect 0x0901/0x0903) */ +struct ixgbe_aci_cmd_read_write_alt_indirect { + __le32 base_dword_addr; + __le32 num_dwords; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_read_write_alt_indirect); + +/* Done Alternate Write (direct 0x0904) */ +struct ixgbe_aci_cmd_done_alt_write { + u8 flags; +#define IXGBE_ACI_CMD_UEFI_BIOS_MODE BIT(0) +#define IXGBE_ACI_RESP_RESET_NEEDED BIT(1) + u8 reserved[15]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_done_alt_write); + +/* Clear Port Alternate Write (direct 0x0906) */ +struct ixgbe_aci_cmd_clear_port_alt_write { + u8 reserved[16]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_clear_port_alt_write); + +/* Get CGU abilities command response data structure (indirect 0x0C61) */ +struct ixgbe_aci_cmd_get_cgu_abilities { + u8 num_inputs; + u8 num_outputs; + u8 pps_dpll_idx; + u8 synce_dpll_idx; + __le32 max_in_freq; + __le32 max_in_phase_adj; + __le32 max_out_freq; + __le32 max_out_phase_adj; + u8 cgu_part_num; + u8 rsvd[3]; +}; + +IXGBE_CHECK_STRUCT_LEN(24, ixgbe_aci_cmd_get_cgu_abilities); + +#define IXGBE_ACI_NODE_HANDLE_VALID BIT(10) +#define IXGBE_ACI_NODE_HANDLE MAKEMASK(0x3FF, 0) +#define IXGBE_ACI_DRIVING_CLK_NUM_SHIFT 10 +#define IXGBE_ACI_DRIVING_CLK_NUM MAKEMASK(0x3F, IXGBE_ACI_DRIVING_CLK_NUM_SHIFT) + +/* Set CGU input config (direct 0x0C62) */ +struct ixgbe_aci_cmd_set_cgu_input_config { + u8 input_idx; + u8 flags1; +#define IXGBE_ACI_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ BIT(6) +#define IXGBE_ACI_SET_CGU_IN_CFG_FLG1_UPDATE_DELAY BIT(7) + u8 flags2; +#define IXGBE_ACI_SET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5) +#define IXGBE_ACI_SET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6) + u8 rsvd; + __le32 freq; + __le32 phase_delay; + u8 rsvd2[2]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_cgu_input_config); + +/* Get CGU input config response descriptor structure (direct 0x0C63) */ +struct ixgbe_aci_cmd_get_cgu_input_config { + u8 input_idx; + u8 status; +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_LOS BIT(0) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_SCM_FAIL BIT(1) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_CFM_FAIL BIT(2) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_GST_FAIL BIT(3) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_PFM_FAIL BIT(4) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_ESYNC_FAIL BIT(6) +#define IXGBE_ACI_GET_CGU_IN_CFG_STATUS_ESYNC_CAP BIT(7) + u8 type; +#define IXGBE_ACI_GET_CGU_IN_CFG_TYPE_READ_ONLY BIT(0) +#define IXGBE_ACI_GET_CGU_IN_CFG_TYPE_GPS BIT(4) +#define IXGBE_ACI_GET_CGU_IN_CFG_TYPE_EXTERNAL BIT(5) +#define IXGBE_ACI_GET_CGU_IN_CFG_TYPE_PHY BIT(6) + u8 flags1; +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG1_PHASE_DELAY_SUPP BIT(0) +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG1_1PPS_SUPP BIT(2) +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG1_10MHZ_SUPP BIT(3) +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG1_ANYFREQ BIT(7) + __le32 freq; + __le32 phase_delay; + u8 flags2; +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5) +#define IXGBE_ACI_GET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6) + u8 rsvd[1]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_cgu_input_config); + +/* Set CGU output config (direct 0x0C64) */ +struct ixgbe_aci_cmd_set_cgu_output_config { + u8 output_idx; + u8 flags; +#define IXGBE_ACI_SET_CGU_OUT_CFG_OUT_EN BIT(0) +#define IXGBE_ACI_SET_CGU_OUT_CFG_ESYNC_EN BIT(1) +#define IXGBE_ACI_SET_CGU_OUT_CFG_UPDATE_FREQ BIT(2) +#define IXGBE_ACI_SET_CGU_OUT_CFG_UPDATE_PHASE BIT(3) +#define IXGBE_ACI_SET_CGU_OUT_CFG_UPDATE_SRC_SEL BIT(4) + u8 src_sel; +#define IXGBE_ACI_SET_CGU_OUT_CFG_DPLL_SRC_SEL MAKEMASK(0x1F, 0) + u8 rsvd; + __le32 freq; + __le32 phase_delay; + u8 rsvd2[2]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_cgu_output_config); + +/* Get CGU output config (direct 0x0C65) */ +struct ixgbe_aci_cmd_get_cgu_output_config { + u8 output_idx; + u8 flags; +#define IXGBE_ACI_GET_CGU_OUT_CFG_OUT_EN BIT(0) +#define IXGBE_ACI_GET_CGU_OUT_CFG_ESYNC_EN BIT(1) +#define IXGBE_ACI_GET_CGU_OUT_CFG_ESYNC_ABILITY BIT(2) + u8 src_sel; +#define IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT 0 +#define IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_SRC_SEL \ + MAKEMASK(0x1F, IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT) +#define IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT 5 +#define IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_MODE \ + MAKEMASK(0x7, IXGBE_ACI_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT) + u8 rsvd; + __le32 freq; + __le32 src_freq; + u8 rsvd2[2]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_cgu_output_config); + +/* Get CGU DPLL status (direct 0x0C66) */ +struct ixgbe_aci_cmd_get_cgu_dpll_status { + u8 dpll_num; + u8 ref_state; +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_LOS BIT(0) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_SCM BIT(1) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_CFM BIT(2) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_GST BIT(3) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_PFM BIT(4) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_FAST_LOCK_EN BIT(5) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_REF_SW_ESYNC BIT(6) + __le16 dpll_state; +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_LOCK BIT(0) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_HO BIT(1) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_HO_READY BIT(2) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_FLHIT BIT(5) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_PSLHIT BIT(7) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_CLK_REF_SHIFT 8 +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_CLK_REF_SEL \ + MAKEMASK(0x1F, IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_CLK_REF_SHIFT) +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_MODE_SHIFT 13 +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_MODE \ + MAKEMASK(0x7, IXGBE_ACI_GET_CGU_DPLL_STATUS_STATE_MODE_SHIFT) + __le32 phase_offset_h; + __le32 phase_offset_l; + u8 eec_mode; +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_EEC_MODE_1 0xA +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_EEC_MODE_2 0xB +#define IXGBE_ACI_GET_CGU_DPLL_STATUS_EEC_MODE_UNKNOWN 0xF + u8 rsvd[1]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_cgu_dpll_status); + +/* Set CGU DPLL config (direct 0x0C67) */ +struct ixgbe_aci_cmd_set_cgu_dpll_config { + u8 dpll_num; + u8 ref_state; +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_LOS BIT(0) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_SCM BIT(1) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_CFM BIT(2) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_GST BIT(3) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_PFM BIT(4) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_FLOCK_EN BIT(5) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_REF_SW_ESYNC BIT(6) + u8 rsvd; + u8 config; +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_CLK_REF_SEL MAKEMASK(0x1F, 0) +#define IXGBE_ACI_SET_CGU_DPLL_CONFIG_MODE MAKEMASK(0x7, 5) + u8 rsvd2[8]; + u8 eec_mode; + u8 rsvd3[1]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_cgu_dpll_config); + +/* Set CGU reference priority (direct 0x0C68) */ +struct ixgbe_aci_cmd_set_cgu_ref_prio { + u8 dpll_num; + u8 ref_idx; + u8 ref_priority; + u8 rsvd[11]; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_cgu_ref_prio); + +/* Get CGU reference priority (direct 0x0C69) */ +struct ixgbe_aci_cmd_get_cgu_ref_prio { + u8 dpll_num; + u8 ref_idx; + u8 ref_priority; /* Valid only in response */ + u8 rsvd[13]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_cgu_ref_prio); + +/* Get CGU info (direct 0x0C6A) */ +struct ixgbe_aci_cmd_get_cgu_info { + __le32 cgu_id; + __le32 cgu_cfg_ver; + __le32 cgu_fw_ver; + u8 node_part_num; + u8 dev_rev; + __le16 node_handle; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_cgu_info); + +struct ixgbe_aci_cmd_temp_tca_event { + u8 event_desc; +#define IXGBE_TEMP_TCA_EVENT_DESC_SUBJ_SHIFT 0 +#define IXGBE_TEMP_TCA_EVENT_DESC_SUBJ_NVM 0 +#define IXGBE_TEMP_TCA_EVENT_DESC_SUBJ_EVENT_STATE 1 +#define IXGBE_TEMP_TCA_EVENT_DESC_SUBJ_ALL 2 + +#define IXGBE_TEMP_TCA_EVENT_DESC_ALARM_SHIFT 2 +#define IXGBE_TEMP_TCA_EVENT_DESC_WARNING_CLEARED 0 +#define IXGBE_TEMP_TCA_EVENT_DESC_ALARM_CLEARED 1 +#define IXGBE_TEMP_TCA_EVENT_DESC_WARNING_RAISED 2 +#define IXGBE_TEMP_TCA_EVENT_DESC_ALARM_RAISED 3 + + u8 reserved; + __le16 temperature; + __le16 thermal_sensor_max_value; + __le16 thermal_sensor_min_value; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_temp_tca_event); + +/* Debug Dump Internal Data (indirect 0xFF08) */ +struct ixgbe_aci_cmd_debug_dump_internals { + __le16 cluster_id; /* Expresses next cluster ID in response */ +#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_LINK 0 +#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_FULL_CSR_SPACE 1 + __le16 table_id; /* Used only for non-memory clusters */ + __le32 idx; /* In table entries for tables, in bytes for memory */ + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_debug_dump_internals); + +/* Set Health Status (direct 0xFF20) */ +struct ixgbe_aci_cmd_set_health_status_config { + u8 event_source; +#define IXGBE_ACI_HEALTH_STATUS_SET_PF_SPECIFIC_MASK BIT(0) +#define IXGBE_ACI_HEALTH_STATUS_SET_ALL_PF_MASK BIT(1) +#define IXGBE_ACI_HEALTH_STATUS_SET_GLOBAL_MASK BIT(2) + u8 reserved[15]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_set_health_status_config); + +#define IXGBE_ACI_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT 0x101 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_TYPE 0x102 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_QUAL 0x103 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_COMM 0x104 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_CONFLICT 0x105 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_NOT_PRESENT 0x106 +#define IXGBE_ACI_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED 0x107 +#define IXGBE_ACI_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT 0x108 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MOD_DIAGNOSTIC_FEATURE 0x109 +#define IXGBE_ACI_HEALTH_STATUS_ERR_INVALID_LINK_CFG 0x10B +#define IXGBE_ACI_HEALTH_STATUS_ERR_PORT_ACCESS 0x10C +#define IXGBE_ACI_HEALTH_STATUS_ERR_PORT_UNREACHABLE 0x10D +#define IXGBE_ACI_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED 0x10F +#define IXGBE_ACI_HEALTH_STATUS_ERR_PARALLEL_FAULT 0x110 +#define IXGBE_ACI_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED 0x111 +#define IXGBE_ACI_HEALTH_STATUS_ERR_NETLIST_TOPO 0x112 +#define IXGBE_ACI_HEALTH_STATUS_ERR_NETLIST 0x113 +#define IXGBE_ACI_HEALTH_STATUS_ERR_TOPO_CONFLICT 0x114 +#define IXGBE_ACI_HEALTH_STATUS_ERR_LINK_HW_ACCESS 0x115 +#define IXGBE_ACI_HEALTH_STATUS_ERR_LINK_RUNTIME 0x116 +#define IXGBE_ACI_HEALTH_STATUS_ERR_DNL_INIT 0x117 +#define IXGBE_ACI_HEALTH_STATUS_ERR_PHY_NVM_PROG 0x120 +#define IXGBE_ACI_HEALTH_STATUS_ERR_PHY_FW_LOAD 0x121 +#define IXGBE_ACI_HEALTH_STATUS_INFO_RECOVERY 0x500 +#define IXGBE_ACI_HEALTH_STATUS_ERR_FLASH_ACCESS 0x501 +#define IXGBE_ACI_HEALTH_STATUS_ERR_NVM_AUTH 0x502 +#define IXGBE_ACI_HEALTH_STATUS_ERR_OROM_AUTH 0x503 +#define IXGBE_ACI_HEALTH_STATUS_ERR_DDP_AUTH 0x504 +#define IXGBE_ACI_HEALTH_STATUS_ERR_NVM_COMPAT 0x505 +#define IXGBE_ACI_HEALTH_STATUS_ERR_OROM_COMPAT 0x506 +#define IXGBE_ACI_HEALTH_STATUS_ERR_NVM_SEC_VIOLATION 0x507 +#define IXGBE_ACI_HEALTH_STATUS_ERR_OROM_SEC_VIOLATION 0x508 +#define IXGBE_ACI_HEALTH_STATUS_ERR_DCB_MIB 0x509 +#define IXGBE_ACI_HEALTH_STATUS_ERR_MNG_TIMEOUT 0x50A +#define IXGBE_ACI_HEALTH_STATUS_ERR_BMC_RESET 0x50B +#define IXGBE_ACI_HEALTH_STATUS_ERR_LAST_MNG_FAIL 0x50C +#define IXGBE_ACI_HEALTH_STATUS_ERR_RESOURCE_ALLOC_FAIL 0x50D +#define IXGBE_ACI_HEALTH_STATUS_ERR_FW_LOOP 0x1000 +#define IXGBE_ACI_HEALTH_STATUS_ERR_FW_PFR_FAIL 0x1001 +#define IXGBE_ACI_HEALTH_STATUS_ERR_LAST_FAIL_AQ 0x1002 + +/* Get Health Status codes (indirect 0xFF21) */ +struct ixgbe_aci_cmd_get_supported_health_status_codes { + __le16 health_code_count; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_supported_health_status_codes); + +/* Get Health Status (indirect 0xFF22) */ +struct ixgbe_aci_cmd_get_health_status { + __le16 health_status_count; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_get_health_status); + +/* Get Health Status event buffer entry, (0xFF22) + * repeated per reported health status + */ +struct ixgbe_aci_cmd_health_status_elem { + __le16 health_status_code; + __le16 event_source; +#define IXGBE_ACI_HEALTH_STATUS_PF (0x1) +#define IXGBE_ACI_HEALTH_STATUS_PORT (0x2) +#define IXGBE_ACI_HEALTH_STATUS_GLOBAL (0x3) + __le32 internal_data1; +#define IXGBE_ACI_HEALTH_STATUS_UNDEFINED_DATA (0xDEADBEEF) + __le32 internal_data2; +}; + +IXGBE_CHECK_STRUCT_LEN(12, ixgbe_aci_cmd_health_status_elem); + +/* Clear Health Status (direct 0xFF23) */ +struct ixgbe_aci_cmd_clear_health_status { + __le32 reserved[4]; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_clear_health_status); + +enum ixgbe_aci_fw_logging_mod { + IXGBE_ACI_FW_LOG_ID_GENERAL = 0, + IXGBE_ACI_FW_LOG_ID_CTRL = 1, + IXGBE_ACI_FW_LOG_ID_LINK = 2, + IXGBE_ACI_FW_LOG_ID_LINK_TOPO = 3, + IXGBE_ACI_FW_LOG_ID_DNL = 4, + IXGBE_ACI_FW_LOG_ID_I2C = 5, + IXGBE_ACI_FW_LOG_ID_SDP = 6, + IXGBE_ACI_FW_LOG_ID_MDIO = 7, + IXGBE_ACI_FW_LOG_ID_ADMINQ = 8, + IXGBE_ACI_FW_LOG_ID_HDMA = 9, + IXGBE_ACI_FW_LOG_ID_LLDP = 10, + IXGBE_ACI_FW_LOG_ID_DCBX = 11, + IXGBE_ACI_FW_LOG_ID_DCB = 12, + IXGBE_ACI_FW_LOG_ID_XLR = 13, + IXGBE_ACI_FW_LOG_ID_NVM = 14, + IXGBE_ACI_FW_LOG_ID_AUTH = 15, + IXGBE_ACI_FW_LOG_ID_VPD = 16, + IXGBE_ACI_FW_LOG_ID_IOSF = 17, + IXGBE_ACI_FW_LOG_ID_PARSER = 18, + IXGBE_ACI_FW_LOG_ID_SW = 19, + IXGBE_ACI_FW_LOG_ID_SCHEDULER = 20, + IXGBE_ACI_FW_LOG_ID_TXQ = 21, + IXGBE_ACI_FW_LOG_ID_ACL = 22, + IXGBE_ACI_FW_LOG_ID_POST = 23, + IXGBE_ACI_FW_LOG_ID_WATCHDOG = 24, + IXGBE_ACI_FW_LOG_ID_TASK_DISPATCH = 25, + IXGBE_ACI_FW_LOG_ID_MNG = 26, + IXGBE_ACI_FW_LOG_ID_SYNCE = 27, + IXGBE_ACI_FW_LOG_ID_HEALTH = 28, + IXGBE_ACI_FW_LOG_ID_TSDRV = 29, + IXGBE_ACI_FW_LOG_ID_PFREG = 30, + IXGBE_ACI_FW_LOG_ID_MDLVER = 31, + IXGBE_ACI_FW_LOG_ID_MAX = 32, +}; + +/* Only a single log level should be set and all log levels under the set value + * are enabled, e.g. if log level is set to IXGBE_FWLOG_LEVEL_VERBOSE, then all + * other log levels are included (except IXGBE_FWLOG_LEVEL_NONE) + */ +enum ixgbe_fwlog_level { + IXGBE_FWLOG_LEVEL_NONE = 0, + IXGBE_FWLOG_LEVEL_ERROR = 1, + IXGBE_FWLOG_LEVEL_WARNING = 2, + IXGBE_FWLOG_LEVEL_NORMAL = 3, + IXGBE_FWLOG_LEVEL_VERBOSE = 4, + IXGBE_FWLOG_LEVEL_INVALID, /* all values >= this entry are invalid */ +}; + +struct ixgbe_fwlog_module_entry { + /* module ID for the corresponding firmware logging event */ + u16 module_id; + /* verbosity level for the module_id */ + u8 log_level; +}; + +struct ixgbe_fwlog_cfg { + /* list of modules for configuring log level */ + struct ixgbe_fwlog_module_entry module_entries[IXGBE_ACI_FW_LOG_ID_MAX]; +#define IXGBE_FWLOG_OPTION_ARQ_ENA BIT(0) +#define IXGBE_FWLOG_OPTION_UART_ENA BIT(1) + /* set before calling ixgbe_fwlog_init() so the PF registers for firmware + * logging on initialization + */ +#define IXGBE_FWLOG_OPTION_REGISTER_ON_INIT BIT(2) + /* set in the ixgbe_fwlog_get() response if the PF is registered for FW + * logging events over ARQ + */ +#define IXGBE_FWLOG_OPTION_IS_REGISTERED BIT(3) + /* options used to configure firmware logging */ + u16 options; + /* minimum number of log events sent per Admin Receive Queue event */ + u8 log_resolution; +}; + +struct ixgbe_fwlog_data { + u16 data_size; + u8 *data; +}; + +struct ixgbe_fwlog_ring { + struct ixgbe_fwlog_data *rings; + u16 size; + u16 head; + u16 tail; +}; + +#define IXGBE_FWLOG_RING_SIZE_DFLT 256 +#define IXGBE_FWLOG_RING_SIZE_MAX 512 + +/* Set FW Logging configuration (indirect 0xFF30) + * Register for FW Logging (indirect 0xFF31) + * Query FW Logging (indirect 0xFF32) + * FW Log Event (indirect 0xFF33) + * Get FW Log (indirect 0xFF34) + * Clear FW Log (indirect 0xFF35) + */ +struct ixgbe_aci_cmd_fw_log { + u8 cmd_flags; +#define IXGBE_ACI_FW_LOG_CONF_UART_EN BIT(0) +#define IXGBE_ACI_FW_LOG_CONF_AQ_EN BIT(1) +#define IXGBE_ACI_FW_LOG_QUERY_REGISTERED BIT(2) +#define IXGBE_ACI_FW_LOG_CONF_SET_VALID BIT(3) +#define IXGBE_ACI_FW_LOG_AQ_REGISTER BIT(0) +#define IXGBE_ACI_FW_LOG_AQ_QUERY BIT(2) +#define IXGBE_ACI_FW_LOG_PERSISTENT BIT(0) + u8 rsp_flag; +#define IXGBE_ACI_FW_LOG_MORE_DATA BIT(1) + __le16 fw_rt_msb; + union { + struct { + __le32 fw_rt_lsb; + } sync; + struct { + __le16 log_resolution; +#define IXGBE_ACI_FW_LOG_MIN_RESOLUTION (1) +#define IXGBE_ACI_FW_LOG_MAX_RESOLUTION (128) + __le16 mdl_cnt; + } cfg; + } ops; + __le32 addr_high; + __le32 addr_low; +}; + +IXGBE_CHECK_PARAM_LEN(ixgbe_aci_cmd_fw_log); + +/* Response Buffer for: + * Set Firmware Logging Configuration (0xFF30) + * Query FW Logging (0xFF32) + */ +struct ixgbe_aci_cmd_fw_log_cfg_resp { + __le16 module_identifier; + u8 log_level; + u8 rsvd0; +}; + +IXGBE_CHECK_STRUCT_LEN(4, ixgbe_aci_cmd_fw_log_cfg_resp); + +/** + * struct ixgbe_aq_desc - Admin Command (AC) descriptor + * @flags: IXGBE_ACI_FLAG_* flags + * @opcode: Admin command opcode + * @datalen: length in bytes of indirect/external data buffer + * @retval: return value from firmware + * @cookie_high: opaque data high-half + * @cookie_low: opaque data low-half + * @params: command-specific parameters + * + * Descriptor format for commands the driver posts via the Admin Command Interface + * (ACI). The firmware writes back onto the command descriptor and returns + * the result of the command. Asynchronous events that are not an immediate + * result of the command are written to the Admin Command Interface (ACI) using + * the same descriptor format. Descriptors are in little-endian notation with + * 32-bit words. + */ +struct ixgbe_aci_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + u8 raw[16]; + struct ixgbe_aci_cmd_generic generic; + struct ixgbe_aci_cmd_get_ver get_ver; + struct ixgbe_aci_cmd_driver_ver driver_ver; + struct ixgbe_aci_cmd_get_exp_err exp_err; + struct ixgbe_aci_cmd_req_res res_owner; + struct ixgbe_aci_cmd_list_caps get_cap; + struct ixgbe_aci_cmd_disable_rxen disable_rxen; + struct ixgbe_aci_cmd_get_fw_event get_fw_event; + struct ixgbe_aci_cmd_get_phy_caps get_phy; + struct ixgbe_aci_cmd_set_phy_cfg set_phy; + struct ixgbe_aci_cmd_restart_an restart_an; + struct ixgbe_aci_cmd_get_link_status get_link_status; + struct ixgbe_aci_cmd_set_event_mask set_event_mask; + struct ixgbe_aci_cmd_get_link_topo get_link_topo; + struct ixgbe_aci_cmd_i2c read_write_i2c; + struct ixgbe_aci_cmd_read_i2c_resp read_i2c_resp; + struct ixgbe_aci_cmd_mdio read_write_mdio; + struct ixgbe_aci_cmd_mdio read_mdio; + struct ixgbe_aci_cmd_mdio write_mdio; + struct ixgbe_aci_cmd_set_port_id_led set_port_id_led; + struct ixgbe_aci_cmd_gpio_by_func read_write_gpio_by_func; + struct ixgbe_aci_cmd_gpio read_write_gpio; + struct ixgbe_aci_cmd_sff_eeprom read_write_sff_param; + struct ixgbe_aci_cmd_prog_topo_dev_nvm prog_topo_dev_nvm; + struct ixgbe_aci_cmd_read_topo_dev_nvm read_topo_dev_nvm; + struct ixgbe_aci_cmd_nvm nvm; + struct ixgbe_aci_cmd_nvm_cfg nvm_cfg; + struct ixgbe_aci_cmd_nvm_checksum nvm_checksum; + struct ixgbe_aci_cmd_read_write_alt_direct read_write_alt_direct; + struct ixgbe_aci_cmd_read_write_alt_indirect read_write_alt_indirect; + struct ixgbe_aci_cmd_done_alt_write done_alt_write; + struct ixgbe_aci_cmd_clear_port_alt_write clear_port_alt_write; + struct ixgbe_aci_cmd_debug_dump_internals debug_dump; + struct ixgbe_aci_cmd_set_health_status_config + set_health_status_config; + struct ixgbe_aci_cmd_get_supported_health_status_codes + get_supported_health_status_codes; + struct ixgbe_aci_cmd_get_health_status get_health_status; + struct ixgbe_aci_cmd_clear_health_status clear_health_status; + struct ixgbe_aci_cmd_fw_log fw_log; + struct ixgbe_aci_cmd_nvm_sanitization nvm_sanitization; + } params; +}; + +/* E610-specific adapter context structures */ + +struct ixgbe_link_status { + /* Refer to ixgbe_aci_phy_type for bits definition */ + u64 phy_type_low; + u64 phy_type_high; + u8 topo_media_conflict; + u16 max_frame_size; + u16 link_speed; + u16 req_speeds; + u8 link_cfg_err; + u8 lse_ena; /* Link Status Event notification */ + u8 link_info; + u8 an_info; + u8 ext_info; + u8 fec_info; + u8 pacing; + /* Refer to #define from module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE] of + * ixgbe_aci_get_phy_caps structure + */ + u8 module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE]; +}; + +/* Common HW capabilities for SW use */ +struct ixgbe_hw_common_caps { + /* Write CSR protection */ + u64 wr_csr_prot; + u32 switching_mode; + /* switching mode supported - EVB switching (including cloud) */ +#define IXGBE_NVM_IMAGE_TYPE_EVB 0x0 + + /* Manageability mode & supported protocols over MCTP */ + u32 mgmt_mode; +#define IXGBE_MGMT_MODE_PASS_THRU_MODE_M 0xF +#define IXGBE_MGMT_MODE_CTL_INTERFACE_M 0xF0 +#define IXGBE_MGMT_MODE_REDIR_SB_INTERFACE_M 0xF00 + + u32 mgmt_protocols_mctp; +#define IXGBE_MGMT_MODE_PROTO_RSVD BIT(0) +#define IXGBE_MGMT_MODE_PROTO_PLDM BIT(1) +#define IXGBE_MGMT_MODE_PROTO_OEM BIT(2) +#define IXGBE_MGMT_MODE_PROTO_NC_SI BIT(3) + + u32 os2bmc; + u32 valid_functions; + /* DCB capabilities */ + u32 active_tc_bitmap; + u32 maxtc; + + /* RSS related capabilities */ + u32 rss_table_size; /* 512 for PFs and 64 for VFs */ + u32 rss_table_entry_width; /* RSS Entry width in bits */ + + /* Tx/Rx queues */ + u32 num_rxq; /* Number/Total Rx queues */ + u32 rxq_first_id; /* First queue ID for Rx queues */ + u32 num_txq; /* Number/Total Tx queues */ + u32 txq_first_id; /* First queue ID for Tx queues */ + + /* MSI-X vectors */ + u32 num_msix_vectors; + u32 msix_vector_first_id; + + /* Max MTU for function or device */ + u32 max_mtu; + + /* WOL related */ + u32 num_wol_proxy_fltr; + u32 wol_proxy_vsi_seid; + + /* LED/SDP pin count */ + u32 led_pin_num; + u32 sdp_pin_num; + + /* LED/SDP - Supports up to 12 LED pins and 8 SDP signals */ +#define IXGBE_MAX_SUPPORTED_GPIO_LED 12 +#define IXGBE_MAX_SUPPORTED_GPIO_SDP 8 + u8 led[IXGBE_MAX_SUPPORTED_GPIO_LED]; + u8 sdp[IXGBE_MAX_SUPPORTED_GPIO_SDP]; + /* SR-IOV virtualization */ + u8 sr_iov_1_1; /* SR-IOV enabled */ + /* VMDQ */ + u8 vmdq; /* VMDQ supported */ + + /* EVB capabilities */ + u8 evb_802_1_qbg; /* Edge Virtual Bridging */ + u8 evb_802_1_qbh; /* Bridge Port Extension */ + + u8 dcb; + u8 iscsi; + u8 mgmt_cem; + + /* WoL and APM support */ +#define IXGBE_WOL_SUPPORT_M BIT(0) +#define IXGBE_ACPI_PROG_MTHD_M BIT(1) +#define IXGBE_PROXY_SUPPORT_M BIT(2) + u8 apm_wol_support; + u8 acpi_prog_mthd; + u8 proxy_support; + bool sec_rev_disabled; + bool update_disabled; + bool nvm_unified_update; + bool netlist_auth; +#define IXGBE_NVM_MGMT_SEC_REV_DISABLED BIT(0) +#define IXGBE_NVM_MGMT_UPDATE_DISABLED BIT(1) +#define IXGBE_NVM_MGMT_UNIFIED_UPD_SUPPORT BIT(3) +#define IXGBE_NVM_MGMT_NETLIST_AUTH_SUPPORT BIT(5) + bool no_drop_policy_support; + /* PCIe reset avoidance */ + bool pcie_reset_avoidance; /* false: not supported, true: supported */ + /* Post update reset restriction */ + bool reset_restrict_support; /* false: not supported, true: supported */ + + /* External topology device images within the NVM */ +#define IXGBE_EXT_TOPO_DEV_IMG_COUNT 4 + u32 ext_topo_dev_img_ver_high[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; + u32 ext_topo_dev_img_ver_low[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; + u8 ext_topo_dev_img_part_num[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_S 8 +#define IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_M \ + MAKEMASK(0xFF, IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_S) + bool ext_topo_dev_img_load_en[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_LOAD_EN BIT(0) + bool ext_topo_dev_img_prog_en[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_PROG_EN BIT(1) + /* Support for OROM update in Recovery Mode. */ + bool orom_recovery_update; + bool next_cluster_id_support; +}; + +#pragma pack(1) +struct ixgbe_orom_civd_info { + u8 signature[4]; /* Must match ASCII '$CIV' characters */ + u8 checksum; /* Simple modulo 256 sum of all structure bytes must equal 0 */ + __le32 combo_ver; /* Combo Image Version number */ + u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ + __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ +}; +#pragma pack() + +/* Function specific capabilities */ +struct ixgbe_hw_func_caps { + struct ixgbe_hw_common_caps common_cap; + u32 num_allocd_vfs; /* Number of allocated VFs */ + u32 vf_base_id; /* Logical ID of the first VF */ + u32 guar_num_vsi; + bool no_drop_policy_ena; +}; + +/* Device wide capabilities */ +struct ixgbe_hw_dev_caps { + struct ixgbe_hw_common_caps common_cap; + u32 num_vfs_exposed; /* Total number of VFs exposed */ + u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ + u32 num_flow_director_fltr; /* Number of FD filters available */ + u32 num_funcs; +}; + +/* ACI event information */ +struct ixgbe_aci_event { + struct ixgbe_aci_desc desc; + u16 msg_len; + u16 buf_len; + u8 *msg_buf; +}; + +struct ixgbe_aci_info { + enum ixgbe_aci_err last_status; /* last status of sent admin command */ + struct ixgbe_lock lock; /* admin command interface lock */ +}; + +/* Minimum Security Revision information */ +struct ixgbe_minsrev_info { + u32 nvm; + u32 orom; + u8 nvm_valid : 1; + u8 orom_valid : 1; +}; + +/* Enumeration of which flash bank is desired to read from, either the active + * bank or the inactive bank. Used to abstract 1st and 2nd bank notion from + * code which just wants to read the active or inactive flash bank. + */ +enum ixgbe_bank_select { + IXGBE_ACTIVE_FLASH_BANK, + IXGBE_INACTIVE_FLASH_BANK, +}; + +/* Option ROM version information */ +struct ixgbe_orom_info { + u8 major; /* Major version of OROM */ + u8 patch; /* Patch version of OROM */ + u16 build; /* Build version of OROM */ + u32 srev; /* Security revision */ +}; + +/* NVM version information */ +struct ixgbe_nvm_info { + u32 eetrack; + u32 srev; + u8 major; + u8 minor; +}; + +/* netlist version information */ +struct ixgbe_netlist_info { + u32 major; /* major high/low */ + u32 minor; /* minor high/low */ + u32 type; /* type high/low */ + u32 rev; /* revision high/low */ + u32 hash; /* SHA-1 hash word */ + u16 cust_ver; /* customer version */ +}; + +/* Enumeration of possible flash banks for the NVM, OROM, and Netlist modules + * of the flash image. + */ +enum ixgbe_flash_bank { + IXGBE_INVALID_FLASH_BANK, + IXGBE_1ST_FLASH_BANK, + IXGBE_2ND_FLASH_BANK, +}; + +/* information for accessing NVM, OROM, and Netlist flash banks */ +struct ixgbe_bank_info { + u32 nvm_ptr; /* Pointer to 1st NVM bank */ + u32 nvm_size; /* Size of NVM bank */ + u32 orom_ptr; /* Pointer to 1st OROM bank */ + u32 orom_size; /* Size of OROM bank */ + u32 netlist_ptr; /* Pointer to 1st Netlist bank */ + u32 netlist_size; /* Size of Netlist bank */ + enum ixgbe_flash_bank nvm_bank; /* Active NVM bank */ + enum ixgbe_flash_bank orom_bank; /* Active OROM bank */ + enum ixgbe_flash_bank netlist_bank; /* Active Netlist bank */ +}; + +/* Flash Chip Information */ +struct ixgbe_flash_info { + struct ixgbe_orom_info orom; /* Option ROM version info */ + struct ixgbe_nvm_info nvm; /* NVM version information */ + struct ixgbe_netlist_info netlist; /* Netlist version info */ + struct ixgbe_bank_info banks; /* Flash Bank information */ + u16 sr_words; /* Shadow RAM size in words */ + u32 flash_size; /* Size of available flash in bytes */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ +}; + +#define IXGBE_NVM_CMD_READ 0x0000000B +#define IXGBE_NVM_CMD_WRITE 0x0000000C + +/* NVM Access command */ +struct ixgbe_nvm_access_cmd { + u32 command; /* NVM command: READ or WRITE */ + u32 offset; /* Offset to read/write, in bytes */ + u32 data_size; /* Size of data field, in bytes */ +}; + +/* NVM Access data */ +struct ixgbe_nvm_access_data { + u32 regval; /* Storage for register value */ +}; + +#endif /* _IXGBE_TYPE_E610_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_vf.c b/sys/dev/ixgbe/ixgbe_vf.c index cac3c6b5e5e7..4e48f7f33c9d 100644 --- a/sys/dev/ixgbe/ixgbe_vf.c +++ b/sys/dev/ixgbe/ixgbe_vf.c @@ -656,7 +656,8 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed, break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; - if (hw->mac.type == ixgbe_mac_X550_vf) { + if (hw->mac.type == ixgbe_mac_X550_vf || + hw->mac.type == ixgbe_mac_E610_vf) { if (links_reg & IXGBE_LINKS_SPEED_NON_STD) *speed = IXGBE_LINK_SPEED_5GB_FULL; } diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 60e66aeaf579..43c3af056b67 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1151,13 +1151,20 @@ ixl_if_enable_intr(if_ctx_t ctx) struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; - struct ixl_rx_queue *que = vsi->rx_queues; + struct ixl_rx_queue *rx_que = vsi->rx_queues; ixl_enable_intr0(hw); /* Enable queue interrupts */ - for (int i = 0; i < vsi->num_rx_queues; i++, que++) - /* TODO: Queue index parameter is probably wrong */ - ixl_enable_queue(hw, que->rxr.me); + if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { + for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) + ixl_enable_queue(hw, rx_que->rxr.me); + } else { + /* + * Set PFINT_LNKLST0 FIRSTQ_INDX to 0x0 to enable + * triggering interrupts by queues. + */ + wr32(hw, I40E_PFINT_LNKLST0, 0x0); + } } /* @@ -1175,11 +1182,13 @@ ixl_if_disable_intr(if_ctx_t ctx) if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) - ixl_disable_queue(hw, rx_que->msix - 1); + ixl_disable_queue(hw, rx_que->rxr.me); } else { - // Set PFINT_LNKLST0 FIRSTQ_INDX to 0x7FF - // stops queues from triggering interrupts - wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); + /* + * Set PFINT_LNKLST0 FIRSTQ_INDX to End of List (0x7FF) + * to stop queues from triggering interrupts. + */ + wr32(hw, I40E_PFINT_LNKLST0, IXL_QUEUE_EOL); } } diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c index 978e5f25ceaf..cc0bc1f3fcd2 100644 --- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c +++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c @@ -120,7 +120,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, switch (attrs->dir) { case IPSEC_DIR_OUTBOUND: - if (attrs->replay_esn.replay_window != 0) + if (attrs->replay_esn.trigger) MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); else MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_MODE); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index 6e24395b5577..c45f02cdaf42 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -1783,8 +1783,8 @@ mlx5e_add_vxlan_rule(struct mlx5e_priv *priv, sa_family_t family, u_int port) el->refcount++; if (el->installed) return (0); - } - el = mlx5e_vxlan_alloc_db_el(priv, proto, port); + } else + el = mlx5e_vxlan_alloc_db_el(priv, proto, port); if ((if_getcapenable(priv->ifp) & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_add_vxlan_rule_from_db(priv, el); diff --git a/sys/dev/nctgpio/nctgpio.c b/sys/dev/nctgpio/nctgpio.c index 75ea1fbdba17..ddc2ceef7dfb 100644 --- a/sys/dev/nctgpio/nctgpio.c +++ b/sys/dev/nctgpio/nctgpio.c @@ -1258,13 +1258,14 @@ nct_attach(device_t dev) GPIO_UNLOCK(sc); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "failed to attach to gpiobus\n"); GPIO_LOCK_DESTROY(sc); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index bf14bfdb73ea..9c06f7fec530 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -27,8 +27,9 @@ /* Driver for ptnet paravirtualized network device. */ #include <sys/cdefs.h> +#include "opt_inet.h" +#include "opt_inet6.h" -#include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -75,9 +76,6 @@ #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/selinfo.h> #include <net/netmap.h> #include <dev/netmap/netmap_kern.h> diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c index 7ffc618e63ee..8525eb9543c3 100644 --- a/sys/dev/null/null.c +++ b/sys/dev/null/null.c @@ -4,6 +4,7 @@ * Copyright (c) 2000 Mark R. V. Murray & Jeroen C. van Gelderen * Copyright (c) 2001-2004 Mark R. V. Murray * Copyright (c) 2014 Eitan Adler + * Copyright (c) 2025 Pietro Cerutti * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,6 +40,7 @@ #include <sys/disk.h> #include <sys/bus.h> #include <sys/filio.h> +#include <sys/event.h> #include <machine/bus.h> #include <machine/vmparam.h> @@ -53,12 +55,26 @@ static d_write_t null_write; static d_ioctl_t null_ioctl; static d_ioctl_t zero_ioctl; static d_read_t zero_read; +static d_kqfilter_t kqfilter; +static int one_ev(struct knote *kn, long hint); +static int zero_ev(struct knote *kn, long hint); + +static const struct filterops one_fop = { + .f_isfd = 1, + .f_event = one_ev +}; + +static const struct filterops zero_fop = { + .f_isfd = 1, + .f_event = zero_ev +}; static struct cdevsw full_cdevsw = { .d_version = D_VERSION, .d_read = zero_read, .d_write = full_write, .d_ioctl = zero_ioctl, + .d_kqfilter = kqfilter, .d_name = "full", }; @@ -67,6 +83,7 @@ static struct cdevsw null_cdevsw = { .d_read = (d_read_t *)nullop, .d_write = null_write, .d_ioctl = null_ioctl, + .d_kqfilter = kqfilter, .d_name = "null", }; @@ -75,6 +92,7 @@ static struct cdevsw zero_cdevsw = { .d_read = zero_read, .d_write = null_write, .d_ioctl = zero_ioctl, + .d_kqfilter = kqfilter, .d_name = "zero", .d_flags = D_MMAP_ANON, }; @@ -197,5 +215,35 @@ null_modevent(module_t mod __unused, int type, void *data __unused) return (0); } +static int +one_ev(struct knote *kn, long hint) +{ + + return (1); +} + +static int +zero_ev(struct knote *kn, long hint) +{ + + return (0); +} + +static int +kqfilter(struct cdev *dev, struct knote *kn) +{ + + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = dev->si_devsw == &null_cdevsw ? &zero_fop : &one_fop; + return (0); + case EVFILT_WRITE: + kn->kn_fop = dev->si_devsw == &full_cdevsw ? &zero_fop : &one_fop; + return (0); + default: + return (EOPNOTSUPP); + } +} + DEV_MODULE(null, null_modevent, NULL); MODULE_VERSION(null, 1); diff --git a/sys/dev/p2sb/lewisburg_gpio.c b/sys/dev/p2sb/lewisburg_gpio.c index b45d7767602c..3be777ab9524 100644 --- a/sys/dev/p2sb/lewisburg_gpio.c +++ b/sys/dev/p2sb/lewisburg_gpio.c @@ -217,10 +217,11 @@ lbggpio_attach(device_t dev) } /* support gpio */ - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) return (ENXIO); + bus_attach_children(dev); return (0); } diff --git a/sys/dev/psci/smccc_trng.c b/sys/dev/psci/smccc_trng.c index ab98837d3841..8a2e5508ef48 100644 --- a/sys/dev/psci/smccc_trng.c +++ b/sys/dev/psci/smccc_trng.c @@ -58,7 +58,7 @@ static device_attach_t trng_attach; static unsigned trng_read(void *, unsigned); -static struct random_source random_trng = { +static const struct random_source random_trng = { .rs_ident = "Arm SMCCC TRNG", .rs_source = RANDOM_PURE_ARM_TRNG, .rs_read = trng_read, diff --git a/sys/dev/qcom_rnd/qcom_rnd.c b/sys/dev/qcom_rnd/qcom_rnd.c index fdd0b553523e..a5ece7e00f28 100644 --- a/sys/dev/qcom_rnd/qcom_rnd.c +++ b/sys/dev/qcom_rnd/qcom_rnd.c @@ -63,7 +63,7 @@ static int qcom_rnd_detach(device_t); static int qcom_rnd_harvest(struct qcom_rnd_softc *, void *, size_t *); static unsigned qcom_rnd_read(void *, unsigned); -static struct random_source random_qcom_rnd = { +static const struct random_source random_qcom_rnd = { .rs_ident = "Qualcomm Entropy Adapter", .rs_source = RANDOM_PURE_QUALCOMM, .rs_read = qcom_rnd_read, diff --git a/sys/dev/qcom_tlmm/qcom_tlmm_ipq4018.c b/sys/dev/qcom_tlmm/qcom_tlmm_ipq4018.c index 2d390cd449af..50f54b896748 100644 --- a/sys/dev/qcom_tlmm/qcom_tlmm_ipq4018.c +++ b/sys/dev/qcom_tlmm/qcom_tlmm_ipq4018.c @@ -346,13 +346,14 @@ qcom_tlmm_ipq4018_attach(device_t dev) fdt_pinctrl_register(dev, NULL); fdt_pinctrl_configure_by_name(dev, "default"); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "%s: failed to attach bus\n", __func__); qcom_tlmm_ipq4018_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/random/armv8rng.c b/sys/dev/random/armv8rng.c index 61698bfff820..524d80317681 100644 --- a/sys/dev/random/armv8rng.c +++ b/sys/dev/random/armv8rng.c @@ -44,7 +44,7 @@ static u_int random_rndr_read(void *, u_int); static bool has_rndr; -static struct random_source random_armv8_rndr = { +static const struct random_source random_armv8_rndr = { .rs_ident = "Armv8 rndr RNG", .rs_source = RANDOM_PURE_ARMV8, .rs_read = random_rndr_read, diff --git a/sys/dev/random/darn.c b/sys/dev/random/darn.c index a66754e095fb..9bb4991df82f 100644 --- a/sys/dev/random/darn.c +++ b/sys/dev/random/darn.c @@ -56,7 +56,7 @@ static u_int random_darn_read(void *, u_int); -static struct random_source random_darn = { +static const struct random_source random_darn = { .rs_ident = "PowerISA DARN random number generator", .rs_source = RANDOM_PURE_DARN, .rs_read = random_darn_read diff --git a/sys/dev/random/ivy.c b/sys/dev/random/ivy.c index 05474d977276..fa1e4831f1b9 100644 --- a/sys/dev/random/ivy.c +++ b/sys/dev/random/ivy.c @@ -51,7 +51,7 @@ static bool has_rdrand, has_rdseed; static u_int random_ivy_read(void *, u_int); -static struct random_source random_ivy = { +static const struct random_source random_ivy = { .rs_ident = "Intel Secure Key RNG", .rs_source = RANDOM_PURE_RDRAND, .rs_read = random_ivy_read diff --git a/sys/dev/random/nehemiah.c b/sys/dev/random/nehemiah.c index f76071290b8f..56f144169dae 100644 --- a/sys/dev/random/nehemiah.c +++ b/sys/dev/random/nehemiah.c @@ -44,7 +44,7 @@ static u_int random_nehemiah_read(void *, u_int); -static struct random_source random_nehemiah = { +static const struct random_source random_nehemiah = { .rs_ident = "VIA Nehemiah Padlock RNG", .rs_source = RANDOM_PURE_NEHEMIAH, .rs_read = random_nehemiah_read diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index c7762967c4fb..84ec174bd08e 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -110,7 +110,7 @@ __read_frequently u_int hc_source_mask; struct random_sources { CK_LIST_ENTRY(random_sources) rrs_entries; - struct random_source *rrs_source; + const struct random_source *rrs_source; }; static CK_LIST_HEAD(sources_head, random_sources) source_list = @@ -493,9 +493,9 @@ random_healthtest_init(enum random_entropy_source source) * The RCT limit comes from the formula in section 4.4.1. * * The APT cutoff is calculated using the formula in section 4.4.2 - * footnote 10 with the window size changed from 512 to 511, since the - * test as written counts the number of samples equal to the first - * sample in the window, and thus tests W-1 samples. + * footnote 10 with the number of Bernoulli trials changed from W to + * W-1, since the test as written counts the number of samples equal to + * the first sample in the window, and thus tests W-1 samples. */ ht->ht_rct_limit = 35; ht->ht_apt_cutoff = 330; @@ -849,7 +849,7 @@ random_harvest_deregister_source(enum random_entropy_source source) } void -random_source_register(struct random_source *rsource) +random_source_register(const struct random_source *rsource) { struct random_sources *rrs; @@ -868,7 +868,7 @@ random_source_register(struct random_source *rsource) } void -random_source_deregister(struct random_source *rsource) +random_source_deregister(const struct random_source *rsource) { struct random_sources *rrs = NULL; diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h index e1c9ac7b680d..6d742447ea8b 100644 --- a/sys/dev/random/randomdev.h +++ b/sys/dev/random/randomdev.h @@ -103,8 +103,8 @@ struct random_source { random_source_read_t *rs_read; }; -void random_source_register(struct random_source *); -void random_source_deregister(struct random_source *); +void random_source_register(const struct random_source *); +void random_source_deregister(const struct random_source *); #endif /* _KERNEL */ diff --git a/sys/dev/rccgpio/rccgpio.c b/sys/dev/rccgpio/rccgpio.c index b2b775b879ad..dafd0b511fa9 100644 --- a/sys/dev/rccgpio/rccgpio.c +++ b/sys/dev/rccgpio/rccgpio.c @@ -308,7 +308,7 @@ rcc_gpio_attach(device_t dev) RCC_WRITE(sc, RCC_GPIO_GP_LVL, sc->sc_output); /* Attach the gpiobus. */ - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, sc->sc_io_rid, sc->sc_io_res); @@ -316,6 +316,7 @@ rcc_gpio_attach(device_t dev) return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/dev/uart/uart_cpu_acpi.c b/sys/dev/uart/uart_cpu_acpi.c index 7382c47a8db6..da77603f0093 100644 --- a/sys/dev/uart/uart_cpu_acpi.c +++ b/sys/dev/uart/uart_cpu_acpi.c @@ -44,23 +44,15 @@ #include <contrib/dev/acpica/include/accommon.h> #include <contrib/dev/acpica/include/actables.h> -static struct acpi_uart_compat_data * +static struct acpi_spcr_compat_data * uart_cpu_acpi_scan(uint8_t interface_type) { - struct acpi_uart_compat_data **cd, *curcd; + struct acpi_spcr_compat_data **cd, *curcd; int i; - SET_FOREACH(cd, uart_acpi_class_and_device_set) { + SET_FOREACH(cd, uart_acpi_spcr_class_set) { curcd = *cd; - for (i = 0; curcd[i].cd_hid != NULL; i++) { - if (curcd[i].cd_port_subtype == interface_type) - return (&curcd[i]); - } - } - - SET_FOREACH(cd, uart_acpi_class_set) { - curcd = *cd; - for (i = 0; curcd[i].cd_hid != NULL; i++) { + for (i = 0; curcd[i].cd_class != NULL; i++) { if (curcd[i].cd_port_subtype == interface_type) return (&curcd[i]); } @@ -143,7 +135,7 @@ uart_cpu_acpi_spcr(int devtype, struct uart_devinfo *di) { vm_paddr_t spcr_physaddr; ACPI_TABLE_SPCR *spcr; - struct acpi_uart_compat_data *cd; + struct acpi_spcr_compat_data *cd; struct uart_class *class; int error = ENXIO; @@ -237,7 +229,7 @@ uart_cpu_acpi_dbg2(struct uart_devinfo *di) ACPI_TABLE_DBG2 *dbg2; ACPI_DBG2_DEVICE *dbg2_dev; ACPI_GENERIC_ADDRESS *base_address; - struct acpi_uart_compat_data *cd; + struct acpi_spcr_compat_data *cd; struct uart_class *class; int error; bool found; diff --git a/sys/dev/uart/uart_cpu_acpi.h b/sys/dev/uart/uart_cpu_acpi.h index 94329e1f1349..218f643c7621 100644 --- a/sys/dev/uart/uart_cpu_acpi.h +++ b/sys/dev/uart/uart_cpu_acpi.h @@ -35,11 +35,18 @@ struct uart_class; +struct acpi_spcr_compat_data { + struct uart_class *cd_class; + uint16_t cd_port_subtype; +}; +SET_DECLARE(uart_acpi_spcr_class_set, struct acpi_spcr_compat_data); +#define UART_ACPI_SPCR_CLASS(data) \ + DATA_SET(uart_acpi_spcr_class_set, data) + struct acpi_uart_compat_data { const char *cd_hid; struct uart_class *cd_class; - uint16_t cd_port_subtype; int cd_regshft; int cd_regiowidth; int cd_rclk; @@ -56,14 +63,6 @@ SET_DECLARE(uart_acpi_class_and_device_set, struct acpi_uart_compat_data); #define UART_ACPI_CLASS_AND_DEVICE(data) \ DATA_SET(uart_acpi_class_and_device_set, data) -/* - * If your UART driver implements uart_class and custom device layer, - * then use UART_ACPI_CLASS for its declaration - */ -SET_DECLARE(uart_acpi_class_set, struct acpi_uart_compat_data); -#define UART_ACPI_CLASS(data) \ - DATA_SET(uart_acpi_class_set, data) - /* Try to initialize UART device from ACPI tables */ int uart_cpu_acpi_setup(int devtype, struct uart_devinfo *di); diff --git a/sys/dev/uart/uart_dev_ns8250.c b/sys/dev/uart/uart_dev_ns8250.c index 0f19ede6d9df..c38d50e54ad8 100644 --- a/sys/dev/uart/uart_dev_ns8250.c +++ b/sys/dev/uart/uart_dev_ns8250.c @@ -492,24 +492,32 @@ UART_CLASS(uart_ns8250_class); * XXX -- refactor out ACPI and FDT ifdefs */ #ifdef DEV_ACPI +static struct acpi_spcr_compat_data acpi_spcr_compat_data[] = { + { &uart_ns8250_class, ACPI_DBG2_16550_COMPATIBLE }, + { &uart_ns8250_class, ACPI_DBG2_16550_SUBSET }, + { &uart_ns8250_class, ACPI_DBG2_16550_WITH_GAS }, + { NULL, 0 }, +}; +UART_ACPI_SPCR_CLASS(acpi_spcr_compat_data); + static struct acpi_uart_compat_data acpi_compat_data[] = { - {"AMD0020", &uart_ns8250_class, 0, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, - {"AMDI0020", &uart_ns8250_class, 0, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, - {"APMC0D08", &uart_ns8250_class, ACPI_DBG2_16550_COMPATIBLE, 2, 4, 0, 0, "APM compatible UART"}, - {"MRVL0001", &uart_ns8250_class, ACPI_DBG2_16550_SUBSET, 2, 0, 200000000, UART_F_BUSY_DETECT, "Marvell / Synopsys Designware UART"}, - {"SCX0006", &uart_ns8250_class, 0, 2, 0, 62500000, UART_F_BUSY_DETECT, "SynQuacer / Synopsys Designware UART"}, - {"HISI0031", &uart_ns8250_class, 0, 2, 0, 200000000, UART_F_BUSY_DETECT, "HiSilicon / Synopsys Designware UART"}, - {"INTC1006", &uart_ns8250_class, 0, 2, 0, 25000000, 0, "Intel ARM64 UART"}, - {"NXP0018", &uart_ns8250_class, 0, 0, 0, 350000000, UART_F_BUSY_DETECT, "NXP / Synopsys Designware UART"}, - {"PNP0500", &uart_ns8250_class, 0, 0, 0, 0, 0, "Standard PC COM port"}, - {"PNP0501", &uart_ns8250_class, 0, 0, 0, 0, 0, "16550A-compatible COM port"}, - {"PNP0502", &uart_ns8250_class, 0, 0, 0, 0, 0, "Multiport serial device (non-intelligent 16550)"}, - {"PNP0510", &uart_ns8250_class, 0, 0, 0, 0, 0, "Generic IRDA-compatible device"}, - {"PNP0511", &uart_ns8250_class, 0, 0, 0, 0, 0, "Generic IRDA-compatible device"}, - {"WACF004", &uart_ns8250_class, 0, 0, 0, 0, 0, "Wacom Tablet PC Screen"}, - {"WACF00E", &uart_ns8250_class, 0, 0, 0, 0, 0, "Wacom Tablet PC Screen 00e"}, - {"FUJ02E5", &uart_ns8250_class, 0, 0, 0, 0, 0, "Wacom Tablet at FuS Lifebook T"}, - {NULL, NULL, 0, 0 , 0, 0, 0, NULL}, + {"AMD0020", &uart_ns8250_class, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, + {"AMDI0020", &uart_ns8250_class, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, + {"APMC0D08", &uart_ns8250_class, 2, 4, 0, 0, "APM compatible UART"}, + {"MRVL0001", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "Marvell / Synopsys Designware UART"}, + {"SCX0006", &uart_ns8250_class, 2, 0, 62500000, UART_F_BUSY_DETECT, "SynQuacer / Synopsys Designware UART"}, + {"HISI0031", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "HiSilicon / Synopsys Designware UART"}, + {"INTC1006", &uart_ns8250_class, 2, 0, 25000000, 0, "Intel ARM64 UART"}, + {"NXP0018", &uart_ns8250_class, 0, 0, 350000000, UART_F_BUSY_DETECT, "NXP / Synopsys Designware UART"}, + {"PNP0500", &uart_ns8250_class, 0, 0, 0, 0, "Standard PC COM port"}, + {"PNP0501", &uart_ns8250_class, 0, 0, 0, 0, "16550A-compatible COM port"}, + {"PNP0502", &uart_ns8250_class, 0, 0, 0, 0, "Multiport serial device (non-intelligent 16550)"}, + {"PNP0510", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, + {"PNP0511", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, + {"WACF004", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen"}, + {"WACF00E", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen 00e"}, + {"FUJ02E5", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet at FuS Lifebook T"}, + {NULL, NULL, 0 , 0, 0, 0, NULL}, }; UART_ACPI_CLASS_AND_DEVICE(acpi_compat_data); #endif diff --git a/sys/dev/uart/uart_dev_pl011.c b/sys/dev/uart/uart_dev_pl011.c index a0d5a5b1c7e2..6afc693cd347 100644 --- a/sys/dev/uart/uart_dev_pl011.c +++ b/sys/dev/uart/uart_dev_pl011.c @@ -391,11 +391,19 @@ UART_FDT_CLASS_AND_DEVICE(fdt_compat_data); #endif #ifdef DEV_ACPI +static struct acpi_spcr_compat_data acpi_spcr_compat_data[] = { + { &uart_pl011_class, ACPI_DBG2_ARM_PL011 }, + { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_GENERIC }, + { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_32BIT }, + { NULL, 0 }, +}; +UART_ACPI_SPCR_CLASS(acpi_spcr_compat_data); + static struct acpi_uart_compat_data acpi_compat_data[] = { - {"ARMH0011", &uart_pl011_class, ACPI_DBG2_ARM_PL011, 2, 0, 0, 0, "uart pl011"}, - {"ARMHB000", &uart_pl011_class, ACPI_DBG2_ARM_SBSA_GENERIC, 2, 0, 0, 0, "uart pl011"}, - {"ARMHB000", &uart_pl011_class, ACPI_DBG2_ARM_SBSA_32BIT, 2, 0, 0, 0, "uart pl011"}, - {NULL, NULL, 0, 0, 0, 0, 0, NULL}, + {"ARMH0011", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {NULL, NULL, 0, 0, 0, 0, NULL}, }; UART_ACPI_CLASS_AND_DEVICE(acpi_compat_data); #endif diff --git a/sys/dev/ufshci/ufshci.h b/sys/dev/ufshci/ufshci.h index 9f0faaadeb57..b96d82ff836e 100644 --- a/sys/dev/ufshci/ufshci.h +++ b/sys/dev/ufshci/ufshci.h @@ -160,19 +160,19 @@ enum ufshci_data_direction { UFSHCI_DATA_DIRECTION_RESERVED = 0b11, }; -enum ufshci_overall_command_status { - UFSHCI_OCS_SUCCESS = 0x0, - UFSHCI_OCS_INVALID_COMMAND_TABLE_ATTRIBUTES = 0x01, - UFSHCI_OCS_INVALID_PRDT_ATTRIBUTES = 0x02, - UFSHCI_OCS_MISMATCH_DATA_BUFFER_SIZE = 0x03, - UFSHCI_OCS_MISMATCH_RESPONSE_UPIU_SIZE = 0x04, - UFSHCI_OCS_COMMUNICATION_FAILURE_WITHIN_UIC_LAYERS = 0x05, - UFSHCI_OCS_ABORTED = 0x06, - UFSHCI_OCS_HOST_CONTROLLER_FATAL_ERROR = 0x07, - UFSHCI_OCS_DEVICE_FATAL_ERROR = 0x08, - UFSHCI_OCS_INVALID_CRYPTO_CONFIGURATION = 0x09, - UFSHCI_OCS_GENERAL_CRYPTO_ERROR = 0x0A, - UFSHCI_OCS_INVALID = 0xF, +enum ufshci_utr_overall_command_status { + UFSHCI_UTR_OCS_SUCCESS = 0x0, + UFSHCI_UTR_OCS_INVALID_COMMAND_TABLE_ATTRIBUTES = 0x01, + UFSHCI_UTR_OCS_INVALID_PRDT_ATTRIBUTES = 0x02, + UFSHCI_UTR_OCS_MISMATCH_DATA_BUFFER_SIZE = 0x03, + UFSHCI_UTR_OCS_MISMATCH_RESPONSE_UPIU_SIZE = 0x04, + UFSHCI_UTR_OCS_COMMUNICATION_FAILURE_WITHIN_UIC_LAYERS = 0x05, + UFSHCI_UTR_OCS_ABORTED = 0x06, + UFSHCI_UTR_OCS_HOST_CONTROLLER_FATAL_ERROR = 0x07, + UFSHCI_UTR_OCS_DEVICE_FATAL_ERROR = 0x08, + UFSHCI_UTR_OCS_INVALID_CRYPTO_CONFIGURATION = 0x09, + UFSHCI_UTR_OCS_GENERAL_CRYPTO_ERROR = 0x0A, + UFSHCI_UTR_OCS_INVALID = 0xF, }; struct ufshci_utp_xfer_req_desc { @@ -271,6 +271,18 @@ _Static_assert(sizeof(struct ufshci_utp_cmd_desc) == #define UFSHCI_UTP_TASK_MGMT_REQ_SIZE 32 #define UFSHCI_UTP_TASK_MGMT_RESP_SIZE 32 +enum ufshci_utmr_overall_command_status { + UFSHCI_UTMR_OCS_SUCCESS = 0x0, + UFSHCI_UTMR_OCS_INVALID_TASK_MANAGEMENT_FUNCTION_ATTRIBUTES = 0x01, + UFSHCI_UTMR_OCS_MISMATCH_TASK_MANAGEMENT_REQUEST_SIZE = 0x02, + UFSHCI_UTMR_OCS_MISMATCH_TASK_MANAGEMENT_RESPONSE_SIZE = 0x03, + UFSHCI_UTMR_OCS_PEER_COMMUNICATION_FAILURE = 0x04, + UFSHCI_UTMR_OCS_ABORTED = 0x05, + UFSHCI_UTMR_OCS_FATAL_ERROR = 0x06, + UFSHCI_UTMR_OCS_DEVICE_FATAL_ERROR = 0x07, + UFSHCI_UTMR_OCS_INVALID = 0xF, +}; + /* UFSHCI spec 4.1, section 6.3.1 "UTP Task Management Request Descriptor" */ struct ufshci_utp_task_mgmt_req_desc { /* dword 0 */ @@ -356,6 +368,7 @@ struct ufshci_upiu { _Static_assert(sizeof(struct ufshci_upiu) == 512, "ufshci_upiu must be 512 bytes"); +/* UFS Spec 4.1, section 10.7.1 "COMMAND UPIU" */ struct ufshci_cmd_command_upiu { /* dword 0-2 */ struct ufshci_upiu_header header; @@ -376,6 +389,7 @@ _Static_assert(sizeof(struct ufshci_cmd_command_upiu) % UFSHCI_UPIU_ALIGNMENT == 0, "UPIU requires 64-bit alignment"); +/* UFS Spec 4.1, section 10.7.2 "RESPONSE UPIU" */ struct ufshci_cmd_response_upiu { /* dword 0-2 */ struct ufshci_upiu_header header; @@ -403,6 +417,69 @@ _Static_assert(sizeof(struct ufshci_cmd_response_upiu) % 0, "UPIU requires 64-bit alignment"); +enum task_management_function { + UFSHCI_TASK_MGMT_FUNCTION_ABORT_TASK = 0x01, + UFSHCI_TASK_MGMT_FUNCTION_ABORT_TASK_SET = 0x02, + UFSHCI_TASK_MGMT_FUNCTION_CLEAR_TASK_SET = 0x04, + UFSHCI_TASK_MGMT_FUNCTION_LOGICAL_UNIT_RESET = 0x08, + UFSHCI_TASK_MGMT_FUNCTION_QUERY_TASK = 0x80, + UFSHCI_TASK_MGMT_FUNCTION_QUERY_TASKSET = 0x81, +}; + +/* UFS Spec 4.1, section 10.7.6 "TASK MANAGEMENT REQUEST UPIU" */ +struct ufshci_task_mgmt_request_upiu { + /* dword 0-2 */ + struct ufshci_upiu_header header; + /* dword 3 */ + uint32_t input_param1; /* (Big-endian) */ + /* dword 4 */ + uint32_t input_param2; /* (Big-endian) */ + /* dword 5 */ + uint32_t input_param3; /* (Big-endian) */ + /* dword 6-7 */ + uint8_t reserved[8]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct ufshci_task_mgmt_request_upiu) == 32, + "bad size for ufshci_task_mgmt_request_upiu"); +_Static_assert(sizeof(struct ufshci_task_mgmt_request_upiu) <= + UFSHCI_UTP_XFER_RESP_SIZE, + "bad size for ufshci_task_mgmt_request_upiu"); +_Static_assert(sizeof(struct ufshci_task_mgmt_request_upiu) % + UFSHCI_UPIU_ALIGNMENT == + 0, + "UPIU requires 64-bit alignment"); + +enum task_management_service_response { + UFSHCI_TASK_MGMT_SERVICE_RESPONSE_FUNCTION_COMPLETE = 0x00, + UFSHCI_TASK_MGMT_SERVICE_RESPONSE_FUNCTION_NOT_SUPPORTED = 0x04, + UFSHCI_TASK_MGMT_SERVICE_RESPONSE_FUNCTION_FAILED = 0x05, + UFSHCI_TASK_MGMT_SERVICE_RESPONSE_FUNCTION_SUCCEEDED = 0x08, + UFSHCI_TASK_MGMT_SERVICE_RESPONSE_INCORRECT_LUN = 0x09, +}; + +/* UFS Spec 4.1, section 10.7.7 "TASK MANAGEMENT RESPONSE UPIU" */ +struct ufshci_task_mgmt_response_upiu { + /* dword 0-2 */ + struct ufshci_upiu_header header; + /* dword 3 */ + uint32_t output_param1; /* (Big-endian) */ + /* dword 4 */ + uint32_t output_param2; /* (Big-endian) */ + /* dword 5-7 */ + uint8_t reserved[12]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct ufshci_task_mgmt_response_upiu) == 32, + "bad size for ufshci_task_mgmt_response_upiu"); +_Static_assert(sizeof(struct ufshci_task_mgmt_response_upiu) <= + UFSHCI_UTP_XFER_RESP_SIZE, + "bad size for ufshci_task_mgmt_response_upiu"); +_Static_assert(sizeof(struct ufshci_task_mgmt_response_upiu) % + UFSHCI_UPIU_ALIGNMENT == + 0, + "UPIU requires 64-bit alignment"); + /* UFS Spec 4.1, section 10.7.8 "QUERY REQUEST UPIU" */ enum ufshci_query_function { UFSHCI_QUERY_FUNC_STANDARD_READ_REQUEST = 0x01, @@ -554,6 +631,7 @@ union ufshci_reponse_upiu { struct ufshci_upiu_header header; struct ufshci_cmd_response_upiu cmd_response_upiu; struct ufshci_query_response_upiu query_response_upiu; + struct ufshci_task_mgmt_response_upiu task_mgmt_response_upiu; struct ufshci_nop_in_upiu nop_in_upiu; }; diff --git a/sys/dev/ufshci/ufshci_ctrlr.c b/sys/dev/ufshci/ufshci_ctrlr.c index 55d8363d3287..37bd32665b2b 100644 --- a/sys/dev/ufshci/ufshci_ctrlr.c +++ b/sys/dev/ufshci/ufshci_ctrlr.c @@ -154,12 +154,12 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev) /* TODO: Initialize interrupt Aggregation Control Register (UTRIACR) */ /* Allocate and initialize UTP Task Management Request List. */ - error = ufshci_utm_req_queue_construct(ctrlr); + error = ufshci_utmr_req_queue_construct(ctrlr); if (error) return (error); /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */ - error = ufshci_ut_req_queue_construct(ctrlr); + error = ufshci_utr_req_queue_construct(ctrlr); if (error) return (error); @@ -179,8 +179,8 @@ ufshci_ctrlr_destruct(struct ufshci_controller *ctrlr, device_t dev) /* TODO: Flush In-flight IOs */ /* Release resources */ - ufshci_utm_req_queue_destroy(ctrlr); - ufshci_ut_req_queue_destroy(ctrlr); + ufshci_utmr_req_queue_destroy(ctrlr); + ufshci_utr_req_queue_destroy(ctrlr); if (ctrlr->tag) bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag); @@ -215,8 +215,8 @@ ufshci_ctrlr_reset(struct ufshci_controller *ctrlr) ufshci_mmio_write_4(ctrlr, ie, 0); /* Release resources */ - ufshci_utm_req_queue_destroy(ctrlr); - ufshci_ut_req_queue_destroy(ctrlr); + ufshci_utmr_req_queue_destroy(ctrlr); + ufshci_utr_req_queue_destroy(ctrlr); /* Reset Host Controller */ error = ufshci_ctrlr_enable_host_ctrlr(ctrlr); @@ -232,12 +232,12 @@ ufshci_ctrlr_reset(struct ufshci_controller *ctrlr) ufshci_mmio_write_4(ctrlr, ie, ie); /* Allocate and initialize UTP Task Management Request List. */ - error = ufshci_utm_req_queue_construct(ctrlr); + error = ufshci_utmr_req_queue_construct(ctrlr); if (error) return (error); /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */ - error = ufshci_ut_req_queue_construct(ctrlr); + error = ufshci_utr_req_queue_construct(ctrlr); if (error) return (error); @@ -245,6 +245,15 @@ ufshci_ctrlr_reset(struct ufshci_controller *ctrlr) } int +ufshci_ctrlr_submit_task_mgmt_request(struct ufshci_controller *ctrlr, + struct ufshci_request *req) +{ + return ( + ufshci_req_queue_submit_request(&ctrlr->task_mgmt_req_queue, req, + /*is_admin*/ false)); +} + +int ufshci_ctrlr_submit_admin_request(struct ufshci_controller *ctrlr, struct ufshci_request *req) { @@ -360,8 +369,8 @@ ufshci_ctrlr_start_config_hook(void *arg) TSENTER(); - if (ufshci_utm_req_queue_enable(ctrlr) == 0 && - ufshci_ut_req_queue_enable(ctrlr) == 0) + if (ufshci_utmr_req_queue_enable(ctrlr) == 0 && + ufshci_utr_req_queue_enable(ctrlr) == 0) ufshci_ctrlr_start(ctrlr); else ufshci_ctrlr_fail(ctrlr, false); @@ -445,9 +454,9 @@ ufshci_ctrlr_poll(struct ufshci_controller *ctrlr) } /* UTP Task Management Request Completion Status */ if (is & UFSHCIM(UFSHCI_IS_REG_UTMRCS)) { - ufshci_printf(ctrlr, "TODO: Implement UTMR completion\n"); ufshci_mmio_write_4(ctrlr, is, UFSHCIM(UFSHCI_IS_REG_UTMRCS)); - /* TODO: Implement UTMR completion */ + ufshci_req_queue_process_completions( + &ctrlr->task_mgmt_req_queue); } /* UTP Transfer Request Completion Status */ if (is & UFSHCIM(UFSHCI_IS_REG_UTRCS)) { diff --git a/sys/dev/ufshci/ufshci_ctrlr_cmd.c b/sys/dev/ufshci/ufshci_ctrlr_cmd.c index ddf28c58fa88..71d163d998af 100644 --- a/sys/dev/ufshci/ufshci_ctrlr_cmd.c +++ b/sys/dev/ufshci/ufshci_ctrlr_cmd.c @@ -8,6 +8,32 @@ #include "ufshci_private.h" void +ufshci_ctrlr_cmd_send_task_mgmt_request(struct ufshci_controller *ctrlr, + ufshci_cb_fn_t cb_fn, void *cb_arg, uint8_t function, uint8_t lun, + uint8_t task_tag, uint8_t iid) +{ + struct ufshci_request *req; + struct ufshci_task_mgmt_request_upiu *upiu; + + req = ufshci_allocate_request_vaddr(NULL, 0, M_WAITOK, cb_fn, cb_arg); + + req->request_size = sizeof(struct ufshci_task_mgmt_request_upiu); + req->response_size = sizeof(struct ufshci_task_mgmt_response_upiu); + + upiu = (struct ufshci_task_mgmt_request_upiu *)&req->request_upiu; + memset(upiu, 0, req->request_size); + upiu->header.trans_type = + UFSHCI_UPIU_TRANSACTION_CODE_TASK_MANAGEMENT_REQUEST; + upiu->header.lun = lun; + upiu->header.ext_iid_or_function = function; + upiu->input_param1 = lun; + upiu->input_param2 = task_tag; + upiu->input_param3 = iid; + + ufshci_ctrlr_submit_task_mgmt_request(ctrlr, req); +} + +void ufshci_ctrlr_cmd_send_nop(struct ufshci_controller *ctrlr, ufshci_cb_fn_t cb_fn, void *cb_arg) { diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h index ac58d44102a0..1a2742ae2e80 100644 --- a/sys/dev/ufshci/ufshci_private.h +++ b/sys/dev/ufshci/ufshci_private.h @@ -125,6 +125,8 @@ struct ufshci_qops { struct ufshci_tracker **tr); void (*ring_doorbell)(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr); + bool (*is_doorbell_cleared)(struct ufshci_controller *ctrlr, + uint8_t slot); void (*clear_cpl_ntf)(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr); bool (*process_cpl)(struct ufshci_req_queue *req_queue); @@ -143,7 +145,10 @@ struct ufshci_hw_queue { int domain; int cpu; - struct ufshci_utp_xfer_req_desc *utrd; + union { + struct ufshci_utp_xfer_req_desc *utrd; + struct ufshci_utp_task_mgmt_req_desc *utmrd; + }; bus_dma_tag_t dma_tag_queue; bus_dmamap_t queuemem_map; @@ -333,6 +338,8 @@ int ufshci_ctrlr_reset(struct ufshci_controller *ctrlr); void ufshci_ctrlr_start_config_hook(void *arg); void ufshci_ctrlr_poll(struct ufshci_controller *ctrlr); +int ufshci_ctrlr_submit_task_mgmt_request(struct ufshci_controller *ctrlr, + struct ufshci_request *req); int ufshci_ctrlr_submit_admin_request(struct ufshci_controller *ctrlr, struct ufshci_request *req); int ufshci_ctrlr_submit_io_request(struct ufshci_controller *ctrlr, @@ -351,6 +358,9 @@ int ufshci_dev_init_ufs_power_mode(struct ufshci_controller *ctrlr); int ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr); /* Controller Command */ +void ufshci_ctrlr_cmd_send_task_mgmt_request(struct ufshci_controller *ctrlr, + ufshci_cb_fn_t cb_fn, void *cb_arg, uint8_t function, uint8_t lun, + uint8_t task_tag, uint8_t iid); void ufshci_ctrlr_cmd_send_nop(struct ufshci_controller *ctrlr, ufshci_cb_fn_t cb_fn, void *cb_arg); void ufshci_ctrlr_cmd_send_query_request(struct ufshci_controller *ctrlr, @@ -361,12 +371,12 @@ void ufshci_ctrlr_cmd_send_scsi_command(struct ufshci_controller *ctrlr, /* Request Queue */ bool ufshci_req_queue_process_completions(struct ufshci_req_queue *req_queue); -int ufshci_utm_req_queue_construct(struct ufshci_controller *ctrlr); -int ufshci_ut_req_queue_construct(struct ufshci_controller *ctrlr); -void ufshci_utm_req_queue_destroy(struct ufshci_controller *ctrlr); -void ufshci_ut_req_queue_destroy(struct ufshci_controller *ctrlr); -int ufshci_utm_req_queue_enable(struct ufshci_controller *ctrlr); -int ufshci_ut_req_queue_enable(struct ufshci_controller *ctrlr); +int ufshci_utmr_req_queue_construct(struct ufshci_controller *ctrlr); +int ufshci_utr_req_queue_construct(struct ufshci_controller *ctrlr); +void ufshci_utmr_req_queue_destroy(struct ufshci_controller *ctrlr); +void ufshci_utr_req_queue_destroy(struct ufshci_controller *ctrlr); +int ufshci_utmr_req_queue_enable(struct ufshci_controller *ctrlr); +int ufshci_utr_req_queue_enable(struct ufshci_controller *ctrlr); void ufshci_req_queue_fail(struct ufshci_controller *ctrlr, struct ufshci_hw_queue *hwq); int ufshci_req_queue_submit_request(struct ufshci_req_queue *req_queue, @@ -385,9 +395,17 @@ int ufshci_req_sdb_enable(struct ufshci_controller *ctrlr, struct ufshci_req_queue *req_queue); int ufshci_req_sdb_reserve_slot(struct ufshci_req_queue *req_queue, struct ufshci_tracker **tr); -void ufshci_req_sdb_ring_doorbell(struct ufshci_controller *ctrlr, +void ufshci_req_sdb_utmr_ring_doorbell(struct ufshci_controller *ctrlr, + struct ufshci_tracker *tr); +void ufshci_req_sdb_utr_ring_doorbell(struct ufshci_controller *ctrlr, + struct ufshci_tracker *tr); +bool ufshci_req_sdb_utmr_is_doorbell_cleared(struct ufshci_controller *ctrlr, + uint8_t slot); +bool ufshci_req_sdb_utr_is_doorbell_cleared(struct ufshci_controller *ctrlr, + uint8_t slot); +void ufshci_req_sdb_utmr_clear_cpl_ntf(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr); -void ufshci_req_sdb_clear_cpl_ntf(struct ufshci_controller *ctrlr, +void ufshci_req_sdb_utr_clear_cpl_ntf(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr); bool ufshci_req_sdb_process_cpl(struct ufshci_req_queue *req_queue); int ufshci_req_sdb_get_inflight_io(struct ufshci_controller *ctrlr); diff --git a/sys/dev/ufshci/ufshci_req_queue.c b/sys/dev/ufshci/ufshci_req_queue.c index cc9a2ddae768..bb6efa6d2ccc 100644 --- a/sys/dev/ufshci/ufshci_req_queue.c +++ b/sys/dev/ufshci/ufshci_req_queue.c @@ -19,21 +19,36 @@ static void ufshci_req_queue_submit_tracker(struct ufshci_req_queue *req_queue, struct ufshci_tracker *tr, enum ufshci_data_direction data_direction); -static const struct ufshci_qops sdb_qops = { +static const struct ufshci_qops sdb_utmr_qops = { .construct = ufshci_req_sdb_construct, .destroy = ufshci_req_sdb_destroy, .get_hw_queue = ufshci_req_sdb_get_hw_queue, .enable = ufshci_req_sdb_enable, .reserve_slot = ufshci_req_sdb_reserve_slot, .reserve_admin_slot = ufshci_req_sdb_reserve_slot, - .ring_doorbell = ufshci_req_sdb_ring_doorbell, - .clear_cpl_ntf = ufshci_req_sdb_clear_cpl_ntf, + .ring_doorbell = ufshci_req_sdb_utmr_ring_doorbell, + .is_doorbell_cleared = ufshci_req_sdb_utmr_is_doorbell_cleared, + .clear_cpl_ntf = ufshci_req_sdb_utmr_clear_cpl_ntf, + .process_cpl = ufshci_req_sdb_process_cpl, + .get_inflight_io = ufshci_req_sdb_get_inflight_io, +}; + +static const struct ufshci_qops sdb_utr_qops = { + .construct = ufshci_req_sdb_construct, + .destroy = ufshci_req_sdb_destroy, + .get_hw_queue = ufshci_req_sdb_get_hw_queue, + .enable = ufshci_req_sdb_enable, + .reserve_slot = ufshci_req_sdb_reserve_slot, + .reserve_admin_slot = ufshci_req_sdb_reserve_slot, + .ring_doorbell = ufshci_req_sdb_utr_ring_doorbell, + .is_doorbell_cleared = ufshci_req_sdb_utr_is_doorbell_cleared, + .clear_cpl_ntf = ufshci_req_sdb_utr_clear_cpl_ntf, .process_cpl = ufshci_req_sdb_process_cpl, .get_inflight_io = ufshci_req_sdb_get_inflight_io, }; int -ufshci_utm_req_queue_construct(struct ufshci_controller *ctrlr) +ufshci_utmr_req_queue_construct(struct ufshci_controller *ctrlr) { struct ufshci_req_queue *req_queue; int error; @@ -44,7 +59,7 @@ ufshci_utm_req_queue_construct(struct ufshci_controller *ctrlr) */ req_queue = &ctrlr->task_mgmt_req_queue; req_queue->queue_mode = UFSHCI_Q_MODE_SDB; - req_queue->qops = sdb_qops; + req_queue->qops = sdb_utmr_qops; error = req_queue->qops.construct(ctrlr, req_queue, UFSHCI_UTRM_ENTRIES, /*is_task_mgmt*/ true); @@ -53,21 +68,21 @@ ufshci_utm_req_queue_construct(struct ufshci_controller *ctrlr) } void -ufshci_utm_req_queue_destroy(struct ufshci_controller *ctrlr) +ufshci_utmr_req_queue_destroy(struct ufshci_controller *ctrlr) { ctrlr->task_mgmt_req_queue.qops.destroy(ctrlr, &ctrlr->task_mgmt_req_queue); } int -ufshci_utm_req_queue_enable(struct ufshci_controller *ctrlr) +ufshci_utmr_req_queue_enable(struct ufshci_controller *ctrlr) { return (ctrlr->task_mgmt_req_queue.qops.enable(ctrlr, &ctrlr->task_mgmt_req_queue)); } int -ufshci_ut_req_queue_construct(struct ufshci_controller *ctrlr) +ufshci_utr_req_queue_construct(struct ufshci_controller *ctrlr) { struct ufshci_req_queue *req_queue; int error; @@ -79,7 +94,7 @@ ufshci_ut_req_queue_construct(struct ufshci_controller *ctrlr) */ req_queue = &ctrlr->transfer_req_queue; req_queue->queue_mode = UFSHCI_Q_MODE_SDB; - req_queue->qops = sdb_qops; + req_queue->qops = sdb_utr_qops; error = req_queue->qops.construct(ctrlr, req_queue, UFSHCI_UTR_ENTRIES, /*is_task_mgmt*/ false); @@ -88,14 +103,14 @@ ufshci_ut_req_queue_construct(struct ufshci_controller *ctrlr) } void -ufshci_ut_req_queue_destroy(struct ufshci_controller *ctrlr) +ufshci_utr_req_queue_destroy(struct ufshci_controller *ctrlr) { ctrlr->transfer_req_queue.qops.destroy(ctrlr, &ctrlr->transfer_req_queue); } int -ufshci_ut_req_queue_enable(struct ufshci_controller *ctrlr) +ufshci_utr_req_queue_enable(struct ufshci_controller *ctrlr) { return (ctrlr->transfer_req_queue.qops.enable(ctrlr, &ctrlr->transfer_req_queue)); @@ -213,20 +228,30 @@ ufshci_req_queue_complete_tracker(struct ufshci_tracker *tr) struct ufshci_req_queue *req_queue = tr->req_queue; struct ufshci_request *req = tr->req; struct ufshci_completion cpl; - struct ufshci_utp_xfer_req_desc *desc; uint8_t ocs; bool retry, error, retriable; mtx_assert(&tr->hwq->qlock, MA_NOTOWNED); - bus_dmamap_sync(req_queue->dma_tag_ucd, req_queue->ucdmem_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + /* Copy the response from the Request Descriptor or UTP Command + * Descriptor. */ + if (req_queue->is_task_mgmt) { + cpl.size = tr->response_size; + memcpy(&cpl.response_upiu, + (void *)tr->hwq->utmrd[tr->slot_num].response_upiu, + cpl.size); - cpl.size = tr->response_size; - memcpy(&cpl.response_upiu, (void *)tr->ucd->response_upiu, cpl.size); + ocs = tr->hwq->utmrd[tr->slot_num].overall_command_status; + } else { + bus_dmamap_sync(req_queue->dma_tag_ucd, req_queue->ucdmem_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - desc = &tr->hwq->utrd[tr->slot_num]; - ocs = desc->overall_command_status; + cpl.size = tr->response_size; + memcpy(&cpl.response_upiu, (void *)tr->ucd->response_upiu, + cpl.size); + + ocs = tr->hwq->utrd[tr->slot_num].overall_command_status; + } error = ufshci_req_queue_response_is_error(req_queue, ocs, &cpl.response_upiu); @@ -358,7 +383,19 @@ ufshci_req_queue_prepare_prdt(struct ufshci_tracker *tr) } static void -ufshci_req_queue_fill_descriptor(struct ufshci_utp_xfer_req_desc *desc, +ufshci_req_queue_fill_utmr_descriptor( + struct ufshci_utp_task_mgmt_req_desc *desc, struct ufshci_request *req) +{ + memset(desc, 0, sizeof(struct ufshci_utp_task_mgmt_req_desc)); + desc->interrupt = true; + /* Set the initial value to Invalid. */ + desc->overall_command_status = UFSHCI_UTMR_OCS_INVALID; + + memcpy(desc->request_upiu, &req->request_upiu, req->request_size); +} + +static void +ufshci_req_queue_fill_utr_descriptor(struct ufshci_utp_xfer_req_desc *desc, uint8_t data_direction, const uint64_t paddr, const uint16_t response_off, const uint16_t response_len, const uint16_t prdt_off, const uint16_t prdt_entry_cnt) @@ -378,7 +415,7 @@ ufshci_req_queue_fill_descriptor(struct ufshci_utp_xfer_req_desc *desc, desc->data_direction = data_direction; desc->interrupt = true; /* Set the initial value to Invalid. */ - desc->overall_command_status = UFSHCI_OCS_INVALID; + desc->overall_command_status = UFSHCI_UTR_OCS_INVALID; desc->utp_command_descriptor_base_address = (uint32_t)(paddr & 0xffffffff); desc->utp_command_descriptor_base_address_upper = (uint32_t)(paddr >> @@ -407,26 +444,32 @@ ufshci_req_queue_submit_tracker(struct ufshci_req_queue *req_queue, /* TODO: Check timeout */ - request_len = req->request_size; - response_off = UFSHCI_UTP_XFER_REQ_SIZE; - response_len = req->response_size; - - /* Prepare UTP Command Descriptor */ - memcpy(tr->ucd, &req->request_upiu, request_len); - memset((uint8_t *)tr->ucd + response_off, 0, response_len); - - /* Prepare PRDT */ - if (req->payload_valid) - ufshci_req_queue_prepare_prdt(tr); - - bus_dmamap_sync(req_queue->dma_tag_ucd, req_queue->ucdmem_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Prepare UTP Transfer Request Descriptor. */ - ucd_paddr = tr->ucd_bus_addr; - ufshci_req_queue_fill_descriptor(&tr->hwq->utrd[slot_num], - data_direction, ucd_paddr, response_off, response_len, tr->prdt_off, - tr->prdt_entry_cnt); + if (req_queue->is_task_mgmt) { + /* Prepare UTP Task Management Request Descriptor. */ + ufshci_req_queue_fill_utmr_descriptor(&tr->hwq->utmrd[slot_num], + req); + } else { + request_len = req->request_size; + response_off = UFSHCI_UTP_XFER_REQ_SIZE; + response_len = req->response_size; + + /* Prepare UTP Command Descriptor */ + memcpy(tr->ucd, &req->request_upiu, request_len); + memset((uint8_t *)tr->ucd + response_off, 0, response_len); + + /* Prepare PRDT */ + if (req->payload_valid) + ufshci_req_queue_prepare_prdt(tr); + + /* Prepare UTP Transfer Request Descriptor. */ + ucd_paddr = tr->ucd_bus_addr; + ufshci_req_queue_fill_utr_descriptor(&tr->hwq->utrd[slot_num], + data_direction, ucd_paddr, response_off, response_len, + tr->prdt_off, tr->prdt_entry_cnt); + + bus_dmamap_sync(req_queue->dma_tag_ucd, req_queue->ucdmem_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + } bus_dmamap_sync(tr->hwq->dma_tag_queue, tr->hwq->queuemem_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); diff --git a/sys/dev/ufshci/ufshci_req_sdb.c b/sys/dev/ufshci/ufshci_req_sdb.c index b1f303afaef5..834a459d48e3 100644 --- a/sys/dev/ufshci/ufshci_req_sdb.c +++ b/sys/dev/ufshci/ufshci_req_sdb.c @@ -26,12 +26,6 @@ ufshci_req_sdb_cmd_desc_destroy(struct ufshci_req_queue *req_queue) tr = hwq->act_tr[i]; bus_dmamap_destroy(req_queue->dma_tag_payload, tr->payload_dma_map); - free(tr, M_UFSHCI); - } - - if (hwq->act_tr) { - free(hwq->act_tr, M_UFSHCI); - hwq->act_tr = NULL; } if (req_queue->ucd) { @@ -76,7 +70,6 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue, uint32_t num_entries, struct ufshci_controller *ctrlr) { struct ufshci_hw_queue *hwq = &req_queue->hwq[UFSHCI_SDB_Q]; - struct ufshci_tracker *tr; size_t ucd_allocsz, payload_allocsz; uint8_t *ucdmem; int i, error; @@ -134,27 +127,14 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue, goto out; } - hwq->act_tr = malloc_domainset(sizeof(struct ufshci_tracker *) * - req_queue->num_entries, - M_UFSHCI, DOMAINSET_PREF(req_queue->domain), M_ZERO | M_WAITOK); - for (i = 0; i < req_queue->num_trackers; i++) { - tr = malloc_domainset(sizeof(struct ufshci_tracker), M_UFSHCI, - DOMAINSET_PREF(req_queue->domain), M_ZERO | M_WAITOK); - bus_dmamap_create(req_queue->dma_tag_payload, 0, - &tr->payload_dma_map); + &hwq->act_tr[i]->payload_dma_map); - tr->req_queue = req_queue; - tr->slot_num = i; - tr->slot_state = UFSHCI_SLOT_STATE_FREE; - - tr->ucd = (struct ufshci_utp_cmd_desc *)ucdmem; - tr->ucd_bus_addr = hwq->ucd_bus_addr[i]; + hwq->act_tr[i]->ucd = (struct ufshci_utp_cmd_desc *)ucdmem; + hwq->act_tr[i]->ucd_bus_addr = hwq->ucd_bus_addr[i]; ucdmem += sizeof(struct ufshci_utp_cmd_desc); - - hwq->act_tr[i] = tr; } return (0); @@ -163,25 +143,16 @@ out: return (ENOMEM); } -static bool -ufshci_req_sdb_is_doorbell_cleared(struct ufshci_controller *ctrlr, - uint8_t slot) -{ - uint32_t utrldbr; - - utrldbr = ufshci_mmio_read_4(ctrlr, utrldbr); - return (!(utrldbr & (1 << slot))); -} - int ufshci_req_sdb_construct(struct ufshci_controller *ctrlr, struct ufshci_req_queue *req_queue, uint32_t num_entries, bool is_task_mgmt) { struct ufshci_hw_queue *hwq; - size_t allocsz; + size_t desc_size, alloc_size; uint64_t queuemem_phys; uint8_t *queuemem; - int error; + struct ufshci_tracker *tr; + int i, error; req_queue->ctrlr = ctrlr; req_queue->is_task_mgmt = is_task_mgmt; @@ -209,10 +180,13 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr, * Descriptor (UTRD) or UTP Task Management Request Descriptor (UTMRD)) * Note: UTRD/UTMRD format is restricted to 1024-byte alignment. */ - allocsz = num_entries * sizeof(struct ufshci_utp_xfer_req_desc); + desc_size = is_task_mgmt ? + sizeof(struct ufshci_utp_task_mgmt_req_desc) : + sizeof(struct ufshci_utp_xfer_req_desc); + alloc_size = num_entries * desc_size; error = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 1024, ctrlr->page_size, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - allocsz, 1, allocsz, 0, NULL, NULL, &hwq->dma_tag_queue); + alloc_size, 1, alloc_size, 0, NULL, NULL, &hwq->dma_tag_queue); if (error != 0) { ufshci_printf(ctrlr, "request queue tag create failed %d\n", error); @@ -227,7 +201,7 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr, } if (bus_dmamap_load(hwq->dma_tag_queue, hwq->queuemem_map, queuemem, - allocsz, ufshci_single_map, &queuemem_phys, 0) != 0) { + alloc_size, ufshci_single_map, &queuemem_phys, 0) != 0) { ufshci_printf(ctrlr, "failed to load request queue memory\n"); bus_dmamem_free(hwq->dma_tag_queue, hwq->utrd, hwq->queuemem_map); @@ -238,13 +212,30 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr, hwq->num_intr_handler_calls = 0; hwq->num_retries = 0; hwq->num_failures = 0; - hwq->utrd = (struct ufshci_utp_xfer_req_desc *)queuemem; hwq->req_queue_addr = queuemem_phys; + /* Allocate trackers */ + hwq->act_tr = malloc_domainset(sizeof(struct ufshci_tracker *) * + req_queue->num_entries, + M_UFSHCI, DOMAINSET_PREF(req_queue->domain), M_ZERO | M_WAITOK); + + for (i = 0; i < req_queue->num_trackers; i++) { + tr = malloc_domainset(sizeof(struct ufshci_tracker), M_UFSHCI, + DOMAINSET_PREF(req_queue->domain), M_ZERO | M_WAITOK); + + tr->req_queue = req_queue; + tr->slot_num = i; + tr->slot_state = UFSHCI_SLOT_STATE_FREE; + + hwq->act_tr[i] = tr; + } + if (is_task_mgmt) { /* UTP Task Management Request (UTMR) */ uint32_t utmrlba, utmrlbau; + hwq->utmrd = (struct ufshci_utp_task_mgmt_req_desc *)queuemem; + utmrlba = hwq->req_queue_addr & 0xffffffff; utmrlbau = hwq->req_queue_addr >> 32; ufshci_mmio_write_4(ctrlr, utmrlba, utmrlba); @@ -253,6 +244,8 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr, /* UTP Transfer Request (UTR) */ uint32_t utrlba, utrlbau; + hwq->utrd = (struct ufshci_utp_xfer_req_desc *)queuemem; + /* * Allocate physical memory for the command descriptor. * UTP Transfer Request (UTR) requires memory for a separate @@ -284,10 +277,22 @@ ufshci_req_sdb_destroy(struct ufshci_controller *ctrlr, struct ufshci_req_queue *req_queue) { struct ufshci_hw_queue *hwq = &req_queue->hwq[UFSHCI_SDB_Q]; + struct ufshci_tracker *tr; + int i; if (!req_queue->is_task_mgmt) ufshci_req_sdb_cmd_desc_destroy(&ctrlr->transfer_req_queue); + for (i = 0; i < req_queue->num_trackers; i++) { + tr = hwq->act_tr[i]; + free(tr, M_UFSHCI); + } + + if (hwq->act_tr) { + free(hwq->act_tr, M_UFSHCI); + hwq->act_tr = NULL; + } + if (hwq->utrd != NULL) { bus_dmamap_unload(hwq->dma_tag_queue, hwq->queuemem_map); bus_dmamem_free(hwq->dma_tag_queue, hwq->utrd, @@ -389,7 +394,18 @@ ufshci_req_sdb_reserve_slot(struct ufshci_req_queue *req_queue, } void -ufshci_req_sdb_clear_cpl_ntf(struct ufshci_controller *ctrlr, +ufshci_req_sdb_utmr_clear_cpl_ntf(struct ufshci_controller *ctrlr, + struct ufshci_tracker *tr) +{ + /* + * NOP + * UTP Task Management does not have a Completion Notification + * Register. + */ +} + +void +ufshci_req_sdb_utr_clear_cpl_ntf(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr) { uint32_t utrlcnr; @@ -399,7 +415,19 @@ ufshci_req_sdb_clear_cpl_ntf(struct ufshci_controller *ctrlr, } void -ufshci_req_sdb_ring_doorbell(struct ufshci_controller *ctrlr, +ufshci_req_sdb_utmr_ring_doorbell(struct ufshci_controller *ctrlr, + struct ufshci_tracker *tr) +{ + uint32_t utmrldbr = 0; + + utmrldbr |= 1 << tr->slot_num; + ufshci_mmio_write_4(ctrlr, utmrldbr, utmrldbr); + + tr->req_queue->hwq[UFSHCI_SDB_Q].num_cmds++; +} + +void +ufshci_req_sdb_utr_ring_doorbell(struct ufshci_controller *ctrlr, struct ufshci_tracker *tr) { uint32_t utrldbr = 0; @@ -408,9 +436,26 @@ ufshci_req_sdb_ring_doorbell(struct ufshci_controller *ctrlr, ufshci_mmio_write_4(ctrlr, utrldbr, utrldbr); tr->req_queue->hwq[UFSHCI_SDB_Q].num_cmds++; +} + +bool +ufshci_req_sdb_utmr_is_doorbell_cleared(struct ufshci_controller *ctrlr, + uint8_t slot) +{ + uint32_t utmrldbr; + + utmrldbr = ufshci_mmio_read_4(ctrlr, utmrldbr); + return (!(utmrldbr & (1 << slot))); +} - // utrldbr = ufshci_mmio_read_4(ctrlr, utrldbr); - // printf("DB=0x%08x\n", utrldbr); +bool +ufshci_req_sdb_utr_is_doorbell_cleared(struct ufshci_controller *ctrlr, + uint8_t slot) +{ + uint32_t utrldbr; + + utrldbr = ufshci_mmio_read_4(ctrlr, utrldbr); + return (!(utrldbr & (1 << slot))); } bool @@ -435,7 +480,7 @@ ufshci_req_sdb_process_cpl(struct ufshci_req_queue *req_queue) * is cleared. */ if (tr->slot_state == UFSHCI_SLOT_STATE_SCHEDULED && - ufshci_req_sdb_is_doorbell_cleared(req_queue->ctrlr, + req_queue->qops.is_doorbell_cleared(req_queue->ctrlr, slot)) { ufshci_req_queue_complete_tracker(tr); done = true; diff --git a/sys/dev/usb/input/uhid.c b/sys/dev/usb/input/uhid.c index a31081663f0c..e2b97f5accac 100644 --- a/sys/dev/usb/input/uhid.c +++ b/sys/dev/usb/input/uhid.c @@ -40,8 +40,6 @@ * HID spec: http://www.usb.org/developers/devclass_docs/HID1_11.pdf */ -#include "opt_hid.h" - #include <sys/stdint.h> #include <sys/stddef.h> #include <sys/param.h> @@ -928,11 +926,7 @@ static device_method_t uhid_methods[] = { }; static driver_t uhid_driver = { -#ifdef HIDRAW_MAKE_UHID_ALIAS - .name = "hidraw", -#else .name = "uhid", -#endif .methods = uhid_methods, .size = sizeof(struct uhid_softc), }; diff --git a/sys/dev/usb/input/usbhid.c b/sys/dev/usb/input/usbhid.c index df810012b3f8..cba3f34053e5 100644 --- a/sys/dev/usb/input/usbhid.c +++ b/sys/dev/usb/input/usbhid.c @@ -114,6 +114,7 @@ struct usbhid_xfer_ctx { void *cb_ctx; int waiters; bool influx; + bool no_readahead; }; struct usbhid_softc { @@ -272,7 +273,7 @@ usbhid_intr_handler_cb(struct usbhid_xfer_ctx *xfer_ctx) sc->sc_intr_handler(sc->sc_intr_ctx, xfer_ctx->buf, xfer_ctx->req.intr.actlen); - return (0); + return (xfer_ctx->no_readahead ? ECANCELED : 0); } static int @@ -430,6 +431,7 @@ usbhid_intr_start(device_t dev, device_t child __unused) .cb = usbhid_intr_handler_cb, .cb_ctx = sc, .buf = sc->sc_intr_buf, + .no_readahead = hid_test_quirk(&sc->sc_hw, HQ_NO_READAHEAD), }; sc->sc_xfer_ctx[POLL_XFER(USBHID_INTR_IN_DT)] = (struct usbhid_xfer_ctx) { .req.intr.maxlen = @@ -705,6 +707,10 @@ usbhid_ioctl(device_t dev, device_t child __unused, unsigned long cmd, if (error == 0) ucr->ucr_actlen = UGETW(req.ctrl.wLength); break; + case USB_GET_DEVICEINFO: + error = usbd_fill_deviceinfo(sc->sc_udev, + (struct usb_device_info *)data); + break; default: error = EINVAL; } diff --git a/sys/dev/usb/misc/cp2112.c b/sys/dev/usb/misc/cp2112.c index d4776ca342cb..201a3ec51ce4 100644 --- a/sys/dev/usb/misc/cp2112.c +++ b/sys/dev/usb/misc/cp2112.c @@ -708,11 +708,12 @@ cp2112gpio_attach(device_t dev) } } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { - device_printf(dev, "gpiobus_attach_bus failed\n"); + device_printf(dev, "gpiobus_add_bus failed\n"); goto detach; } + bus_attach_children(dev); return (0); detach: diff --git a/sys/dev/usb/net/if_ipheth.c b/sys/dev/usb/net/if_ipheth.c index f70113c53eb4..cfa800707391 100644 --- a/sys/dev/usb/net/if_ipheth.c +++ b/sys/dev/usb/net/if_ipheth.c @@ -55,6 +55,7 @@ #include <net/if_var.h> #include <dev/usb/usb.h> +#include <dev/usb/usb_cdc.h> #include <dev/usb/usbdi.h> #include <dev/usb/usbdi_util.h> #include "usbdevs.h" @@ -81,6 +82,9 @@ static uether_fn_t ipheth_start; static uether_fn_t ipheth_setmulti; static uether_fn_t ipheth_setpromisc; +static ipheth_consumer_t ipheth_consume_read; +static ipheth_consumer_t ipheth_consume_read_ncm; + #ifdef USB_DEBUG static int ipheth_debug = 0; @@ -96,7 +100,31 @@ static const struct usb_config ipheth_config[IPHETH_N_TRANSFER] = { .direction = UE_DIR_RX, .frames = IPHETH_RX_FRAMES_MAX, .bufsize = (IPHETH_RX_FRAMES_MAX * MCLBYTES), - .flags = {.short_frames_ok = 1,.short_xfer_ok = 1,.ext_buffer = 1,}, + .flags = {.short_frames_ok = 1, .short_xfer_ok = 1, .ext_buffer = 1,}, + .callback = ipheth_bulk_read_callback, + .timeout = 0, /* no timeout */ + }, + + [IPHETH_BULK_TX] = { + .type = UE_BULK, + .endpoint = UE_ADDR_ANY, + .direction = UE_DIR_TX, + .frames = IPHETH_TX_FRAMES_MAX, + .bufsize = (IPHETH_TX_FRAMES_MAX * IPHETH_BUF_SIZE), + .flags = {.force_short_xfer = 1,}, + .callback = ipheth_bulk_write_callback, + .timeout = IPHETH_TX_TIMEOUT, + }, +}; + +static const struct usb_config ipheth_config_ncm[IPHETH_N_TRANSFER] = { + [IPHETH_BULK_RX] = { + .type = UE_BULK, + .endpoint = UE_ADDR_ANY, + .direction = UE_DIR_RX, + .frames = 1, + .bufsize = IPHETH_RX_NCM_BUF_SIZE, + .flags = {.short_frames_ok = 1, .short_xfer_ok = 1,}, .callback = ipheth_bulk_read_callback, .timeout = 0, /* no timeout */ }, @@ -204,6 +232,21 @@ ipheth_get_mac_addr(struct ipheth_softc *sc) return (0); } +static bool +ipheth_enable_ncm(struct ipheth_softc *sc) +{ + struct usb_device_request req; + + req.bmRequestType = UT_WRITE_VENDOR_INTERFACE; + req.bRequest = IPHETH_CMD_ENABLE_NCM; + USETW(req.wValue, 0); + req.wIndex[0] = sc->sc_iface_no; + req.wIndex[1] = 0; + USETW(req.wLength, 0); + + return (usbd_do_request(sc->sc_ue.ue_udev, NULL, &req, NULL) == 0); +} + static int ipheth_probe(device_t dev) { @@ -221,6 +264,7 @@ ipheth_attach(device_t dev) struct ipheth_softc *sc = device_get_softc(dev); struct usb_ether *ue = &sc->sc_ue; struct usb_attach_arg *uaa = device_get_ivars(dev); + const struct usb_config *config; int error; sc->sc_iface_no = uaa->info.bIfaceIndex; @@ -235,18 +279,29 @@ ipheth_attach(device_t dev) device_printf(dev, "Cannot set alternate setting\n"); goto detach; } - error = usbd_transfer_setup(uaa->device, &sc->sc_iface_no, - sc->sc_xfer, ipheth_config, IPHETH_N_TRANSFER, sc, &sc->sc_mtx); - if (error) { - device_printf(dev, "Cannot setup USB transfers\n"); - goto detach; - } + ue->ue_sc = sc; ue->ue_dev = dev; ue->ue_udev = uaa->device; ue->ue_mtx = &sc->sc_mtx; ue->ue_methods = &ipheth_ue_methods; + if (ipheth_enable_ncm(sc)) { + config = ipheth_config_ncm; + sc->is_ncm = true; + sc->consume = &ipheth_consume_read_ncm; + } else { + config = ipheth_config; + sc->consume = &ipheth_consume_read; + } + + error = usbd_transfer_setup(uaa->device, &sc->sc_iface_no, sc->sc_xfer, + config, IPHETH_N_TRANSFER, sc, &sc->sc_mtx); + if (error) { + device_printf(dev, "Cannot setup USB transfers\n"); + goto detach; + } + error = ipheth_get_mac_addr(sc); if (error) { device_printf(dev, "Cannot get MAC address\n"); @@ -389,12 +444,9 @@ ipheth_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error) int actlen; int aframes; - usbd_xfer_status(xfer, &actlen, NULL, &aframes, NULL); - - DPRINTFN(1, "\n"); - switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: + usbd_xfer_status(xfer, &actlen, NULL, &aframes, NULL); DPRINTFN(11, "transfer complete: %u bytes in %u frames\n", actlen, aframes); @@ -471,53 +523,40 @@ ipheth_bulk_read_callback(struct usb_xfer *xfer, usb_error_t error) uint8_t x; int actlen; int aframes; - int len; - - usbd_xfer_status(xfer, &actlen, NULL, &aframes, NULL); switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: - + usbd_xfer_status(xfer, &actlen, NULL, &aframes, NULL); DPRINTF("received %u bytes in %u frames\n", actlen, aframes); - for (x = 0; x != aframes; x++) { - m = sc->sc_rx_buf[x]; - sc->sc_rx_buf[x] = NULL; - len = usbd_xfer_frame_len(xfer, x); - - if (len < (int)(sizeof(struct ether_header) + - IPHETH_RX_ADJ)) { - m_freem(m); - continue; - } - - m_adj(m, IPHETH_RX_ADJ); - - /* queue up mbuf */ - uether_rxmbuf(&sc->sc_ue, m, len - IPHETH_RX_ADJ); - } + for (x = 0; x != aframes; x++) + sc->consume(xfer, x); /* FALLTHROUGH */ case USB_ST_SETUP: - - for (x = 0; x != IPHETH_RX_FRAMES_MAX; x++) { - if (sc->sc_rx_buf[x] == NULL) { - m = uether_newbuf(); - if (m == NULL) - goto tr_stall; - - /* cancel alignment for ethernet */ - m_adj(m, ETHER_ALIGN); - - sc->sc_rx_buf[x] = m; - } else { - m = sc->sc_rx_buf[x]; + if (!sc->is_ncm) { + for (x = 0; x != IPHETH_RX_FRAMES_MAX; x++) { + if (sc->sc_rx_buf[x] == NULL) { + m = uether_newbuf(); + if (m == NULL) + goto tr_stall; + + /* cancel alignment for ethernet */ + m_adj(m, ETHER_ALIGN); + + sc->sc_rx_buf[x] = m; + } else { + m = sc->sc_rx_buf[x]; + } + usbd_xfer_set_frame_data(xfer, x, m->m_data, m->m_len); } - - usbd_xfer_set_frame_data(xfer, x, m->m_data, m->m_len); + usbd_xfer_set_frames(xfer, x); + } else { + usbd_xfer_set_frame_len(xfer, 0, + IPHETH_RX_NCM_BUF_SIZE); + usbd_xfer_set_frames(xfer, 1); } - /* set number of frames and start hardware */ - usbd_xfer_set_frames(xfer, x); + usbd_transfer_submit(xfer); /* flush any received frames */ uether_rxflush(&sc->sc_ue); @@ -539,3 +578,86 @@ ipheth_bulk_read_callback(struct usb_xfer *xfer, usb_error_t error) break; } } + +static void +ipheth_consume_read(struct usb_xfer *xfer, int x) +{ + struct ipheth_softc *sc = usbd_xfer_softc(xfer); + struct mbuf *m = sc->sc_rx_buf[x]; + int len; + + sc->sc_rx_buf[x] = NULL; + len = usbd_xfer_frame_len(xfer, x); + + if (len < (int)(sizeof(struct ether_header) + IPHETH_RX_ADJ)) { + m_freem(m); + return; + } + + m_adj(m, IPHETH_RX_ADJ); + + /* queue up mbuf */ + uether_rxmbuf(&sc->sc_ue, m, len - IPHETH_RX_ADJ); +} + +static void +ipheth_consume_read_ncm(struct usb_xfer *xfer, int x) +{ + struct ipheth_softc *sc = usbd_xfer_softc(xfer); + struct usb_page_cache *pc = usbd_xfer_get_frame(xfer, 0); + struct ncm_data_cache ncm; + if_t ifp = uether_getifp(&sc->sc_ue); + struct mbuf *new_buf; + int i, actlen; + uint16_t dp_offset, dp_len; + + usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL); + + if (actlen < IPHETH_NCM_HEADER_SIZE) + return; + + usbd_copy_out(pc, 0, &ncm.hdr, sizeof(ncm.hdr)); + + if (UGETDW(ncm.hdr.dwSignature) != 0x484D434E) + return; + + /* Dpt follows the hdr on iOS */ + if (UGETW(ncm.hdr.wDptIndex) != (int)(sizeof(struct usb_ncm16_hdr))) + return; + + usbd_copy_out(pc, UGETW(ncm.hdr.wDptIndex), &ncm.dpt, sizeof(ncm.dpt)); + + if (UGETDW(ncm.dpt.dwSignature) != 0x304D434E) + return; + + usbd_copy_out(pc, UGETW(ncm.hdr.wDptIndex) + sizeof(ncm.dpt), &ncm.dp, + sizeof(ncm.dp)); + + for (i = 0; i < IPHETH_NCM_DPT_DP_NUM; ++i) { + dp_offset = UGETW(ncm.dp[i].wFrameIndex); + dp_len = UGETW(ncm.dp[i].wFrameLength); + + /* (3.3.1 USB CDC NCM spec v1.0) */ + if (dp_offset == 0 && dp_len == 0) + break; + + if (dp_offset < IPHETH_NCM_HEADER_SIZE || dp_offset >= actlen || + actlen < (dp_len + dp_offset) || + dp_len < sizeof(struct ether_header)) { + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + continue; + } + if (dp_len > (MCLBYTES - ETHER_ALIGN)) { + if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); + continue; + } + + new_buf = uether_newbuf(); + if (new_buf == NULL) { + if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); + continue; + } + usbd_copy_out(pc, dp_offset, new_buf->m_data, dp_len); + uether_rxmbuf(&sc->sc_ue, new_buf, dp_len); + } +} diff --git a/sys/dev/usb/net/if_iphethvar.h b/sys/dev/usb/net/if_iphethvar.h index 203bb96b6f22..d637e8f67d01 100644 --- a/sys/dev/usb/net/if_iphethvar.h +++ b/sys/dev/usb/net/if_iphethvar.h @@ -41,6 +41,7 @@ #define IPHETH_BUF_SIZE 1514 #define IPHETH_TX_TIMEOUT 5000 /* ms */ +#define IPHETH_RX_NCM_BUF_SIZE 65536 #define IPHETH_RX_FRAMES_MAX 1 #define IPHETH_TX_FRAMES_MAX 8 @@ -55,10 +56,20 @@ #define IPHETH_CTRL_TIMEOUT 5000 /* ms */ #define IPHETH_CMD_GET_MACADDR 0x00 +#define IPHETH_CMD_ENABLE_NCM 0x04 #define IPHETH_CMD_CARRIER_CHECK 0x45 #define IPHETH_CARRIER_ON 0x04 +#define IPHETH_NCM_DPT_DP_NUM 22 +#define IPHETH_NCM_DPT_HEADER_SIZE \ + (sizeof(struct usb_ncm16_dpt) + \ + IPHETH_NCM_DPT_DP_NUM * sizeof(struct usb_ncm16_dp)) +#define IPHETH_NCM_HEADER_SIZE \ + (sizeof(struct usb_ncm16_hdr) + IPHETH_NCM_DPT_HEADER_SIZE) + +typedef void (ipheth_consumer_t)(struct usb_xfer *xfer, int idx); + enum { IPHETH_BULK_TX, IPHETH_BULK_RX, @@ -76,6 +87,16 @@ struct ipheth_softc { uint8_t sc_data[IPHETH_CTRL_BUF_SIZE]; uint8_t sc_iface_no; uint8_t sc_carrier_on; + + bool is_ncm; + + ipheth_consumer_t *consume; +}; + +struct ncm_data_cache { + struct usb_ncm16_hdr hdr; + struct usb_ncm16_dpt dpt; + struct usb_ncm16_dp dp[IPHETH_NCM_DPT_DP_NUM]; }; #define IPHETH_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) diff --git a/sys/dev/usb/usb_device.c b/sys/dev/usb/usb_device.c index 60c2d6745b3f..f0989972f49f 100644 --- a/sys/dev/usb/usb_device.c +++ b/sys/dev/usb/usb_device.c @@ -3111,3 +3111,51 @@ usbd_get_endpoint_mode(struct usb_device *udev, struct usb_endpoint *ep) { return (ep->ep_mode); } + +/*------------------------------------------------------------------------* + * usbd_fill_deviceinfo + * + * This function dumps information about an USB device to the + * structure pointed to by the "di" argument. + * + * Returns: + * 0: Success + * Else: Failure + *------------------------------------------------------------------------*/ +int +usbd_fill_deviceinfo(struct usb_device *udev, struct usb_device_info *di) +{ + struct usb_device *hub; + + bzero(di, sizeof(di[0])); + + di->udi_bus = device_get_unit(udev->bus->bdev); + di->udi_addr = udev->address; + di->udi_index = udev->device_index; + strlcpy(di->udi_serial, usb_get_serial(udev), sizeof(di->udi_serial)); + strlcpy(di->udi_vendor, usb_get_manufacturer(udev), sizeof(di->udi_vendor)); + strlcpy(di->udi_product, usb_get_product(udev), sizeof(di->udi_product)); + usb_printbcd(di->udi_release, sizeof(di->udi_release), + UGETW(udev->ddesc.bcdDevice)); + di->udi_vendorNo = UGETW(udev->ddesc.idVendor); + di->udi_productNo = UGETW(udev->ddesc.idProduct); + di->udi_releaseNo = UGETW(udev->ddesc.bcdDevice); + di->udi_class = udev->ddesc.bDeviceClass; + di->udi_subclass = udev->ddesc.bDeviceSubClass; + di->udi_protocol = udev->ddesc.bDeviceProtocol; + di->udi_config_no = udev->curr_config_no; + di->udi_config_index = udev->curr_config_index; + di->udi_power = udev->flags.self_powered ? 0 : udev->power; + di->udi_speed = udev->speed; + di->udi_mode = udev->flags.usb_mode; + di->udi_power_mode = udev->power_mode; + di->udi_suspended = udev->flags.peer_suspended; + + hub = udev->parent_hub; + if (hub) { + di->udi_hubaddr = hub->address; + di->udi_hubindex = hub->device_index; + di->udi_hubport = udev->port_no; + } + return (0); +} diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c index c0af27d77e5d..ccb0b2184ec4 100644 --- a/sys/dev/usb/usb_generic.c +++ b/sys/dev/usb/usb_generic.c @@ -831,42 +831,7 @@ ugen_get_iface_driver(struct usb_fifo *f, struct usb_gen_descriptor *ugd) int ugen_fill_deviceinfo(struct usb_fifo *f, struct usb_device_info *di) { - struct usb_device *udev; - struct usb_device *hub; - - udev = f->udev; - - bzero(di, sizeof(di[0])); - - di->udi_bus = device_get_unit(udev->bus->bdev); - di->udi_addr = udev->address; - di->udi_index = udev->device_index; - strlcpy(di->udi_serial, usb_get_serial(udev), sizeof(di->udi_serial)); - strlcpy(di->udi_vendor, usb_get_manufacturer(udev), sizeof(di->udi_vendor)); - strlcpy(di->udi_product, usb_get_product(udev), sizeof(di->udi_product)); - usb_printbcd(di->udi_release, sizeof(di->udi_release), - UGETW(udev->ddesc.bcdDevice)); - di->udi_vendorNo = UGETW(udev->ddesc.idVendor); - di->udi_productNo = UGETW(udev->ddesc.idProduct); - di->udi_releaseNo = UGETW(udev->ddesc.bcdDevice); - di->udi_class = udev->ddesc.bDeviceClass; - di->udi_subclass = udev->ddesc.bDeviceSubClass; - di->udi_protocol = udev->ddesc.bDeviceProtocol; - di->udi_config_no = udev->curr_config_no; - di->udi_config_index = udev->curr_config_index; - di->udi_power = udev->flags.self_powered ? 0 : udev->power; - di->udi_speed = udev->speed; - di->udi_mode = udev->flags.usb_mode; - di->udi_power_mode = udev->power_mode; - di->udi_suspended = udev->flags.peer_suspended; - - hub = udev->parent_hub; - if (hub) { - di->udi_hubaddr = hub->address; - di->udi_hubindex = hub->device_index; - di->udi_hubport = udev->port_no; - } - return (0); + return (usbd_fill_deviceinfo(f->udev, di)); } int diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h index 08d130aa2868..0826d9f078c4 100644 --- a/sys/dev/usb/usbdi.h +++ b/sys/dev/usb/usbdi.h @@ -38,6 +38,7 @@ struct usb_process; struct usb_proc_msg; struct usb_mbuf; struct usb_fs_privdata; +struct usb_device_info; struct mbuf; typedef enum { /* keep in sync with usb_errstr_table */ @@ -587,6 +588,8 @@ usb_error_t usbd_set_endpoint_mode(struct usb_device *udev, struct usb_endpoint *ep, uint8_t ep_mode); uint8_t usbd_get_endpoint_mode(struct usb_device *udev, struct usb_endpoint *ep); +int usbd_fill_deviceinfo(struct usb_device *udev, + struct usb_device_info *di); const struct usb_device_id *usbd_lookup_id_by_info( const struct usb_device_id *id, usb_size_t sizeof_id, diff --git a/sys/dev/virtio/mmio/virtio_mmio.c b/sys/dev/virtio/mmio/virtio_mmio.c index 5a81c8a24779..fe531fced998 100644 --- a/sys/dev/virtio/mmio/virtio_mmio.c +++ b/sys/dev/virtio/mmio/virtio_mmio.c @@ -53,7 +53,6 @@ #include <dev/virtio/virtqueue.h> #include <dev/virtio/mmio/virtio_mmio.h> -#include "virtio_mmio_if.h" #include "virtio_bus_if.h" #include "virtio_if.h" @@ -79,7 +78,6 @@ static int vtmmio_alloc_virtqueues(device_t, int, struct vq_alloc_info *); static int vtmmio_setup_intr(device_t, enum intr_type); static void vtmmio_stop(device_t); -static void vtmmio_poll(device_t); static int vtmmio_reinit(device_t, uint64_t); static void vtmmio_reinit_complete(device_t); static void vtmmio_notify_virtqueue(device_t, uint16_t, bus_size_t); @@ -104,29 +102,11 @@ static void vtmmio_vq_intr(void *); * I/O port read/write wrappers. */ #define vtmmio_write_config_1(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_1((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_1((sc)->res[0], (o), (v)) #define vtmmio_write_config_2(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_2((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_2((sc)->res[0], (o), (v)) #define vtmmio_write_config_4(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_4((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_4((sc)->res[0], (o), (v)) #define vtmmio_read_config_1(sc, o) \ bus_read_1((sc)->res[0], (o)) @@ -157,7 +137,6 @@ static device_method_t vtmmio_methods[] = { DEVMETHOD(virtio_bus_alloc_virtqueues, vtmmio_alloc_virtqueues), DEVMETHOD(virtio_bus_setup_intr, vtmmio_setup_intr), DEVMETHOD(virtio_bus_stop, vtmmio_stop), - DEVMETHOD(virtio_bus_poll, vtmmio_poll), DEVMETHOD(virtio_bus_reinit, vtmmio_reinit), DEVMETHOD(virtio_bus_reinit_complete, vtmmio_reinit_complete), DEVMETHOD(virtio_bus_notify_vq, vtmmio_notify_virtqueue), @@ -220,19 +199,9 @@ vtmmio_setup_intr(device_t dev, enum intr_type type) { struct vtmmio_softc *sc; int rid; - int err; sc = device_get_softc(dev); - if (sc->platform != NULL) { - err = VIRTIO_MMIO_SETUP_INTR(sc->platform, sc->dev, - vtmmio_vq_intr, sc); - if (err == 0) { - /* Okay we have backend-specific interrupts */ - return (0); - } - } - rid = 0; sc->res[1] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); @@ -597,17 +566,6 @@ vtmmio_stop(device_t dev) vtmmio_reset(device_get_softc(dev)); } -static void -vtmmio_poll(device_t dev) -{ - struct vtmmio_softc *sc; - - sc = device_get_softc(dev); - - if (sc->platform != NULL) - VIRTIO_MMIO_POLL(sc->platform); -} - static int vtmmio_reinit(device_t dev, uint64_t features) { diff --git a/sys/dev/virtio/mmio/virtio_mmio.h b/sys/dev/virtio/mmio/virtio_mmio.h index ac6a96c1c7fe..edcbf0519acc 100644 --- a/sys/dev/virtio/mmio/virtio_mmio.h +++ b/sys/dev/virtio/mmio/virtio_mmio.h @@ -37,7 +37,6 @@ struct vtmmio_virtqueue; struct vtmmio_softc { device_t dev; - device_t platform; struct resource *res[2]; uint64_t vtmmio_features; diff --git a/sys/dev/virtio/mmio/virtio_mmio_fdt.c b/sys/dev/virtio/mmio/virtio_mmio_fdt.c index 7fba8aad8db8..bb9ea8efbaeb 100644 --- a/sys/dev/virtio/mmio/virtio_mmio_fdt.c +++ b/sys/dev/virtio/mmio/virtio_mmio_fdt.c @@ -63,12 +63,10 @@ #include <dev/virtio/mmio/virtio_mmio.h> static int vtmmio_fdt_probe(device_t); -static int vtmmio_fdt_attach(device_t); static device_method_t vtmmio_fdt_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, vtmmio_fdt_probe), - DEVMETHOD(device_attach, vtmmio_fdt_attach), DEVMETHOD_END }; @@ -93,48 +91,3 @@ vtmmio_fdt_probe(device_t dev) return (vtmmio_probe(dev)); } - -static int -vtmmio_setup_platform(device_t dev, struct vtmmio_softc *sc) -{ - phandle_t platform_node; - struct fdt_ic *ic; - phandle_t xref; - phandle_t node; - - sc->platform = NULL; - - if ((node = ofw_bus_get_node(dev)) == -1) - return (ENXIO); - - if (OF_searchencprop(node, "platform", &xref, - sizeof(xref)) == -1) { - return (ENXIO); - } - - platform_node = OF_node_from_xref(xref); - - SLIST_FOREACH(ic, &fdt_ic_list_head, fdt_ics) { - if (ic->iph == platform_node) { - sc->platform = ic->dev; - break; - } - } - - if (sc->platform == NULL) { - /* No platform-specific device. Ignore it. */ - } - - return (0); -} - -static int -vtmmio_fdt_attach(device_t dev) -{ - struct vtmmio_softc *sc; - - sc = device_get_softc(dev); - vtmmio_setup_platform(dev, sc); - - return (vtmmio_attach(dev)); -} diff --git a/sys/dev/virtio/mmio/virtio_mmio_if.m b/sys/dev/virtio/mmio/virtio_mmio_if.m deleted file mode 100644 index baebbd9a0b1c..000000000000 --- a/sys/dev/virtio/mmio/virtio_mmio_if.m +++ /dev/null @@ -1,99 +0,0 @@ -#- -# Copyright (c) 2014 Ruslan Bukin <br@bsdpad.com> -# All rights reserved. -# -# This software was developed by SRI International and the University of -# Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) -# ("CTSRD"), as part of the DARPA CRASH research programme. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# - -#include <sys/types.h> - -# -# This is optional interface to virtio mmio backend. -# Useful when backend is implemented not by the hardware but software, e.g. -# by using another cpu core. -# - -INTERFACE virtio_mmio; - -CODE { - static int - virtio_mmio_prewrite(device_t dev, size_t offset, int val) - { - - return (1); - } - - static int - virtio_mmio_note(device_t dev, size_t offset, int val) - { - - return (1); - } - - static int - virtio_mmio_setup_intr(device_t dev, device_t mmio_dev, - void *handler, void *ih_user) - { - - return (1); - } -}; - -# -# Inform backend we are going to write data at offset. -# -METHOD int prewrite { - device_t dev; - size_t offset; - int val; -} DEFAULT virtio_mmio_prewrite; - -# -# Inform backend we have data wrotten to offset. -# -METHOD int note { - device_t dev; - size_t offset; - int val; -} DEFAULT virtio_mmio_note; - -# -# Inform backend we are going to poll virtqueue. -# -METHOD int poll { - device_t dev; -}; - -# -# Setup backend-specific interrupts. -# -METHOD int setup_intr { - device_t dev; - device_t mmio_dev; - void *handler; - void *ih_user; -} DEFAULT virtio_mmio_setup_intr; diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 2ff9be9680b8..ecb3dbb370e5 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -28,6 +28,9 @@ /* Driver for VirtIO network devices. */ +#include "opt_inet.h" +#include "opt_inet6.h" + #include <sys/param.h> #include <sys/eventhandler.h> #include <sys/systm.h> @@ -82,9 +85,6 @@ #include <dev/virtio/network/if_vtnetvar.h> #include "virtio_if.h" -#include "opt_inet.h" -#include "opt_inet6.h" - #if defined(INET) || defined(INET6) #include <machine/in_cksum.h> #endif diff --git a/sys/dev/virtio/random/virtio_random.c b/sys/dev/virtio/random/virtio_random.c index f938ba99ae53..3f30c8b68f4c 100644 --- a/sys/dev/virtio/random/virtio_random.c +++ b/sys/dev/virtio/random/virtio_random.c @@ -77,7 +77,7 @@ static struct virtio_feature_desc vtrnd_feature_desc[] = { { 0, NULL } }; -static struct random_source random_vtrnd = { +static const struct random_source random_vtrnd = { .rs_ident = "VirtIO Entropy Adapter", .rs_source = RANDOM_PURE_VIRTIO, .rs_read = vtrnd_read, diff --git a/sys/dev/virtio/virtio_bus_if.m b/sys/dev/virtio/virtio_bus_if.m index 57ae90bdc917..4181b641faad 100644 --- a/sys/dev/virtio/virtio_bus_if.m +++ b/sys/dev/virtio/virtio_bus_if.m @@ -109,7 +109,3 @@ METHOD void write_device_config { int len; }; -METHOD void poll { - device_t dev; -}; - diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index 8cc3326dc08e..cc7a233d60ee 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -605,10 +605,8 @@ virtqueue_poll(struct virtqueue *vq, uint32_t *len) { void *cookie; - VIRTIO_BUS_POLL(vq->vq_dev); while ((cookie = virtqueue_dequeue(vq, len)) == NULL) { cpu_spinwait(); - VIRTIO_BUS_POLL(vq->vq_dev); } return (cookie); diff --git a/sys/dev/watchdog/watchdog.c b/sys/dev/watchdog/watchdog.c index e6b6dc1eac70..e1b2e08c3f10 100644 --- a/sys/dev/watchdog/watchdog.c +++ b/sys/dev/watchdog/watchdog.c @@ -50,11 +50,20 @@ #include <sys/syscallsubr.h> /* kern_clock_gettime() */ -static int wd_set_pretimeout(int newtimeout, int disableiftoolong); +#ifdef COMPAT_FREEBSD14 +#define WDIOCPATPAT_14 _IOW('W', 42, u_int) /* pat the watchdog */ +#define WDIOC_SETTIMEOUT_14 _IOW('W', 43, int) /* set/reset the timer */ +#define WDIOC_GETTIMEOUT_14 _IOR('W', 44, int) /* get total timeout */ +#define WDIOC_GETTIMELEFT_14 _IOR('W', 45, int) /* get time left */ +#define WDIOC_GETPRETIMEOUT_14 _IOR('W', 46, int) /* get the pre-timeout */ +#define WDIOC_SETPRETIMEOUT_14 _IOW('W', 47, int) /* set the pre-timeout */ +#endif + +static int wd_set_pretimeout(sbintime_t newtimeout, int disableiftoolong); static void wd_timeout_cb(void *arg); static struct callout wd_pretimeo_handle; -static int wd_pretimeout; +static sbintime_t wd_pretimeout; static int wd_pretimeout_act = WD_SOFT_LOG; static struct callout wd_softtimeo_handle; @@ -63,6 +72,8 @@ static int wd_softtimer; /* true = use softtimer instead of hardware static int wd_softtimeout_act = WD_SOFT_LOG; /* action for the software timeout */ static struct cdev *wd_dev; +static volatile sbintime_t wd_last_sbt; /* last timeout value (sbt) */ +static sbintime_t wd_last_sbt_sysctl; /* last timeout value (sbt) */ static volatile u_int wd_last_u; /* last timeout value set by kern_do_pat */ static u_int wd_last_u_sysctl; /* last timeout value set by kern_do_pat */ static u_int wd_last_u_sysctl_secs; /* wd_last_u in seconds */ @@ -73,6 +84,8 @@ SYSCTL_UINT(_hw_watchdog, OID_AUTO, wd_last_u, CTLFLAG_RD, &wd_last_u_sysctl, 0, "Watchdog last update time"); SYSCTL_UINT(_hw_watchdog, OID_AUTO, wd_last_u_secs, CTLFLAG_RD, &wd_last_u_sysctl_secs, 0, "Watchdog last update time"); +SYSCTL_SBINTIME_MSEC(_hw_watchdog, OID_AUTO, wd_last_msecs, CTLFLAG_RD, + &wd_last_sbt_sysctl, "Watchdog last update time (milliseconds)"); static int wd_lastpat_valid = 0; static time_t wd_lastpat = 0; /* when the watchdog was last patted */ @@ -80,105 +93,94 @@ static time_t wd_lastpat = 0; /* when the watchdog was last patted */ /* Hook for external software watchdog to register for use if needed */ void (*wdog_software_attach)(void); -static void -pow2ns_to_ts(int pow2ns, struct timespec *ts) +/* Legacy interface to watchdog. */ +int +wdog_kern_pat(u_int utim) { - uint64_t ns; + sbintime_t sbt; - ns = 1ULL << pow2ns; - ts->tv_sec = ns / 1000000000ULL; - ts->tv_nsec = ns % 1000000000ULL; -} + if ((utim & WD_LASTVAL) != 0 && (utim & WD_INTERVAL) > 0) + return (EINVAL); -static int -pow2ns_to_ticks(int pow2ns) -{ - struct timeval tv; - struct timespec ts; + if ((utim & WD_LASTVAL) != 0) { + return (wdog_control(WD_CTRL_RESET)); + } - pow2ns_to_ts(pow2ns, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - return (tvtohz(&tv)); + utim &= WD_INTERVAL; + if (utim == WD_TO_NEVER) + sbt = 0; + else + sbt = nstosbt(1 << utim); + + return (wdog_kern_pat_sbt(sbt)); } -static int -seconds_to_pow2ns(int seconds) +int +wdog_control(int ctrl) { - uint64_t power; - uint64_t ns; - uint64_t shifted; - - ns = ((uint64_t)seconds) * 1000000000ULL; - power = flsll(ns); - shifted = 1ULL << power; - if (shifted <= ns) { - power++; + /* Disable takes precedence */ + if (ctrl == WD_CTRL_DISABLE) { + wdog_kern_pat(0); } - return (power); + + if ((ctrl & WD_CTRL_RESET) != 0) { + wdog_kern_pat_sbt(wd_last_sbt); + } else if ((ctrl & WD_CTRL_ENABLE) != 0) { + wdog_kern_pat_sbt(wd_last_sbt); + } + + return (0); } int -wdog_kern_pat(u_int utim) +wdog_kern_pat_sbt(sbintime_t sbt) { - int error; - static int first = 1; - - if ((utim & WD_LASTVAL) != 0 && (utim & WD_INTERVAL) > 0) - return (EINVAL); - - if ((utim & WD_LASTVAL) != 0) { - /* - * if WD_LASTVAL is set, fill in the bits for timeout - * from the saved value in wd_last_u. - */ - MPASS((wd_last_u & ~WD_INTERVAL) == 0); - utim &= ~WD_LASTVAL; - utim |= wd_last_u; - } else { - /* - * Otherwise save the new interval. - * This can be zero (to disable the watchdog) - */ - wd_last_u = (utim & WD_INTERVAL); + sbintime_t error_sbt = 0; + int pow2ns = 0; + int error = 0; + static bool first = true; + + /* legacy uses power-of-2-nanoseconds time. */ + if (sbt != 0) { + pow2ns = flsl(sbttons(sbt)); + } + if (wd_last_sbt != sbt) { + wd_last_u = pow2ns; wd_last_u_sysctl = wd_last_u; - wd_last_u_sysctl_secs = pow2ns_to_ticks(wd_last_u) / hz; + wd_last_u_sysctl_secs = sbt / SBT_1S; + + wd_last_sbt = sbt; } - if ((utim & WD_INTERVAL) == WD_TO_NEVER) { - utim = 0; - /* Assume all is well; watchdog signals failure. */ - error = 0; - } else { - /* Assume no watchdog available; watchdog flags success */ + if (sbt != 0) error = EOPNOTSUPP; - } + if (wd_softtimer) { - if (utim == 0) { + if (sbt == 0) { callout_stop(&wd_softtimeo_handle); } else { - (void) callout_reset(&wd_softtimeo_handle, - pow2ns_to_ticks(utim), wd_timeout_cb, "soft"); + (void) callout_reset_sbt(&wd_softtimeo_handle, + sbt, 0, wd_timeout_cb, "soft", 0); } error = 0; } else { - EVENTHANDLER_INVOKE(watchdog_list, utim, &error); + EVENTHANDLER_INVOKE(watchdog_sbt_list, sbt, &error_sbt, &error); + EVENTHANDLER_INVOKE(watchdog_list, pow2ns, &error); } /* - * If we no hardware watchdog responded, we have not tried to + * If no hardware watchdog responded, we have not tried to * attach an external software watchdog, and one is available, * attach it now and retry. */ - if (error == EOPNOTSUPP && first && *wdog_software_attach != NULL) { + if (error == EOPNOTSUPP && first && wdog_software_attach != NULL) { (*wdog_software_attach)(); - EVENTHANDLER_INVOKE(watchdog_list, utim, &error); + EVENTHANDLER_INVOKE(watchdog_sbt_list, sbt, &error_sbt, &error); + EVENTHANDLER_INVOKE(watchdog_list, pow2ns, &error); } - first = 0; + first = false; + /* TODO: Print a (rate limited?) warning if error_sbt is too far away */ wd_set_pretimeout(wd_pretimeout, true); - /* - * If we were able to arm/strobe the watchdog, then - * update the last time it was strobed for WDIOC_GETTIMELEFT - */ if (!error) { struct timespec ts; @@ -189,6 +191,7 @@ wdog_kern_pat(u_int utim) wd_lastpat_valid = 1; } } + return (error); } @@ -265,16 +268,14 @@ wd_timeout_cb(void *arg) * current actual watchdog timeout. */ static int -wd_set_pretimeout(int newtimeout, int disableiftoolong) +wd_set_pretimeout(sbintime_t newtimeout, int disableiftoolong) { - u_int utime; - struct timespec utime_ts; - int timeout_ticks; + sbintime_t utime; + sbintime_t timeout_left; - utime = wdog_kern_last_timeout(); - pow2ns_to_ts(utime, &utime_ts); + utime = wdog_kern_last_timeout_sbt(); /* do not permit a pre-timeout >= than the timeout. */ - if (newtimeout >= utime_ts.tv_sec) { + if (newtimeout >= utime) { /* * If 'disableiftoolong' then just fall through * so as to disable the pre-watchdog @@ -292,7 +293,7 @@ wd_set_pretimeout(int newtimeout, int disableiftoolong) return 0; } - timeout_ticks = pow2ns_to_ticks(utime) - (hz*newtimeout); + timeout_left = utime - newtimeout; #if 0 printf("wd_set_pretimeout: " "newtimeout: %d, " @@ -306,8 +307,8 @@ wd_set_pretimeout(int newtimeout, int disableiftoolong) #endif /* We determined the value is sane, so reset the callout */ - (void) callout_reset(&wd_pretimeo_handle, - timeout_ticks, wd_timeout_cb, "pre"); + (void) callout_reset_sbt(&wd_pretimeo_handle, + timeout_left, 0, wd_timeout_cb, "pre", 0); wd_pretimeout = newtimeout; return 0; } @@ -316,6 +317,7 @@ static int wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags __unused, struct thread *td) { + sbintime_t sb; u_int u; time_t timeleft; int error; @@ -351,29 +353,55 @@ wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, error = EINVAL; } break; - case WDIOC_GETPRETIMEOUT: - *(int *)data = (int)wd_pretimeout; +#ifdef COMPAT_FREEBSD14 + case WDIOC_GETPRETIMEOUT_14: + *(int *)data = (int)(wd_pretimeout / SBT_1S); break; - case WDIOC_SETPRETIMEOUT: - error = wd_set_pretimeout(*(int *)data, false); + case WDIOC_SETPRETIMEOUT_14: + error = wd_set_pretimeout(*(int *)data * SBT_1S, false); break; - case WDIOC_GETTIMELEFT: + case WDIOC_GETTIMELEFT_14: error = wd_get_time_left(td, &timeleft); if (error) break; *(int *)data = (int)timeleft; break; - case WDIOC_SETTIMEOUT: + case WDIOC_SETTIMEOUT_14: u = *(u_int *)data; - error = wdog_kern_pat(seconds_to_pow2ns(u)); + error = wdog_kern_pat_sbt(mstosbt(u * 1000ULL)); break; - case WDIOC_GETTIMEOUT: + case WDIOC_GETTIMEOUT_14: u = wdog_kern_last_timeout(); *(u_int *)data = u; break; - case WDIOCPATPAT: + case WDIOCPATPAT_14: error = wd_ioctl_patpat(data); break; +#endif + + /* New API */ + case WDIOC_CONTROL: + wdog_control(*(int *)data); + break; + case WDIOC_SETTIMEOUT: + sb = *(sbintime_t *)data; + error = wdog_kern_pat_sbt(sb); + break; + case WDIOC_GETTIMEOUT: + *(sbintime_t *)data = wdog_kern_last_timeout_sbt(); + break; + case WDIOC_GETTIMELEFT: + error = wd_get_time_left(td, &timeleft); + if (error) + break; + *(sbintime_t *)data = (sbintime_t)timeleft * SBT_1S; + break; + case WDIOC_GETPRETIMEOUT: + *(sbintime_t *)data = wd_pretimeout; + break; + case WDIOC_SETPRETIMEOUT: + error = wd_set_pretimeout(*(sbintime_t *)data, false); + break; default: error = ENOIOCTL; break; @@ -392,6 +420,12 @@ wdog_kern_last_timeout(void) return (wd_last_u); } +sbintime_t +wdog_kern_last_timeout_sbt(void) +{ + return (wd_last_sbt); +} + static struct cdevsw wd_cdevsw = { .d_version = D_VERSION, .d_ioctl = wd_ioctl, diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c index c4d0e6ba7b30..a496b41fcf6e 100644 --- a/sys/fs/cd9660/cd9660_vnops.c +++ b/sys/fs/cd9660/cd9660_vnops.c @@ -124,7 +124,7 @@ cd9660_access(struct vop_access_args *ap) uid_t uid; gid_t gid; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); /* @@ -162,7 +162,7 @@ cd9660_open(struct vop_open_args *ap) struct vnode *vp = ap->a_vp; struct iso_node *ip = VTOI(vp); - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); vnode_create_vobject(vp, ip->i_size, ap->a_td); @@ -242,7 +242,7 @@ cd9660_ioctl(struct vop_ioctl_args *ap) VOP_UNLOCK(vp); return (EBADF); } - if (vp->v_type == VCHR || vp->v_type == VBLK) { + if (VN_ISDEV(vp)) { VOP_UNLOCK(vp); return (EOPNOTSUPP); } @@ -280,7 +280,7 @@ cd9660_read(struct vop_read_args *ap) int seqcount; long size, n, on; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); seqcount = ap->a_ioflag >> IO_SEQSHIFT; @@ -711,7 +711,7 @@ cd9660_strategy(struct vop_strategy_args *ap) struct bufobj *bo; ip = VTOI(vp); - if (vp->v_type == VBLK || vp->v_type == VCHR) + if (VN_ISDEV(vp)) panic("cd9660_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { bp->b_blkno = (ip->iso_start + bp->b_lblkno) << @@ -818,7 +818,7 @@ cd9660_getpages(struct vop_getpages_args *ap) struct vnode *vp; vp = ap->a_vp; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (use_buf_pager) diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index 064c10bd18b2..00389c927087 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -222,7 +222,7 @@ ext2_itimes_locked(struct vnode *vp) ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; - if ((vp->v_type == VBLK || vp->v_type == VCHR)) + if (VN_ISDEV(vp)) ip->i_flag |= IN_LAZYMOD; else ip->i_flag |= IN_MODIFIED; @@ -276,7 +276,7 @@ static int ext2_open(struct vop_open_args *ap) { - if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); /* @@ -360,7 +360,7 @@ ext2_getattr(struct vop_getattr_args *ap) vap->va_nlink = ip->i_nlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; - vap->va_rdev = ip->i_rdev; + vap->va_rdev = VN_ISDEV(vp) ? ip->i_rdev : NODEV; vap->va_size = ip->i_size; vap->va_atime.tv_sec = ip->i_atime; vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; @@ -1571,7 +1571,7 @@ ext2_strategy(struct vop_strategy_args *ap) daddr_t blkno; int error; - if (vp->v_type == VBLK || vp->v_type == VCHR) + if (VN_ISDEV(vp)) panic("ext2_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) @@ -1733,7 +1733,7 @@ ext2_deleteextattr(struct vop_deleteextattr_args *ap) if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) return (EOPNOTSUPP); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, @@ -1771,7 +1771,7 @@ ext2_getextattr(struct vop_getextattr_args *ap) if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) return (EOPNOTSUPP); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, @@ -1814,7 +1814,7 @@ ext2_listextattr(struct vop_listextattr_args *ap) if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) return (EOPNOTSUPP); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, @@ -1855,7 +1855,7 @@ ext2_setextattr(struct vop_setextattr_args *ap) if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) return (EOPNOTSUPP); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index b782146b7278..de60a4717dd4 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1748,7 +1748,7 @@ fuse_vnop_open(struct vop_open_args *ap) if (fuse_isdeadfs(vp)) return (EXTERROR(ENXIO, "This FUSE session is about " "to be closed")); - if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) + if (VN_ISDEV(vp) || vp->v_type == VFIFO) return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type", vp->v_type)); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 0ae3b94bef89..1e4e8506790f 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -239,6 +239,7 @@ static bool nfscl_use_gss[NFSV42_NPROCS] = { true, true, true, + true, }; /* diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 67e33193ecec..7f5b29ca2085 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -187,7 +187,7 @@ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Read Plus */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Seek */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Write Same */ - { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Clone */ + { 2, 1, 1, 0, LK_SHARED, 1, 0 }, /* Clone */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getxattr */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Setxattr */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Listxattrs */ @@ -219,7 +219,7 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }; + 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -310,6 +310,7 @@ static struct { { NFSV4OP_LAYOUTERROR, 1, "LayoutError", 11, }, { NFSV4OP_VERIFY, 3, "AppendWrite", 11, }, { NFSV4OP_OPENATTR, 3, "OpenAttr", 8, }, + { NFSV4OP_SAVEFH, 5, "Clone", 5, }, }; /* @@ -319,7 +320,7 @@ static int nfs_bigrequest[NFSV42_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 1, 0 + 0, 1, 0, 0 }; /* @@ -648,7 +649,7 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, - false, false, false); + false, false, false, 0); break; } } @@ -1302,7 +1303,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, bool *has_namedattrp, - NFSPROC_T *p, struct ucred *cred) + uint32_t *clone_blksizep, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; @@ -1437,6 +1438,13 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_SYSTEM); } + /* Some filesystems do not support block cloning */ + if (vp == NULL || VOP_PATHCONF(vp, + _PC_CLONE_BLKSIZE, &has_pathconf) != 0) + has_pathconf = 0; + if (has_pathconf == 0) + NFSCLRBIT_ATTRBIT(&checkattrbits, + NFSATTRBIT_CLONEBLKSIZE); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; @@ -2374,6 +2382,23 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, if (compare && !(*retcmpp) && i != nfs_srvmaxio) *retcmpp = NFSERR_NOTSAME; break; + case NFSATTRBIT_CLONEBLKSIZE: + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (compare) { + if (!(*retcmpp)) { + if (vp == NULL || VOP_PATHCONF(vp, + _PC_CLONE_BLKSIZE, &has_pathconf) + != 0) + has_pathconf = 0; + if (has_pathconf != + fxdr_unsigned(uint32_t, *tl)) + *retcmpp = NFSERR_NOTSAME; + } + } else if (clone_blksizep != NULL) { + *clone_blksizep = fxdr_unsigned(uint32_t, *tl); + } + attrsum += NFSX_UNSIGNED; + break; case NFSATTRBIT_CHANGEATTRTYPE: NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (compare) { @@ -2648,7 +2673,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, struct statfs *pnfssf, bool xattrsupp, bool has_hiddensystem, - bool has_namedattr) + bool has_namedattr, uint32_t clone_blksize) { int bitpos, retnum = 0; u_int32_t *tl; @@ -2771,6 +2796,9 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN); NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM); } + if (clone_blksize == 0) + NFSCLRBIT_ATTRBIT(&attrbits, + NFSATTRBIT_CLONEBLKSIZE); retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: @@ -3249,6 +3277,11 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, } retnum += NFSX_UNSIGNED; break; + case NFSATTRBIT_CLONEBLKSIZE: + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(clone_blksize); + retnum += NFSX_UNSIGNED; + break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 54f60a753c50..61083ecf2d66 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -286,6 +286,8 @@ int nfsrvd_deallocate(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); int nfsrvd_copy_file_range(struct nfsrv_descript *, int, vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *); +int nfsrvd_clone(struct nfsrv_descript *, int, + vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_seek(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); int nfsrvd_getxattr(struct nfsrv_descript *, int, @@ -341,7 +343,8 @@ int nfsv4_loadattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, struct nfsfh **, fhandle_t *, int, struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *, struct nfsfsinfo *, NFSACL_T *, - int, int *, u_int32_t *, u_int32_t *, bool *, NFSPROC_T *, struct ucred *); + int, int *, u_int32_t *, u_int32_t *, bool *, uint32_t *, NFSPROC_T *, + struct ucred *); int nfsv4_lock(struct nfsv4lock *, int, int *, struct mtx *, struct mount *); void nfsv4_unlock(struct nfsv4lock *, int); void nfsv4_relref(struct nfsv4lock *); @@ -397,7 +400,7 @@ void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int, int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *, struct vattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *, bool, bool, - bool); + bool, uint32_t); void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *); struct mbuf *nfsrv_adj(struct mbuf *, int, int); void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *); @@ -517,10 +520,10 @@ int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t, u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, int); int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *, uint32_t *, - struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); + uint32_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); -int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, bool *, +int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, bool *, uint32_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *, NFSPROC_T *); @@ -562,6 +565,8 @@ int nfsrpc_deallocate(vnode_t, off_t, off_t, struct nfsvattr *, int *, int nfsrpc_copy_file_range(vnode_t, off_t *, vnode_t, off_t *, size_t *, unsigned int, int *, struct nfsvattr *, int *, struct nfsvattr *, struct ucred *, bool, bool *); +int nfsrpc_clone(vnode_t, off_t *, vnode_t, off_t *, size_t *, bool, + int *, struct nfsvattr *, int *, struct nfsvattr *, struct ucred *); int nfsrpc_seek(vnode_t, off_t *, bool *, int, struct ucred *, struct nfsvattr *, int *); int nfsrpc_getextattr(vnode_t, const char *, struct uio *, ssize_t *, @@ -668,7 +673,7 @@ int nfscl_nget(mount_t, vnode_t, struct nfsfh *, NFSPROC_T *nfscl_getparent(NFSPROC_T *); void nfscl_start_renewthread(struct nfsclclient *); void nfscl_loadsbinfo(struct nfsmount *, struct nfsstatfs *, void *); -void nfscl_loadfsinfo (struct nfsmount *, struct nfsfsinfo *); +void nfscl_loadfsinfo(struct nfsmount *, struct nfsfsinfo *, uint32_t); void nfscl_delegreturn(struct nfscldeleg *, int, struct nfsmount *, struct ucred *, NFSPROC_T *); void nfsrvd_cbinit(int); @@ -737,7 +742,7 @@ int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *, int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, bool, bool, - bool); + bool, uint32_t); int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index c30b46261df0..5a019b4989cf 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -442,10 +442,13 @@ /* Do a NFSv4 Openattr. */ #define NFSPROC_OPENATTR 70 +/* Do a NFSv4.2 Clone. */ +#define NFSPROC_CLONE 71 + /* * Must be defined as one higher than the last NFSv4.2 Proc# above. */ -#define NFSV42_NPROCS 71 +#define NFSV42_NPROCS 72 /* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */ #define NFSV42_OLDNPROCS 69 @@ -477,7 +480,7 @@ struct nfsstatsv1 { uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; - uint64_t rpccnt[NFSV42_NPROCS + 9]; + uint64_t rpccnt[NFSV42_NPROCS + 8]; uint64_t rpcretries; uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; uint64_t srvlayouts; @@ -906,15 +909,6 @@ int nfsmsleep(void *, void *, int, const char *, struct timespec *); #define NFSBZERO(s, l) bzero((s), (l)) /* - * Some queue.h files don't have these dfined in them. - */ -#ifndef LIST_END -#define LIST_END(head) NULL -#define SLIST_END(head) NULL -#define TAILQ_END(head) NULL -#endif - -/* * This must be defined to be a global variable that increments once * per second, but never stops or goes backwards, even when a "date" * command changes the TOD clock. It is used for delta times for @@ -1035,9 +1029,6 @@ void ncl_copy_vattr(struct vattr *dst, struct vattr *src); #define NFSDECRGLOBAL(a) ((a)--) /* - * Assorted funky stuff to make things work under Darwin8. - */ -/* * These macros checks for a field in vattr being set. */ #define NFSATTRISSET(t, v, a) ((v)->a != (t)VNOVAL) diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index cb5a80e8df73..d628108bdc1a 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -411,10 +411,13 @@ /* Do a NFSv4 Openattr. */ #define NFSPROC_OPENATTR 70 +/* Do a NFSv4.2 Clone. */ +#define NFSPROC_CLONE 71 + /* * Must be defined as one higher than the last NFSv4.2 Proc# above. */ -#define NFSV42_NPROCS 71 +#define NFSV42_NPROCS 72 /* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */ #define NFSV42_OLDNPROCS 69 @@ -1194,6 +1197,7 @@ struct nfsv3_sattr { NFSATTRBM_LAYOUTBLKSIZE | \ NFSATTRBM_LAYOUTALIGNMENT | \ NFSATTRBM_SUPPATTREXCLCREAT | \ + NFSATTRBM_CLONEBLKSIZE | \ NFSATTRBM_CHANGEATTRTYPE | \ NFSATTRBM_XATTRSUPPORT) @@ -1242,7 +1246,8 @@ struct nfsv3_sattr { * NFSATTRBIT_NFSV42 - Attributes only supported by NFSv4.2. */ #define NFSATTRBIT_NFSV42_2 \ - (NFSATTRBM_CHANGEATTRTYPE | \ + (NFSATTRBM_CLONEBLKSIZE | \ + NFSATTRBM_CHANGEATTRTYPE | \ NFSATTRBM_XATTRSUPPORT | \ NFSATTRBM_MODEUMASK) @@ -1415,7 +1420,7 @@ struct nfsv3_sattr { /* * NFSGETATTRBIT_STATFS2 - bits 64<->95 */ -#define NFSGETATTRBIT_STATFS2 0 +#define NFSGETATTRBIT_STATFS2 (NFSATTRBM_CLONEBLKSIZE) /* * Set of attributes for the equivalent of an nfsv3 pathconf rpc. @@ -1438,7 +1443,7 @@ struct nfsv3_sattr { /* * NFSGETATTRBIT_PATHCONF2 - bits 64<->95 */ -#define NFSGETATTRBIT_PATHCONF2 0 +#define NFSGETATTRBIT_PATHCONF2 (NFSATTRBM_CLONEBLKSIZE) /* * Sets of attributes required by readdir and readdirplus. diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index bca0bdcd0df1..05963074e53d 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -272,7 +272,7 @@ nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) if (nd->nd_flag & ND_NFSV4) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, - NULL); + NULL, NULL); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR); nap->na_type = nfsv34tov_type(fp->fa_type); diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index b25d967982a1..704aeeeabdf2 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -828,7 +828,7 @@ nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* @@ -963,7 +963,8 @@ nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) * Use the fsinfo stuff to update the mount point. */ void -nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) +nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp, + uint32_t clone_blksize) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && @@ -1003,6 +1004,14 @@ nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); + + /* + * Although ZFS reports a clone_blksize of 16Mbytes, + * 128Kbytes usually works, so set it to that. + */ + if (clone_blksize > 128 * 1024) + clone_blksize = 128 * 1024; + nmp->nm_cloneblksize = clone_blksize; nmp->nm_state |= NFSSTA_GOTFSINFO; } diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 920fcf7b8c61..4ec621de2eff 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -225,6 +225,9 @@ static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *, nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *, struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *); +static int nfsrpc_clonerpc(vnode_t, off_t, vnode_t, off_t, size_t *, bool, + nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *); static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *, int, struct nfsvattr *, int *, struct ucred *); static struct mbuf *nfsm_split(struct mbuf *, uint64_t); @@ -696,7 +699,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, ("nfsrpc_openrpc: Getattr repstat")); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, NULL, NULL, p, cred); + NULL, NULL, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; } @@ -1355,7 +1358,7 @@ nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, if ((nd->nd_flag & ND_NFSV4) != 0) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); else error = nfsm_loadattr(nd, nap); } else @@ -3597,7 +3600,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, NULL, NULL, p, cred); + NULL, NULL, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { @@ -3847,7 +3850,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, &rderr, NULL, p, cred); + NULL, NULL, &rderr, NULL, NULL, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); @@ -4072,7 +4075,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, NULL, NULL, p, cred); + NULL, NULL, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { @@ -4346,7 +4349,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, &rderr, NULL, p, cred); + NULL, NULL, &rderr, NULL, NULL, p, cred); if (error) goto nfsmout; } @@ -4981,8 +4984,8 @@ nfsmout: */ int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, - uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, - int *attrflagp) + uint32_t *leasep, uint32_t *cloneblksizep, struct ucred *cred, NFSPROC_T *p, + struct nfsvattr *nap, int *attrflagp) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; @@ -4991,6 +4994,8 @@ nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, int error; *attrflagp = 0; + if (cloneblksizep != NULL) + *cloneblksizep = 0; nmp = VFSTONFS(vp->v_mount); if (NFSHASNFSV4(nmp)) { /* @@ -5009,7 +5014,7 @@ nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL, - NULL, p, cred); + NULL, cloneblksizep, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; @@ -5063,7 +5068,8 @@ nfsmout: */ int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, bool *has_namedattrp, - struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) + uint32_t *clone_blksizep, struct ucred *cred, NFSPROC_T *p, + struct nfsvattr *nap, int *attrflagp) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; @@ -5074,6 +5080,7 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, bool *has_namedattrp, *has_namedattrp = false; *attrflagp = 0; + *clone_blksizep = 0; nmp = VFSTONFS(vp->v_mount); if (NFSHASNFSV4(nmp)) { np = VTONFS(vp); @@ -5100,7 +5107,7 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, bool *has_namedattrp, if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, - has_namedattrp, p, cred); + has_namedattrp, clone_blksizep, p, cred); if (!error) *attrflagp = 1; } else { @@ -5395,7 +5402,8 @@ nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, - NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, NULL, p, cred); + NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, NULL, NULL, p, + cred); else error = nd->nd_repstat; m_freem(nd->nd_mrep); @@ -5437,7 +5445,7 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, false, false, - false); + false, 0); error = nfscl_request(nd, vp, p, cred); if (error) return (error); @@ -8496,7 +8504,7 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, if (*++tl == 0) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, - NULL, NULL, NULL, NULL, p, cred); + NULL, NULL, NULL, NULL, NULL, p, cred); if (error != 0) goto nfsmout; if (ndp != NULL) { @@ -9168,6 +9176,199 @@ nfsmout: } /* + * nfs clone operation. + */ +int +nfsrpc_clone(vnode_t invp, off_t *inoffp, vnode_t outvp, + off_t *outoffp, size_t *lenp, bool toeof, int *inattrflagp, + struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap, + struct ucred *cred) +{ + int error, expireret = 0, retrycnt; + uint32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(invp->v_mount); + struct nfsfh *innfhp = NULL, *outnfhp = NULL; + nfsv4stateid_t instateid, outstateid; + void *inlckp, *outlckp; + + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + innfhp = VTONFS(invp)->n_fhp; + outnfhp = VTONFS(outvp)->n_fhp; + retrycnt = 0; + do { + /* Get both stateids. */ + inlckp = NULL; + nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len, + NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid, + &inlckp); + outlckp = NULL; + nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len, + NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid, + &outlckp); + + error = nfsrpc_clonerpc(invp, *inoffp, outvp, *outoffp, lenp, + toeof, &instateid, &outstateid, innap, inattrflagp, outnap, + outattrflagp, cred, curthread); + if (error == 0) { + *inoffp += *lenp; + *outoffp += *lenp; + } else if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (inlckp != NULL) + nfscl_lockderef(inlckp); + if (outlckp != NULL) + nfscl_lockderef(outlckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_cfr"); + } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) && + error == NFSERR_BADSTATEID)) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, + curthread); + } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) { + error = EIO; + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4)); + if (error != 0 && (retrycnt >= 4 || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER)) + error = EIO; + return (error); +} + +/* + * The clone RPC. + */ +static int +nfsrpc_clonerpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff, + size_t *lenp, bool toeof, nfsv4stateid_t *instateidp, + nfsv4stateid_t *outstateidp, struct nfsvattr *innap, int *inattrflagp, + struct nfsvattr *outnap, int *outattrflagp, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t *tl, *opcntp; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsmount *nmp; + nfsattrbit_t attrbits; + struct vattr va; + uint64_t len; + + nmp = VFSTONFS(invp->v_mount); + *inattrflagp = *outattrflagp = 0; + len = *lenp; + if (len == 0) + return (0); + if (toeof) + len = 0; + nfscl_reqstart(nd, NFSPROC_CLONE, nmp, VTONFS(invp)->n_fhp->nfh_fh, + VTONFS(invp)->n_fhp->nfh_len, &opcntp, NULL, 0, 0, cred); + /* + * First do a Setattr of atime to the server's clock + * time. The FreeBSD "collective" was of the opinion + * that setting atime was necessary for this syscall. + * Do the Setattr before the Clone, so that it can be + * handled well if the server replies NFSERR_DELAY to + * the Setattr operation. + */ + if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_SETATTR); + nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID); + VATTR_NULL(&va); + va.va_atime.tv_sec = va.va_atime.tv_nsec = 0; + va.va_vaflags = VA_UTIMES_NULL; + nfscl_fillsattr(nd, &va, invp, 0, 0); + /* Bump opcnt from 7 to 8. */ + *opcntp = txdr_unsigned(8); + } + + /* Now Getattr the invp attributes. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + + /* Set outvp. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_PUTFH); + (void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh, + VTONFS(outvp)->n_fhp->nfh_len, 0); + + /* Do the Clone. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_CLONE); + nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID); + nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(inoff, tl); tl += 2; + txdr_hyper(outoff, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + + /* Get the outvp attributes. */ + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSWRITEGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + + error = nfscl_request(nd, invp, p, cred); + if (error != 0) + return (error); + /* Skip over the Setattr reply. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0 && + (nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*(tl + 1) == 0) { + error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); + if (error != 0) + goto nfsmout; + } else + nd->nd_flag |= ND_NOMOREDATA; + } + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + /* Get the input file's attributes. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*(tl + 1) == 0) { + error = nfsm_loadattr(nd, innap); + if (error != 0) + goto nfsmout; + *inattrflagp = 1; + } else + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for PutFH. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*++tl != 0) + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for Clone. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, outnap); + if (error == 0) + *outattrflagp = NFS_LATTR_NOSHRINK; + } else { + *lenp = 0; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + m_freem(nd->nd_mrep); + return (error); +} + +/* * Seek operation. */ int @@ -9724,13 +9925,13 @@ nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td) struct nfsstatfs sb; struct mount *mp; struct nfsmount *nmp; - uint32_t lease; + uint32_t clone_blksize, lease; int attrflag, error; mp = vp->v_mount; nmp = VFSTONFS(mp); - error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva, - &attrflag); + error = nfsrpc_statfs(vp, &sb, &fs, &lease, &clone_blksize, cred, td, + &nfsva, &attrflag); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); if (error == 0) { @@ -9739,7 +9940,7 @@ nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td) nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease); NFSUNLOCKCLSTATE(); mtx_lock(&nmp->nm_mtx); - nfscl_loadfsinfo(nmp, &fs); + nfscl_loadfsinfo(nmp, &fs, clone_blksize); nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat); mp->mnt_stat.f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 99a781640c53..aa9d01fc4632 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -3701,7 +3701,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0, - (uint64_t)0, NULL, false, false, false); + (uint64_t)0, NULL, false, false, false, 0); break; case NFSV4OP_CBRECALL: NFSCL_DEBUG(4, "cbrecall\n"); diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 0bd05c03885b..5ea7eab07632 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -292,8 +292,10 @@ nfs_statfs(struct mount *mp, struct statfs *sbp) int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; char *fakefh; + uint32_t clone_blksize; td = curthread; + clone_blksize = 0; error = vfs_busy(mp, MBF_NOWAIT); if (error) @@ -337,8 +339,8 @@ nfs_statfs(struct mount *mp, struct statfs *sbp) } else mtx_unlock(&nmp->nm_mtx); if (!error) - error = nfsrpc_statfs(vp, &sb, &fs, NULL, td->td_ucred, td, - &nfsva, &attrflag); + error = nfsrpc_statfs(vp, &sb, &fs, NULL, &clone_blksize, + td->td_ucred, td, &nfsva, &attrflag); if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 && error == NFSERR_WRONGSEC) { /* Cannot get new stats, so return what is in mnt_stat. */ @@ -375,7 +377,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp) if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) - nfscl_loadfsinfo(nmp, &fs); + nfscl_loadfsinfo(nmp, &fs, clone_blksize); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); @@ -408,7 +410,7 @@ ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); - nfscl_loadfsinfo(nmp, &fs); + nfscl_loadfsinfo(nmp, &fs, 0); mtx_unlock(&nmp->nm_mtx); } return (error); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index fa451887e73e..52f72dc43c3f 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -1782,7 +1782,7 @@ nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int error = 0, attrflag, dattrflag; u_int32_t rdev; - if (vap->va_type == VCHR || vap->va_type == VBLK) + if (VATTR_ISDEV(vap)) rdev = vap->va_rdev; else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; @@ -4027,31 +4027,51 @@ nfs_copy_file_range(struct vop_copy_file_range_args *ap) struct vattr va, *vap; struct uio io; struct nfsmount *nmp; + struct nfsnode *np; size_t len, len2; ssize_t r; int error, inattrflag, outattrflag, ret, ret2, invp_lock; off_t inoff, outoff; - bool consecutive, must_commit, tryoutcred; + bool consecutive, must_commit, onevp, toeof, tryclone, tryoutcred; + bool mustclone; /* * NFSv4.2 Copy is not permitted for infile == outfile. + * The NFSv4.2 Clone operation does work on non-overlapping + * byte ranges in the same file, but only if offsets + * (and len if not to EOF) are aligned properly. * TODO: copy_file_range() between multiple NFS mountpoints + * --> This is not possible now, since each mount appears to + * the NFSv4.n server as a separate client. */ - if (invp == outvp || invp->v_mount != outvp->v_mount) { + if ((invp == outvp && (ap->a_flags & COPY_FILE_RANGE_CLONE) == 0) || + (invp != outvp && invp->v_mount != outvp->v_mount)) { generic_copy: return (ENOSYS); } - - invp_lock = LK_SHARED; + if (invp == outvp) { + onevp = true; + invp_lock = LK_EXCLUSIVE; + } else { + onevp = false; + invp_lock = LK_SHARED; + } + mustclone = false; + if (onevp || (ap->a_flags & COPY_FILE_RANGE_CLONE) != 0) + mustclone = true; relock: + inoff = *ap->a_inoffp; + outoff = *ap->a_outoffp; - /* Lock both vnodes, avoiding risk of deadlock. */ + /* Lock vnode(s), avoiding risk of deadlock. */ do { mp = NULL; error = vn_start_write(outvp, &mp, V_WAIT); if (error == 0) { error = vn_lock(outvp, LK_EXCLUSIVE); if (error == 0) { + if (onevp) + break; error = vn_lock(invp, invp_lock | LK_NOWAIT); if (error == 0) break; @@ -4071,16 +4091,24 @@ relock: return (error); /* - * More reasons to avoid nfs copy: not NFSv4.2, or explicitly - * disabled. + * More reasons to avoid nfs copy/clone: not NFSv4.2, explicitly + * disabled or requires cloning and unable to clone. + * Only clone if the clone_blksize attribute is supported + * and the clone_blksize is greater than 0. + * Alignment of offsets and length will be checked later. */ nmp = VFSTONFS(invp->v_mount); + np = VTONFS(invp); mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & NFSMNTP_NOCOPY) != 0) + mustclone = true; if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || - (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0) { + (mustclone && (!NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_CLONEBLKSIZE) || nmp->nm_cloneblksize == 0))) { mtx_unlock(&nmp->nm_mtx); VOP_UNLOCK(invp); - VOP_UNLOCK(outvp); + if (!onevp) + VOP_UNLOCK(outvp); /* For onevp, same as invp. */ if (mp != NULL) vn_finished_write(mp); goto generic_copy; @@ -4111,6 +4139,8 @@ relock: invp_obj = invp->v_object; if (invp_obj != NULL && vm_object_mightbedirty(invp_obj)) { if (invp_lock != LK_EXCLUSIVE) { + KASSERT(!onevp, ("nfs_copy_file_range: " + "invp_lock LK_SHARED for onevp")); invp_lock = LK_EXCLUSIVE; VOP_UNLOCK(invp); VOP_UNLOCK(outvp); @@ -4134,10 +4164,10 @@ relock: else consecutive = false; mtx_unlock(&nmp->nm_mtx); - inoff = *ap->a_inoffp; - outoff = *ap->a_outoffp; tryoutcred = true; must_commit = false; + toeof = false; + if (error == 0) { vap = &VTONFS(invp)->n_vattr.na_vattr; error = VOP_GETATTR(invp, vap, ap->a_incred); @@ -4169,29 +4199,63 @@ relock: if (error == 0 && ret != 0) error = ret; } - } else if (inoff + len > vap->va_size) + } else if (inoff + len >= vap->va_size) { + toeof = true; *ap->a_lenp = len = vap->va_size - inoff; + } } else error = 0; } /* + * For cloning, the offsets must be clone blksize aligned and + * the len must be blksize aligned unless it goes to EOF on + * the input file. + */ + tryclone = false; + if (len > 0) { + if (error == 0 && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_CLONEBLKSIZE) && nmp->nm_cloneblksize != 0 && + (inoff % nmp->nm_cloneblksize) == 0 && + (outoff % nmp->nm_cloneblksize) == 0 && + (toeof || (len % nmp->nm_cloneblksize) == 0)) + tryclone = true; + else if (mustclone) + error = ENOSYS; + } + + /* * len will be set to 0 upon a successful Copy RPC. - * As such, this only loops when the Copy RPC needs to be retried. + * As such, this only loops when the Copy/Clone RPC needs to be retried. */ while (len > 0 && error == 0) { inattrflag = outattrflag = 0; len2 = len; - if (tryoutcred) - error = nfsrpc_copy_file_range(invp, ap->a_inoffp, - outvp, ap->a_outoffp, &len2, ap->a_flags, - &inattrflag, &innfsva, &outattrflag, &outnfsva, - ap->a_outcred, consecutive, &must_commit); - else - error = nfsrpc_copy_file_range(invp, ap->a_inoffp, - outvp, ap->a_outoffp, &len2, ap->a_flags, - &inattrflag, &innfsva, &outattrflag, &outnfsva, - ap->a_incred, consecutive, &must_commit); + if (tryclone) { + if (tryoutcred) + error = nfsrpc_clone(invp, ap->a_inoffp, outvp, + ap->a_outoffp, &len2, toeof, &inattrflag, + &innfsva, &outattrflag, &outnfsva, + ap->a_outcred); + else + error = nfsrpc_clone(invp, ap->a_inoffp, outvp, + ap->a_outoffp, &len2, toeof, &inattrflag, + &innfsva, &outattrflag, &outnfsva, + ap->a_incred); + } else { + if (tryoutcred) + error = nfsrpc_copy_file_range(invp, + ap->a_inoffp, outvp, ap->a_outoffp, &len2, + ap->a_flags, &inattrflag, &innfsva, + &outattrflag, &outnfsva, + ap->a_outcred, consecutive, &must_commit); + else + error = nfsrpc_copy_file_range(invp, + ap->a_inoffp, outvp, ap->a_outoffp, &len2, + ap->a_flags, &inattrflag, &innfsva, + &outattrflag, &outnfsva, + ap->a_incred, consecutive, &must_commit); + } if (inattrflag != 0) ret = nfscl_loadattrcache(&invp, &innfsva, NULL, 0, 1); if (outattrflag != 0) @@ -4230,6 +4294,13 @@ relock: /* Try again with incred. */ tryoutcred = false; error = 0; + } else if (tryclone && error != 0) { + if (mustclone) { + error = ENOSYS; + } else { + tryclone = false; + error = 0; + } } if (error == NFSERR_STALEWRITEVERF) { /* @@ -4243,11 +4314,12 @@ relock: } } VOP_UNLOCK(invp); - VOP_UNLOCK(outvp); + if (!onevp) + VOP_UNLOCK(outvp); /* For onevp, same as invp. */ if (mp != NULL) vn_finished_write(mp); if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || - error == NFSERR_ACCES) { + error == NFSERR_ACCES || error == ENOSYS) { /* * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can * use a_incred for the read and a_outcred for the write, so @@ -4255,7 +4327,7 @@ relock: * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can * never succeed, so disable it. */ - if (error != NFSERR_ACCES) { + if (error != NFSERR_ACCES && error != ENOSYS) { /* Can never do Copy on this mount. */ mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_NOCOPY; @@ -4596,6 +4668,7 @@ nfs_pathconf(struct vop_pathconf_args *ap) struct nfsmount *nmp; struct thread *td = curthread; off_t off; + uint32_t clone_blksize; bool eof, has_namedattr, named_enabled; int attrflag, error; struct nfsnode *np; @@ -4604,19 +4677,22 @@ nfs_pathconf(struct vop_pathconf_args *ap) np = VTONFS(vp); named_enabled = false; has_namedattr = false; + clone_blksize = 0; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || ap->a_name == _PC_NO_TRUNC)) || (NFS_ISV4(vp) && (ap->a_name == _PC_ACL_NFS4 || - ap->a_name == _PC_HAS_NAMEDATTR))) { + ap->a_name == _PC_HAS_NAMEDATTR || + ap->a_name == _PC_CLONE_BLKSIZE))) { /* * Since only the above 4 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can - * be used for _PC_ACL_NFS4 and _PC_HAS_NAMEDATTR as well. + * be used for _PC_ACL_NFS4, _PC_HAS_NAMEDATTR and + * _PC_CLONE_BLKSIZE as well. */ - error = nfsrpc_pathconf(vp, &pc, &has_namedattr, td->td_ucred, - td, &nfsva, &attrflag); + error = nfsrpc_pathconf(vp, &pc, &has_namedattr, &clone_blksize, + td->td_ucred, td, &nfsva, &attrflag); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); if (error != 0) @@ -4771,6 +4847,9 @@ nfs_pathconf(struct vop_pathconf_args *ap) else *ap->a_retval = 0; break; + case _PC_CLONE_BLKSIZE: + *ap->a_retval = clone_blksize; + break; default: error = vop_stdpathconf(ap); diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 37b84a015dab..ef876dd30e59 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -87,6 +87,7 @@ struct nfsmount { /* unclipped, wraps to 0 */ struct __rpc_client *nm_aconn[NFS_MAXNCONN - 1]; /* Additional nconn */ /* Locked via nm_sockreq.nr_mtx */ + uint32_t nm_cloneblksize; /* Block cloning alignment */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 8c427c66c156..b2966934f9b7 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2113,7 +2113,8 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, - bool xattrsupp, bool has_hiddensystem, bool has_namedattr) + bool xattrsupp, bool has_hiddensystem, bool has_namedattr, + uint32_t clone_blksize) { struct statfs *sf; int error; @@ -2130,9 +2131,11 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, sf = NULL; } } + error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, - mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr); + mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr, + clone_blksize); free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); @@ -2441,7 +2444,7 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; - u_int32_t *tl; + uint32_t clone_blksize, *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; @@ -2943,6 +2946,7 @@ again: xattrsupp = false; has_hiddensystem = false; has_namedattr = false; + clone_blksize = 0; if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); @@ -2966,6 +2970,11 @@ again: &pathval) != 0) pathval = 0; has_namedattr = pathval > 0; + pathval = 0; + if (VOP_PATHCONF(nvp, _PC_CLONE_BLKSIZE, + &pathval) != 0) + pathval = 0; + clone_blksize = pathval; NFSVOPUNLOCK(nvp); } else supports_nfsv4acls = 0; @@ -2986,14 +2995,16 @@ again: nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, - has_hiddensystem, has_namedattr); + has_hiddensystem, has_namedattr, + clone_blksize); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, - has_hiddensystem, has_namedattr); + has_hiddensystem, has_namedattr, + clone_blksize); } if (nvp != NULL) vrele(nvp); @@ -3740,6 +3751,7 @@ nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec); if ((error = namei(&nd)) != 0) goto out; + NDFREE_PNBUF(&nd); error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { @@ -5690,7 +5702,8 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, - NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, + NULL); NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; @@ -5721,7 +5734,8 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, - NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, + NULL); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); nfsmout: @@ -5887,7 +5901,8 @@ nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, - NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, + NULL); } else error = nd->nd_repstat; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); @@ -6054,7 +6069,8 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, - NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, + NULL); NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; @@ -6068,7 +6084,8 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, - NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, + NULL); } else error = nd->nd_repstat; NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); @@ -6216,7 +6233,8 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, - NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; @@ -6241,7 +6259,7 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, - NULL); + NULL, NULL); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); nfsmout: @@ -6386,7 +6404,7 @@ nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, - NULL, 0, 0, 0, 0, 0, NULL, false, false, false); + NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { @@ -6530,7 +6548,7 @@ nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); /* * We can only save the updated values in the extended * attribute if the vp is exclusively locked. diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 9eebcda548c6..a881968b03be 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -253,6 +253,7 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, size_t atsiz; long pathval; bool has_hiddensystem, has_namedattr, xattrsupp; + uint32_t clone_blksize; if (nd->nd_repstat) goto out; @@ -330,6 +331,11 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, &pathval) != 0) pathval = 0; has_namedattr = pathval > 0; + pathval = 0; + if (VOP_PATHCONF(vp, _PC_CLONE_BLKSIZE, + &pathval) != 0) + pathval = 0; + clone_blksize = pathval; mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && @@ -365,7 +371,7 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, has_hiddensystem, - has_namedattr); + has_namedattr, clone_blksize); vfs_unbusy(mp); } vrele(vp); @@ -4347,7 +4353,7 @@ nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, - sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, NULL, p, + sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { @@ -6011,6 +6017,212 @@ nfsmout: } /* + * nfs clone service + */ +int +nfsrvd_clone(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) +{ + uint32_t *tl; + struct nfsvattr at; + int error = 0, ret; + off_t inoff, outoff; + uint64_t len; + size_t xfer; + struct nfsstate inst, outst, *instp = &inst, *outstp = &outst; + struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo; + nfsquad_t clientid; + nfsv4stateid_t stateid; + nfsattrbit_t attrbits; + void *rl_rcookie, *rl_wcookie; + long pathval; + + rl_rcookie = rl_wcookie = NULL; + pathval = 0; + if (nfsrv_maxcopyrange == 0 || nfsrv_devidcnt > 0 || + VOP_PATHCONF(vp, _PC_CLONE_BLKSIZE, &pathval) != 0 || + pathval == 0) { + /* + * For a pNFS server, reply NFSERR_NOTSUPP so that the client + * will not do the clone and will do I/O on the DS(s). + * If vfs.nfsd.maxcopyrange set to 0, disable Clone. + */ + nd->nd_repstat = NFSERR_NOTSUPP; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER); + instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); + inlop->lo_flags = NFSLCK_READ; + instp->ls_ownerlen = 0; + instp->ls_op = NULL; + instp->ls_uid = nd->nd_cred->cr_uid; + instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + clientid.lval[0] = instp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = instp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) + clientid.qval = nd->nd_clientid.qval; + instp->ls_stateid.other[2] = *tl++; + outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + outlop->lo_flags = NFSLCK_WRITE; + outstp->ls_ownerlen = 0; + outstp->ls_op = NULL; + outstp->ls_uid = nd->nd_cred->cr_uid; + outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + outstp->ls_stateid.other[0] = *tl++; + outstp->ls_stateid.other[1] = *tl++; + outstp->ls_stateid.other[2] = *tl++; + inoff = fxdr_hyper(tl); tl += 2; + inlop->lo_first = inoff; + outoff = fxdr_hyper(tl); tl += 2; + outlop->lo_first = outoff; + len = fxdr_hyper(tl); + if (len == 0) { + /* len == 0 means to EOF. */ + inlop->lo_end = OFF_MAX; + outlop->lo_end = OFF_MAX; + } else { + inlop->lo_end = inlop->lo_first + len; + outlop->lo_end = outlop->lo_first + len; + } + + if ((inoff > OFF_MAX || outoff > OFF_MAX || + inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX || + inlop->lo_end < inlop->lo_first || outlop->lo_end < + outlop->lo_first)) + nd->nd_repstat = NFSERR_INVAL; + + if (nd->nd_repstat == 0 && vp->v_type != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + + /* Check permissions for the input file. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL, + clientid, &stateid, exp, nd, curthread); + if (vp != tovp) { + NFSVOPUNLOCK(vp); + if (nd->nd_repstat != 0) + goto out; + + error = NFSVOPLOCK(tovp, LK_SHARED); + if (error != 0) + goto out; + pathval = 0; + if (VOP_PATHCONF(tovp, _PC_CLONE_BLKSIZE, &pathval) != 0 || + pathval == 0) + nd->nd_repstat = NFSERR_NOTSUPP; + else if (tovp->v_type != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + } + + /* For the output file, we only need the Owner attribute. */ + ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL, + clientid, &stateid, toexp, nd, curthread); + NFSVOPUNLOCK(tovp); + + /* Range lock the byte ranges for both invp and outvp. */ + if (nd->nd_repstat == 0) { + for (;;) { + if (len == 0) + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + OFF_MAX); + else + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + outoff + len); + if (vp != tovp) { + if (len == 0) + rl_rcookie = vn_rangelock_tryrlock(vp, + inoff, OFF_MAX); + else + rl_rcookie = vn_rangelock_tryrlock(vp, + inoff, inoff + len); + if (rl_rcookie != NULL) + break; + } else { + rl_rcookie = NULL; + break; + } + vn_rangelock_unlock(tovp, rl_wcookie); + if (len == 0) + rl_rcookie = vn_rangelock_rlock(vp, inoff, + OFF_MAX); + else + rl_rcookie = vn_rangelock_rlock(vp, inoff, + inoff + len); + vn_rangelock_unlock(vp, rl_rcookie); + } + + error = NFSVOPLOCK(vp, LK_SHARED); + if (error == 0) { + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL); + if (ret == 0) { + /* + * Since invp is range locked, na_size should + * not change. + */ + if (len == 0 && at.na_size > inoff) + len = SSIZE_MAX; /* To EOF. */ + else if (inoff + len > at.na_size) + nd->nd_repstat = NFSERR_INVAL; + } + NFSVOPUNLOCK(vp); + if (ret != 0 && nd->nd_repstat == 0) + nd->nd_repstat = ret; + } else if (nd->nd_repstat == 0) + nd->nd_repstat = error; + } + + /* + * Do the actual copy to an upper limit of vfs.nfsd.maxcopyrange. + * This size limit can be set to limit the time a copy RPC will + * take. + */ + xfer = len; + if (nd->nd_repstat == 0) { + nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff, + &xfer, COPY_FILE_RANGE_CLONE, nd->nd_cred, nd->nd_cred, + NULL); + if (nd->nd_repstat == ENOSYS) + nd->nd_repstat = NFSERR_INVAL; + } + + /* Unlock the ranges. */ + if (rl_rcookie != NULL) + vn_rangelock_unlock(vp, rl_rcookie); + if (rl_wcookie != NULL) + vn_rangelock_unlock(tovp, rl_wcookie); + +out: + vrele(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* * nfs seek service */ int diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index d6832b4f74be..636c4735a131 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -371,7 +371,7 @@ int (*nfsrv4_ops2[NFSV42_NOPS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, - (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + nfsrvd_clone, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index 2e27817389dd..111b0f26d0b5 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -4675,7 +4675,7 @@ errout: } else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, - NULL, p, NULL); + NULL, NULL, p, NULL); m_freem(nd->nd_mrep); } NFSLOCKSTATE(); @@ -7731,6 +7731,7 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); NFSD_DEBUG(4, "dspath not dir\n"); @@ -7767,6 +7768,7 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error); if (error != 0) break; + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; @@ -7795,6 +7797,7 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, NFSD_DEBUG(4, "mds lookup=%d\n", error); if (error != 0) goto out; + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; @@ -8654,6 +8657,7 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VREG) { vput(nd.ni_vp); NFSD_DEBUG(4, "mdspath not reg\n"); @@ -8675,6 +8679,7 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, vput(vp); return (error); } + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); vput(vp); @@ -8717,6 +8722,7 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, vput(curvp); return (error); } + NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) { vput(nd.ni_vp); vput(vp); diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c index 227e2b93883e..acb73973d93b 100644 --- a/sys/fs/p9fs/p9fs_vnops.c +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -1326,7 +1326,7 @@ p9fs_read(struct vop_read_args *ap) np = P9FS_VTON(vp); error = 0; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (vp->v_type != VREG) return (EISDIR); diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c index afb8e05f5929..acf18de5ab51 100644 --- a/sys/fs/tarfs/tarfs_vnops.c +++ b/sys/fs/tarfs/tarfs_vnops.c @@ -208,8 +208,7 @@ tarfs_getattr(struct vop_getattr_args *ap) vap->va_birthtime = tnp->birthtime; vap->va_gen = tnp->gen; vap->va_flags = tnp->flags; - vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? - tnp->rdev : NODEV; + vap->va_rdev = VN_ISDEV(vp) ? tnp->rdev : NODEV; vap->va_bytes = round_page(tnp->physize); vap->va_filerev = 0; @@ -515,7 +514,7 @@ tarfs_read(struct vop_read_args *ap) uiop = ap->a_uio; vp = ap->a_vp; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (vp->v_type != VREG) diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 1237f6b92cdb..dd281d18d87d 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -551,7 +551,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype) MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); MPASS((type == VLNK) ^ (target == NULL)); - MPASS((type == VBLK || type == VCHR) ^ (rdev == VNOVAL)); + MPASS(VTYPE_ISDEV(type) ^ (rdev == VNOVAL)); if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) return (ENOSPC); diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 79b6c8b2e6a1..0f4ea2fdc28c 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -280,8 +280,7 @@ tmpfs_mknod(struct vop_mknod_args *v) struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; - if (vap->va_type != VBLK && vap->va_type != VCHR && - vap->va_type != VFIFO) + if (!VATTR_ISDEV(vap) && vap->va_type != VFIFO) return (EINVAL); return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL)); @@ -462,8 +461,7 @@ tmpfs_stat(struct vop_stat_args *v) sb->st_nlink = node->tn_links; sb->st_uid = node->tn_uid; sb->st_gid = node->tn_gid; - sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? - node->tn_rdev : NODEV; + sb->st_rdev = VN_ISDEV(vp) ? node->tn_rdev : NODEV; sb->st_size = node->tn_size; sb->st_atim.tv_sec = node->tn_atime.tv_sec; sb->st_atim.tv_nsec = node->tn_atime.tv_nsec; @@ -521,8 +519,7 @@ tmpfs_getattr(struct vop_getattr_args *v) vap->va_birthtime = node->tn_birthtime; vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; - vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? - node->tn_rdev : NODEV; + vap->va_rdev = VN_ISDEV(vp) ? node->tn_rdev : NODEV; if (vp->v_type == VREG) { #ifdef __ILP32__ vm_object_t obj = node->tn_reg.tn_aobj; @@ -1918,7 +1915,7 @@ tmpfs_deleteextattr(struct vop_deleteextattr_args *ap) node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); @@ -1956,7 +1953,7 @@ tmpfs_getextattr(struct vop_getextattr_args *ap) int error; node = VP_TO_TMPFS_NODE(vp); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); @@ -1993,7 +1990,7 @@ tmpfs_listextattr(struct vop_listextattr_args *ap) int error; node = VP_TO_TMPFS_NODE(vp); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); @@ -2037,7 +2034,7 @@ tmpfs_setextattr(struct vop_setextattr_args *ap) tmp = VFS_TO_TMPFS(vp->v_mount); attr_size = ap->a_uio->uio_resid; diff = 0; - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 41125f6478ac..88e44b335b29 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -1046,7 +1046,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even - * when the partitition scheme doesn't, we do it here in + * when the partition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); @@ -1539,7 +1539,7 @@ g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even - * when the partitition scheme doesn't, we do it here in + * when the partition scheme doesn't, we do it here in * scheme-independent code. */ pp = cp->provider; @@ -2023,7 +2023,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even - * when the partitition scheme doesn't, we do it here in + * when the partition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index f577cd07ac7c..88b8967cd693 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -343,3 +343,4 @@ options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus options IICHID_SAMPLING # Workaround missing GPIO INTR support +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ diff --git a/sys/isa/isareg.h b/sys/isa/isareg.h index e89136c7e1e5..8b2d55608078 100644 --- a/sys/isa/isareg.h +++ b/sys/isa/isareg.h @@ -49,7 +49,7 @@ #define IO_RTC 0x070 /* RTC */ #define IO_ICU2 0x0A0 /* 8259A Interrupt Controller #2 */ -#define IO_MDA 0x3B0 /* Monochome Adapter */ +#define IO_MDA 0x3B0 /* Monochrome Adapter */ #define IO_VGA 0x3C0 /* E/VGA Ports */ #define IO_CGA 0x3D0 /* CGA Ports */ diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index c0a5479c9634..fcd232cde21e 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -145,8 +145,8 @@ struct sysent sysent[] = { { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 76 = obsolete vhangup */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 77 = obsolete vlimit */ { .sy_narg = AS(mincore_args), .sy_call = (sy_call_t *)sys_mincore, .sy_auevent = AUE_MINCORE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 78 = mincore */ - { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = getgroups */ - { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = setgroups */ + { compat14(AS(freebsd14_getgroups_args),getgroups), .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 79 = freebsd14 getgroups */ + { compat14(AS(freebsd14_setgroups_args),setgroups), .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 80 = freebsd14 setgroups */ { .sy_narg = 0, .sy_call = (sy_call_t *)sys_getpgrp, .sy_auevent = AUE_GETPGRP, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 81 = getpgrp */ { .sy_narg = AS(setpgid_args), .sy_call = (sy_call_t *)sys_setpgid, .sy_auevent = AUE_SETPGRP, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 82 = setpgid */ { .sy_narg = AS(setitimer_args), .sy_call = (sy_call_t *)sys_setitimer, .sy_auevent = AUE_SETITIMER, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 83 = setitimer */ @@ -661,4 +661,6 @@ struct sysent sysent[] = { { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ + { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ + { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ }; diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 2cd5b7069023..0ca42d640767 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -310,6 +310,39 @@ sys_getegid(struct thread *td, struct getegid_args *uap) return (0); } +#ifdef COMPAT_FREEBSD14 +int +freebsd14_getgroups(struct thread *td, struct freebsd14_getgroups_args *uap) +{ + struct ucred *cred; + int ngrp, error; + + cred = td->td_ucred; + + /* + * For FreeBSD < 15.0, we account for the egid being placed at the + * beginning of the group list prior to all supplementary groups. + */ + ngrp = cred->cr_ngroups + 1; + if (uap->gidsetsize == 0) { + error = 0; + goto out; + } else if (uap->gidsetsize < ngrp) { + return (EINVAL); + } + + error = copyout(&cred->cr_gid, uap->gidset, sizeof(gid_t)); + if (error == 0) + error = copyout(cred->cr_groups, uap->gidset + 1, + (ngrp - 1) * sizeof(gid_t)); + +out: + td->td_retval[0] = ngrp; + return (error); + +} +#endif /* COMPAT_FREEBSD14 */ + #ifndef _SYS_SYSPROTO_H_ struct getgroups_args { int gidsetsize; @@ -320,18 +353,11 @@ int sys_getgroups(struct thread *td, struct getgroups_args *uap) { struct ucred *cred; - gid_t *ugidset; int ngrp, error; cred = td->td_ucred; - /* - * cr_gid has been moved out of cr_groups, but we'll continue exporting - * the egid as groups[0] for the time being until we audit userland for - * any surprises. - */ - ngrp = cred->cr_ngroups + 1; - + ngrp = cred->cr_ngroups; if (uap->gidsetsize == 0) { error = 0; goto out; @@ -339,14 +365,7 @@ sys_getgroups(struct thread *td, struct getgroups_args *uap) if (uap->gidsetsize < ngrp) return (EINVAL); - ugidset = uap->gidset; - error = copyout(&cred->cr_gid, ugidset, sizeof(*ugidset)); - if (error != 0) - goto out; - - if (ngrp > 1) - error = copyout(cred->cr_groups, ugidset + 1, - (ngrp - 1) * sizeof(*ugidset)); + error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t)); out: td->td_retval[0] = ngrp; return (error); @@ -1186,6 +1205,44 @@ fail: return (error); } +#ifdef COMPAT_FREEBSD14 +int +freebsd14_setgroups(struct thread *td, struct freebsd14_setgroups_args *uap) +{ + gid_t smallgroups[CRED_SMALLGROUPS_NB]; + gid_t *groups; + int gidsetsize, error; + + /* + * Before FreeBSD 15.0, we allow one more group to be supplied to + * account for the egid appearing before the supplementary groups. This + * may technically allow one more supplementary group for systems that + * did use the default NGROUPS_MAX if we round it back up to 1024. + */ + gidsetsize = uap->gidsetsize; + if (gidsetsize > ngroups_max + 1 || gidsetsize < 0) + return (EINVAL); + + if (gidsetsize > CRED_SMALLGROUPS_NB) + groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK); + else + groups = smallgroups; + + error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t)); + if (error == 0) { + int ngroups = gidsetsize > 0 ? gidsetsize - 1 /* egid */ : 0; + + error = kern_setgroups(td, &ngroups, groups + 1); + if (error == 0 && gidsetsize > 0) + td->td_proc->p_ucred->cr_gid = groups[0]; + } + + if (groups != smallgroups) + free(groups, M_TEMP); + return (error); +} +#endif /* COMPAT_FREEBSD14 */ + #ifndef _SYS_SYSPROTO_H_ struct setgroups_args { int gidsetsize; @@ -1210,8 +1267,7 @@ sys_setgroups(struct thread *td, struct setgroups_args *uap) * setgroups() differ. */ gidsetsize = uap->gidsetsize; - /* XXXKE Limit to ngroups_max when we change the userland interface. */ - if (gidsetsize > ngroups_max + 1 || gidsetsize < 0) + if (gidsetsize > ngroups_max || gidsetsize < 0) return (EINVAL); if (gidsetsize > CRED_SMALLGROUPS_NB) @@ -1238,35 +1294,17 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups) struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; int ngrp, error; - gid_t egid; ngrp = *ngrpp; /* Sanity check size. */ - /* XXXKE Limit to ngroups_max when we change the userland interface. */ - if (ngrp < 0 || ngrp > ngroups_max + 1) + if (ngrp < 0 || ngrp > ngroups_max) return (EINVAL); AUDIT_ARG_GROUPSET(groups, ngrp); - /* - * setgroups(0, NULL) is a legitimate way of clearing the groups vector - * on non-BSD systems (which generally do not have the egid in the - * groups[0]). We risk security holes when running non-BSD software if - * we do not do the same. So we allow and treat 0 for 'ngrp' specially - * below (twice). - */ - if (ngrp != 0) { - /* - * To maintain userland compat for now, we use the first group - * as our egid and we'll use the rest as our supplemental - * groups. - */ - egid = groups[0]; - ngrp--; - groups++; - groups_normalize(&ngrp, groups); - *ngrpp = ngrp; - } + groups_normalize(&ngrp, groups); + *ngrpp = ngrp; + newcred = crget(); crextend(newcred, ngrp); PROC_LOCK(p); @@ -1289,15 +1327,7 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups) if (error) goto fail; - /* - * If some groups were passed, the first one is currently the desired - * egid. This code is to be removed (along with some commented block - * above) when setgroups() is changed to take only supplementary groups. - */ - if (ngrp != 0) - newcred->cr_gid = egid; crsetgroups_internal(newcred, ngrp, groups); - setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); @@ -2891,7 +2921,8 @@ crextend(struct ucred *cr, int n) * Normalizes a set of groups to be applied to a 'struct ucred'. * * Normalization ensures that the supplementary groups are sorted in ascending - * order and do not contain duplicates. + * order and do not contain duplicates. This allows group_is_supplementary + * to do a binary search. */ static void groups_normalize(int *ngrp, gid_t *groups) diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c index ab7e048a2ab1..565ab3ad6ee6 100644 --- a/sys/kern/sys_timerfd.c +++ b/sys/kern/sys_timerfd.c @@ -206,7 +206,6 @@ retry: mtx_unlock(&tfd->tfd_lock); return (EAGAIN); } - td->td_rtcgen = atomic_load_acq_int(&rtc_generation); error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "tfdrd", 0); if (error == 0) { diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 09bf4d519927..4122f9261871 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -84,8 +84,8 @@ const char *syscallnames[] = { "obs_vhangup", /* 76 = obsolete vhangup */ "obs_vlimit", /* 77 = obsolete vlimit */ "mincore", /* 78 = mincore */ - "getgroups", /* 79 = getgroups */ - "setgroups", /* 80 = setgroups */ + "compat14.getgroups", /* 79 = freebsd14 getgroups */ + "compat14.setgroups", /* 80 = freebsd14 setgroups */ "getpgrp", /* 81 = getpgrp */ "setpgid", /* 82 = setpgid */ "setitimer", /* 83 = setitimer */ @@ -600,4 +600,6 @@ const char *syscallnames[] = { "exterrctl", /* 592 = exterrctl */ "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ "inotify_rm_watch", /* 594 = inotify_rm_watch */ + "getgroups", /* 595 = getgroups */ + "setgroups", /* 596 = setgroups */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 53b5d3cbbba9..fa64597d14a5 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -552,13 +552,13 @@ _Out_writes_bytes_(len/PAGE_SIZE) char *vec ); } -79 AUE_GETGROUPS STD|CAPENABLED { +79 AUE_GETGROUPS STD|CAPENABLED|COMPAT14 { int getgroups( int gidsetsize, _Out_writes_opt_(gidsetsize) gid_t *gidset ); } -80 AUE_SETGROUPS STD { +80 AUE_SETGROUPS STD|COMPAT14 { int setgroups( int gidsetsize, _In_reads_(gidsetsize) const gid_t *gidset @@ -3371,5 +3371,17 @@ int wd ); } +595 AUE_GETGROUPS STD|CAPENABLED { + int getgroups( + int gidsetsize, + _Out_writes_opt_(gidsetsize) gid_t *gidset + ); + } +596 AUE_SETGROUPS STD { + int setgroups( + int gidsetsize, + _In_reads_(gidsetsize) const gid_t *gidset + ); + } ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index 4dfc63924da9..2b1ea9eed8d4 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -454,22 +454,6 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } - /* getgroups */ - case 79: { - struct getgroups_args *p = params; - iarg[a++] = p->gidsetsize; /* int */ - uarg[a++] = (intptr_t)p->gidset; /* gid_t * */ - *n_args = 2; - break; - } - /* setgroups */ - case 80: { - struct setgroups_args *p = params; - iarg[a++] = p->gidsetsize; /* int */ - uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */ - *n_args = 2; - break; - } /* getpgrp */ case 81: { *n_args = 0; @@ -3500,6 +3484,22 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } + /* getgroups */ + case 595: { + struct getgroups_args *p = params; + iarg[a++] = p->gidsetsize; /* int */ + uarg[a++] = (intptr_t)p->gidset; /* gid_t * */ + *n_args = 2; + break; + } + /* setgroups */ + case 596: { + struct setgroups_args *p = params; + iarg[a++] = p->gidsetsize; /* int */ + uarg[a++] = (intptr_t)p->gidset; /* const gid_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -4199,32 +4199,6 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; - /* getgroups */ - case 79: - switch (ndx) { - case 0: - p = "int"; - break; - case 1: - p = "userland gid_t *"; - break; - default: - break; - }; - break; - /* setgroups */ - case 80: - switch (ndx) { - case 0: - p = "int"; - break; - case 1: - p = "userland const gid_t *"; - break; - default: - break; - }; - break; /* getpgrp */ case 81: break; @@ -9367,6 +9341,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* getgroups */ + case 595: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland gid_t *"; + break; + default: + break; + }; + break; + /* setgroups */ + case 596: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland const gid_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -9633,16 +9633,6 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; - /* getgroups */ - case 79: - if (ndx == 0 || ndx == 1) - p = "int"; - break; - /* setgroups */ - case 80: - if (ndx == 0 || ndx == 1) - p = "int"; - break; /* getpgrp */ case 81: /* setpgid */ @@ -11365,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* getgroups */ + case 595: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* setgroups */ + case 596: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 85fe48ddd466..eb1327f7f2de 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -1160,7 +1160,8 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) return (EINVAL); - if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) + if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | + O_CLOFORK)) != 0) return (EINVAL); largepage = (shmflags & SHM_LARGEPAGE) != 0; diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index fa655c43d155..19c39e42bafa 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -5170,7 +5170,7 @@ bufstrategy(struct bufobj *bo, struct buf *bp) vp = bp->b_vp; KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy")); - KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, + KASSERT(!VN_ISDEV(vp), ("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp)); i = VOP_STRATEGY(vp, bp); KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp)); diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c index 746a5a39208e..b265a5ff3a62 100644 --- a/sys/kern/vfs_inotify.c +++ b/sys/kern/vfs_inotify.c @@ -801,6 +801,7 @@ vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, vn_lock(vp, LK_SHARED | LK_RETRY); if (error != 0) break; + NDFREE_PNBUF(&nd); vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); vrele(nd.ni_vp); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c64618036733..bf3ed9d515dc 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2839,7 +2839,7 @@ setfflags(struct thread *td, struct vnode *vp, u_long flags) * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ - if (vp->v_type == VCHR || vp->v_type == VBLK) { + if (VN_ISDEV(vp)) { error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); if (error != 0) return (error); @@ -5050,11 +5050,12 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, size_t retlen; void *rl_rcookie, *rl_wcookie; off_t inoff, outoff, savinoff, savoutoff; - bool foffsets_locked; + bool foffsets_locked, foffsets_set; infp = outfp = NULL; rl_rcookie = rl_wcookie = NULL; foffsets_locked = false; + foffsets_set = false; error = 0; retlen = 0; @@ -5122,6 +5123,8 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, } foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); foffsets_locked = true; + } else { + foffsets_set = true; } savinoff = inoff; savoutoff = outoff; @@ -5180,11 +5183,12 @@ out: vn_rangelock_unlock(invp, rl_rcookie); if (rl_wcookie != NULL) vn_rangelock_unlock(outvp, rl_wcookie); + if ((foffsets_locked || foffsets_set) && + (error == EINTR || error == ERESTART)) { + inoff = savinoff; + outoff = savoutoff; + } if (foffsets_locked) { - if (error == EINTR || error == ERESTART) { - inoff = savinoff; - outoff = savoutoff; - } if (inoffp == NULL) foffset_unlock(infp, inoff, 0); else @@ -5193,6 +5197,9 @@ out: foffset_unlock(outfp, outoff, 0); else *outoffp = outoff; + } else if (foffsets_set) { + *inoffp = inoff; + *outoffp = outoff; } if (outfp != NULL) fdrop(outfp, td); diff --git a/sys/libkern/qsort.c b/sys/libkern/qsort.c index 0255a3d64d76..342b1525dd8a 100644 --- a/sys/libkern/qsort.c +++ b/sys/libkern/qsort.c @@ -114,11 +114,10 @@ qsort(void *a, size_t n, size_t es, cmp_t *cmp) char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d1, d2; int cmp_result; - int swaptype_long, swaptype_int, swap_cnt; + int swaptype_long, swaptype_int; loop: SWAPINIT(long, a, es); SWAPINIT(int, a, es); - swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; @@ -147,7 +146,6 @@ loop: SWAPINIT(long, a, es); for (;;) { while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { if (cmp_result == 0) { - swap_cnt = 1; swap(pa, pb); pa += es; } @@ -155,7 +153,6 @@ loop: SWAPINIT(long, a, es); } while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { if (cmp_result == 0) { - swap_cnt = 1; swap(pc, pd); pd -= es; } @@ -164,18 +161,9 @@ loop: SWAPINIT(long, a, es); if (pb > pc) break; swap(pb, pc); - swap_cnt = 1; pb += es; pc -= es; } - if (swap_cnt == 0) { /* Switch to insertion sort */ - for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) - for (pl = pm; - pl > (char *)a && CMP(thunk, pl - es, pl) > 0; - pl -= es) - swap(pl, pl - es); - return; - } pn = (char *)a + n * es; d1 = MIN(pa - (char *)a, pb - pa); diff --git a/sys/modules/hid/Makefile b/sys/modules/hid/Makefile index 56c3267d8684..10720570deb7 100644 --- a/sys/modules/hid/Makefile +++ b/sys/modules/hid/Makefile @@ -17,6 +17,7 @@ SUBDIR += \ hsctrl \ ietp \ ps4dshock \ + u2f \ xb360gp .include <bsd.subdir.mk> diff --git a/sys/modules/hid/u2f/Makefile b/sys/modules/hid/u2f/Makefile new file mode 100644 index 000000000000..227e7154035b --- /dev/null +++ b/sys/modules/hid/u2f/Makefile @@ -0,0 +1,8 @@ +.PATH: ${SRCTOP}/sys/dev/hid + +KMOD= u2f +SRCS= u2f.c +SRCS+= opt_hid.h opt_usb.h +SRCS+= bus_if.h device_if.h + +.include <bsd.kmod.mk> diff --git a/sys/modules/ix/Makefile b/sys/modules/ix/Makefile index ad9f36e054e3..aec6eaabffdd 100644 --- a/sys/modules/ix/Makefile +++ b/sys/modules/ix/Makefile @@ -7,7 +7,7 @@ SRCS += if_ix.c if_bypass.c if_fdir.c if_sriov.c ix_txrx.c ixgbe_osdep.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c -SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c +SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c ixgbe_e610.c CFLAGS+= -I${SRCTOP}/sys/dev/ixgbe .include <bsd.kmod.mk> diff --git a/sys/modules/ixv/Makefile b/sys/modules/ixv/Makefile index 1b4431ac11cd..e7066bb7829b 100644 --- a/sys/modules/ixv/Makefile +++ b/sys/modules/ixv/Makefile @@ -7,7 +7,7 @@ SRCS += if_ixv.c if_fdir.c ix_txrx.c ixgbe_osdep.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c -SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c +SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c ixgbe_e610.c CFLAGS+= -I${SRCTOP}/sys/dev/ixgbe .include <bsd.kmod.mk> diff --git a/sys/modules/ossl/Makefile b/sys/modules/ossl/Makefile index 7a92742d6b36..ac2c752e922e 100644 --- a/sys/modules/ossl/Makefile +++ b/sys/modules/ossl/Makefile @@ -29,6 +29,7 @@ SRCS.arm= \ SRCS.aarch64= \ chacha-armv8.S \ + chacha-armv8-sve.S \ poly1305-armv8.S \ sha1-armv8.S \ sha256-armv8.S \ diff --git a/sys/modules/rtw88/Makefile b/sys/modules/rtw88/Makefile index 9739ede11073..822be639da43 100644 --- a/sys/modules/rtw88/Makefile +++ b/sys/modules/rtw88/Makefile @@ -43,6 +43,7 @@ SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_wlan.h opt_inet6.h opt_inet.h CFLAGS+= -DKBUILD_MODNAME='"rtw88"' +CFLAGS+= -DLINUXKPI_VERSION=61400 CFLAGS+= -I${DEVRTW88DIR} CFLAGS+= ${LINUXKPI_INCLUDES} diff --git a/sys/modules/rtw89/Makefile b/sys/modules/rtw89/Makefile index 09580f288c62..e66f85c3ac17 100644 --- a/sys/modules/rtw89/Makefile +++ b/sys/modules/rtw89/Makefile @@ -39,6 +39,7 @@ SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_wlan.h opt_inet6.h opt_inet.h opt_acpi.h CFLAGS+= -DKBUILD_MODNAME='"rtw89"' +CFLAGS+= -DLINUXKPI_VERSION=61400 CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI CFLAGS+= -I${DEVRTW89DIR} diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile index 2dd9e2be3f56..ec531ed646a7 100644 --- a/sys/modules/zfs/Makefile +++ b/sys/modules/zfs/Makefile @@ -15,6 +15,7 @@ KMOD= zfs ${SRCDIR}/icp/asm-ppc64/sha2 \ ${SRCDIR}/icp/asm-ppc64/blake3 \ ${SRCDIR}/icp/asm-x86_64/blake3 \ + ${SRCDIR}/icp/asm-x86_64/modes \ ${SRCDIR}/icp/asm-x86_64/sha2 \ ${SRCDIR}/os/freebsd/spl \ ${SRCDIR}/os/freebsd/zfs \ @@ -40,7 +41,8 @@ CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS \ .if ${MACHINE_ARCH} == "amd64" CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \ - -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW + -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW \ + -DHAVE_VAES -DHAVE_VPCLMULQDQ .endif .if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \ @@ -82,6 +84,9 @@ SRCS+= blake3_avx2.S \ blake3_avx512.S \ blake3_sse2.S \ blake3_sse41.S + +#icp/asm-x86_64/modes +SRCS+= aesni-gcm-avx2-vaes.S .endif #icp/algs/sha2 diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 6561d62b5e26..12274bcceea1 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -704,6 +704,11 @@ /* iops->setattr() takes struct user_namespace* */ /* #undef HAVE_USERNS_IOPS_SETATTR */ +#ifdef __amd64__ +/* Define if host toolchain supports VAES */ +#define HAVE_VAES 1 +#endif + /* fops->clone_file_range() is available */ /* #undef HAVE_VFS_CLONE_FILE_RANGE */ @@ -743,6 +748,11 @@ /* __vmalloc page flags exists */ /* #undef HAVE_VMALLOC_PAGE_KERNEL */ +#ifdef __amd64__ +/* Define if host toolchain supports VPCLMULQDQ */ +#define HAVE_VPCLMULQDQ 1 +#endif + /* int (*writepage_t)() takes struct folio* */ /* #undef HAVE_WRITEPAGE_T_FOLIO */ @@ -830,7 +840,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.3.99-515-FreeBSD_g8302b6e32" +#define ZFS_META_ALIAS "zfs-2.3.99-571-FreeBSD_ga9410ccbd" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -839,7 +849,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.15" +#define ZFS_META_KVER_MAX "6.16" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "4.18" @@ -860,7 +870,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "515-FreeBSD_g8302b6e32" +#define ZFS_META_RELEASE "571-FreeBSD_ga9410ccbd" /* Define the project version. */ #define ZFS_META_VERSION "2.3.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 2f1ffe504350..5c265cf5b08e 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.3.99-515-g8302b6e32" +#define ZFS_META_GITREV "zfs-2.3.99-571-ga9410ccbd" diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 1e444be93e9f..66555fd1feb5 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1500,8 +1500,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; bif->bif_vlanproto = ETHERTYPE_VLAN; - if (sc->sc_flags & IFBRF_VLANFILTER) - bif->bif_pvid = sc->sc_defpvid; + bif->bif_pvid = sc->sc_defpvid; if (sc->sc_flags & IFBRF_DEFQINQ) bif->bif_flags |= IFBIF_QINQ; @@ -1970,9 +1969,6 @@ bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg) struct ifbreq *req = arg; struct bridge_iflist *bif; - if ((sc->sc_flags & IFBRF_VLANFILTER) == 0) - return (EXTERROR(EINVAL, "VLAN filtering not enabled")); - bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); @@ -2410,12 +2406,10 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, mflags = m->m_flags; /* - * If VLAN filtering is enabled, and the native VLAN ID of the - * outgoing interface matches the VLAN ID of the frame, remove - * the VLAN header. + * If the native VLAN ID of the outgoing interface matches the + * VLAN ID of the frame, remove the VLAN tag. */ - if ((sc->sc_flags & IFBRF_VLANFILTER) && - bif->bif_pvid != DOT1Q_VID_NULL && + if (bif->bif_pvid != DOT1Q_VID_NULL && VLANTAGOF(m) == bif->bif_pvid) { m->m_flags &= ~M_VLANTAG; m->m_pkthdr.ether_vtag = 0; @@ -3296,9 +3290,19 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) if (vlan > DOT1Q_VID_MAX) return (false); - /* If VLAN filtering isn't enabled, pass everything. */ - if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0) + /* + * If VLAN filtering isn't enabled, pass everything, but add a tag + * if the port has a pvid configured. + */ + if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0) { + if (vlan == DOT1Q_VID_NULL && + sbif->bif_pvid != DOT1Q_VID_NULL) { + m->m_pkthdr.ether_vtag = sbif->bif_pvid; + m->m_flags |= M_VLANTAG; + } + return (true); + } /* If Q-in-Q is disabled, check for stacked tags. */ if ((sbif->bif_flags & IFBIF_QINQ) == 0) { diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 5a74ffa1cc2f..d780e49af25f 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -153,7 +153,7 @@ int if_clone_destroy(const char *); int if_clone_list(struct if_clonereq *); void if_clone_restoregroup(struct ifnet *); -/* The below interfaces are used only by epair(4). */ +/* The below interfaces are used only by epair(4) and tun(4)/tap(4). */ void if_clone_addif(struct if_clone *, struct ifnet *); int if_clone_destroyif(struct if_clone *, struct ifnet *); diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 1efc220aa8e1..e99df0b85ccf 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -160,8 +160,8 @@ struct pfsync_ins_ack { struct pfsync_upd_c { u_int64_t id; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; u_int32_t creatorid; u_int32_t expire; u_int8_t timeout; diff --git a/sys/net/if_tap.h b/sys/net/if_tap.h index d84cd2eba6f3..8297b8d9e3d2 100644 --- a/sys/net/if_tap.h +++ b/sys/net/if_tap.h @@ -57,6 +57,8 @@ #define TAPGIFNAME TUNGIFNAME #define TAPSVNETHDR _IOW('t', 91, int) #define TAPGVNETHDR _IOR('t', 94, int) +#define TAPSTRANSIENT TUNSTRANSIENT +#define TAPGTRANSIENT TUNGTRANSIENT /* VMware ioctl's */ #define VMIO_SIOCSIFFLAGS _IOWINT('V', 0) diff --git a/sys/net/if_tun.h b/sys/net/if_tun.h index a8fb61db45a2..ccdc25944823 100644 --- a/sys/net/if_tun.h +++ b/sys/net/if_tun.h @@ -43,5 +43,7 @@ struct tuninfo { #define TUNSIFPID _IO('t', 95) #define TUNSIFHEAD _IOW('t', 96, int) #define TUNGIFHEAD _IOR('t', 97, int) +#define TUNSTRANSIENT _IOW('t', 98, int) +#define TUNGTRANSIENT _IOR('t', 99, int) #endif /* !_NET_IF_TUN_H_ */ diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 5e6f65c04b2f..c8dbb6aa8893 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -132,6 +132,7 @@ struct tuntap_softc { #define TUN_DYING 0x0200 #define TUN_L2 0x0400 #define TUN_VMNET 0x0800 +#define TUN_TRANSIENT 0x1000 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) #define TUN_READY (TUN_OPEN | TUN_INITED) @@ -443,6 +444,18 @@ tuntap_name2info(const char *name, int *outunit, int *outflags) return (0); } +static struct if_clone * +tuntap_cloner_from_flags(int tun_flags) +{ + + for (u_int i = 0; i < NDRV; i++) + if ((tun_flags & TUN_DRIVER_IDENT_MASK) == + tuntap_drivers[i].ident_flags) + return (V_tuntap_driver_cloners[i]); + + return (NULL); +} + /* * Get driver information from a set of flags specified. Masks the identifying * part of the flags and compares it against all of the available @@ -615,19 +628,39 @@ out: CURVNET_RESTORE(); } -static void -tun_destroy(struct tuntap_softc *tp) +static int +tun_destroy(struct tuntap_softc *tp, bool may_intr) { + int error; TUN_LOCK(tp); + + /* + * Transient tunnels may have set TUN_DYING if we're being destroyed as + * a result of the last close, which we'll allow. + */ + MPASS((tp->tun_flags & (TUN_DYING | TUN_TRANSIENT)) != TUN_DYING); tp->tun_flags |= TUN_DYING; - if (tp->tun_busy != 0) - cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); - else - TUN_UNLOCK(tp); + error = 0; + while (tp->tun_busy != 0) { + if (may_intr) + error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); + else + cv_wait(&tp->tun_cv, &tp->tun_mtx); + if (error != 0) { + tp->tun_flags &= ~TUN_DYING; + TUN_UNLOCK(tp); + return (error); + } + } + TUN_UNLOCK(tp); CURVNET_SET(TUN2IFP(tp)->if_vnet); + mtx_lock(&tunmtx); + TAILQ_REMOVE(&tunhead, tp, tun_list); + mtx_unlock(&tunmtx); + /* destroy_dev will take care of any alias. */ destroy_dev(tp->tun_dev); seldrain(&tp->tun_rsel); @@ -648,6 +681,8 @@ tun_destroy(struct tuntap_softc *tp) cv_destroy(&tp->tun_cv); free(tp, M_TUN); CURVNET_RESTORE(); + + return (0); } static int @@ -655,12 +690,7 @@ tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t fla { struct tuntap_softc *tp = ifp->if_softc; - mtx_lock(&tunmtx); - TAILQ_REMOVE(&tunhead, tp, tun_list); - mtx_unlock(&tunmtx); - tun_destroy(tp); - - return (0); + return (tun_destroy(tp, true)); } static void @@ -702,9 +732,9 @@ tun_uninit(const void *unused __unused) mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { - TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - tun_destroy(tp); + /* tun_destroy() will remove it from the tailq. */ + tun_destroy(tp, false); mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); @@ -1217,6 +1247,23 @@ out: tun_vnethdr_set(ifp, 0); tun_unbusy_locked(tp); + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + struct if_clone *cloner; + int error __diagused; + + /* Mark it busy so that nothing can re-open it. */ + tp->tun_flags |= TUN_DYING; + TUN_UNLOCK(tp); + + CURVNET_SET_QUIET(ifp->if_home_vnet); + cloner = tuntap_cloner_from_flags(tp->tun_flags); + CURVNET_RESTORE(); + + error = if_clone_destroyif(cloner, ifp); + MPASS(error == 0 || error == EINTR || error == ERESTART); + return; + } + TUN_UNLOCK(tp); } @@ -1668,6 +1715,19 @@ tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, case TUNGDEBUG: *(int *)data = tundebug; break; + case TUNSTRANSIENT: + TUN_LOCK(tp); + if (*(int *)data) + tp->tun_flags |= TUN_TRANSIENT; + else + tp->tun_flags &= ~TUN_TRANSIENT; + TUN_UNLOCK(tp); + break; + case TUNGTRANSIENT: + TUN_LOCK(tp); + *(int *)data = (tp->tun_flags & TUN_TRANSIENT) != 0; + TUN_UNLOCK(tp); + break; case FIONBIO: break; case FIOASYNC: diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 2eca81d54f99..98c59e5de988 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -142,6 +142,7 @@ struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); +static int iflib_simple_transmit(if_t ifp, struct mbuf *m); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; @@ -198,6 +199,7 @@ struct iflib_ctx { uint8_t ifc_sysctl_use_logical_cores; uint16_t ifc_sysctl_extra_msix_vectors; bool ifc_cpus_are_physical_cores; + bool ifc_sysctl_simple_tx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -725,6 +727,7 @@ static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf *iflib_fixup_rx(struct mbuf *m); #endif +static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh); static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); @@ -2624,8 +2627,10 @@ iflib_stop(if_ctx_t ctx) #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); - /* clean any enqueued buffers */ - iflib_ifmp_purge(txq); + if (!ctx->ifc_sysctl_simple_tx) { + /* clean any enqueued buffers */ + iflib_ifmp_purge(txq); + } /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); @@ -2890,51 +2895,6 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) return (m); } -#if defined(INET6) || defined(INET) -static void -iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) -{ - CURVNET_SET(if_getvnet(lc->ifp)); -#if defined(INET6) - *v6 = V_ip6_forwarding; -#endif -#if defined(INET) - *v4 = V_ipforwarding; -#endif - CURVNET_RESTORE(); -} - -/* - * Returns true if it's possible this packet could be LROed. - * if it returns false, it is guaranteed that tcp_lro_rx() - * would not return zero. - */ -static bool -iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) -{ - struct ether_header *eh; - - eh = mtod(m, struct ether_header *); - switch (eh->ether_type) { -#if defined(INET6) - case htons(ETHERTYPE_IPV6): - return (!v6_forwarding); -#endif -#if defined(INET) - case htons(ETHERTYPE_IP): - return (!v4_forwarding); -#endif - } - - return (false); -} -#else -static void -iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) -{ -} -#endif - static void _task_fn_rx_watchdog(void *context) { @@ -2955,19 +2915,19 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; +#if defined(INET6) || defined(INET) int lro_enabled; - bool v4_forwarding, v6_forwarding, lro_possible; +#endif uint8_t retval = 0; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ - struct mbuf *m, *mh, *mt, *mf; + struct mbuf *m, *mh, *mt; NET_EPOCH_ASSERT(); - lro_possible = v4_forwarding = v6_forwarding = false; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); @@ -2983,6 +2943,10 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) return (retval); } +#if defined(INET6) || defined(INET) + lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); +#endif + /* pfil needs the vnet to be set */ CURVNET_SET_QUIET(if_getvnet(ifp)); for (budget_left = budget; budget_left > 0 && avail > 0;) { @@ -3027,7 +2991,17 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) if (__predict_false(m == NULL)) continue; - /* imm_pkt: -- cxgb */ +#ifndef __NO_STRICT_ALIGNMENT + if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) + continue; +#endif +#if defined(INET6) || defined(INET) + if (lro_enabled) { + tcp_lro_queue_mbuf(&rxq->ifr_lc, m); + continue; + } +#endif + if (mh == NULL) mh = mt = m; else { @@ -3040,49 +3014,8 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); - lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); - if (lro_enabled) - iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); - mt = mf = NULL; - while (mh != NULL) { - m = mh; - mh = mh->m_nextpkt; - m->m_nextpkt = NULL; -#ifndef __NO_STRICT_ALIGNMENT - if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) - continue; -#endif -#if defined(INET6) || defined(INET) - if (lro_enabled) { - if (!lro_possible) { - lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); - if (lro_possible && mf != NULL) { - if_input(ifp, mf); - DBG_COUNTER_INC(rx_if_input); - mt = mf = NULL; - } - } - if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC | CSUM_L4_VALID)) == - (CSUM_L4_CALC | CSUM_L4_VALID)) { - if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) - continue; - } - } -#endif - if (lro_possible) { - if_input(ifp, m); - DBG_COUNTER_INC(rx_if_input); - continue; - } - - if (mf == NULL) - mf = m; - if (mt != NULL) - mt->m_nextpkt = m; - mt = m; - } - if (mf != NULL) { - if_input(ifp, mf); + if (mh != NULL) { + if_input(ifp, mh); DBG_COUNTER_INC(rx_if_input); } @@ -3707,13 +3640,16 @@ defrag: * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { - txq->ift_no_desc_avail++; - bus_dmamap_unload(buf_tag, map); - DBG_COUNTER_INC(encap_txq_avail_fail); - DBG_COUNTER_INC(encap_txd_encap_fail); - if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) - GROUPTASK_ENQUEUE(&txq->ift_task); - return (ENOBUFS); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { + txq->ift_no_desc_avail++; + bus_dmamap_unload(buf_tag, map); + DBG_COUNTER_INC(encap_txq_avail_fail); + DBG_COUNTER_INC(encap_txd_encap_fail); + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) + GROUPTASK_ENQUEUE(&txq->ift_task); + return (ENOBUFS); + } } /* * On Intel cards we can greatly reduce the number of TX interrupts @@ -4086,6 +4022,12 @@ _task_fn_tx(void *context) netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif + if (ctx->ifc_sysctl_simple_tx) { + mtx_lock(&txq->ift_mtx); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + goto skip_ifmp; + } #ifdef ALTQ if (if_altq_is_enabled(ifp)) iflib_altq_if_start(ifp); @@ -4099,9 +4041,8 @@ _task_fn_tx(void *context) */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); -#ifdef DEV_NETMAP + skip_ifmp: -#endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else @@ -5203,7 +5144,14 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; - + if (ctx->ifc_sysctl_simple_tx) { +#ifndef ALTQ + if_settransmitfn(ifp, iflib_simple_transmit); + device_printf(dev, "using simple if_transmit\n"); +#else + device_printf(dev, "ALTQ prevents using simple if_transmit\n"); +#endif + } iflib_reset_qvalues(ctx); IFNET_WLOCK(); CTX_LOCK(ctx); @@ -6838,6 +6786,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); + SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, + "use simple tx ring"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); @@ -7160,3 +7111,48 @@ iflib_debugnet_poll(if_t ifp, int count) return (0); } #endif /* DEBUGNET */ + + +static inline iflib_txq_t +iflib_simple_select_queue(if_ctx_t ctx, struct mbuf *m) +{ + int qidx; + + if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) + qidx = QIDX(ctx, m); + else + qidx = NTXQSETS(ctx) + FIRST_QSET(ctx) - 1; + return (&ctx->ifc_txqs[qidx]); +} + +static int +iflib_simple_transmit(if_t ifp, struct mbuf *m) +{ + if_ctx_t ctx; + iflib_txq_t txq; + int error; + int bytes_sent = 0, pkt_sent = 0, mcast_sent = 0; + + + ctx = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + txq = iflib_simple_select_queue(ctx, m); + mtx_lock(&txq->ift_mtx); + error = iflib_encap(txq, &m); + if (error == 0) { + pkt_sent++; + bytes_sent += m->m_pkthdr.len; + mcast_sent += !!(m->m_flags & M_MCAST); + (void)iflib_txd_db_check(txq, true); + } + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); + if (mcast_sent) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); + + return (error); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index c397f0b67896..d6c13470f2eb 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1020,7 +1020,7 @@ struct pf_state_scrub_export { #define PF_SCRUB_FLAG_VALID 0x01 uint8_t scrub_flag; uint32_t pfss_ts_mod; /* timestamp modulation */ -}; +} __packed; struct pf_state_key_export { struct pf_addr addr[2]; @@ -1037,7 +1037,7 @@ struct pf_state_peer_export { uint8_t state; /* active state level */ uint8_t wscale; /* window scaling factor */ uint8_t dummy[6]; -}; +} __packed; _Static_assert(sizeof(struct pf_state_peer_export) == 32, "size incorrect"); struct pf_state_export { @@ -1179,26 +1179,6 @@ struct pf_test_ctx { * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ -struct pfsync_state_scrub { - u_int16_t pfss_flags; - u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 - u_int8_t scrub_flag; - u_int32_t pfss_ts_mod; /* timestamp modulation */ -} __packed; - -struct pfsync_state_peer { - struct pfsync_state_scrub scrub; /* state is scrubbed */ - u_int32_t seqlo; /* Max sequence number sent */ - u_int32_t seqhi; /* Max the other end ACKd + win */ - u_int32_t seqdiff; /* Sequence number modulator */ - u_int16_t max_win; /* largest window (pre scaling) */ - u_int16_t mss; /* Maximum segment size option */ - u_int8_t state; /* active state level */ - u_int8_t wscale; /* window scaling factor */ - u_int8_t pad[6]; -} __packed; - struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; @@ -1208,8 +1188,8 @@ struct pfsync_state_1301 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1235,8 +1215,8 @@ struct pfsync_state_1400 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1323,39 +1303,10 @@ extern pflog_packet_t *pflog_packet_ptr; /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) +void pf_state_peer_hton(const struct pf_state_peer *, + struct pf_state_peer_export *); +void pf_state_peer_ntoh(const struct pf_state_peer_export *, + struct pf_state_peer *); #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h index 7845b682f3e4..2ca5b3433e47 100644 --- a/sys/netinet/icmp6.h +++ b/sys/netinet/icmp6.h @@ -713,9 +713,6 @@ void icmp6_redirect_input(struct mbuf *, int); void icmp6_redirect_output(struct mbuf *, struct nhop_object *); int icmp6_ratelimit(const struct in6_addr *, const int, const int); -struct ip6ctlparam; -void icmp6_mtudisc_update(struct ip6ctlparam *, int); - /* XXX: is this the right place for these macros? */ #define icmp6_ifstat_inc(ifp, tag) \ do { \ diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h index d6b75e482e35..b39479565bd6 100644 --- a/sys/netinet/icmp_var.h +++ b/sys/netinet/icmp_var.h @@ -100,7 +100,6 @@ void kmod_icmpstat_inc(int statnum); SYSCTL_DECL(_net_inet_icmp); extern int badport_bandlim(int); -#define BANDLIM_UNLIMITED -1 #define BANDLIM_ICMP_UNREACH 0 #define BANDLIM_ICMP_ECHO 1 #define BANDLIM_ICMP_TSTAMP 2 diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 71b75d18efd0..fc0848b2c944 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -391,7 +391,6 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - nip->ip_hl = 5; nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; - nip->ip_off = 0; if (V_error_keeptags) m_tag_copy_chain(m, n, M_NOWAIT); @@ -872,6 +871,8 @@ match: mac_netinet_icmp_replyinplace(m); #endif ip->ip_src = t; + /* ip->ip_tos will be reflected. */ + ip->ip_off = htons(0); ip->ip_ttl = V_ip_defttl; if (optlen > 0) { @@ -1181,7 +1182,7 @@ badport_bandlim(int which) { int64_t pps; - if (V_icmplim == 0 || which == BANDLIM_UNLIMITED) + if (V_icmplim == 0) return (0); KASSERT(which >= 0 && which < BANDLIM_MAX, diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index d392cbe09950..6492495dc583 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -609,7 +609,6 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) int tlen = 0, off; int drop_hdrlen; int thflags; - int rstreason = 0; /* For badport_bandlim accounting purposes */ int lookupflag; uint8_t iptos; struct m_tag *fwd_tag = NULL; @@ -905,23 +904,22 @@ findpcb: * XXX MRT Send RST using which routing table? */ if (inp == NULL) { - if (rstreason != 0) { + if ((lookupflag & INPLOOKUP_WILDCARD) == 0) { /* We came here after second (safety) lookup. */ - MPASS((lookupflag & INPLOOKUP_WILDCARD) == 0); - goto dropwithreset; - } - /* - * Log communication attempts to ports that are not - * in use. - */ - if ((V_tcp_log_in_vain == 1 && (thflags & TH_SYN)) || - V_tcp_log_in_vain == 2) { - if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6))) + MPASS(!closed_port); + } else { + /* + * Log communication attempts to ports that are not + * in use. + */ + if (((V_tcp_log_in_vain == 1 && (thflags & TH_SYN)) || + V_tcp_log_in_vain == 2) && + (s = tcp_log_vain(NULL, th, (void *)ip, ip6))) { log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); + } + closed_port = true; } - rstreason = BANDLIM_TCP_RST; - closed_port = true; goto dropwithreset; } INP_LOCK_ASSERT(inp); @@ -1012,13 +1010,11 @@ findpcb: * down or it is in the CLOSED state. Either way we drop the * segment and send an appropriate response. */ - rstreason = BANDLIM_TCP_RST; closed_port = true; goto dropwithreset; } if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { - rstreason = BANDLIM_TCP_RST; closed_port = true; goto dropwithreset; } @@ -1102,7 +1098,8 @@ findpcb: * don't want to sent RST for the second ACK, * so we perform second lookup without wildcard * match, hoping to find the new socket. If - * the ACK is stray indeed, rstreason would + * the ACK is stray indeed, the missing + * INPLOOKUP_WILDCARD flag in lookupflag would * hint the above code that the lookup was a * second attempt. * @@ -1110,7 +1107,6 @@ findpcb: * of the failure cause. */ INP_WUNLOCK(inp); - rstreason = BANDLIM_TCP_RST; lookupflag &= ~INPLOOKUP_WILDCARD; goto findpcb; } @@ -1134,7 +1130,6 @@ tfo_socket_result: V_tcp_sc_rst_sock_fail ? "sending RST" : "try again"); if (V_tcp_sc_rst_sock_fail) { - rstreason = BANDLIM_TCP_RST; goto dropwithreset; } else goto dropunlock; @@ -1201,7 +1196,6 @@ tfo_socket_result: s, __func__); syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); - rstreason = BANDLIM_TCP_RST; goto dropwithreset; } /* @@ -1277,7 +1271,6 @@ tfo_socket_result: "Connection attempt to deprecated " "IPv6 address rejected\n", s, __func__); - rstreason = BANDLIM_TCP_RST; goto dropwithreset; } } @@ -1398,8 +1391,7 @@ dropwithreset: * When blackholing do not respond with a RST but * completely ignore the segment and drop it. */ - if (rstreason == BANDLIM_TCP_RST && - ((!closed_port && V_blackhole == 3) || + if (((!closed_port && V_blackhole == 3) || (closed_port && ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) && (V_blackhole_local || ( @@ -1414,7 +1406,7 @@ dropwithreset: ))) goto dropunlock; TCP_PROBE5(receive, NULL, tp, m, tp, th); - tcp_dropwithreset(m, th, tp, tlen, rstreason); + tcp_dropwithreset(m, th, tp, tlen); m = NULL; /* mbuf chain got consumed. */ dropunlock: @@ -1523,7 +1515,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, uint16_t thflags; int acked, ourfinisacked, needoutput = 0; sackstatus_t sack_changed; - int rstreason, todrop, win, incforsyn = 0; + int todrop, win, incforsyn = 0; uint32_t tiwin; uint16_t nsegs; char *s; @@ -1568,7 +1560,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, */ if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { - rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -1984,7 +1975,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { - rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -1997,7 +1987,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * FIN, or a RST. */ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) { - rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } else if (thflags & TH_SYN) { @@ -2264,7 +2253,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { - rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -2346,7 +2334,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); - rstreason = BANDLIM_TCP_RST; goto dropwithreset; } @@ -3443,7 +3430,6 @@ dropafterack: if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { - rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -3455,11 +3441,10 @@ dropafterack: return; dropwithreset: + tcp_dropwithreset(m, th, NULL, tlen); if (tp != NULL) { - tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(inp); - } else - tcp_dropwithreset(m, th, NULL, tlen, rstreason); + } return; drop: @@ -3479,8 +3464,7 @@ drop: * tp may be NULL. */ void -tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, - int tlen, int rstreason) +tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int tlen) { #ifdef INET struct ip *ip; @@ -3520,7 +3504,7 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, #endif /* Perform bandwidth limiting. */ - if (badport_bandlim(rstreason) < 0) + if (badport_bandlim(BANDLIM_TCP_RST) < 0) goto drop; /* tcp_respond consumes the mbuf chain. */ diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index f2960ab9c636..fed259f4d8e1 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -7863,7 +7863,7 @@ nothing_left: /* tcp_close will kill the inp pre-log the Reset */ tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); tp = tcp_close(tp); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); BBR_STAT_INC(bbr_dropped_af_data); return (1); } @@ -8763,7 +8763,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -8965,7 +8965,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -8977,7 +8977,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -9010,7 +9010,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -9288,7 +9288,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -9385,7 +9385,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -9405,7 +9405,7 @@ close_now: tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); tp = tcp_close(tp); KMOD_TCPSTAT_INC(tcps_rcvafterclose); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, (*tlen)); + ctf_do_dropwithreset(m, tp, th, *tlen); return (1); } if (sbavail(&so->so_snd) == 0) @@ -9535,7 +9535,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -9637,7 +9637,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -9739,7 +9739,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -9848,7 +9848,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -11510,7 +11510,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } if (tiwin > bbr->r_ctl.rc_high_rwnd) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 2dfcad84ad99..71dd4de6baf9 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -12038,7 +12038,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, /* tcp_close will kill the inp pre-log the Reset */ tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); tp = tcp_close(tp); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } } @@ -12876,7 +12876,7 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -13090,7 +13090,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -13103,7 +13103,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -13137,7 +13137,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -13400,7 +13400,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -13496,7 +13496,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -13518,7 +13518,7 @@ rack_check_data_after_close(struct mbuf *m, tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); tp = tcp_close(tp); KMOD_TCPSTAT_INC(tcps_rcvafterclose); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, (*tlen)); + ctf_do_dropwithreset(m, tp, th, *tlen); return (1); } if (sbavail(&so->so_snd) == 0) @@ -13646,7 +13646,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -13747,7 +13747,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -13849,7 +13849,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -13953,7 +13953,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset_conn(m, tp, th, tlen); return (1); } } @@ -16653,7 +16653,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); #ifdef TCP_ACCOUNTING sched_unpin(); #endif diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c index fb013d3d17f0..fc12672a45f7 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.c +++ b/sys/netinet/tcp_stacks/rack_bbr_common.c @@ -507,9 +507,9 @@ ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked) void ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, - int32_t rstreason, int32_t tlen) + int32_t tlen) { - tcp_dropwithreset(m, th, tp, tlen, rstreason); + tcp_dropwithreset(m, th, tp, tlen); if (tp != NULL) INP_WUNLOCK(tptoinpcb(tp)); } @@ -670,7 +670,7 @@ ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max))) { *ret_val = 1; - ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); + ctf_do_dropwithreset(m, tp, th, tlen); return; } else *ret_val = 0; @@ -864,10 +864,10 @@ ctf_calc_rwin(struct socket *so, struct tcpcb *tp) void ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, - int32_t rstreason, int32_t tlen) + int32_t tlen) { - tcp_dropwithreset(m, th, tp, tlen, rstreason); + tcp_dropwithreset(m, th, tp, tlen); tp = tcp_drop(tp, ETIMEDOUT); if (tp) INP_WUNLOCK(tptoinpcb(tp)); diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h index 6a8a056d89b0..cd33cb8ce50b 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.h +++ b/sys/netinet/tcp_stacks/rack_bbr_common.h @@ -101,7 +101,7 @@ ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, void ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, - struct tcphdr *th, int32_t rstreason, int32_t tlen); + struct tcphdr *th, int32_t tlen); void ctf_do_drop(struct mbuf *m, struct tcpcb *tp); @@ -125,7 +125,7 @@ ctf_calc_rwin(struct socket *so, struct tcpcb *tp); void ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, - int32_t rstreason, int32_t tlen); + int32_t tlen); uint32_t ctf_fixed_maxseg(struct tcpcb *tp); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 1fce7c591639..2e039ebbfdd2 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -3205,7 +3205,7 @@ tcp6_next_pmtu(const struct icmp6_hdr *icmp6) * small, set to the min. */ if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */ + mtu = IPV6_MMTU; return (mtu); } diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index c095fc8f7765..ce63fcf9ffc0 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -101,7 +101,7 @@ sysctl_net_inet_tcp_nolocaltimewait(SYSCTL_HANDLER_ARGS) if (error == 0 && req->newptr) { V_nolocaltimewait = new; gone_in(16, "net.inet.tcp.nolocaltimewait is obsolete." - " Use net.inet.tcp.local_msl instead.\n"); + " Use net.inet.tcp.msl_local instead.\n"); } return (error); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index b90f65e83cb1..53856bae9a66 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1379,8 +1379,7 @@ int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, void tcp_reass_global_init(void); void tcp_reass_flush(struct tcpcb *); void tcp_dooptions(struct tcpopt *, u_char *, int, int); -void tcp_dropwithreset(struct mbuf *, struct tcphdr *, - struct tcpcb *, int, int); +void tcp_dropwithreset(struct mbuf *, struct tcphdr *, struct tcpcb *, int); void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); void tcp_xmit_timer(struct tcpcb *, int); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index df8f293f9426..3e6519118a40 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -448,7 +448,7 @@ udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in) /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast - * or multicast datgram.) + * or multicast datagram.) */ UDPSTAT_INC(udps_noport); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) @@ -654,7 +654,11 @@ udp_input(struct mbuf **mp, int *offp, int proto) else UDP_PROBE(receive, NULL, NULL, ip, NULL, uh); UDPSTAT_INC(udps_noport); - if (m->m_flags & (M_BCAST | M_MCAST)) { + if (m->m_flags & M_MCAST) { + UDPSTAT_INC(udps_noportmcast); + goto badunlocked; + } + if (m->m_flags & M_BCAST) { UDPSTAT_INC(udps_noportbcast); goto badunlocked; } diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index d89515d7eda5..f98381499b2d 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -159,6 +159,7 @@ static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf **, int, int); +static void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp); /* * Kernel module interface for updating icmp6stat. The argument is an index @@ -1115,7 +1116,7 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len) if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; - icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ + icmp6_mtudisc_update(&ip6cp); } if (ip6_ctlprotox[nxt] != NULL) @@ -1130,47 +1131,18 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len) return (-1); } -void -icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) +static void +icmp6_mtudisc_update(struct ip6ctlparam *ip6cp) { struct in6_addr *dst = &ip6cp->ip6c_finaldst->sin6_addr; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; - struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ + struct mbuf *m = ip6cp->ip6c_m; u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; uint32_t max_mtu; -#if 0 - /* - * RFC2460 section 5, last paragraph. - * even though minimum link MTU for IPv6 is IPV6_MMTU, - * we may see ICMPv6 too big with mtu < IPV6_MMTU - * due to packet translator in the middle. - * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for - * special handling. - */ if (mtu < IPV6_MMTU) return; -#endif - - /* - * we reject ICMPv6 too big with abnormally small value. - * XXX what is the good definition of "abnormally small"? - */ - if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) - return; - - if (!validated) - return; - - /* - * In case the suggested mtu is less than IPV6_MMTU, we - * only need to remember that it was for above mentioned - * "alwaysfrag" case. - * Try to be as close to the spec as possible. - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index ed71c58fffbe..6299ce6e146b 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -145,11 +145,10 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); -static int ip6_getpmtu(struct route_in6 *, int, - struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, - u_int); -static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, - u_long *, int *, u_int); +static void ip6_getpmtu(struct route_in6 *, int, + struct ifnet *, const struct in6_addr *, u_long *, u_int, u_int); +static void ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, + u_long *, u_int); static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -418,7 +417,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, int vlan_pcp = -1; struct in6_ifaddr *ia = NULL; u_long mtu; - int alwaysfrag, dontfrag; + int dontfrag; u_int32_t optlen, plen = 0, unfragpartlen; struct ip6_exthdrs exthdrs; struct in6_addr src0, dst0; @@ -939,12 +938,10 @@ nonh6lookup: *ifpp = ifp; /* Determine path MTU. */ - if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, - &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) - goto bad; - KASSERT(mtu > 0, ("%s:%d: mtu %ld, ro_pmtu %p ro %p ifp %p " - "alwaysfrag %d fibnum %u\n", __func__, __LINE__, mtu, ro_pmtu, ro, - ifp, alwaysfrag, fibnum)); + ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, &mtu, fibnum, + *nexthdrp); + KASSERT(mtu > 0, ("%s:%d: mtu %ld, ro_pmtu %p ro %p ifp %p fibnum %u", + __func__, __LINE__, mtu, ro_pmtu, ro, ifp, fibnum)); /* * The caller of this function may specify to use the minimum MTU @@ -1121,20 +1118,13 @@ passout: * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * - * The logic here is rather complex: - * 1: normal case (dontfrag == 0, alwaysfrag == 0) + * 1: normal case (dontfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu - * - * 3: if we always need to attach fragment header (alwaysfrag == 1) - * always fragment - * - * 4: if dontfrag == 1 && alwaysfrag == 1 - * error, as we cannot handle this conflicting request. */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { @@ -1157,14 +1147,9 @@ passout: dontfrag = 1; else dontfrag = 0; - if (dontfrag && alwaysfrag) { /* Case 4. */ - /* Conflicting request - can't transmit. */ - error = EMSGSIZE; - goto bad; - } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* Case 2-b. */ /* - * Even if the DONTFRAG option is specified, we cannot send the + * If the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if @@ -1178,7 +1163,7 @@ passout: } /* Transmit packet without fragmentation. */ - if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* Cases 1-a and 2-a. */ + if (dontfrag || tlen <= mtu) { /* Cases 1-a and 2-a. */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); @@ -1194,7 +1179,7 @@ passout: goto done; } - /* Try to fragment the packet. Cases 1-b and 3. */ + /* Try to fragment the packet. Case 1-b. */ if (mtu < IPV6_MMTU) { /* Path MTU cannot be less than IPV6_MMTU. */ error = EMSGSIZE; @@ -1478,9 +1463,10 @@ ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) NET_EPOCH_ENTER(et); nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0); - if (nh != NULL) - error = ip6_calcmtu(nh->nh_ifp, dst, nh->nh_mtu, mtup, NULL, 0); - else + if (nh != NULL) { + ip6_calcmtu(nh->nh_ifp, dst, nh->nh_mtu, mtup, 0); + error = 0; + } else error = EHOSTUNREACH; NET_EPOCH_EXIT(et); @@ -1494,13 +1480,12 @@ ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) * inside @ro_pmtu to avoid subsequent route lookups after packet * filter processing. * - * Stores mtu and always-frag value into @mtup and @alwaysfragp. - * Returns 0 on success. + * Stores mtu into @mtup. */ -static int +static void ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, - int *alwaysfragp, u_int fibnum, u_int proto) + u_int fibnum, u_int proto) { struct nhop_object *nh; struct in6_addr kdst; @@ -1544,65 +1529,41 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, if (ro_pmtu != NULL && ro_pmtu->ro_nh != NULL) mtu = ro_pmtu->ro_nh->nh_mtu; - return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); + ip6_calcmtu(ifp, dst, mtu, mtup, proto); } /* * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and * hostcache data for @dst. - * Stores mtu and always-frag value into @mtup and @alwaysfragp. - * - * Returns 0 on success. + * Stores mtu into @mtup. */ -static int +static void ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, - u_long *mtup, int *alwaysfragp, u_int proto) + u_long *mtup, u_int proto) { u_long mtu = 0; - int alwaysfrag = 0; - int error = 0; if (rt_mtu > 0) { - u_int32_t ifmtu; - struct in_conninfo inc; - - bzero(&inc, sizeof(inc)); - inc.inc_flags |= INC_ISIPV6; - inc.inc6_faddr = *dst; - - ifmtu = IN6_LINKMTU(ifp); + /* Skip the hostcache if the protocol handles PMTU changes. */ + if (proto != IPPROTO_TCP && proto != IPPROTO_SCTP) { + struct in_conninfo inc = { + .inc_flags = INC_ISIPV6, + .inc6_faddr = *dst, + }; - /* TCP is known to react to pmtu changes so skip hc */ - if (proto != IPPROTO_TCP) mtu = tcp_hc_getmtu(&inc); + } if (mtu) mtu = min(mtu, rt_mtu); else mtu = rt_mtu; - if (mtu == 0) - mtu = ifmtu; - else if (mtu < IPV6_MMTU) { - /* - * RFC2460 section 5, last paragraph: - * if we record ICMPv6 too big message with - * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU - * or smaller, with framgent header attached. - * (fragment header is needed regardless from the - * packet size, for translators to identify packets) - */ - alwaysfrag = 1; - mtu = IPV6_MMTU; - } - } else if (ifp) { + } + + if (mtu == 0) mtu = IN6_LINKMTU(ifp); - } else - error = EHOSTUNREACH; /* XXX */ *mtup = mtu; - if (alwaysfragp) - *alwaysfragp = alwaysfrag; - return (error); } /* diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 0027cf3bd230..1a32365f5d1d 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -341,7 +341,7 @@ udp6_multi_input(struct mbuf *m, int off, int proto, /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast - * or multicast datgram.) + * or multicast datagram.) */ UDPSTAT_INC(udps_noport); UDPSTAT_INC(udps_noportmcast); diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c index 4bf913d9c978..5f605b05f7b8 100644 --- a/sys/netlink/route/iface_drivers.c +++ b/sys/netlink/route/iface_drivers.c @@ -82,9 +82,12 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } - if ((lattrs->ifi_change & IFF_UP) && (lattrs->ifi_flags & IFF_UP) == 0) { - /* Request to down the interface */ - if_down(ifp); + if ((lattrs->ifi_change & IFF_UP) != 0 || lattrs->ifi_change == 0) { + /* Request to up or down the interface */ + if (lattrs->ifi_flags & IFF_UP) + if_up(ifp); + else + if_down(ifp); } if (lattrs->ifla_mtu > 0) { @@ -97,7 +100,8 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } - if (lattrs->ifi_change & IFF_PROMISC) { + if ((lattrs->ifi_change & IFF_PROMISC) != 0 || + lattrs->ifi_change == 0) { error = ifpromisc(ifp, lattrs->ifi_flags & IFF_PROMISC); if (error != 0) { nlmsg_report_err_msg(npt, "unable to set promisc"); diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index e34c08c8c4db..585c196391c0 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -123,8 +123,8 @@ union inet_template { sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) -static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, - struct pfsync_state_peer *); +static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, + struct pf_state_peer_export *); static int pfsync_in_clr(struct mbuf *, int, int, int, int); static int pfsync_in_ins(struct mbuf *, int, int, int, int); static int pfsync_in_iack(struct mbuf *, int, int, int, int); @@ -330,7 +330,7 @@ SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); -static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, +static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); @@ -502,7 +502,7 @@ pfsync_clone_destroy(struct ifnet *ifp) } static int -pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, +pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { @@ -1172,8 +1172,8 @@ pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) } static int -pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, - struct pfsync_state_peer *dst) +pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, + struct pf_state_peer_export *dst) { int sync = 0; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 3fa7789efcfe..8cd4fff95b15 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -1347,6 +1347,8 @@ pf_cleanup(void) uma_zdestroy(V_pf_state_z); uma_zdestroy(V_pf_state_key_z); uma_zdestroy(V_pf_udp_mapping_z); + uma_zdestroy(V_pf_anchor_z); + uma_zdestroy(V_pf_eth_anchor_z); } static int @@ -2067,6 +2069,44 @@ pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir) return (false); } +void +pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d) +{ + d->seqlo = htonl(s->seqlo); + d->seqhi = htonl(s->seqhi); + d->seqdiff = htonl(s->seqdiff); + d->max_win = htons(s->max_win); + d->mss = htons(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub) { + d->scrub.pfss_flags = htons( + s->scrub->pfss_flags & PFSS_TIMESTAMP); + d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; + d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); + d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID; + } +} + +void +pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d) +{ + d->seqlo = ntohl(s->seqlo); + d->seqhi = ntohl(s->seqhi); + d->seqdiff = ntohl(s->seqdiff); + d->max_win = ntohs(s->max_win); + d->mss = ntohs(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID && + d->scrub != NULL) { + d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) & + PFSS_TIMESTAMP; + d->scrub->pfss_ttl = s->scrub.pfss_ttl; + d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); + } +} + struct pf_udp_mapping * pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, struct pf_addr *nat_addr, uint16_t nat_port) diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index b6f5d74b5b42..e5da05a958f6 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -6444,19 +6444,14 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - struct pf_kanchor *anchor; - struct pf_keth_anchor *eth_anchor; + struct pf_kanchor *anchor, *tmp_anchor; + struct pf_keth_anchor *eth_anchor, *tmp_eth_anchor; int rs_num; do { /* Unlink rules of all user defined anchors */ - RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) { - /* Wildcard based anchors may not have a respective - * explicit anchor rule or they may be left empty - * without rules. It leads to anchor.refcnt=0, and the - * rest of the logic does not expect it. */ - if (anchor->refcnt == 0) - anchor->refcnt = 1; + RB_FOREACH_SAFE(anchor, pf_kanchor_global, &V_pf_anchors, + tmp_anchor) { for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) { if ((error = pf_begin_rules(&t[rs_num], rs_num, anchor->path)) != 0) { @@ -6474,14 +6469,8 @@ shutdown_pf(void) } /* Unlink rules of all user defined ether anchors */ - RB_FOREACH(eth_anchor, pf_keth_anchor_global, - &V_pf_keth_anchors) { - /* Wildcard based anchors may not have a respective - * explicit anchor rule or they may be left empty - * without rules. It leads to anchor.refcnt=0, and the - * rest of the logic does not expect it. */ - if (eth_anchor->refcnt == 0) - eth_anchor->refcnt = 1; + RB_FOREACH_SAFE(eth_anchor, pf_keth_anchor_global, + &V_pf_keth_anchors, tmp_eth_anchor) { if ((error = pf_begin_eth(&t[0], eth_anchor->path)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: eth " diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 09754359ec2d..45b5b8dd5fef 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -118,7 +118,7 @@ dump_state_peer(struct nl_writer *nw, int attr, const struct pf_state_peer *peer nlattr_add_u16(nw, PF_STP_PFSS_FLAGS, pfss_flags); nlattr_add_u32(nw, PF_STP_PFSS_TS_MOD, sc->pfss_ts_mod); nlattr_add_u8(nw, PF_STP_PFSS_TTL, sc->pfss_ttl); - nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PFSYNC_SCRUB_FLAG_VALID); + nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PF_SCRUB_FLAG_VALID); } nlattr_set_len(nw, off); diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 85711c8fc3ff..630c88b97dd7 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -293,3 +293,4 @@ device virtio_balloon # VirtIO Memory Balloon device options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ diff --git a/sys/powerpc/conf/GENERIC64LE b/sys/powerpc/conf/GENERIC64LE index a56feb6574a4..eb9a9441425d 100644 --- a/sys/powerpc/conf/GENERIC64LE +++ b/sys/powerpc/conf/GENERIC64LE @@ -274,3 +274,4 @@ device virtio_balloon # VirtIO Memory Balloon device options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c index cb96d768adef..7353ed7bac7b 100644 --- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c +++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c @@ -228,12 +228,13 @@ mpc85xx_gpio_attach(device_t dev) OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { mpc85xx_gpio_detach(dev); return (ENOMEM); } + bus_attach_children(dev); return (0); } diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c index 65f90aa4affa..65a07c7ebc39 100644 --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/powerpc/powerpc/busdma_machdep.c @@ -648,6 +648,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); if (map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->alignment); + sgsize = MIN(sgsize, buflen); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC index a8500fe80019..2ff711e80127 100644 --- a/sys/riscv/conf/GENERIC +++ b/sys/riscv/conf/GENERIC @@ -132,6 +132,7 @@ device umass # Disks/Mass storage - Requires scbus and da options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID Bus +options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ # Serial (COM) ports device uart # Generic UART driver diff --git a/sys/riscv/riscv/busdma_bounce.c b/sys/riscv/riscv/busdma_bounce.c index f652f08bf5dc..9d9556fc72f9 100644 --- a/sys/riscv/riscv/busdma_bounce.c +++ b/sys/riscv/riscv/busdma_bounce.c @@ -672,6 +672,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, map->pagesneeded != 0 && addr_needs_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); + sgsize = MIN(sgsize, buflen); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { diff --git a/sys/riscv/sifive/sifive_gpio.c b/sys/riscv/sifive/sifive_gpio.c index ef68d2b39da3..98bff2f72082 100644 --- a/sys/riscv/sifive/sifive_gpio.c +++ b/sys/riscv/sifive/sifive_gpio.c @@ -157,13 +157,14 @@ sfgpio_attach(device_t dev) sc->gpio_pins[i].gp_name[GPIOMAXNAME - 1] = '\0'; } - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Cannot attach gpiobus\n"); error = ENXIO; goto fail; } + bus_attach_children(dev); return (0); fail: diff --git a/sys/riscv/starfive/jh7110_gpio.c b/sys/riscv/starfive/jh7110_gpio.c index 452a3306b4a1..1ed7d9f42259 100644 --- a/sys/riscv/starfive/jh7110_gpio.c +++ b/sys/riscv/starfive/jh7110_gpio.c @@ -321,13 +321,14 @@ jh7110_gpio_attach(device_t dev) JH7110_GPIO_WRITE(sc, GPIOE_1, 0); JH7110_GPIO_WRITE(sc, GPIOEN, 1); - sc->busdev = gpiobus_attach_bus(dev); + sc->busdev = gpiobus_add_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Cannot attach gpiobus\n"); jh7110_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/riscv/starfive/jh7110_pcie.c b/sys/riscv/starfive/jh7110_pcie.c index 2d0a4be69b2c..5181252ab2dc 100644 --- a/sys/riscv/starfive/jh7110_pcie.c +++ b/sys/riscv/starfive/jh7110_pcie.c @@ -483,6 +483,16 @@ jh7110_pcie_msi_enable_intr(device_t dev, struct intr_irqsrc *isrc) } static void +jh7110_pcie_msi_post_filter(device_t dev, struct intr_irqsrc *isrc) +{ +} + +static void +jh7110_pcie_msi_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ +} + +static void jh7110_pcie_msi_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct jh7110_pcie_softc *sc; @@ -1008,6 +1018,8 @@ static device_method_t jh7110_pcie_methods[] = { /* Interrupt controller interface */ DEVMETHOD(pic_enable_intr, jh7110_pcie_msi_enable_intr), DEVMETHOD(pic_disable_intr, jh7110_pcie_msi_disable_intr), + DEVMETHOD(pic_post_filter, jh7110_pcie_msi_post_filter), + DEVMETHOD(pic_post_ithread, jh7110_pcie_msi_post_ithread), DEVMETHOD(pic_pre_ithread, jh7110_pcie_msi_pre_ithread), /* OFW bus interface */ diff --git a/sys/security/mac_do/mac_do.c b/sys/security/mac_do/mac_do.c index 8856be5fa1a3..7a5ac2e01f75 100644 --- a/sys/security/mac_do/mac_do.c +++ b/sys/security/mac_do/mac_do.c @@ -44,7 +44,7 @@ SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN, &print_parse_error, 0, "Print parse errors on setting rules " "(via sysctl(8))."); -static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do"); +static MALLOC_DEFINE(M_MAC_DO, "mac_do", "mac_do(4) security module"); #define MAC_RULE_STRING_LEN 1024 @@ -319,17 +319,17 @@ toast_rules(struct rules *const rules) struct rule *rule, *rule_next; STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) { - free(rule->uids, M_DO); - free(rule->gids, M_DO); - free(rule, M_DO); + free(rule->uids, M_MAC_DO); + free(rule->gids, M_MAC_DO); + free(rule, M_MAC_DO); } - free(rules, M_DO); + free(rules, M_MAC_DO); } static struct rules * alloc_rules(void) { - struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK); + struct rules *const rules = malloc(sizeof(*rules), M_MAC_DO, M_WAITOK); _Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!"); rules->string[0] = 0; @@ -433,7 +433,7 @@ static void make_parse_error(struct parse_error **const parse_error, const size_t pos, const char *const fmt, ...) { - struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK); + struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO, M_WAITOK); va_list ap; err->pos = pos; @@ -448,7 +448,7 @@ make_parse_error(struct parse_error **const parse_error, const size_t pos, static void free_parse_error(struct parse_error *const parse_error) { - free(parse_error, M_DO); + free(parse_error, M_MAC_DO); } static int @@ -733,7 +733,7 @@ parse_target_clause(char *to, struct rule *const rule, "Too many target clauses of type '%s'.", to_type); return (EOVERFLOW); } - ie = malloc(sizeof(*ie), M_DO, M_WAITOK); + ie = malloc(sizeof(*ie), M_MAC_DO, M_WAITOK); ie->spec = is; STAILQ_INSERT_TAIL(list, ie, ie_entries); check_type_and_id_spec(type, &is); @@ -784,7 +784,7 @@ pour_list_into_rule(const id_type_t type, struct id_list *const list, STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) { MPASS(idx < *nb); array[idx] = ie->spec; - free(ie, M_DO); + free(ie, M_MAC_DO); ++idx; } MPASS(idx == *nb); @@ -874,7 +874,7 @@ parse_single_rule(char *rule, struct rules *const rules, STAILQ_INIT(&gid_list); /* Freed when the 'struct rules' container is freed. */ - new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO); + new = malloc(sizeof(*new), M_MAC_DO, M_WAITOK | M_ZERO); from_type = strsep_noblanks(&rule, "="); MPASS(from_type != NULL); /* Because 'rule' was not NULL. */ @@ -933,7 +933,7 @@ parse_single_rule(char *rule, struct rules *const rules, } while (to_list != NULL); if (new->uids_nb != 0) { - new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO, + new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_MAC_DO, M_WAITOK); error = pour_list_into_rule(IT_UID, &uid_list, new->uids, &new->uids_nb, parse_error); @@ -949,7 +949,7 @@ parse_single_rule(char *rule, struct rules *const rules, } if (new->gids_nb != 0) { - new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO, + new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_MAC_DO, M_WAITOK); error = pour_list_into_rule(IT_GID, &gid_list, new->gids, &new->gids_nb, parse_error); @@ -969,13 +969,13 @@ parse_single_rule(char *rule, struct rules *const rules, return (0); einval: - free(new->gids, M_DO); - free(new->uids, M_DO); - free(new, M_DO); + free(new->gids, M_MAC_DO); + free(new->uids, M_MAC_DO); + free(new, M_MAC_DO); STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next) - free(ie, M_DO); + free(ie, M_MAC_DO); STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next) - free(ie, M_DO); + free(ie, M_MAC_DO); MPASS(*parse_error != NULL); return (EINVAL); } @@ -1028,7 +1028,7 @@ parse_rules(const char *const string, struct rules **const rulesp, bcopy(string, rules->string, len + 1); MPASS(rules->string[len] == '\0'); /* Catch some races. */ - copy = malloc(len + 1, M_DO, M_WAITOK); + copy = malloc(len + 1, M_MAC_DO, M_WAITOK); bcopy(string, copy, len + 1); MPASS(copy[len] == '\0'); /* Catch some races. */ @@ -1046,7 +1046,7 @@ parse_rules(const char *const string, struct rules **const rulesp, *rulesp = rules; out: - free(copy, M_DO); + free(copy, M_MAC_DO); return (error); } @@ -1226,7 +1226,7 @@ parse_and_set_rules(struct prison *const pr, const char *rules_string, static int mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS) { - char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK); + char *const buf = malloc(MAC_RULE_STRING_LEN, M_MAC_DO, M_WAITOK); struct prison *const td_pr = req->td->td_ucred->cr_prison; struct prison *pr; struct rules *rules; @@ -1250,7 +1250,7 @@ mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS) free_parse_error(parse_error); } out: - free(buf, M_DO); + free(buf, M_MAC_DO); return (error); } @@ -1573,7 +1573,7 @@ set_data_header(void *const data, const size_t size, const int priv, static void * alloc_data(void *const data, const size_t size) { - struct mac_do_data_header *const hdr = realloc(data, size, M_DO, + struct mac_do_data_header *const hdr = realloc(data, size, M_MAC_DO, M_WAITOK); MPASS(size >= sizeof(struct mac_do_data_header)); @@ -1602,7 +1602,7 @@ alloc_data(void *const data, const size_t size) static void dealloc_thread_osd(void *const value) { - free(value, M_DO); + free(value, M_MAC_DO); } /* diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 1646aa108701..a830c9d4c622 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -159,6 +159,7 @@ typedef int dumper_hdr_t(struct dumperinfo *di, struct kerneldumpheader *kdh); #define GID_RT_PRIO 47 #define GID_ID_PRIO 48 #define GID_DIALER 68 +#define GID_U2F 116 #define GID_NOGROUP 65533 #define GID_NOBODY 65534 diff --git a/sys/sys/param.h b/sys/sys/param.h index b9942c7bc2fd..fc2a78883f1e 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500059 +#define __FreeBSD_version 1500063 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index d703a11fda01..2d6903967e15 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -85,8 +85,8 @@ /* 76 is obsolete vhangup */ /* 77 is obsolete vlimit */ #define SYS_mincore 78 -#define SYS_getgroups 79 -#define SYS_setgroups 80 +#define SYS_freebsd14_getgroups 79 +#define SYS_freebsd14_setgroups 80 #define SYS_getpgrp 81 #define SYS_setpgid 82 #define SYS_setitimer 83 @@ -533,4 +533,6 @@ #define SYS_exterrctl 592 #define SYS_inotify_add_watch_at 593 #define SYS_inotify_rm_watch 594 -#define SYS_MAXSYSCALL 595 +#define SYS_getgroups 595 +#define SYS_setgroups 596 +#define SYS_MAXSYSCALL 597 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index b7ded62cacb4..d1172c2dc7bf 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -65,8 +65,8 @@ MIASM = \ mprotect.o \ madvise.o \ mincore.o \ - getgroups.o \ - setgroups.o \ + freebsd14_getgroups.o \ + freebsd14_setgroups.o \ getpgrp.o \ setpgid.o \ setitimer.o \ @@ -436,4 +436,6 @@ MIASM = \ setcred.o \ exterrctl.o \ inotify_add_watch_at.o \ - inotify_rm_watch.o + inotify_rm_watch.o \ + getgroups.o \ + setgroups.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 8d666f9c8ee9..98311a6dbf94 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -273,14 +273,6 @@ struct mincore_args { char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char vec_l_[PADL_(char *)]; char * vec; char vec_r_[PADR_(char *)]; }; -struct getgroups_args { - char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; - char gidset_l_[PADL_(gid_t *)]; gid_t * gidset; char gidset_r_[PADR_(gid_t *)]; -}; -struct setgroups_args { - char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; - char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)]; -}; struct getpgrp_args { syscallarg_t dummy; }; @@ -1901,6 +1893,14 @@ struct inotify_rm_watch_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)]; }; +struct getgroups_args { + char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; + char gidset_l_[PADL_(gid_t *)]; gid_t * gidset; char gidset_r_[PADR_(gid_t *)]; +}; +struct setgroups_args { + char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; + char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)]; +}; int sys__exit(struct thread *, struct _exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -1957,8 +1957,6 @@ int sys_munmap(struct thread *, struct munmap_args *); int sys_mprotect(struct thread *, struct mprotect_args *); int sys_madvise(struct thread *, struct madvise_args *); int sys_mincore(struct thread *, struct mincore_args *); -int sys_getgroups(struct thread *, struct getgroups_args *); -int sys_setgroups(struct thread *, struct setgroups_args *); int sys_getpgrp(struct thread *, struct getpgrp_args *); int sys_setpgid(struct thread *, struct setpgid_args *); int sys_setitimer(struct thread *, struct setitimer_args *); @@ -2305,6 +2303,8 @@ int sys_setcred(struct thread *, struct setcred_args *); int sys_exterrctl(struct thread *, struct exterrctl_args *); int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *); int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); +int sys_getgroups(struct thread *, struct getgroups_args *); +int sys_setgroups(struct thread *, struct setgroups_args *); #ifdef COMPAT_43 @@ -2799,6 +2799,16 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #ifdef COMPAT_FREEBSD14 +struct freebsd14_getgroups_args { + char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; + char gidset_l_[PADL_(gid_t *)]; gid_t * gidset; char gidset_r_[PADR_(gid_t *)]; +}; +struct freebsd14_setgroups_args { + char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)]; + char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)]; +}; +int freebsd14_getgroups(struct thread *, struct freebsd14_getgroups_args *); +int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *); #endif /* COMPAT_FREEBSD14 */ @@ -2873,8 +2883,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #define SYS_AUE_mprotect AUE_MPROTECT #define SYS_AUE_madvise AUE_MADVISE #define SYS_AUE_mincore AUE_MINCORE -#define SYS_AUE_getgroups AUE_GETGROUPS -#define SYS_AUE_setgroups AUE_SETGROUPS +#define SYS_AUE_freebsd14_getgroups AUE_GETGROUPS +#define SYS_AUE_freebsd14_setgroups AUE_SETGROUPS #define SYS_AUE_getpgrp AUE_GETPGRP #define SYS_AUE_setpgid AUE_SETPGRP #define SYS_AUE_setitimer AUE_SETITIMER @@ -3289,6 +3299,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #define SYS_AUE_exterrctl AUE_NULL #define SYS_AUE_inotify_add_watch_at AUE_INOTIFY #define SYS_AUE_inotify_rm_watch AUE_INOTIFY +#define SYS_AUE_getgroups AUE_GETGROUPS +#define SYS_AUE_setgroups AUE_SETGROUPS #undef PAD_ #undef PADL_ diff --git a/sys/sys/types.h b/sys/sys/types.h index fd375139a092..8311c1901b7e 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -297,9 +297,11 @@ typedef struct vm_page *vm_page_t; #if defined(_KERNEL) || defined(_STANDALONE) #if !defined(__bool_true_false_are_defined) && !defined(__cplusplus) #define __bool_true_false_are_defined 1 +#if __STDC_VERSION__ < 202311L #define false 0 #define true 1 typedef _Bool bool; +#endif /* __STDC_VERSION__ < 202311L */ #endif /* !__bool_true_false_are_defined && !__cplusplus */ #endif /* KERNEL || _STANDALONE */ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index 7ab2f021e408..5743dc1c8033 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -216,6 +216,15 @@ #define CLOSE_RANGE_CLOEXEC (1<<2) #define CLOSE_RANGE_CLOFORK (1<<3) +/* + * copy_file_range flags visible to user space. + * High order 8 bits reserved for kernel flags. + * Allocate from bit 23 down, to try and avoid conflicts with + * future Linux flags. + */ +#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require cloning. */ +#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE) + #endif /* __BSD_VISIBLE */ #endif /* !_SYS_UNISTD_H_ */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 8080e9edd8c3..6ef9bbec9446 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -67,6 +67,11 @@ __enum_uint8_decl(vtype) { VLASTTYPE = VMARKER, }; +/* + * We frequently need to test is something is a device node. + */ +#define VTYPE_ISDEV(vtype) ((vtype) == VCHR || (vtype) == VBLK) + __enum_uint8_decl(vstate) { VSTATE_UNINITIALIZED, VSTATE_CONSTRUCTED, @@ -199,6 +204,8 @@ struct vnode { int v_seqc_users; /* i modifications pending */ }; +#define VN_ISDEV(vp) VTYPE_ISDEV((vp)->v_type) + #ifndef DEBUG_LOCKS #ifdef _LP64 /* @@ -309,6 +316,8 @@ struct vattr { long va_spare; /* remain quad aligned */ }; +#define VATTR_ISDEV(vap) VTYPE_ISDEV((vap)->va_type) + /* * Flags for va_vaflags. */ @@ -397,21 +406,8 @@ struct vattr { */ #define VLKTIMEOUT (hz / 20 + 1) -/* copy_file_range flags */ -#define COPY_FILE_RANGE_KFLAGS 0xff000000 - -/* - * copy_file_range flags visible to user space. - * Allocate high bits first, to try and avoid conflicting with Linux. - */ -#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require cloning. */ -#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE) - #ifdef _KERNEL -/* copy_file_range flags only usable in the kernel */ -#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */ - #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_VNODE); #endif @@ -634,6 +630,10 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int); #define VN_OPEN_INVFS 0x00000008 #define VN_OPEN_WANTIOCTLCAPS 0x00000010 +/* copy_file_range kernel flags */ +#define COPY_FILE_RANGE_KFLAGS 0xff000000 +#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */ + /* * Public vnode manipulation functions. */ diff --git a/sys/sys/watchdog.h b/sys/sys/watchdog.h index 4a16b18509f5..8401d343a6b7 100644 --- a/sys/sys/watchdog.h +++ b/sys/sys/watchdog.h @@ -32,15 +32,16 @@ #define _SYS_WATCHDOG_H #include <sys/ioccom.h> +#include <sys/_types.h> #define _PATH_WATCHDOG "fido" -#define WDIOCPATPAT _IOW('W', 42, u_int) /* pat the watchdog */ -#define WDIOC_SETTIMEOUT _IOW('W', 43, int) /* set/reset the timer */ -#define WDIOC_GETTIMEOUT _IOR('W', 44, int) /* get total timeout */ -#define WDIOC_GETTIMELEFT _IOR('W', 45, int) /* get time left */ -#define WDIOC_GETPRETIMEOUT _IOR('W', 46, int) /* get the pre-timeout */ -#define WDIOC_SETPRETIMEOUT _IOW('W', 47, int) /* set the pre-timeout */ +#define WDIOC_PATPAT _IOW('W', 52, sbintime_t) /* pat the watchdog */ +#define WDIOC_SETTIMEOUT _IOW('W', 53, sbintime_t) /* set/reset the timer */ +#define WDIOC_GETTIMEOUT _IOR('W', 54, sbintime_t) /* get total timeout */ +#define WDIOC_GETTIMELEFT _IOR('W', 55, sbintime_t) /* get time left */ +#define WDIOC_GETPRETIMEOUT _IOR('W', 56, sbintime_t) /* get the pre-timeout */ +#define WDIOC_SETPRETIMEOUT _IOW('W', 57, sbintime_t) /* set the pre-timeout */ /* set the action when a pre-timeout occurs see: WD_SOFT_* */ #define WDIOC_SETPRETIMEOUTACT _IOW('W', 48, int) @@ -48,6 +49,8 @@ #define WDIOC_SETSOFT _IOW('W', 49, int) #define WDIOC_SETSOFTTIMEOUTACT _IOW('W', 50, int) +#define WDIOC_CONTROL _IOW('W', 51, int) /* configure watchdog */ + #define WD_ACTIVE 0x8000000 /* * Watchdog reset, timeout set to value in WD_INTERVAL field. @@ -93,6 +96,11 @@ #define WD_TO_64SEC 36 #define WD_TO_128SEC 37 +/* Control options for WDIOC_CONTROL */ +#define WD_CTRL_DISABLE 0x00000000 +#define WD_CTRL_ENABLE 0x00000001 +#define WD_CTRL_RESET 0x00000002 + /* action on pre-timeout trigger */ #define WD_SOFT_PANIC 0x01 /* panic */ #define WD_SOFT_DDB 0x02 /* enter debugger */ @@ -105,11 +113,16 @@ #include <sys/_eventhandler.h> typedef void (*watchdog_fn)(void *, u_int, int *); +typedef void (*watchdog_sbt_fn)(void *, sbintime_t, sbintime_t *, int *); EVENTHANDLER_DECLARE(watchdog_list, watchdog_fn); +EVENTHANDLER_DECLARE(watchdog_sbt_list, watchdog_sbt_fn); u_int wdog_kern_last_timeout(void); int wdog_kern_pat(u_int utim); +sbintime_t wdog_kern_last_timeout_sbt(void); +int wdog_kern_pat_sbt(sbintime_t utim); +int wdog_control(int ctrl); /* * The following function pointer is used to attach a software watchdog diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 897a21032907..c7e2b3f4b8e6 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1550,7 +1550,7 @@ ffs_openextattr( } */ *ap) { - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); @@ -1572,7 +1572,7 @@ ffs_closeextattr( struct vnode *vp; vp = ap->a_vp; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (ap->a_commit && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) return (EROFS); @@ -1610,7 +1610,7 @@ ffs_deleteextattr( vp = ap->a_vp; ip = VTOI(vp); - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (strlen(ap->a_name) == 0) return (EINVAL); @@ -1688,7 +1688,7 @@ ffs_getextattr( ip = VTOI(ap->a_vp); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, @@ -1738,7 +1738,7 @@ ffs_listextattr( ip = VTOI(ap->a_vp); - if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + if (VN_ISDEV(ap->a_vp)) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, @@ -1803,7 +1803,7 @@ ffs_setextattr( ip = VTOI(vp); fs = ITOFS(ip); - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); if (strlen(ap->a_name) == 0) return (EINVAL); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index ffc993aef9fc..0921eee92b9d 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -156,30 +156,30 @@ ufs_itimes_locked(struct vnode *vp) if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; - if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) + if (VN_ISDEV(vp) && !DOINGSOFTDEP(vp)) UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); else if (((vp->v_mount->mnt_kern_flag & - (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || - (ip->i_flag & (IN_CHANGE | IN_UPDATE))) + (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || + (ip->i_flag & (IN_CHANGE | IN_UPDATE)) != 0) UFS_INODE_SET_FLAG(ip, IN_MODIFIED); - else if (ip->i_flag & IN_ACCESS) + else if ((ip->i_flag & IN_ACCESS) != 0) UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); vfs_timestamp(&ts); - if (ip->i_flag & IN_ACCESS) { + if ((ip->i_flag & IN_ACCESS) != 0) { DIP_SET(ip, i_atime, ts.tv_sec); DIP_SET(ip, i_atimensec, ts.tv_nsec); } - if (ip->i_flag & IN_UPDATE) { + if ((ip->i_flag & IN_UPDATE) != 0) { DIP_SET(ip, i_mtime, ts.tv_sec); DIP_SET(ip, i_mtimensec, ts.tv_nsec); } - if (ip->i_flag & IN_CHANGE) { + if ((ip->i_flag & IN_CHANGE) != 0) { DIP_SET(ip, i_ctime, ts.tv_sec); DIP_SET(ip, i_ctimensec, ts.tv_nsec); DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); } - out: +out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } @@ -319,7 +319,7 @@ ufs_open(struct vop_open_args *ap) struct vnode *vp = ap->a_vp; struct inode *ip; - if (vp->v_type == VCHR || vp->v_type == VBLK) + if (VN_ISDEV(vp)) return (EOPNOTSUPP); ip = VTOI(vp); @@ -540,7 +540,7 @@ ufs_stat(struct vop_stat_args *ap) sb->st_uid = ip->i_uid; sb->st_gid = ip->i_gid; if (I_IS_UFS1(ip)) { - sb->st_rdev = ip->i_din1->di_rdev; + sb->st_rdev = VN_ISDEV(vp) ? ip->i_din1->di_rdev : NODEV; sb->st_size = ip->i_din1->di_size; sb->st_mtim.tv_sec = ip->i_din1->di_mtime; sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec; @@ -551,7 +551,7 @@ ufs_stat(struct vop_stat_args *ap) sb->st_blocks = dbtob((uint64_t)ip->i_din1->di_blocks) / S_BLKSIZE; sb->st_filerev = ip->i_din1->di_modrev; } else { - sb->st_rdev = ip->i_din2->di_rdev; + sb->st_rdev = VN_ISDEV(vp) ? ip->i_din2->di_rdev : NODEV; sb->st_size = ip->i_din2->di_size; sb->st_mtim.tv_sec = ip->i_din2->di_mtime; sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec; @@ -603,7 +603,7 @@ ufs_getattr( vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; if (I_IS_UFS1(ip)) { - vap->va_rdev = ip->i_din1->di_rdev; + vap->va_rdev = VN_ISDEV(vp) ? ip->i_din1->di_rdev : NODEV; vap->va_size = ip->i_din1->di_size; vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; @@ -612,7 +612,7 @@ ufs_getattr( vap->va_bytes = dbtob((uint64_t)ip->i_din1->di_blocks); vap->va_filerev = ip->i_din1->di_modrev; } else { - vap->va_rdev = ip->i_din2->di_rdev; + vap->va_rdev = VN_ISDEV(vp) ? ip->i_din2->di_rdev : NODEV; vap->va_size = ip->i_din2->di_size; vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index de8a6c52c08f..244aa31ea703 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -901,8 +901,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count, int error, before, after, rbehind, rahead, poff, i; int bytecount, secmask; - KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, - ("%s does not support devices", __func__)); + KASSERT(!VN_ISDEV(vp), ("%s does not support devices", __func__)); if (VN_IS_DOOMED(vp)) return (VM_PAGER_BAD); diff --git a/sys/x86/include/apicreg.h b/sys/x86/include/apicreg.h index d610d7f11a1c..1252647fbab3 100644 --- a/sys/x86/include/apicreg.h +++ b/sys/x86/include/apicreg.h @@ -296,6 +296,8 @@ typedef struct IOAPIC ioapic_t; /* constants relating to APIC ID registers */ #define APIC_ID_MASK 0xff000000 #define APIC_ID_SHIFT 24 +#define APIC_EXT_ID_MASK 0x00fe0000 +#define APIC_EXT_ID_SHIFT 17 #define APIC_ID_CLUSTER 0xf0 #define APIC_ID_CLUSTER_ID 0x0f #define APIC_MAX_CLUSTER 0xe diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c index 040174113104..e86279aa9c98 100644 --- a/sys/x86/x86/busdma_bounce.c +++ b/sys/x86/x86/busdma_bounce.c @@ -726,6 +726,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); + sgsize = MIN(sgsize, buflen); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0, sgsize); } |