aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/conf/GENERIC4
-rw-r--r--sys/amd64/include/vmm.h1
-rw-r--r--sys/arm/arm/generic_timer.c22
-rw-r--r--sys/arm/conf/TEGRA1242
-rw-r--r--sys/arm64/arm64/cpu_feat.c29
-rw-r--r--sys/arm64/arm64/elf32_machdep.c2
-rw-r--r--sys/arm64/arm64/identcpu.c51
-rw-r--r--sys/arm64/arm64/machdep.c23
-rw-r--r--sys/arm64/arm64/pmap.c140
-rw-r--r--sys/arm64/arm64/ptrauth.c23
-rw-r--r--sys/arm64/arm64/trap.c1
-rw-r--r--sys/arm64/conf/std.arm641
-rw-r--r--sys/arm64/include/cpu.h26
-rw-r--r--sys/arm64/include/cpu_feat.h46
-rw-r--r--sys/arm64/include/pmap.h3
-rw-r--r--sys/arm64/include/proc.h1
-rw-r--r--sys/arm64/include/vmm.h1
-rw-r--r--sys/arm64/vmm/vmm.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c11
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h4
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_systrace_args.c44
-rw-r--r--sys/compat/lindebugfs/lindebugfs.c8
-rw-r--r--sys/compat/linprocfs/linprocfs.c256
-rw-r--r--sys/compat/linsysfs/linsysfs.c159
-rw-r--r--sys/compat/linsysfs/linsysfs_net.c24
-rw-r--r--sys/compat/linuxkpi/common/include/acpi/acpi.h2
-rw-r--r--sys/compat/linuxkpi/common/include/kunit/static_stub.h15
-rw-r--r--sys/compat/linuxkpi/common/include/linux/cleanup.h49
-rw-r--r--sys/compat/linuxkpi/common/include/linux/compiler.h6
-rw-r--r--sys/compat/linuxkpi/common/include/linux/device.h5
-rw-r--r--sys/compat/linuxkpi/common/include/linux/ieee80211.h28
-rw-r--r--sys/compat/linuxkpi/common/include/linux/math.h2
-rw-r--r--sys/compat/linuxkpi/common/include/linux/math64.h6
-rw-r--r--sys/compat/linuxkpi/common/include/linux/overflow.h180
-rw-r--r--sys/compat/linuxkpi/common/include/linux/pci.h17
-rw-r--r--sys/compat/linuxkpi/common/include/linux/rcupdate.h5
-rw-r--r--sys/compat/linuxkpi/common/include/linux/skbuff.h18
-rw-r--r--sys/compat/linuxkpi/common/include/linux/slab.h4
-rw-r--r--sys/compat/linuxkpi/common/include/linux/string_helpers.h2
-rw-r--r--sys/compat/linuxkpi/common/include/linux/timer.h21
-rw-r--r--sys/compat/linuxkpi/common/include/net/mac80211.h2
-rw-r--r--sys/compat/linuxkpi/common/src/linux_80211.c583
-rw-r--r--sys/compat/linuxkpi/common/src/linux_80211.h21
-rw-r--r--sys/compat/linuxkpi/common/src/linux_compat.c11
-rw-r--r--sys/compat/linuxkpi/common/src/linux_devres.c26
-rw-r--r--sys/compat/linuxkpi/common/src/linux_pci.c128
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/skbuff.h0
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/test-bug.h0
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/test.h0
-rw-r--r--sys/conf/NOTES10
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/kern.pre.mk4
-rw-r--r--sys/conf/newvers.sh4
-rw-r--r--sys/conf/options2
-rw-r--r--sys/contrib/dev/acpica/components/executer/extrace.c2
-rw-r--r--sys/contrib/dev/rtw88/main.c57
-rw-r--r--sys/contrib/dev/rtw89/fw.c4
-rw-r--r--sys/contrib/libnv/nvlist.c10
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py2
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-qemu.yml9
-rw-r--r--sys/contrib/openzfs/META4
-rw-r--r--sys/contrib/openzfs/Makefile.am4
-rw-r--r--sys/contrib/openzfs/cmd/Makefile.am6
-rwxr-xr-xsys/contrib/openzfs/cmd/arc_summary7
-rwxr-xr-xsys/contrib/openzfs/cmd/arcstat.in9
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c2
-rw-r--r--sys/contrib/openzfs/cmd/zhack.c4
-rw-r--r--sys/contrib/openzfs/cmd/zpool/Makefile.am5
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.448
-rw-r--r--sys/contrib/openzfs/cmd/zstream/Makefile.am5
-rw-r--r--sys/contrib/openzfs/config/always-arch.m417
-rw-r--r--sys/contrib/openzfs/config/always-compiler-options.m462
-rw-r--r--sys/contrib/openzfs/config/kernel-blkdev.m49
-rw-r--r--sys/contrib/openzfs/config/kernel-dentry-operations.m412
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rw-r--r--sys/contrib/openzfs/config/user-statx.m46
-rw-r--r--sys/contrib/openzfs/config/zfs-build.m42
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install2
-rwxr-xr-xsys/contrib/openzfs/contrib/debian/rules.in2
-rw-r--r--sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in9
-rw-r--r--sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c4
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h26
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/stat.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h3
-rw-r--r--sys/contrib/openzfs/include/sys/zvol.h2
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h2
-rw-r--r--sys/contrib/openzfs/man/man1/arcstat.14
-rw-r--r--sys/contrib/openzfs/man/man1/cstyle.12
-rw-r--r--sys/contrib/openzfs/man/man1/zhack.12
-rw-r--r--sys/contrib/openzfs/man/man1/ztest.12
-rw-r--r--sys/contrib/openzfs/man/man4/spl.42
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.42
-rw-r--r--sys/contrib/openzfs/man/man5/vdev_id.conf.52
-rw-r--r--sys/contrib/openzfs/man/man7/dracut.zfs.72
-rw-r--r--sys/contrib/openzfs/man/man7/vdevprops.72
-rw-r--r--sys/contrib/openzfs/man/man7/zfsconcepts.72
-rw-r--r--sys/contrib/openzfs/man/man7/zfsprops.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolconcepts.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolprops.72
-rw-r--r--sys/contrib/openzfs/man/man8/zdb.82
-rw-r--r--sys/contrib/openzfs/man/man8/zed.8.in2
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-allow.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-bookmark.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-clone.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-create.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-destroy.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-diff.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-hold.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-jail.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-list.847
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-load-key.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in2
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-mount.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-project.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-promote.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-rename.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-rewrite.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-send.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-set.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-share.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-snapshot.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-upgrade.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-userspace.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-wait.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-zone.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs_ids_to_path.82
-rw-r--r--sys/contrib/openzfs/man/man8/zgenhostid.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-attach.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-checkpoint.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-clear.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-create.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-destroy.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-detach.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-export.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-get.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-history.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-import.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-initialize.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-iostat.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-labelclear.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-list.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-offline.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-reguid.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-remove.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-reopen.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-replace.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-resilver.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-scrub.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-split.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-sync.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-trim.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-upgrade.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-wait.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool.82
-rw-r--r--sys/contrib/openzfs/man/man8/zstream.82
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c3
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c244
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c19
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c55
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c32
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_log.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfeature.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c23
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in2
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg4
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh111
-rw-r--r--sys/ddb/db_ps.c11
-rw-r--r--sys/dev/ahci/ahci_pci.c1
-rw-r--r--sys/dev/ath/ath_rate/sample/sample.c8
-rw-r--r--sys/dev/ath/if_ath_tx_ht.c6
-rw-r--r--sys/dev/axgbe/if_axgbe_pci.c3
-rw-r--r--sys/dev/cxgbe/t4_main.c2
-rw-r--r--sys/dev/cyapa/cyapa.c95
-rw-r--r--sys/dev/e1000/if_em.c4
-rw-r--r--sys/dev/enetc/if_enetc.c3
-rw-r--r--sys/dev/gpio/gpio_if.m26
-rw-r--r--sys/dev/gpio/gpiobus.c101
-rw-r--r--sys/dev/gpio/gpiobus_if.m30
-rw-r--r--sys/dev/gpio/gpiobus_internal.h1
-rw-r--r--sys/dev/gpio/gpioc.c157
-rw-r--r--sys/dev/gpio/gpioled.c108
-rw-r--r--sys/dev/gpio/ofw_gpiobus.c3
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c21
-rw-r--r--sys/dev/hwt/hwt_ioctl.c9
-rw-r--r--sys/dev/ice/ice_lib.c6
-rw-r--r--sys/dev/ichsmb/ichsmb_pci.c3
-rw-r--r--sys/dev/igc/if_igc.c4
-rw-r--r--sys/dev/iommu/busdma_iommu.c54
-rw-r--r--sys/dev/iommu/iommu.h2
-rw-r--r--sys/dev/irdma/irdma_cm.c2
-rw-r--r--sys/dev/irdma/irdma_utils.c4
-rw-r--r--sys/dev/ixgbe/if_ix.c36
-rw-r--r--sys/dev/ixgbe/ixgbe_e610.c34
-rw-r--r--sys/dev/ixl/if_ixl.c4
-rw-r--r--sys/dev/mpr/mpr.c10
-rw-r--r--sys/dev/mpr/mpr_mapping.c18
-rw-r--r--sys/dev/mpr/mprvar.h1
-rw-r--r--sys/dev/mwl/if_mwl.c2
-rw-r--r--sys/dev/nvme/nvme.c1
-rw-r--r--sys/dev/nvme/nvme_ahci.c1
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c117
-rw-r--r--sys/dev/nvme/nvme_ctrlr_cmd.c3
-rw-r--r--sys/dev/nvme/nvme_ns.c3
-rw-r--r--sys/dev/nvme/nvme_pci.c1
-rw-r--r--sys/dev/nvme/nvme_private.h3
-rw-r--r--sys/dev/nvme/nvme_qpair.c3
-rw-r--r--sys/dev/nvme/nvme_sim.c1
-rw-r--r--sys/dev/nvme/nvme_sysctl.c1
-rw-r--r--sys/dev/nvme/nvme_util.c23
-rw-r--r--sys/dev/pci/pci_user.c121
-rw-r--r--sys/dev/qat/qat_common/adf_gen4_timer.c2
-rw-r--r--sys/dev/qlnx/qlnxe/ecore_dev.c6
-rw-r--r--sys/dev/qlnx/qlnxe/ecore_mcp.c2
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_def.h16
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_os.c25
-rw-r--r--sys/dev/random/random_harvestq.c148
-rw-r--r--sys/dev/random/randomdev.h3
-rw-r--r--sys/dev/re/if_re.c3
-rw-r--r--sys/dev/rtwn/if_rtwn.c3
-rw-r--r--sys/dev/rtwn/if_rtwn_tx.c8
-rw-r--r--sys/dev/rtwn/rtl8192c/r92c_tx.c11
-rw-r--r--sys/dev/rtwn/rtl8812a/r12a_tx.c16
-rw-r--r--sys/dev/sound/pci/hda/hdac.c11
-rw-r--r--sys/dev/sound/pci/hda/hdac.h1
-rw-r--r--sys/dev/tpm/tpm20.c13
-rw-r--r--sys/dev/tpm/tpm_tis_core.c7
-rw-r--r--sys/dev/ufshci/ufshci.h69
-rw-r--r--sys/dev/ufshci/ufshci_ctrlr.c45
-rw-r--r--sys/dev/ufshci/ufshci_dev.c355
-rw-r--r--sys/dev/ufshci/ufshci_pci.c3
-rw-r--r--sys/dev/ufshci/ufshci_private.h15
-rw-r--r--sys/dev/ufshci/ufshci_reg.h2
-rw-r--r--sys/dev/ufshci/ufshci_sysctl.c20
-rw-r--r--sys/dev/ufshci/ufshci_uic_cmd.c19
-rw-r--r--sys/dev/usb/controller/xhci.c85
-rw-r--r--sys/dev/usb/controller/xhci_pci.c2
-rw-r--r--sys/dev/usb/controller/xhcireg.h5
-rw-r--r--sys/dev/usb/net/if_umb.c4
-rw-r--r--sys/dev/usb/usb_hub.c3
-rw-r--r--sys/dev/usb/wlan/if_rsu.c66
-rw-r--r--sys/dev/usb/wlan/if_rsureg.h9
-rw-r--r--sys/dev/usb/wlan/if_run.c14
-rw-r--r--sys/dev/virtio/network/if_vtnet.c443
-rw-r--r--sys/dev/virtio/network/if_vtnetvar.h2
-rw-r--r--sys/dev/vmgenc/vmgenc_acpi.c8
-rw-r--r--sys/dev/vmm/vmm_dev.c1
-rw-r--r--sys/fs/nfs/nfs_commonport.c2
-rw-r--r--sys/fs/nfsclient/nfs_clport.c7
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c7
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c3
-rw-r--r--sys/fs/procfs/procfs.c68
-rw-r--r--sys/fs/procfs/procfs_status.c8
-rw-r--r--sys/fs/pseudofs/pseudofs.c69
-rw-r--r--sys/fs/pseudofs/pseudofs.h19
-rw-r--r--sys/fs/tarfs/tarfs_vnops.c4
-rw-r--r--sys/geom/cache/g_cache.c4
-rw-r--r--sys/geom/concat/g_concat.c4
-rw-r--r--sys/geom/eli/g_eli.c2
-rw-r--r--sys/geom/gate/g_gate.c2
-rw-r--r--sys/geom/geom.h5
-rw-r--r--sys/geom/geom_dev.c2
-rw-r--r--sys/geom/geom_event.c9
-rw-r--r--sys/geom/geom_slice.c2
-rw-r--r--sys/geom/geom_subr.c33
-rw-r--r--sys/geom/journal/g_journal.c2
-rw-r--r--sys/geom/label/g_label.c2
-rw-r--r--sys/geom/linux_lvm/g_linux_lvm.c4
-rw-r--r--sys/geom/mirror/g_mirror.c4
-rw-r--r--sys/geom/mirror/g_mirror_ctl.c2
-rw-r--r--sys/geom/mountver/g_mountver.c2
-rw-r--r--sys/geom/multipath/g_multipath.c4
-rw-r--r--sys/geom/nop/g_nop.c2
-rw-r--r--sys/geom/part/g_part.c4
-rw-r--r--sys/geom/raid/g_raid.c4
-rw-r--r--sys/geom/raid3/g_raid3.c4
-rw-r--r--sys/geom/raid3/g_raid3_ctl.c2
-rw-r--r--sys/geom/shsec/g_shsec.c4
-rw-r--r--sys/geom/stripe/g_stripe.c10
-rw-r--r--sys/geom/union/g_union.c3
-rw-r--r--sys/geom/virstor/g_virstor.c4
-rw-r--r--sys/geom/zero/g_zero.c2
-rw-r--r--sys/i386/conf/GENERIC2
-rw-r--r--sys/kern/imgact_elf.c4
-rw-r--r--sys/kern/init_main.c9
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_descrip.c34
-rw-r--r--sys/kern/kern_environment.c32
-rw-r--r--sys/kern/kern_event.c141
-rw-r--r--sys/kern/kern_jail.c406
-rw-r--r--sys/kern/kern_jaildesc.c278
-rw-r--r--sys/kern/kern_malloc.c11
-rw-r--r--sys/kern/kern_mutex.c4
-rw-r--r--sys/kern/kern_proc.c16
-rw-r--r--sys/kern/kern_rmlock.c4
-rw-r--r--sys/kern/kern_rwlock.c4
-rw-r--r--sys/kern/kern_sx.c4
-rw-r--r--sys/kern/kern_thr.c11
-rw-r--r--sys/kern/kern_thread.c6
-rw-r--r--sys/kern/kern_tslog.c10
-rw-r--r--sys/kern/subr_bus.c13
-rw-r--r--sys/kern/subr_witness.c134
-rw-r--r--sys/kern/sys_procdesc.c4
-rw-r--r--sys/kern/syscalls.c2
-rw-r--r--sys/kern/syscalls.master10
-rw-r--r--sys/kern/systrace_args.c44
-rw-r--r--sys/kern/uipc_usrreq.c4
-rw-r--r--sys/kern/vfs_init.c37
-rw-r--r--sys/kern/vfs_mount.c7
-rw-r--r--sys/kern/vfs_subr.c8
-rw-r--r--sys/libkern/arm64/crc32c_armv8.S8
-rw-r--r--sys/modules/dtb/rockchip/Makefile3
-rw-r--r--sys/modules/irdma/Makefile6
-rw-r--r--sys/modules/sound/driver/hda/Makefile4
-rw-r--r--sys/modules/zfs/zfs_config.h12
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
-rw-r--r--sys/net/if.c27
-rw-r--r--sys/net/if_bridge.c34
-rw-r--r--sys/net/if_epair.c61
-rw-r--r--sys/net/if_ethersubr.c2
-rw-r--r--sys/net/iflib.c28
-rw-r--r--sys/net/pfvar.h11
-rw-r--r--sys/net80211/ieee80211_ddb.c2
-rw-r--r--sys/net80211/ieee80211_freebsd.h11
-rw-r--r--sys/net80211/ieee80211_ht.c18
-rw-r--r--sys/net80211/ieee80211_node.c4
-rw-r--r--sys/net80211/ieee80211_node.h34
-rw-r--r--sys/net80211/ieee80211_output.c6
-rw-r--r--sys/net80211/ieee80211_phy.c30
-rw-r--r--sys/net80211/ieee80211_phy.h8
-rw-r--r--sys/net80211/ieee80211_sta.c2
-rw-r--r--sys/net80211/ieee80211_var.h3
-rw-r--r--sys/net80211/ieee80211_vht.c20
-rw-r--r--sys/net80211/ieee80211_vht.h4
-rw-r--r--sys/netgraph/ng_parse.c4
-rw-r--r--sys/netinet/in.c15
-rw-r--r--sys/netinet/tcp_log_buf.c3
-rw-r--r--sys/netinet/tcp_sack.c21
-rw-r--r--sys/netinet/tcp_subr.c6
-rw-r--r--sys/netinet/tcp_syncache.c104
-rw-r--r--sys/netinet/udp_usrreq.c45
-rw-r--r--sys/netinet6/in6.c21
-rw-r--r--sys/netinet6/in6_ifattach.c7
-rw-r--r--sys/netinet6/in6_ifattach.h1
-rw-r--r--sys/netinet6/nd6_rtr.c54
-rw-r--r--sys/netpfil/pf/if_pfsync.c16
-rw-r--r--sys/netpfil/pf/pf.c189
-rw-r--r--sys/netpfil/pf/pf.h1
-rw-r--r--sys/netpfil/pf/pf_ioctl.c180
-rw-r--r--sys/netpfil/pf/pf_lb.c151
-rw-r--r--sys/netpfil/pf/pf_nl.c10
-rw-r--r--sys/netpfil/pf/pf_nl.h2
-rw-r--r--sys/netpfil/pf/pf_norm.c12
-rw-r--r--sys/netpfil/pf/pf_nv.c7
-rw-r--r--sys/netpfil/pf/pf_osfp.c2
-rw-r--r--sys/netpfil/pf/pf_ruleset.c10
-rw-r--r--sys/powerpc/conf/GENERIC642
-rw-r--r--sys/powerpc/conf/GENERIC64LE2
-rw-r--r--sys/riscv/include/vmm.h1
-rw-r--r--sys/riscv/vmm/vmm.c6
-rw-r--r--sys/sys/cpu.h22
-rw-r--r--sys/sys/event.h19
-rw-r--r--sys/sys/exterrvar.h2
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/jail.h20
-rw-r--r--sys/sys/jaildesc.h83
-rw-r--r--sys/sys/kernel.h27
-rw-r--r--sys/sys/mount.h1
-rw-r--r--sys/sys/mutex.h2
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/pciio.h5
-rw-r--r--sys/sys/power.h2
-rw-r--r--sys/sys/random.h3
-rw-r--r--sys/sys/rmlock.h2
-rw-r--r--sys/sys/rwlock.h2
-rw-r--r--sys/sys/sx.h2
-rw-r--r--sys/sys/syscall.h4
-rw-r--r--sys/sys/syscall.mk4
-rw-r--r--sys/sys/sysproto.h10
-rw-r--r--sys/sys/ttycom.h4
-rw-r--r--sys/sys/user.h4
-rw-r--r--sys/tools/makeobjops.awk4
-rw-r--r--sys/vm/uma_core.c10
-rw-r--r--sys/vm/vm_domainset.c265
-rw-r--r--sys/vm/vm_domainset.h15
-rw-r--r--sys/vm/vm_extern.h2
-rw-r--r--sys/vm/vm_fault.c81
-rw-r--r--sys/vm/vm_glue.c2
-rw-r--r--sys/vm/vm_kern.c12
-rw-r--r--sys/vm/vm_page.c21
-rw-r--r--sys/x86/iommu/amd_intrmap.c14
-rw-r--r--sys/x86/iommu/intel_intrmap.c8
-rw-r--r--sys/x86/x86/identcpu.c4
409 files changed, 6487 insertions, 2553 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 81427b5b18b6..d5a1adb2dc65 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -26,7 +26,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
-options BLOAT_KERNEL_WITH_EXTERR
+options EXTERR_STRINGS
options VIMAGE # Subsystem virtualization, e.g. VNET
options INET # InterNETworking
options INET6 # IPv6 communications protocols
@@ -287,9 +287,9 @@ device wlan # 802.11 support
options IEEE80211_DEBUG # enable debug msgs
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 0b3daed4f69e..e35119af8572 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -46,6 +46,7 @@ enum vm_suspend_how {
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
VM_SUSPEND_TRIPLEFAULT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/arm/arm/generic_timer.c b/sys/arm/arm/generic_timer.c
index a8c779dcba6d..97976408c943 100644
--- a/sys/arm/arm/generic_timer.c
+++ b/sys/arm/arm/generic_timer.c
@@ -882,32 +882,32 @@ DELAY(int usec)
TSEXIT();
}
-static bool
+static cpu_feat_en
wfxt_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64isar2;
if (!get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2))
- return (false);
- return (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) != ID_AA64ISAR2_WFxT_NONE);
+ return (FEAT_ALWAYS_DISABLE);
+ if (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) >= ID_AA64ISAR2_WFxT_NONE)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
-static void
+static bool
wfxt_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
{
/* will be called if wfxt_check returns true */
enable_wfxt = true;
+ return (true);
}
-static struct cpu_feat feat_wfxt = {
- .feat_name = "FEAT_WFXT",
- .feat_check = wfxt_check,
- .feat_enable = wfxt_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM,
-};
-DATA_SET(cpu_feat_set, feat_wfxt);
+CPU_FEAT(feat_wfxt, "WFE and WFI instructions with timeout",
+ wfxt_check, NULL, wfxt_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM);
#endif
static uint32_t
diff --git a/sys/arm/conf/TEGRA124 b/sys/arm/conf/TEGRA124
index ad5532427eda..ff23e63f77bd 100644
--- a/sys/arm/conf/TEGRA124
+++ b/sys/arm/conf/TEGRA124
@@ -107,9 +107,9 @@ device ums # USB mouse
# Wireless NIC cards
#device wlan # 802.11 support
#device wlan_wep # 802.11 WEP support
+#device wlan_tkip # 802.11 TKIP support
#device wlan_ccmp # 802.11 CCMP support
#device wlan_gcmp # 802.11 GCMP support
-#device wlan_tkip # 802.11 TKIP support
#device wlan_amrr # AMRR transmit rate control algorithm
# PCI
diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c
index cc262394913d..986d5079e980 100644
--- a/sys/arm64/arm64/cpu_feat.c
+++ b/sys/arm64/arm64/cpu_feat.c
@@ -32,16 +32,21 @@
#include <machine/cpu.h>
#include <machine/cpu_feat.h>
+SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata");
+
/* TODO: Make this a list if we ever grow a callback other than smccc_errata */
static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL;
void
enable_cpu_feat(uint32_t stage)
{
+ char tunable[32];
struct cpu_feat **featp, *feat;
uint32_t midr;
u_int errata_count, *errata_list;
cpu_feat_errata errata_status;
+ cpu_feat_en check_status;
+ bool val;
MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0);
@@ -58,9 +63,26 @@ enable_cpu_feat(uint32_t stage)
PCPU_GET(cpuid) != 0)
continue;
- if (feat->feat_check != NULL && !feat->feat_check(feat, midr))
+ if (feat->feat_check != NULL)
+ continue;
+
+ check_status = feat->feat_check(feat, midr);
+ /* Ignore features that are not present */
+ if (check_status == FEAT_ALWAYS_DISABLE)
continue;
+ snprintf(tunable, sizeof(tunable), "hw.feat.%s",
+ feat->feat_name);
+ if (TUNABLE_BOOL_FETCH(tunable, &val)) {
+ /* Is the feature disabled by the tunable? */
+ if (!val)
+ continue;
+ /* If enabled by the tunable then enable it */
+ } else if (check_status == FEAT_DEFAULT_DISABLE) {
+ /* No tunable set and disabled by default */
+ continue;
+ }
+
/*
* Check if the feature has any errata that may need a
* workaround applied (or it is to install the workaround for
@@ -97,8 +119,9 @@ enable_cpu_feat(uint32_t stage)
/* Shouldn't be possible */
MPASS(errata_status != ERRATA_UNKNOWN);
- feat->feat_enable(feat, errata_status, errata_list,
- errata_count);
+ if (feat->feat_enable(feat, errata_status, errata_list,
+ errata_count))
+ feat->feat_enabled = true;
}
}
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 5c81c6cdce3d..8f8a934ad520 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -225,7 +225,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->args[i] = ap[i];
if (narg > nap) {
if (narg - nap > nitems(args))
- panic("Too many system call arguiments");
+ panic("Too many system call arguments");
error = copyin((void *)td->td_frame->tf_x[13], args,
(narg - nap) * sizeof(int));
if (error != 0)
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index bcacea43ad2f..a001be200518 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -2272,37 +2272,25 @@ static const struct mrs_user_reg user_regs[] = {
static bool
user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr)
{
- /* Skip non-Neoverse-N1 */
- if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_NEOVERSE_N1, 0, 0))
- return (false);
-
- switch (CPU_VAR(midr)) {
- default:
- break;
- case 4:
- /* Fixed in r4p1 */
- if (CPU_REV(midr) > 0)
- break;
- /* FALLTHROUGH */
- case 3:
- /* If DIC is enabled (coherent icache) then we are affected */
- return (CTR_DIC_VAL(ctr) != 0);
- }
-
- return (false);
+ /*
+ * Neoverse-N1 erratum 1542419
+ * Present in r3p0 - r4p0
+ * Fixed in r4p1
+ */
+ return (midr_check_var_part_range(midr, CPU_IMPL_ARM,
+ CPU_PART_NEOVERSE_N1, 3, 0, 4, 0) && CTR_DIC_VAL(ctr) != 0);
}
-static bool
-user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+static cpu_feat_en
+user_ctr_check(const struct cpu_feat *feat __unused, u_int midr)
{
if (emulate_ctr)
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0)))
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -2320,7 +2308,7 @@ user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr,
return (false);
}
-static void
+static bool
user_ctr_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count)
{
@@ -2356,16 +2344,13 @@ user_ctr_enable(const struct cpu_feat *feat __unused,
WRITE_SPECIALREG(sctlr_el1,
READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT);
isb();
+
+ return (true);
}
-static struct cpu_feat user_ctr = {
- .feat_name = "Trap CTR_EL0",
- .feat_check = user_ctr_check,
- .feat_has_errata = user_ctr_has_errata,
- .feat_enable = user_ctr_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, user_ctr);
+CPU_FEAT(trap_ctr, "Trap CTR_EL0",
+ user_ctr_check, user_ctr_has_errata, user_ctr_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
static bool
user_ctr_handler(uint64_t esr, struct trapframe *frame)
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 53856dd90cae..47c701e8588c 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -173,16 +173,19 @@ SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
#endif
-static bool
+static cpu_feat_en
pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mfr1;
id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
+ if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
}
-static void
+static bool
pan_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -200,15 +203,13 @@ pan_enable(const struct cpu_feat *feat __unused,
".arch_extension pan \n"
"msr pan, #1 \n"
".arch_extension nopan \n");
+
+ return (true);
}
-static struct cpu_feat feat_pan = {
- .feat_name = "FEAT_PAN",
- .feat_check = pan_check,
- .feat_enable = pan_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_pan);
+CPU_FEAT(feat_pan, "Privileged access never",
+ pan_check, NULL, pan_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
bool
has_hyp(void)
@@ -857,7 +858,7 @@ initarm(struct arm64_bootparams *abp)
cninit();
set_ttbr0(abp->kern_ttbr0);
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
if (!valid)
panic("Invalid bus configuration: %s",
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ec89c4573799..8a4395aa1c89 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr;
#define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \
ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW))
+static bool __read_mostly pmap_multiple_tlbi = false;
+
struct pmap_large_md_page {
struct rwlock pv_lock;
struct md_page pv_page;
@@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen)
}
}
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
bs_state.dmap_valid = true;
@@ -1399,7 +1401,7 @@ pmap_bootstrap(void)
/* And the l3 tables for the early devmap */
pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE));
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
@@ -1656,14 +1658,17 @@ pmap_init_pv_table(void)
}
}
-static bool
+static cpu_feat_en
pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mmfr1;
id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
- ID_AA64MMFR1_HAFDBS_AF_DBS);
+ if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
+ ID_AA64MMFR1_HAFDBS_AF_DBS)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -1671,8 +1676,8 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
u_int **errata_list, u_int *errata_count)
{
/* Disable on Cortex-A55 for erratum 1024718 - all revisions */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_CORTEX_A55, 0, 0)) {
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55) {
static u_int errata_id = 1024718;
*errata_list = &errata_id;
@@ -1681,21 +1686,19 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
}
/* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK,
- CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) {
- if (CPU_REV(PCPU_GET(midr)) < 3) {
- static u_int errata_id = 2051678;
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 0, 2)) {
+ static u_int errata_id = 2051678;
- *errata_list = &errata_id;
- *errata_count = 1;
- return (true);
- }
+ *errata_list = &errata_id;
+ *errata_count = 1;
+ return (true);
}
return (false);
}
-static void
+static bool
pmap_dbm_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list __unused,
u_int errata_count)
@@ -1704,7 +1707,7 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
/* Skip if there is an erratum affecting DBM */
if (errata_status != ERRATA_NONE)
- return;
+ return (false);
tcr = READ_SPECIALREG(tcr_el1) | TCR_HD;
WRITE_SPECIALREG(tcr_el1, tcr);
@@ -1714,16 +1717,58 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
__asm __volatile("tlbi vmalle1");
dsb(nsh);
isb();
+
+ return (true);
}
-static struct cpu_feat feat_dbm = {
- .feat_name = "FEAT_HAFDBS (DBM)",
- .feat_check = pmap_dbm_check,
- .feat_has_errata = pmap_dbm_has_errata,
- .feat_enable = pmap_dbm_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_dbm);
+CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state",
+ pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ /*
+ * Cortex-A55 erratum 2441007 (Cat B rare)
+ * Present in all revisions
+ */
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55)
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A76 erratum 1286807 (Cat B rare)
+ * Present in r0p0 - r3p0
+ * Fixed in r3p1
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76,
+ 0, 0, 3, 0))
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A510 erratum 2441009 (Cat B rare)
+ * Present in r0p0 - r1p1
+ * Fixed in r1p2
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 1, 1))
+ return (FEAT_DEFAULT_DISABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ pmap_multiple_tlbi = true;
+ return (true);
+}
+
+CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata",
+ pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
/*
* Initialize the pmap module.
@@ -1878,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
r = TLBI_VA(va);
if (pmap == kernel_pmap) {
pmap_s1_invalidate_kernel(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
pmap_s1_invalidate_user(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1922,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
end = TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_kernel(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= TLBI_VA(sva);
end |= TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_user(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1963,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
pmap_s2_invalidate_range(pmap, sva, eva, final_only);
}
+void
+pmap_s1_invalidate_all_kernel(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ if (pmap_multiple_tlbi) {
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ }
+ isb();
+}
+
/*
* Invalidates all cached intermediate- and final-level TLB entries for the
* given virtual address space.
@@ -1977,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap)
dsb(ishst);
if (pmap == kernel_pmap) {
__asm __volatile("tlbi vmalle1is");
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi vmalle1is");
+ }
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
__asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ }
}
dsb(ish);
isb();
@@ -7967,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
pa += L2_SIZE;
}
if ((old_l2e & ATTR_DESCR_VALID) != 0)
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
else {
/*
* Because the old entries were invalid and the new
@@ -8058,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size)
}
}
if (preinit_map) {
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
return;
}
diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c
index dbe0c69b8d60..fdab5414e24c 100644
--- a/sys/arm64/arm64/ptrauth.c
+++ b/sys/arm64/arm64/ptrauth.c
@@ -82,7 +82,7 @@ ptrauth_disable(void)
return (false);
}
-static bool
+static cpu_feat_en
ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t isar;
@@ -116,14 +116,14 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) {
if (ID_AA64ISAR1_APA_VAL(isar) > 0 ||
ID_AA64ISAR1_API_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
/* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */
if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) {
if (ID_AA64ISAR2_APA3_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
@@ -138,10 +138,10 @@ out:
ID_AA64ISAR1_GPI_MASK, 0);
update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
-static void
+static bool
ptrauth_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -153,16 +153,13 @@ ptrauth_enable(const struct cpu_feat *feat __unused,
elf64_addr_mask_14.code |= PAC_ADDR_MASK_14;
elf64_addr_mask_14.data |= PAC_ADDR_MASK_14;
#endif
-}
+ return (true);
+}
-static struct cpu_feat feat_pauth = {
- .feat_name = "FEAT_PAuth",
- .feat_check = ptrauth_check,
- .feat_enable = ptrauth_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM,
-};
-DATA_SET(cpu_feat_set, feat_pauth);
+CPU_FEAT(feat_pauth, "Pointer Authentication",
+ ptrauth_check, NULL, ptrauth_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM);
/* Copy the keys when forking a new process */
void
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
index bed58095201a..75c9b5f87892 100644
--- a/sys/arm64/arm64/trap.c
+++ b/sys/arm64/arm64/trap.c
@@ -246,6 +246,7 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
print_registers(frame);
print_gp_register("far", far);
+ printf(" esr: 0x%.16lx\n", esr);
panic("Unhandled external data abort");
}
diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64
index c83e98c17a33..a0568466cfaf 100644
--- a/sys/arm64/conf/std.arm64
+++ b/sys/arm64/conf/std.arm64
@@ -7,6 +7,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
+options EXTERR_STRINGS
options VIMAGE # Subsystem virtualization, e.g. VNET
options INET # InterNETworking
options INET6 # IPv6 communications protocols
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 59cda36f275e..07a783138f42 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -193,8 +193,30 @@
(((mask) & PCPU_GET(midr)) == \
((mask) & CPU_ID_RAW((impl), (part), (var), (rev))))
-#define CPU_MATCH_RAW(mask, devid) \
- (((mask) & PCPU_GET(midr)) == ((mask) & (devid)))
+#if !defined(__ASSEMBLER__)
+static inline bool
+midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low,
+ u_int part_low, u_int var_high, u_int part_high)
+{
+ /* Check for the correct part */
+ if (CPU_IMPL(midr) != impl || CPU_PART(midr) != part)
+ return (false);
+
+ /* Check if the variant is between var_low and var_high inclusive */
+ if (CPU_VAR(midr) < var_low || CPU_VAR(midr) > var_high)
+ return (false);
+
+ /* If the variant is the low value, check if the part is high enough */
+ if (CPU_VAR(midr) == var_low && CPU_PART(midr) < part_low)
+ return (false);
+
+ /* If the variant is the high value, check if the part is low enough */
+ if (CPU_VAR(midr) == var_high && CPU_PART(midr) > part_high)
+ return (false);
+
+ return (true);
+}
+#endif
/*
* Chip-specific errata. This defines are intended to be
diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h
index 9fe6a9dd95d9..6a554b6baedf 100644
--- a/sys/arm64/include/cpu_feat.h
+++ b/sys/arm64/include/cpu_feat.h
@@ -29,6 +29,7 @@
#define _MACHINE_CPU_FEAT_H_
#include <sys/linker_set.h>
+#include <sys/sysctl.h>
typedef enum {
ERRATA_UNKNOWN, /* Unknown erratum */
@@ -39,6 +40,31 @@ typedef enum {
/* kernel component. */
} cpu_feat_errata;
+typedef enum {
+ /*
+ * Don't implement the feature or erratum wrokarount,
+ * e.g. the feature is not implemented or erratum is
+ * for another CPU.
+ */
+ FEAT_ALWAYS_DISABLE,
+
+ /*
+ * Disable by default, but allow the user to enable,
+ * e.g. For a rare erratum with a workaround, Arm
+ * Category B (rare) or similar.
+ */
+ FEAT_DEFAULT_DISABLE,
+
+ /*
+ * Enabled by default, bit allow the user to disable,
+ * e.g. For a common erratum with a workaround, Arm
+ * Category A or B or similar.
+ */
+ FEAT_DEFAULT_ENABLE,
+
+ /* We could add FEAT_ALWAYS_ENABLE if a need was found. */
+} cpu_feat_en;
+
#define CPU_FEAT_STAGE_MASK 0x00000001
#define CPU_FEAT_EARLY_BOOT 0x00000000
#define CPU_FEAT_AFTER_DEV 0x00000001
@@ -49,10 +75,10 @@ typedef enum {
struct cpu_feat;
-typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int);
+typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int);
typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int,
u_int **, u_int *);
-typedef void (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
+typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
u_int *, u_int);
struct cpu_feat {
@@ -61,9 +87,25 @@ struct cpu_feat {
cpu_feat_has_errata *feat_has_errata;
cpu_feat_enable *feat_enable;
uint32_t feat_flags;
+ bool feat_enabled;
};
SET_DECLARE(cpu_feat_set, struct cpu_feat);
+SYSCTL_DECL(_hw_feat);
+
+#define CPU_FEAT(name, descr, check, has_errata, enable, flags) \
+static struct cpu_feat name = { \
+ .feat_name = #name, \
+ .feat_check = check, \
+ .feat_has_errata = has_errata, \
+ .feat_enable = enable, \
+ .feat_flags = flags, \
+ .feat_enabled = false, \
+}; \
+DATA_SET(cpu_feat_set, name); \
+SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \
+ 0, descr)
+
/*
* Allow drivers to mark an erratum as worked around, e.g. the Errata
* Management ABI may know the workaround isn't needed on a given system.
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 0f23f200f0f6..406b6e2c5e0a 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -69,6 +69,7 @@ struct md_page {
TAILQ_HEAD(,pv_entry) pv_list;
int pv_gen;
vm_memattr_t pv_memattr;
+ uint8_t pv_reserve[3];
};
enum pmap_stage {
@@ -174,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t);
struct pcb *pmap_switch(struct thread *);
+void pmap_s1_invalidate_all_kernel(void);
+
extern void (*pmap_clean_stage2_tlbi)(void);
extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t,
bool);
diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h
index 184743d4cc80..b40990e89385 100644
--- a/sys/arm64/include/proc.h
+++ b/sys/arm64/include/proc.h
@@ -75,6 +75,7 @@ struct mdthread {
struct mdproc {
uint64_t md_tcr; /* TCR_EL1 fields to update */
+ uint64_t md_reserved[2];
};
#endif /* !LOCORE */
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index 73b5b4a09591..e839b5dd92c9 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -42,6 +42,7 @@ enum vm_suspend_how {
VM_SUSPEND_RESET,
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 3082d2941221..1dcefa1489e9 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -1342,8 +1342,14 @@ vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
static int
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
{
+ struct vm *vm;
+
+ vm = vcpu->vm;
vcpu_lock(vcpu);
while (1) {
+ if (vm->suspend)
+ break;
+
if (vgic_has_pending_irq(vcpu->cookie))
break;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index 7192df200ae2..8078f3f6d4b1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -7761,7 +7761,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
uintptr_t *memref = (uintptr_t *)(uintptr_t) val;
if (!DTRACE_INSCRATCHPTR(&mstate,
- (uintptr_t)memref, 2 * sizeof(uintptr_t))) {
+ (uintptr_t) memref,
+ sizeof (uintptr_t) + sizeof (size_t))) {
*flags |= CPU_DTRACE_BADADDR;
continue;
}
@@ -7773,21 +7774,21 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
* Check if the size exceeds the allocated
* buffer size.
*/
- if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) {
+ if (size + sizeof (size_t) >
+ dp->dtdo_rtype.dtdt_size) {
/* Flag a drop! */
*flags |= CPU_DTRACE_DROP;
continue;
}
/* Store the size in the buffer first. */
- DTRACE_STORE(uintptr_t, tomax,
- valoffs, size);
+ DTRACE_STORE(size_t, tomax, valoffs, size);
/*
* Offset the buffer address to the start
* of the data.
*/
- valoffs += sizeof(uintptr_t);
+ valoffs += sizeof(size_t);
/*
* Reset to the memory address rather than
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 90cd21a80923..54063150eef9 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -515,4 +515,6 @@
#define FREEBSD32_SYS_inotify_rm_watch 594
#define FREEBSD32_SYS_getgroups 595
#define FREEBSD32_SYS_setgroups 596
-#define FREEBSD32_SYS_MAXSYSCALL 597
+#define FREEBSD32_SYS_jail_attach_jd 597
+#define FREEBSD32_SYS_jail_remove_jd 598
+#define FREEBSD32_SYS_MAXSYSCALL 599
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index f0f8d26554b5..f7cc4c284e4d 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 12f1a346c3e9..18f809ef04e3 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index e471c5148021..29a5497e9efa 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/compat/lindebugfs/lindebugfs.c b/sys/compat/lindebugfs/lindebugfs.c
index 50f9377ffec3..8cddc6f390bc 100644
--- a/sys/compat/lindebugfs/lindebugfs.c
+++ b/sys/compat/lindebugfs/lindebugfs.c
@@ -206,7 +206,7 @@ debugfs_create_file(const char *name, umode_t mode,
pnode = debugfs_root;
flags = fops->write ? PFS_RDWR : PFS_RD;
- dnode->d_pfs_node = pfs_create_file(pnode, name, debugfs_fill,
+ pfs_create_file(pnode, &dnode->d_pfs_node, name, debugfs_fill,
debugfs_attr, NULL, debugfs_destroy, flags | PFS_NOWAIT);
if (dnode->d_pfs_node == NULL) {
free(dm, M_DFSINT);
@@ -283,7 +283,8 @@ debugfs_create_dir(const char *name, struct dentry *parent)
else
pnode = debugfs_root;
- dnode->d_pfs_node = pfs_create_dir(pnode, name, debugfs_attr, NULL, debugfs_destroy, PFS_RD | PFS_NOWAIT);
+ pfs_create_dir(pnode, &dnode->d_pfs_node, name, debugfs_attr, NULL,
+ debugfs_destroy, PFS_RD | PFS_NOWAIT);
if (dnode->d_pfs_node == NULL) {
free(dm, M_DFSINT);
return (NULL);
@@ -316,7 +317,8 @@ debugfs_create_symlink(const char *name, struct dentry *parent,
else
pnode = debugfs_root;
- dnode->d_pfs_node = pfs_create_link(pnode, name, &debugfs_fill_data, NULL, NULL, NULL, PFS_NOWAIT);
+ pfs_create_link(pnode, &dnode->d_pfs_node, name, &debugfs_fill_data,
+ NULL, NULL, NULL, PFS_NOWAIT);
if (dnode->d_pfs_node == NULL)
goto fail;
dnode->d_pfs_node->pn_data = dm;
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 1c6d64d6b8bc..95b212be1306 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -2320,165 +2320,165 @@ linprocfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
/* /proc/... */
- pfs_create_file(root, "cmdline", &linprocfs_docmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "devices", &linprocfs_dodevices,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "modules", &linprocfs_domodules,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "mounts", &linprocfs_domtab,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "mtab", &linprocfs_domtab,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "partitions", &linprocfs_dopartitions,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(root, "self", &procfs_docurproc,
- NULL, NULL, NULL, 0);
- pfs_create_file(root, "stat", &linprocfs_dostat,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "swaps", &linprocfs_doswaps,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "uptime", &linprocfs_douptime,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "version", &linprocfs_doversion,
+ pfs_create_file(root, NULL, "cmdline", &linprocfs_docmdline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "cpuinfo", &linprocfs_docpuinfo, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "devices", &linprocfs_dodevices, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "filesystems", &linprocfs_dofilesystems,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(root, NULL, "loadavg", &linprocfs_doloadavg, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "meminfo", &linprocfs_domeminfo, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "modules", &linprocfs_domodules, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "mounts", &linprocfs_domtab, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "mtab", &linprocfs_domtab, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(root, NULL, "partitions", &linprocfs_dopartitions, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(root, NULL, "self", &procfs_docurproc, NULL, NULL, NULL,
+ 0);
+ pfs_create_file(root, NULL, "stat", &linprocfs_dostat, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(root, NULL, "swaps", &linprocfs_doswaps, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "uptime", &linprocfs_douptime, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "version", &linprocfs_doversion, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/bus/... */
- dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
- dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
- dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &dir, "pci", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &dir, "devices", NULL, NULL, NULL, 0);
/* /proc/net/... */
- dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "dev", &linprocfs_donetdev,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "route", &linprocfs_donetroute,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(root, &dir, "net", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "dev", &linprocfs_donetdev, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "route", &linprocfs_donetroute, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/<pid>/... */
- dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
- pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
- NULL, &procfs_candebug, NULL, PFS_RD);
- pfs_create_link(dir, "exe", &procfs_doprocfile,
- NULL, &procfs_notsystem, NULL, 0);
- pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
- NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
- pfs_create_file(dir, "mem", &linprocfs_doprocmem,
- procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "mounts", &linprocfs_domtab,
+ pfs_create_dir(root, &dir, "pid", NULL, NULL, NULL, PFS_PROCDEP);
+ pfs_create_file(dir, NULL, "cmdline", &linprocfs_doproccmdline, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "cwd", &linprocfs_doproccwd, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, NULL, "environ", &linprocfs_doprocenviron, NULL,
+ &procfs_candebug, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "exe", &procfs_doprocfile, NULL,
+ &procfs_notsystem, NULL, 0);
+ pfs_create_file(dir, NULL, "maps", &linprocfs_doprocmaps, NULL, NULL,
+ NULL, PFS_RD | PFS_AUTODRAIN);
+ pfs_create_file(dir, NULL, "mem", &linprocfs_doprocmem, procfs_attr_rw,
+ &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "mountinfo", &linprocfs_doprocmountinfo,
NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "root", &linprocfs_doprocroot,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "stat", &linprocfs_doprocstat,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "status", &linprocfs_doprocstatus,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "auxv", &linprocfs_doauxv,
- NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
- pfs_create_file(dir, "limits", &linprocfs_doproclimits,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj,
+ pfs_create_file(dir, NULL, "mounts", &linprocfs_domtab, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "root", &linprocfs_doprocroot, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, NULL, "stat", &linprocfs_doprocstat, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "statm", &linprocfs_doprocstatm, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "status", &linprocfs_doprocstatus, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL,
+ 0);
+ pfs_create_file(dir, NULL, "auxv", &linprocfs_doauxv, NULL,
+ &procfs_candebug, NULL, PFS_RD | PFS_RAWRD);
+ pfs_create_file(dir, NULL, "limits", &linprocfs_doproclimits, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "oom_score_adj", &linprocfs_do_oom_score_adj,
procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR);
/* /proc/<pid>/task/... */
- dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
- pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(dir, &dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, ".dummy", &linprocfs_dotaskdummy, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/scsi/... */
- dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
+ pfs_create_dir(root, &dir, "scsi", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "device_info", &linprocfs_doscsidevinfo,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "scsi", &linprocfs_doscsiscsi, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/... */
- sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &sys, "sys", NULL, NULL, NULL, 0);
/* /proc/sys/kernel/... */
- dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "ostype", &linprocfs_doostype,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "version", &linprocfs_doosbuild,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "sem", &linprocfs_dosem,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmall", &linprocfs_doshmall,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "tainted", &linprocfs_dotainted,
+ pfs_create_dir(sys, &dir, "kernel", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "osrelease", &linprocfs_doosrelease, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "ostype", &linprocfs_doostype, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "version", &linprocfs_doosbuild, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmax", &linprocfs_domsgmax, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmni", &linprocfs_domsgmni, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmnb", &linprocfs_domsgmnb, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "ngroups_max", &linprocfs_dongroups_max,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "pid_max", &linprocfs_dopid_max, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "sem", &linprocfs_dosem, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "shmall", &linprocfs_doshmall, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shmmax", &linprocfs_doshmmax, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shmmni", &linprocfs_doshmmni, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "tainted", &linprocfs_dotainted, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/kernel/random/... */
- dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "uuid", &linprocfs_douuid,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "boot_id", &linprocfs_doboot_id,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(dir, &dir, "random", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "uuid", &linprocfs_douuid, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "boot_id", &linprocfs_doboot_id, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/vm/.... */
- dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
+ pfs_create_dir(sys, &dir, "vm", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "min_free_kbytes", &linprocfs_dominfree,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt,
+ pfs_create_file(dir, NULL, "max_map_count", &linprocfs_domax_map_cnt,
NULL, NULL, NULL, PFS_RD);
/* /proc/sysvipc/... */
- dir = pfs_create_dir(root, "sysvipc", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "msg", &linprocfs_dosysvipc_msg,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "sem", &linprocfs_dosysvipc_sem,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shm", &linprocfs_dosysvipc_shm,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(root, &dir, "sysvipc", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "msg", &linprocfs_dosysvipc_msg, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "sem", &linprocfs_dosysvipc_sem, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shm", &linprocfs_dosysvipc_shm, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/fs/... */
- dir = pfs_create_dir(sys, "fs", NULL, NULL, NULL, 0);
+ pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0);
/* /proc/sys/fs/mqueue/... */
- dir = pfs_create_dir(dir, "mqueue", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "msg_default", &linprocfs_domqueue_msg_default,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgsize_default", &linprocfs_domqueue_msgsize_default,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msg_max", &linprocfs_domqueue_msg_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgsize_max", &linprocfs_domqueue_msgsize_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "queues_max", &linprocfs_domqueue_queues_max,
+ pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "msg_default",
+ &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgsize_default",
+ &linprocfs_domqueue_msgsize_default, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msg_max", &linprocfs_domqueue_msg_max, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgsize_max",
+ &linprocfs_domqueue_msgsize_max, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "queues_max", &linprocfs_domqueue_queues_max,
NULL, NULL, NULL, PFS_RD);
return (0);
diff --git a/sys/compat/linsysfs/linsysfs.c b/sys/compat/linsysfs/linsysfs.c
index 7f70221b420d..5a41c5193415 100644
--- a/sys/compat/linsysfs/linsysfs.c
+++ b/sys/compat/linsysfs/linsysfs.c
@@ -267,6 +267,8 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
struct pci_devinfo *dinfo;
char *device, *host, *new_path, *devname;
+ children = NULL;
+ device = host = NULL;
new_path = path;
devname = malloc(16, M_TEMP, M_WAITOK);
@@ -292,39 +294,43 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
dinfo->cfg.func);
strcat(new_path, "/");
strcat(new_path, device);
- dir = pfs_create_dir(dir, device,
+ error = pfs_create_dir(dir, &dir, device,
NULL, NULL, NULL, 0);
- cur_file = pfs_create_file(dir, "vendor",
+ if (error != 0)
+ goto out;
+ pfs_create_dir(dir, &dir, device, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, &cur_file, "vendor",
&linsysfs_fill_vendor, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "device",
+ pfs_create_file(dir, &cur_file, "device",
&linsysfs_fill_device, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir,
+ pfs_create_file(dir, &cur_file,
"subsystem_vendor",
&linsysfs_fill_subvendor, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir,
+ pfs_create_file(dir, &cur_file,
"subsystem_device",
&linsysfs_fill_subdevice, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "revision",
+ pfs_create_file(dir, &cur_file, "revision",
&linsysfs_fill_revid, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "config",
+ pfs_create_file(dir, &cur_file, "config",
&linsysfs_fill_config, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "uevent",
- &linsysfs_fill_uevent_pci, NULL, NULL,
- NULL, PFS_RD);
+ pfs_create_file(dir, &cur_file, "uevent",
+ &linsysfs_fill_uevent_pci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_link(dir, "subsystem",
+ pfs_create_link(dir, &cur_file, "subsystem",
&linsysfs_fill_data, NULL, NULL, NULL, 0);
/* libdrm just checks that the link ends in "/pci" */
cur_file->pn_data = "/sys/bus/pci";
@@ -334,34 +340,32 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
sprintf(host, "host%d", host_number++);
strcat(new_path, "/");
strcat(new_path, host);
- pfs_create_dir(dir, host,
- NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, NULL, host, NULL,
+ NULL, NULL, 0);
scsi_host = malloc(sizeof(
struct scsi_host_queue),
- M_DEVBUF, M_NOWAIT);
+ M_DEVBUF, M_WAITOK);
scsi_host->path = malloc(
strlen(new_path) + 1,
- M_DEVBUF, M_NOWAIT);
+ M_DEVBUF, M_WAITOK);
scsi_host->path[0] = '\000';
bcopy(new_path, scsi_host->path,
strlen(new_path) + 1);
scsi_host->name = "unknown";
- sub_dir = pfs_create_dir(scsi, host,
+ pfs_create_dir(scsi, &sub_dir, host,
NULL, NULL, NULL, 0);
- pfs_create_link(sub_dir, "device",
- &linsysfs_link_scsi_host,
- NULL, NULL, NULL, 0);
- pfs_create_file(sub_dir, "proc_name",
- &linsysfs_scsiname,
+ pfs_create_link(sub_dir, NULL, "device",
+ &linsysfs_link_scsi_host, NULL,
+ NULL, NULL, 0);
+ pfs_create_file(sub_dir, NULL,
+ "proc_name", &linsysfs_scsiname,
NULL, NULL, NULL, PFS_RD);
scsi_host->name
= linux_driver_get_name_dev(dev);
TAILQ_INSERT_TAIL(&scsi_host_q,
scsi_host, scsi_host_next);
}
- free(device, M_TEMP);
- free(host, M_TEMP);
}
}
@@ -374,26 +378,27 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
device_get_unit(dev) >= 0) {
dinfo = device_get_ivars(parent);
if (dinfo != NULL && dinfo->cfg.baseclass == PCIC_DISPLAY) {
- pfs_create_dir(dir, "drm", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, NULL, "drm", NULL, NULL,
+ NULL, 0);
sprintf(devname, "226:%d",
device_get_unit(dev));
- sub_dir = pfs_create_dir(chardev,
- devname, NULL, NULL, NULL, 0);
- cur_file = pfs_create_link(sub_dir,
- "device", &linsysfs_fill_vgapci, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_dir(chardev, &sub_dir, devname, NULL,
+ NULL, NULL, 0);
+ pfs_create_link(sub_dir, &cur_file, "device",
+ &linsysfs_fill_vgapci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dir;
- cur_file = pfs_create_file(sub_dir,
- "uevent", &linsysfs_fill_uevent_drm, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_file(sub_dir, &cur_file, "uevent",
+ &linsysfs_fill_uevent_drm, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dev;
sprintf(devname, "card%d",
device_get_unit(dev));
- sub_dir = pfs_create_dir(drm,
- devname, NULL, NULL, NULL, 0);
- cur_file = pfs_create_link(sub_dir,
- "device", &linsysfs_fill_vgapci, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_dir(drm, &sub_dir, devname, NULL,
+ NULL, NULL, 0);
+ pfs_create_link(sub_dir, &cur_file, "device",
+ &linsysfs_fill_vgapci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dir;
}
}
@@ -401,17 +406,37 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
error = device_get_children(dev, &children, &nchildren);
if (error == 0) {
- for (i = 0; i < nchildren; i++)
- if (children[i])
- linsysfs_run_bus(children[i], dir, scsi,
+ for (i = 0; i < nchildren; i++) {
+ if (children[i]) {
+ error = linsysfs_run_bus(children[i], dir, scsi,
chardev, drm, new_path, prefix);
- free(children, M_TEMP);
+ if (error != 0) {
+ printf(
+ "linsysfs_run_bus: %s omitted from sysfs tree, error %d\n",
+ device_get_nameunit(children[i]),
+ error);
+ }
+ }
+ }
+
+ /*
+ * We override the error to avoid cascading failures; the
+ * innermost device that failed in a tree is probably the most
+ * significant one for diagnostics, its parents would be noise.
+ */
+ error = 0;
}
+
+out:
+ free(host, M_TEMP);
+ free(device, M_TEMP);
+ if (children != NULL)
+ free(children, M_TEMP);
if (new_path != path)
free(new_path, M_TEMP);
free(devname, M_TEMP);
- return (1);
+ return (error);
}
/*
@@ -455,10 +480,10 @@ linsysfs_listcpus(struct pfs_node *dir)
for (i = 0; i < mp_ncpus; ++i) {
/* /sys/devices/system/cpu/cpuX */
sprintf(name, "cpu%d", i);
- cpu = pfs_create_dir(dir, name, NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &cpu, name, NULL, NULL, NULL, 0);
- pfs_create_file(cpu, "online", &linsysfs_cpuxonline,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "online", &linsysfs_cpuxonline, NULL,
+ NULL, NULL, PFS_RD);
}
free(name, M_TEMP);
}
@@ -485,52 +510,56 @@ linsysfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
/* /sys/bus/... */
- dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0);
/* /sys/class/... */
- class = pfs_create_dir(root, "class", NULL, NULL, NULL, 0);
- scsi = pfs_create_dir(class, "scsi_host", NULL, NULL, NULL, 0);
- drm = pfs_create_dir(class, "drm", NULL, NULL, NULL, 0);
- pfs_create_dir(class, "power_supply", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &class, "class", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &scsi, "scsi_host", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &drm, "drm", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, NULL, "power_supply", NULL, NULL, NULL, 0);
/* /sys/class/net/.. */
- net = pfs_create_dir(class, "net", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &net, "net", NULL, NULL, NULL, 0);
/* /sys/dev/... */
- devdir = pfs_create_dir(root, "dev", NULL, NULL, NULL, 0);
- chardev = pfs_create_dir(devdir, "char", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &devdir, "dev", NULL, NULL, NULL, 0);
+ pfs_create_dir(devdir, &chardev, "char", NULL, NULL, NULL, 0);
/* /sys/devices/... */
- dir = pfs_create_dir(root, "devices", NULL, NULL, NULL, 0);
- pci = pfs_create_dir(dir, "pci0000:00", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "devices", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &pci, "pci0000:00", NULL, NULL, NULL, 0);
devclass = devclass_find("root");
if (devclass == NULL) {
return (0);
}
+ /*
+ * This assumes that the root node is unlikely to error out in
+ * linsysfs_run_bus, which may or may not be true.
+ */
dev = devclass_get_device(devclass, 0);
linsysfs_run_bus(dev, pci, scsi, chardev, drm, "/pci0000:00", "0000");
/* /sys/devices/system */
- sys = pfs_create_dir(dir, "system", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &sys, "system", NULL, NULL, NULL, 0);
/* /sys/devices/system/cpu */
- cpu = pfs_create_dir(sys, "cpu", NULL, NULL, NULL, 0);
+ pfs_create_dir(sys, &cpu, "cpu", NULL, NULL, NULL, 0);
- pfs_create_file(cpu, "online", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(cpu, "possible", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(cpu, "present", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "online", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "possible", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "present", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
linsysfs_listcpus(cpu);
/* /sys/kernel */
- kernel = pfs_create_dir(root, "kernel", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &kernel, "kernel", NULL, NULL, NULL, 0);
/* /sys/kernel/debug, mountpoint for lindebugfs. */
- pfs_create_dir(kernel, "debug", NULL, NULL, NULL, 0);
+ pfs_create_dir(kernel, NULL, "debug", NULL, NULL, NULL, 0);
linsysfs_net_init();
diff --git a/sys/compat/linsysfs/linsysfs_net.c b/sys/compat/linsysfs/linsysfs_net.c
index 73602b0132a4..751dbb5b3713 100644
--- a/sys/compat/linsysfs/linsysfs_net.c
+++ b/sys/compat/linsysfs/linsysfs_net.c
@@ -237,22 +237,22 @@ linsysfs_net_addif(if_t ifp, void *arg)
nic = pfs_find_node(dir, ifname);
if (nic == NULL) {
- nic = pfs_create_dir(dir, ifname, NULL, linsysfs_if_visible,
+ pfs_create_dir(dir, &nic, ifname, NULL, linsysfs_if_visible,
NULL, 0);
- pfs_create_file(nic, "address", &linsysfs_if_addr,
+ pfs_create_file(nic, NULL, "address", &linsysfs_if_addr, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "addr_len", &linsysfs_if_addrlen,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "addr_len", &linsysfs_if_addrlen,
+ pfs_create_file(nic, NULL, "flags", &linsysfs_if_flags, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "ifindex", &linsysfs_if_ifindex,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "flags", &linsysfs_if_flags,
+ pfs_create_file(nic, NULL, "mtu", &linsysfs_if_mtu, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "tx_queue_len", &linsysfs_if_txq_len,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "ifindex", &linsysfs_if_ifindex,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "mtu", &linsysfs_if_mtu,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "tx_queue_len", &linsysfs_if_txq_len,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "type", &linsysfs_if_type,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "type", &linsysfs_if_type, NULL,
+ NULL, NULL, PFS_RD);
}
/*
* There is a small window between registering the if_arrival
diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi.h b/sys/compat/linuxkpi/common/include/acpi/acpi.h
index 016c7ede0f6e..9bb435591daa 100644
--- a/sys/compat/linuxkpi/common/include/acpi/acpi.h
+++ b/sys/compat/linuxkpi/common/include/acpi/acpi.h
@@ -131,7 +131,7 @@ acpi_format_exception(ACPI_STATUS Exception)
}
static inline ACPI_STATUS
-acpi_get_handle(ACPI_HANDLE Parent, ACPI_STRING Pathname,
+acpi_get_handle(ACPI_HANDLE Parent, const char *Pathname,
ACPI_HANDLE *RetHandle)
{
return (AcpiGetHandle(Parent, Pathname, RetHandle));
diff --git a/sys/compat/linuxkpi/common/include/kunit/static_stub.h b/sys/compat/linuxkpi/common/include/kunit/static_stub.h
new file mode 100644
index 000000000000..9d425d46dbb0
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/kunit/static_stub.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Björn Zeeb under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _LINUXKPI_KUNIT_STATIC_STUB_H
+#define _LINUXKPI_KUNIT_STATIC_STUB_H
+
+#define KUNIT_STATIC_STUB_REDIRECT(_fn, ...) do { } while(0)
+
+#endif /* _LINUXKPI_KUNIT_STATIC_STUB_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/cleanup.h b/sys/compat/linuxkpi/common/include/linux/cleanup.h
index 01f234f0cbe7..5bb146f082ed 100644
--- a/sys/compat/linuxkpi/common/include/linux/cleanup.h
+++ b/sys/compat/linuxkpi/common/include/linux/cleanup.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2024 The FreeBSD Foundation
+ * Copyright (c) 2024-2025 The FreeBSD Foundation
*
* This software was developed by Björn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -43,4 +43,51 @@
guard_ ## _n ## _t guard_ ## _n ## _ ## __COUNTER__ \
__cleanup(guard_ ## _n ## _destroy) = guard_ ## _n ## _create
+#define DEFINE_FREE(_n, _t, _f) \
+ static inline void \
+ __free_ ## _n(void *p) \
+ { \
+ _t _T; \
+ \
+ _T = *(_t *)p; \
+ _f; \
+ }
+
+#define __free(_n) __cleanup(__free_##_n)
+
+/*
+ * Given this is a _0 version it should likely be broken up into parts.
+ * But we have no idead what a _1, _2, ... version would do different
+ * until we see a call.
+ * This is used for a not-real-type (rcu). We use a bool to "simulate"
+ * the lock held. Also _T still special, may not always be used, so tag
+ * with __unused (or better the LinuxKPI __maybe_unused).
+ */
+#define DEFINE_LOCK_GUARD_0(_n, _lock, _unlock, ...) \
+ \
+ typedef struct { \
+ bool lock; \
+ __VA_ARGS__; \
+ } guard_ ## _n ## _t; \
+ \
+ static inline void \
+ guard_ ## _n ## _destroy(guard_ ## _n ## _t *_T) \
+ { \
+ if (_T->lock) { \
+ _unlock; \
+ } \
+ } \
+ \
+ static inline guard_ ## _n ## _t \
+ guard_ ## _n ## _create(void) \
+ { \
+ guard_ ## _n ## _t _tmp; \
+ guard_ ## _n ## _t *_T __maybe_unused; \
+ \
+ _tmp.lock = true; \
+ _T = &_tmp; \
+ _lock; \
+ return (_tmp); \
+ }
+
#endif /* _LINUXKPI_LINUX_CLEANUP_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h
index fb5ad3bf4fe4..948396144ad6 100644
--- a/sys/compat/linuxkpi/common/include/linux/compiler.h
+++ b/sys/compat/linuxkpi/common/include/linux/compiler.h
@@ -130,4 +130,10 @@
#define is_signed_type(t) ((t)-1 < (t)1)
#define is_unsigned_type(t) ((t)-1 > (t)1)
+#if __has_builtin(__builtin_dynamic_object_size)
+#define __struct_size(_s) __builtin_dynamic_object_size(_s, 0)
+#else
+#define __struct_size(_s) __builtin_object_size(_s, 0)
+#endif
+
#endif /* _LINUXKPI_LINUX_COMPILER_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h
index 2556b0c45e49..7dd6340746d2 100644
--- a/sys/compat/linuxkpi/common/include/linux/device.h
+++ b/sys/compat/linuxkpi/common/include/linux/device.h
@@ -4,7 +4,7 @@
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2021-2022 The FreeBSD Foundation
+ * Copyright (c) 2021-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -284,7 +284,8 @@ int lkpi_devres_destroy(struct device *, void(*release)(struct device *, void *)
void lkpi_devres_release_free_list(struct device *);
void lkpi_devres_unlink(struct device *, void *);
void lkpi_devm_kmalloc_release(struct device *, void *);
-#define devm_kfree(_d, _p) lkpi_devm_kmalloc_release(_d, _p)
+void lkpi_devm_kfree(struct device *, const void *);
+#define devm_kfree(_d, _p) lkpi_devm_kfree(_d, _p)
static inline const char *
dev_driver_string(const struct device *dev)
diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h
index b9161c586d07..17041bb03ce8 100644
--- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h
+++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h
@@ -408,6 +408,14 @@ enum ieee80211_sta_state {
IEEE80211_STA_AUTHORIZED = 4, /* 802.1x */
};
+enum ieee80211_sta_rx_bandwidth {
+ IEEE80211_STA_RX_BW_20 = 0,
+ IEEE80211_STA_RX_BW_40,
+ IEEE80211_STA_RX_BW_80,
+ IEEE80211_STA_RX_BW_160,
+ IEEE80211_STA_RX_BW_320,
+};
+
enum ieee80211_tx_info_flags {
/* XXX TODO .. right shift numbers - not sure where that came from? */
IEEE80211_TX_CTL_AMPDU = BIT(0),
@@ -524,24 +532,24 @@ struct ieee80211_mgmt {
uint16_t beacon_int;
uint16_t capab_info;
uint8_t variable[0];
- } beacon;
+ } __packed beacon;
/* 9.3.3.5 Association Request frame format */
struct {
uint16_t capab_info;
uint16_t listen_interval;
uint8_t variable[0];
- } assoc_req;
+ } __packed assoc_req;
/* 9.3.3.10 Probe Request frame format */
struct {
uint8_t variable[0];
- } probe_req;
+ } __packed probe_req;
/* 9.3.3.11 Probe Response frame format */
struct {
uint64_t timestamp;
uint16_t beacon_int;
uint16_t capab_info;
uint8_t variable[0];
- } probe_resp;
+ } __packed probe_resp;
/* 9.3.3.14 Action frame format */
struct {
/* 9.4.1.11 Action field */
@@ -557,7 +565,7 @@ struct ieee80211_mgmt {
uint8_t tpc_elem_length;
uint8_t tpc_elem_tx_power;
uint8_t tpc_elem_link_margin;
- } tpc_report;
+ } __packed tpc_report;
/* 9.6.8.33 Fine Timing Measurement frame format */
struct {
uint8_t dialog_token;
@@ -567,7 +575,7 @@ struct ieee80211_mgmt {
uint16_t tod_error;
uint16_t toa_error;
uint8_t variable[0];
- } ftm;
+ } __packed ftm;
/* 802.11-2016, 9.6.5.2 ADDBA Request frame format */
struct {
uint8_t action_code;
@@ -577,16 +585,16 @@ struct ieee80211_mgmt {
uint16_t start_seq_num;
/* Optional follows... */
uint8_t variable[0];
- } addba_req;
+ } __packed addba_req;
/* XXX */
struct {
uint8_t dialog_token;
- } wnm_timing_msr;
+ } __packed wnm_timing_msr;
} u;
- } action;
+ } __packed action;
DECLARE_FLEX_ARRAY(uint8_t, body);
} u;
-};
+} __packed __aligned(2);
struct ieee80211_cts { /* net80211::ieee80211_frame_cts */
__le16 frame_control;
diff --git a/sys/compat/linuxkpi/common/include/linux/math.h b/sys/compat/linuxkpi/common/include/linux/math.h
index 5a348a57747b..1d50e011f66d 100644
--- a/sys/compat/linuxkpi/common/include/linux/math.h
+++ b/sys/compat/linuxkpi/common/include/linux/math.h
@@ -56,7 +56,7 @@
__ret; \
})
-#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 60600
+#if !defined(LINUXKPI_VERSION) || (LINUXKPI_VERSION >= 60600)
#define abs_diff(x, y) ({ \
__typeof(x) _x = (x); \
__typeof(y) _y = (y); \
diff --git a/sys/compat/linuxkpi/common/include/linux/math64.h b/sys/compat/linuxkpi/common/include/linux/math64.h
index a216d350570f..25ca9da1b622 100644
--- a/sys/compat/linuxkpi/common/include/linux/math64.h
+++ b/sys/compat/linuxkpi/common/include/linux/math64.h
@@ -98,6 +98,12 @@ div64_u64_round_up(uint64_t dividend, uint64_t divisor)
return ((dividend + divisor - 1) / divisor);
}
+static inline uint64_t
+roundup_u64(uint64_t x1, uint32_t x2)
+{
+ return (div_u64(x1 + x2 - 1, x2) * x2);
+}
+
#define DIV64_U64_ROUND_UP(...) \
div64_u64_round_up(__VA_ARGS__)
diff --git a/sys/compat/linuxkpi/common/include/linux/overflow.h b/sys/compat/linuxkpi/common/include/linux/overflow.h
index 9ba9b9500f11..e811037b8ecc 100644
--- a/sys/compat/linuxkpi/common/include/linux/overflow.h
+++ b/sys/compat/linuxkpi/common/include/linux/overflow.h
@@ -33,8 +33,10 @@
* credit to Christian Biere.
*/
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
-#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
-#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_max(t) __type_max(typeof(t))
+#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define type_min(t) __type_min(typeof(t))
/*
* Avoids triggering -Wtype-limits compilation warning,
@@ -59,46 +61,123 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* @b: second addend
* @d: pointer to store sum
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted addition, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * sum has overflowed or been truncated.
+ * *@d holds the results of the attempted addition, regardless of whether
+ * wrap-around occurred.
*/
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
/**
+ * wrapping_add() - Intentionally perform a wrapping addition
+ * @type: type for result of calculation
+ * @a: first addend
+ * @b: second addend
+ *
+ * Return the potentially wrapped-around addition without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_add(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_add_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_add() - Intentionally perform a wrapping increment assignment
+ * @var: variable to be incremented
+ * @offset: amount to add
+ *
+ * Increments @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_add(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_add(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted subtraction, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * difference has underflowed or been truncated.
+ * *@d holds the results of the attempted subtraction, regardless of whether
+ * wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
+ * wrapping_sub() - Intentionally perform a wrapping subtraction
+ * @type: type for result of calculation
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ *
+ * Return the potentially wrapped-around subtraction without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_sub(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_sub_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
+ * @var: variable to be decremented
+ * @offset: amount to subtract
+ *
+ * Decrements @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_sub(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted multiplication, but is not
- * considered "safe for use" on a non-zero return value, which indicates
- * that the product has overflowed or been truncated.
+ * *@d holds the results of the attempted multiplication, regardless of whether
+ * wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
+ * wrapping_mul() - Intentionally perform a wrapping multiplication
+ * @type: type for result of calculation
+ * @a: first factor
+ * @b: second factor
+ *
+ * Return the potentially wrapped-around multiplication without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_mul(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_mul_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
@@ -122,7 +201,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
- u64 _a_full = _a; \
+ unsigned long long _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
@@ -132,10 +211,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
- (x) > type_max(typeof(T)) : \
+ (x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
- (x) < 0 || (x) > type_max(typeof(T)) : \
- (x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+ (x) < 0 || (x) > type_max(T) : \
+ (x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
@@ -312,27 +391,40 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
struct_size((type *)NULL, member, count)
/**
- * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
- * Enables caller macro to pass (different) initializer.
+ * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass arbitrary trailing expressions
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
- * @initializer: initializer expression (could be empty for no init).
+ * @trailer: Trailing expressions for attributes and/or initializers.
*/
-#define _DEFINE_FLEX(type, name, member, count, initializer) \
+#define __DEFINE_FLEX(type, name, member, count, trailer...) \
_Static_assert(__builtin_constant_p(count), \
"onstack flex array members require compile-time const count"); \
union { \
u8 bytes[struct_size_t(type, member, count)]; \
type obj; \
- } name##_u initializer; \
+ } name##_u trailer; \
type *name = (type *)&name##_u
/**
- * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
- * flexible array member.
+ * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass (different) initializer.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ */
+#define _DEFINE_FLEX(type, name, member, count, initializer...) \
+ __DEFINE_FLEX(type, name, member, count, = { .obj initializer })
+
+/**
+ * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member, when it does not have a __counted_by annotation.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
@@ -342,8 +434,42 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
* Define a zeroed, on-stack, instance of @type structure with a trailing
* flexible array member.
* Use __struct_size(@name) to get compile-time size of it afterwards.
+ * Use __member_size(@name->member) to get compile-time size of @name members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
+ */
+#define DEFINE_RAW_FLEX(type, name, member, count) \
+ __DEFINE_FLEX(type, name, member, count, = { })
+
+/**
+ * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member.
+ *
+ * @TYPE: structure type name, including "struct" keyword.
+ * @NAME: Name for a variable to define.
+ * @MEMBER: Name of the array member.
+ * @COUNTER: Name of the __counted_by member.
+ * @COUNT: Number of elements in the array; must be compile-time const.
+ *
+ * Define a zeroed, on-stack, instance of @TYPE structure with a trailing
+ * flexible array member.
+ * Use __struct_size(@NAME) to get compile-time size of it afterwards.
+ * Use __member_size(@NAME->member) to get compile-time size of @NAME members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
+ */
+#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \
+ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, })
+
+/**
+ * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family.
+ * Returns the number of elements in @array.
+ *
+ * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX().
+ * @array: Name of the array member.
*/
-#define DEFINE_FLEX(type, name, member, count) \
- _DEFINE_FLEX(type, name, member, count, = {})
+#define STACK_FLEX_ARRAY_SIZE(name, array) \
+ (__member_size((name)->array) / sizeof(*(name)->array) + \
+ __must_be_array((name)->array))
#endif /* _LINUXKPI_LINUX_OVERFLOW_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h
index 3fd4191b9917..d891d0df3546 100644
--- a/sys/compat/linuxkpi/common/include/linux/pci.h
+++ b/sys/compat/linuxkpi/common/include/linux/pci.h
@@ -355,7 +355,6 @@ struct pci_dev {
TAILQ_HEAD(, pci_mmio_region) mmio;
};
-int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name);
int pci_alloc_irq_vectors(struct pci_dev *pdev, int minv, int maxv,
unsigned int flags);
bool pci_device_is_present(struct pci_dev *pdev);
@@ -365,10 +364,13 @@ void __iomem **linuxkpi_pcim_iomap_table(struct pci_dev *pdev);
void *linuxkpi_pci_iomap_range(struct pci_dev *, int,
unsigned long, unsigned long);
void *linuxkpi_pci_iomap(struct pci_dev *, int, unsigned long);
+void *linuxkpi_pcim_iomap(struct pci_dev *, int, unsigned long);
void linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res);
int linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask,
const char *name);
+int linuxkpi_pci_request_region(struct pci_dev *, int, const char *);
int linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name);
+int linuxkpi_pcim_request_all_regions(struct pci_dev *, const char *);
void linuxkpi_pci_release_region(struct pci_dev *pdev, int bar);
void linuxkpi_pci_release_regions(struct pci_dev *pdev);
int linuxkpi_pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries,
@@ -561,12 +563,16 @@ done:
return (pdev->bus->self);
}
+#define pci_request_region(pdev, bar, res_name) \
+ linuxkpi_pci_request_region(pdev, bar, res_name)
#define pci_release_region(pdev, bar) \
linuxkpi_pci_release_region(pdev, bar)
-#define pci_release_regions(pdev) \
- linuxkpi_pci_release_regions(pdev)
#define pci_request_regions(pdev, res_name) \
linuxkpi_pci_request_regions(pdev, res_name)
+#define pci_release_regions(pdev) \
+ linuxkpi_pci_release_regions(pdev)
+#define pcim_request_all_regions(pdev, name) \
+ linuxkpi_pcim_request_all_regions(pdev, name)
static inline void
lkpi_pci_disable_msix(struct pci_dev *pdev)
@@ -803,6 +809,8 @@ static inline void pci_disable_sriov(struct pci_dev *dev)
linuxkpi_pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size)
#define pci_iomap(pdev, mmio_bar, mmio_size) \
linuxkpi_pci_iomap(pdev, mmio_bar, mmio_size)
+#define pcim_iomap(pdev, bar, maxlen) \
+ linuxkpi_pcim_iomap(pdev, bar, maxlen)
#define pci_iounmap(pdev, res) \
linuxkpi_pci_iounmap(pdev, res)
@@ -1445,6 +1453,9 @@ linuxkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
return (lkpi_pci_get_device(vendor, device, odev));
}
+#define for_each_pci_dev(_pdev) \
+ while ((_pdev = linuxkpi_pci_get_device(PCI_ANY_ID, PCI_ANY_ID, _pdev)) != NULL)
+
/* This is a FreeBSD extension so we can use bus_*(). */
static inline void
linuxkpi_pcim_want_to_use_bus_functions(struct pci_dev *pdev)
diff --git a/sys/compat/linuxkpi/common/include/linux/rcupdate.h b/sys/compat/linuxkpi/common/include/linux/rcupdate.h
index 85d766c8dbc9..4aceb7296cd6 100644
--- a/sys/compat/linuxkpi/common/include/linux/rcupdate.h
+++ b/sys/compat/linuxkpi/common/include/linux/rcupdate.h
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2016-2017 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2024 The FreeBSD Foundation
+ * Copyright (c) 2024-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -35,6 +35,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/cleanup.h>
#include <machine/atomic.h>
@@ -162,4 +163,6 @@ void linux_synchronize_rcu(unsigned type);
#define init_rcu_head_on_stack(...)
#define destroy_rcu_head_on_stack(...)
+DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
+
#endif /* _LINUXKPI_LINUX_RCUPDATE_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/skbuff.h b/sys/compat/linuxkpi/common/include/linux/skbuff.h
index c8ad90281e34..6e41c368a8b8 100644
--- a/sys/compat/linuxkpi/common/include/linux/skbuff.h
+++ b/sys/compat/linuxkpi/common/include/linux/skbuff.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2020-2025 The FreeBSD Foundation
- * Copyright (c) 2021-2023 Bjoern A. Zeeb
+ * Copyright (c) 2021-2025 Bjoern A. Zeeb
*
* This software was developed by Björn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -47,13 +47,11 @@
#include <linux/ktime.h>
#include <linux/compiler.h>
-#include "opt_wlan.h"
-
-/* Currently this is only used for wlan so we can depend on that. */
-#if defined(IEEE80211_DEBUG) && !defined(SKB_DEBUG)
-#define SKB_DEBUG
-#endif
-
+/*
+ * At least the net/intel-irdma-kmod port pulls this header in; likely through
+ * if_ether.h (see PR289268). This means we no longer can rely on
+ * IEEE80211_DEBUG (opt_wlan.h) to automatically set SKB_DEBUG.
+ */
/* #define SKB_DEBUG */
#ifdef SKB_DEBUG
@@ -120,7 +118,7 @@ enum sk_checksum_flags {
CHECKSUM_NONE = 0x00,
CHECKSUM_UNNECESSARY = 0x01,
CHECKSUM_PARTIAL = 0x02,
- CHECKSUM_COMPLETE = 0x04,
+ CHECKSUM_COMPLETE = 0x03,
};
struct skb_frag {
@@ -170,7 +168,7 @@ struct sk_buff {
};
};
uint16_t protocol;
- uint8_t ip_summed;
+ uint8_t ip_summed; /* 2 bit only. */
/* uint8_t */
/* "Scratch" area for layers to store metadata. */
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h
index 47e3d133eb6c..0e649e1e3c4a 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -40,8 +40,10 @@
#include <linux/compat.h>
#include <linux/types.h>
#include <linux/gfp.h>
+#include <linux/err.h>
#include <linux/llist.h>
#include <linux/overflow.h>
+#include <linux/cleanup.h>
MALLOC_DECLARE(M_KMALLOC);
@@ -153,6 +155,8 @@ kfree(const void *ptr)
lkpi_kfree(ptr);
}
+DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
+
/*
* Other k*alloc() funtions using the above as underlying allocator.
*/
diff --git a/sys/compat/linuxkpi/common/include/linux/string_helpers.h b/sys/compat/linuxkpi/common/include/linux/string_helpers.h
index 1bdff2730361..2c6fe0b1708d 100644
--- a/sys/compat/linuxkpi/common/include/linux/string_helpers.h
+++ b/sys/compat/linuxkpi/common/include/linux/string_helpers.h
@@ -66,4 +66,6 @@ str_enable_disable(bool value)
return "disable";
}
+#define str_disable_enable(_v) str_enable_disable(!(_v))
+
#endif
diff --git a/sys/compat/linuxkpi/common/include/linux/timer.h b/sys/compat/linuxkpi/common/include/linux/timer.h
index a635f0faea59..d48939e28a02 100644
--- a/sys/compat/linuxkpi/common/include/linux/timer.h
+++ b/sys/compat/linuxkpi/common/include/linux/timer.h
@@ -49,8 +49,13 @@ extern unsigned long linux_timer_hz_mask;
#define TIMER_IRQSAFE 0x0001
+#if defined(LINUXKPI_VERSION) && (LINUXKPI_VERSION < 61600)
#define from_timer(var, arg, field) \
container_of(arg, typeof(*(var)), field)
+#else
+#define timer_container_of(var, arg, field) \
+ container_of(arg, typeof(*(var)), field)
+#endif
#define timer_setup(timer, func, flags) do { \
CTASSERT(((flags) & ~TIMER_IRQSAFE) == 0); \
@@ -79,11 +84,23 @@ extern unsigned long linux_timer_hz_mask;
extern int mod_timer(struct timer_list *, unsigned long);
extern void add_timer(struct timer_list *);
extern void add_timer_on(struct timer_list *, int cpu);
-extern int del_timer(struct timer_list *);
-extern int del_timer_sync(struct timer_list *);
+
+extern int timer_delete(struct timer_list *);
extern int timer_delete_sync(struct timer_list *);
extern int timer_shutdown_sync(struct timer_list *);
+static inline int
+del_timer(struct timer_list *tl)
+{
+ return (timer_delete(tl));
+}
+
+static inline int
+del_timer_sync(struct timer_list *tl)
+{
+ return (timer_delete_sync(tl));
+}
+
#define timer_pending(timer) callout_pending(&(timer)->callout)
#define round_jiffies(j) \
((unsigned long)(((j) + linux_timer_hz_mask) & ~linux_timer_hz_mask))
diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h
index 0106e6648bd4..8de03410c6b6 100644
--- a/sys/compat/linuxkpi/common/include/net/mac80211.h
+++ b/sys/compat/linuxkpi/common/include/net/mac80211.h
@@ -737,7 +737,7 @@ struct ieee80211_link_sta {
struct ieee80211_he_6ghz_capa he_6ghz_capa;
struct ieee80211_sta_eht_cap eht_cap;
uint8_t rx_nss;
- enum ieee80211_sta_rx_bw bandwidth;
+ enum ieee80211_sta_rx_bandwidth bandwidth;
enum ieee80211_smps_mode smps_mode;
struct ieee80211_sta_agg agg;
struct ieee80211_sta_txpwr txpwr;
diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c
index e248588dd275..d00734001a59 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.c
+++ b/sys/compat/linuxkpi/common/src/linux_80211.c
@@ -77,6 +77,8 @@
#include <linux/rculist.h>
#include "linux_80211.h"
+/* #define LKPI_80211_USE_SCANLIST */
+/* #define LKPI_80211_BGSCAN */
#define LKPI_80211_WME
#define LKPI_80211_HW_CRYPTO
#define LKPI_80211_HT
@@ -103,6 +105,10 @@ SYSCTL_DECL(_compat_linuxkpi);
SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, 80211, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"LinuxKPI 802.11 compatibility layer");
+static bool lkpi_order_scanlist = false;
+SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, order_scanlist, CTLFLAG_RW,
+ &lkpi_order_scanlist, 0, "Enable LinuxKPI 802.11 scan list shuffeling");
+
#if defined(LKPI_80211_HW_CRYPTO)
static bool lkpi_hwcrypto = false;
SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, hw_crypto, CTLFLAG_RDTUN,
@@ -167,6 +173,7 @@ const struct cfg80211_ops linuxkpi_mac80211cfgops = {
static struct lkpi_sta *lkpi_find_lsta_by_ni(struct lkpi_vif *,
struct ieee80211_node *);
#endif
+static void lkpi_sw_scan_task(void *, int);
static void lkpi_80211_txq_tx_one(struct lkpi_sta *, struct mbuf *);
static void lkpi_80211_txq_task(void *, int);
static void lkpi_80211_lhw_rxq_task(void *, int);
@@ -394,7 +401,7 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS)
return (0);
}
-static enum ieee80211_sta_rx_bw
+static enum ieee80211_sta_rx_bandwidth
lkpi_cw_to_rx_bw(enum nl80211_chan_width cw)
{
switch (cw) {
@@ -418,7 +425,7 @@ lkpi_cw_to_rx_bw(enum nl80211_chan_width cw)
}
static enum nl80211_chan_width
-lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bw rx_bw)
+lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bandwidth rx_bw)
{
switch (rx_bw) {
case IEEE80211_STA_RX_BW_20:
@@ -439,7 +446,7 @@ lkpi_sync_chanctx_cw_from_rx_bw(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, struct ieee80211_sta *sta)
{
struct ieee80211_chanctx_conf *chanctx_conf;
- enum ieee80211_sta_rx_bw old_bw;
+ enum ieee80211_sta_rx_bandwidth old_bw;
uint32_t changed;
chanctx_conf = rcu_dereference_protected(vif->bss_conf.chanctx_conf,
@@ -544,7 +551,7 @@ static void
lkpi_sta_sync_vht_from_ni(struct ieee80211_vif *vif, struct ieee80211_sta *sta,
struct ieee80211_node *ni)
{
- enum ieee80211_sta_rx_bw bw;
+ enum ieee80211_sta_rx_bandwidth bw;
uint32_t width;
int rx_nss;
uint16_t rx_mcs_map;
@@ -955,6 +962,30 @@ lkpi_nl80211_band_to_net80211_band(enum nl80211_band band)
return (0x00);
}
+#ifdef LINUXKPI_DEBUG_80211
+static const char *
+lkpi_nl80211_band_name(enum nl80211_band band)
+{
+ switch (band) {
+ case NL80211_BAND_2GHZ:
+ return "2Ghz";
+ break;
+ case NL80211_BAND_5GHZ:
+ return "5Ghz";
+ break;
+ case NL80211_BAND_60GHZ:
+ return "60Ghz";
+ break;
+ case NL80211_BAND_6GHZ:
+ return "6Ghz";
+ break;
+ default:
+ panic("%s: unsupported band %u\n", __func__, band);
+ break;
+ }
+}
+#endif
+
#if 0
static enum ieee80211_ac_numbers
lkpi_ac_net_to_l80211(int ac)
@@ -1319,6 +1350,7 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
lhw = ic->ic_softc;
hw = LHW_TO_HW(lhw);
lvif = VAP_TO_LVIF(vap);
+ vif = LVIF_TO_VIF(lvif);
/*
* Make sure we do not make it here without going through
@@ -1326,6 +1358,23 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
*/
lockdep_assert_wiphy(hw->wiphy);
+ /*
+ * While we are assoc we may still send packets. We cannot delete the
+ * keys as otherwise packets could go out unencrypted. Some firmware
+ * does not like this and will fire an assert.
+ * net80211 needs to drive this better but given we want the disassoc
+ * frame out and have to unlock we are open to a race currently.
+ * This check should prevent problems.
+ * How to test: run 800Mbit/s UDP traffic and during that restart your
+ * supplicant. You want to survive that.
+ */
+ if (vif->cfg.assoc) {
+ if (linuxkpi_debug_80211 & D80211_TRACE_HW_CRYPTO)
+ ic_printf(ic, "%d %lu %s: vif still assoc; not deleting keys\n",
+ curthread->td_tid, jiffies, __func__);
+ return (0);
+ }
+
if (IEEE80211_KEY_UNDEFINED(k)) {
ic_printf(ic, "%s: vap %p key %p is undefined: %p %u\n",
__func__, vap, k, k->wk_cipher, k->wk_keyix);
@@ -1370,7 +1419,6 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
kc->keyidx, kc->hw_key_idx, kc->flags, IEEE80211_KEY_FLAG_BITS);
#endif
- vif = LVIF_TO_VIF(lvif);
error = lkpi_80211_mo_set_key(hw, DISABLE_KEY, vif, sta, kc);
if (error != 0) {
ic_printf(ic, "%d %lu %s: set_key cmd %d(%s) for sta %6D failed: %d\n",
@@ -1842,13 +1890,31 @@ lkpi_update_dtim_tsf(struct ieee80211_vif *vif, struct ieee80211_node *ni,
vif->bss_conf.beacon_int = 16;
bss_changed |= BSS_CHANGED_BEACON_INT;
}
- if (vif->bss_conf.dtim_period != vap->iv_dtim_period &&
- vap->iv_dtim_period > 0) {
- vif->bss_conf.dtim_period = vap->iv_dtim_period;
+
+ /*
+ * lkpi_iv_sta_recv_mgmt() will directly call into this function.
+ * iwlwifi(4) in iwl_mvm_bss_info_changed_station_common() will
+ * stop seesion protection the moment it sees
+ * BSS_CHANGED_BEACON_INFO (with the expectations that it was
+ * "a beacon from the associated AP"). It will also update
+ * the beacon filter in that case. This is the only place
+ * we set the BSS_CHANGED_BEACON_INFO on the non-teardown
+ * path so make sure we only do run this check once we are
+ * assoc. (*iv_recv_mgmt)() will be called before we enter
+ * here so the ni will be updates with information from the
+ * beacon via net80211::sta_recv_mgmt(). We also need to
+ * make sure we do not do it on every beacon we still may
+ * get so only do if something changed. vif->bss_conf.dtim_period
+ * should be 0 as we start up (we also reset it on teardown).
+ */
+ if (vif->cfg.assoc &&
+ vif->bss_conf.dtim_period != ni->ni_dtim_period &&
+ ni->ni_dtim_period > 0) {
+ vif->bss_conf.dtim_period = ni->ni_dtim_period;
bss_changed |= BSS_CHANGED_BEACON_INFO;
}
- vif->bss_conf.sync_dtim_count = vap->iv_dtim_count;
+ vif->bss_conf.sync_dtim_count = ni->ni_dtim_count;
vif->bss_conf.sync_tsf = le64toh(ni->ni_tstamp.tsf);
/* vif->bss_conf.sync_device_ts = set in linuxkpi_ieee80211_rx. */
@@ -1876,6 +1942,8 @@ lkpi_stop_hw_scan(struct lkpi_hw *lhw, struct ieee80211_vif *vif)
int error;
bool cancel;
+ TRACE_SCAN(lhw->ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
cancel = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -2798,6 +2866,14 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i
bss_changed = 0;
bss_changed |= lkpi_disassoc(sta, vif, lhw);
+#ifdef LKPI_80211_HW_CRYPTO
+ /*
+ * In theory we remove keys here but there must not exist any for this
+ * state change until we clean them up again into small steps and no
+ * code duplication.
+ */
+#endif
+
lkpi_lsta_dump(lsta, ni, __func__, __LINE__);
/* Adjust sta and change state (from NONE) to NOTEXIST. */
@@ -3333,6 +3409,16 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
#ifdef LKPI_80211_HW_CRYPTO
if (lkpi_hwcrypto) {
+ /*
+ * In theory we only need to do this if we changed assoc.
+ * If we were not assoc, there should be no keys and we
+ * should not be here.
+ */
+#ifdef notyet
+ KASSERT((bss_changed & BSS_CHANGED_ASSOC) != 0, ("%s: "
+ "trying to remove keys but were not assoc: %#010jx, lvif %p\n",
+ __func__, (uintmax_t)bss_changed, lvif));
+#endif
error = lkpi_sta_del_keys(hw, vif, lsta);
if (error != 0) {
ic_printf(vap->iv_ic, "%s:%d: lkpi_sta_del_keys "
@@ -3394,6 +3480,9 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
* 4) call unassign_vif_chanctx
* 5) call lkpi_hw_conf_idle
* 6) call remove_chanctx
+ *
+ * Note: vif->driver_flags & IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC
+ * might change this.
*/
bss_changed |= lkpi_disassoc(sta, vif, lhw);
@@ -3545,7 +3634,7 @@ lkpi_iv_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
vif = LVIF_TO_VIF(lvif);
/* No need to replicate this in most state handlers. */
- if (ostate == IEEE80211_S_SCAN && nstate != IEEE80211_S_SCAN)
+ if (nstate > IEEE80211_S_SCAN)
lkpi_stop_hw_scan(lhw, vif);
s = sta_state_fsm;
@@ -3739,6 +3828,7 @@ lkpi_iv_sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
enum ieee80211_bss_changed bss_changed;
lvif = VAP_TO_LVIF(ni->ni_vap);
+ vif = LVIF_TO_VIF(lvif);
lvif->iv_recv_mgmt(ni, m0, subtype, rxs, rssi, nf);
@@ -3746,13 +3836,18 @@ lkpi_iv_sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
break;
case IEEE80211_FC0_SUBTYPE_BEACON:
- lvif->beacons++;
+ /*
+ * Only count beacons when assoc. SCAN has its own logging.
+ * This is for connection/beacon loss/session protection almost
+ * over debugging when trying to get into a stable RUN state.
+ */
+ if (vif->cfg.assoc)
+ lvif->beacons++;
break;
default:
return;
}
- vif = LVIF_TO_VIF(lvif);
lhw = ni->ni_ic->ic_softc;
hw = LHW_TO_HW(lhw);
@@ -3824,6 +3919,7 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
lvif = malloc(len, M_80211_VAP, M_WAITOK | M_ZERO);
mtx_init(&lvif->mtx, "lvif", NULL, MTX_DEF);
+ TASK_INIT(&lvif->sw_scan_task, 0, lkpi_sw_scan_task, lvif);
INIT_LIST_HEAD(&lvif->lsta_list);
lvif->lvif_bss = NULL;
refcount_init(&lvif->nt_unlocked, 0);
@@ -3987,13 +4083,9 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
* Modern chipset/fw/drv will do A-MPDU in drv/fw and fail
* to do so if they cannot do the crypto too.
*/
- if (!lkpi_hwcrypto && ieee80211_hw_check(hw, AMPDU_AGGREGATION))
+ if (!lkpi_hwcrypto && IEEE80211_CONF_AMPDU_OFFLOAD(ic))
vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_RX;
#endif
-#if defined(LKPI_80211_HT)
- /* 20250125-BZ Keep A-MPDU TX cleared until we sorted out AddBA for all drivers. */
- vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_TX;
-#endif
if (hw->max_listen_interval == 0)
hw->max_listen_interval = 7 * (ic->ic_lintval / ic->ic_bintval);
@@ -4062,6 +4154,8 @@ lkpi_ic_vap_delete(struct ieee80211vap *vap)
/* Clear up per-VIF/VAP sysctls. */
sysctl_ctx_free(&lvif->sysctl_ctx);
+ ieee80211_draintask(ic, &lvif->sw_scan_task);
+
LKPI_80211_LHW_LVIF_LOCK(lhw);
TAILQ_REMOVE(&lhw->lvif_head, lvif, lvif_entry);
LKPI_80211_LHW_LVIF_UNLOCK(lhw);
@@ -4303,6 +4397,113 @@ lkpi_scan_ies_add(uint8_t *p, struct ieee80211_scan_ies *scan_ies,
}
static void
+lkpi_enable_hw_scan(struct lkpi_hw *lhw)
+{
+
+ if (lhw->ops->hw_scan) {
+ /*
+ * Advertise full-offload scanning.
+ *
+ * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise
+ * we essentially disable hw_scan for all drivers not setting
+ * the flag.
+ */
+ lhw->ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
+ lhw->scan_flags |= LKPI_LHW_SCAN_HW;
+ }
+}
+
+#ifndef LKPI_80211_USE_SCANLIST
+static const uint32_t chan_pri[] = {
+ 5180, 5500, 5745,
+ 5260, 5580, 5660, 5825,
+ 5220, 5300, 5540, 5620, 5700, 5785, 5865,
+ 2437, 2412, 2422, 2462, 2472, 2432, 2452
+};
+
+static int
+lkpi_scan_chan_list_idx(const struct linuxkpi_ieee80211_channel *lc)
+{
+ int i;
+
+ for (i = 0; i < nitems(chan_pri); i++) {
+ if (lc->center_freq == chan_pri[i])
+ return (i);
+ }
+
+ return (-1);
+}
+
+static int
+lkpi_scan_chan_list_comp(const struct linuxkpi_ieee80211_channel *lc1,
+ const struct linuxkpi_ieee80211_channel *lc2)
+{
+ int idx1, idx2;
+
+ /* Find index in list. */
+ idx1 = lkpi_scan_chan_list_idx(lc1);
+ idx2 = lkpi_scan_chan_list_idx(lc2);
+
+ if (idx1 == -1 && idx2 != -1)
+ return (1);
+ if (idx1 != -1 && idx2 == -1)
+ return (-1);
+
+ /* Neither on the list, use center_freq. */
+ if (idx1 == -1 && idx2 == -1)
+ return (lc1->center_freq - lc2->center_freq);
+
+ /* Whichever is first in the list. */
+ return (idx1 - idx2);
+}
+
+static void
+lkpi_scan_chan_list_resort(struct linuxkpi_ieee80211_channel **cpp, size_t nchan)
+{
+ struct linuxkpi_ieee80211_channel *lc, *nc;
+ size_t i, j;
+ int rc;
+
+ for (i = (nchan - 1); i > 0; i--) {
+ for (j = i; j > 0 ; j--) {
+ lc = *(cpp + j);
+ nc = *(cpp + j - 1);
+ rc = lkpi_scan_chan_list_comp(lc, nc);
+ if (rc < 0) {
+ *(cpp + j) = nc;
+ *(cpp + j - 1) = lc;
+ }
+ }
+ }
+}
+
+static bool
+lkpi_scan_chan(struct linuxkpi_ieee80211_channel *c,
+ struct ieee80211com *ic, bool log)
+{
+
+ if ((c->flags & IEEE80211_CHAN_DISABLED) != 0) {
+ if (log)
+ TRACE_SCAN(ic, "Skipping disabled chan "
+ "on band %s [%#x/%u/%#x]",
+ lkpi_nl80211_band_name(c->band), c->hw_value,
+ c->center_freq, c->flags);
+ return (false);
+ }
+ if (isclr(ic->ic_chan_active, ieee80211_mhz2ieee(c->center_freq,
+ lkpi_nl80211_band_to_net80211_band(c->band)))) {
+ if (log)
+ TRACE_SCAN(ic, "Skipping !active chan "
+ "on band %s [%#x/%u/%#x]",
+ lkpi_nl80211_band_name(c->band), c->hw_value,
+ c->center_freq, c->flags);
+ return (false);
+ }
+ return (true);
+}
+#endif
+
+static void
lkpi_ic_scan_start(struct ieee80211com *ic)
{
struct lkpi_hw *lhw;
@@ -4315,35 +4516,52 @@ lkpi_ic_scan_start(struct ieee80211com *ic)
bool is_hw_scan;
lhw = ic->ic_softc;
+ ss = ic->ic_scan;
+ vap = ss->ss_vap;
+ TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) {
/* A scan is still running. */
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+ TRACE_SCAN(ic, "Trying to start new scan while still running; "
+ "cancelling new net80211 scan; scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+ ieee80211_cancel_scan(vap);
return;
}
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
- ss = ic->ic_scan;
- vap = ss->ss_vap;
+#if 0
if (vap->iv_state != IEEE80211_S_SCAN) {
- IMPROVE("We need to be able to scan if not in S_SCAN");
+ TODO("We need to be able to scan if not in S_SCAN");
+ TRACE_SCAN(ic, "scan_flags %b iv_state %d",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, vap->iv_state);
+ ieee80211_cancel_scan(vap);
return;
}
+#endif
hw = LHW_TO_HW(lhw);
if (!is_hw_scan) {
/* If hw_scan is cleared clear FEXT_SCAN_OFFLOAD too. */
vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD;
-sw_scan:
+
lvif = VAP_TO_LVIF(vap);
vif = LVIF_TO_VIF(lvif);
if (vap->iv_state == IEEE80211_S_SCAN)
lkpi_hw_conf_idle(hw, false);
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ lhw->scan_flags |= LKPI_LHW_SCAN_RUNNING;
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+
lkpi_update_mcast_filter(ic);
+ TRACE_SCAN(vap->iv_ic, "Starting SW_SCAN: scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
lkpi_80211_mo_sw_scan_start(hw, vif, vif->addr);
/* net80211::scan_start() handled PS for us. */
IMPROVE();
@@ -4358,6 +4576,9 @@ sw_scan:
struct cfg80211_scan_6ghz_params *s6gp;
size_t chan_len, nchan, ssids_len, s6ghzlen;
int band, i, ssid_count, common_ie_len;
+#ifndef LKPI_80211_USE_SCANLIST
+ int n;
+#endif
uint32_t band_mask;
uint8_t *ie, *ieend;
bool running;
@@ -4369,7 +4590,8 @@ sw_scan:
band_mask = 0;
nchan = 0;
if (ieee80211_hw_check(hw, SINGLE_SCAN_ON_ALL_BANDS)) {
-#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */
+#ifdef LKPI_80211_USE_SCANLIST
+ /* Avoid net80211 scan lists until it has proper scan offload support. */
for (i = ss->ss_next; i < ss->ss_last; i++) {
nchan++;
band = lkpi_net80211_chan_to_nl80211_band(
@@ -4387,8 +4609,17 @@ sw_scan:
continue;
}
if (hw->wiphy->bands[band] != NULL) {
- nchan += hw->wiphy->bands[band]->n_channels;
+ struct linuxkpi_ieee80211_channel *channels;
+ int n;
+
band_mask |= (1 << band);
+
+ channels = hw->wiphy->bands[band]->channels;
+ n = hw->wiphy->bands[band]->n_channels;
+ for (i = 0; i < n; i++) {
+ if (lkpi_scan_chan(&channels[i], ic, true))
+ nchan++;
+ }
}
}
#endif
@@ -4427,11 +4658,32 @@ sw_scan:
/* hw_req->req.wdev */
hw_req->req.wiphy = hw->wiphy;
hw_req->req.no_cck = false; /* XXX */
-#if 0
- /* This seems to pessimise default scanning behaviour. */
- hw_req->req.duration_mandatory = TICKS_2_USEC(ss->ss_mindwell);
- hw_req->req.duration = TICKS_2_USEC(ss->ss_maxdwell);
-#endif
+
+ /*
+ * In general setting duration[_mandatory] seems to pessimise
+ * default scanning behaviour. We only use it for BGSCANnig
+ * to keep the dwell times small.
+ * Setting duration_mandatory makes this the maximum dwell
+ * time (otherwise may be shorter). Duration is in TU.
+ */
+ if ((ic->ic_flags_ext & IEEE80211_FEXT_BGSCAN) != 0) {
+ unsigned long dwell;
+
+ if ((ic->ic_caps & IEEE80211_C_BGSCAN) == 0 ||
+ (vap->iv_flags & IEEE80211_F_BGSCAN) == 0)
+ ic_printf(ic, "BGSCAN despite off: %b, %b, %b\n",
+ ic->ic_flags_ext, IEEE80211_FEXT_BITS,
+ vap->iv_flags, IEEE80211_F_BITS,
+ ic->ic_caps, IEEE80211_C_BITS);
+
+ dwell = ss->ss_mindwell;
+ if (dwell == 0)
+ dwell = msecs_to_ticks(20);
+
+ hw_req->req.duration_mandatory = true;
+ hw_req->req.duration = TICKS_2_USEC(dwell) / 1024;
+ }
+
#ifdef __notyet__
hw_req->req.flags |= NL80211_SCAN_FLAG_RANDOM_ADDR;
memcpy(hw_req->req.mac_addr, xxx, IEEE80211_ADDR_LEN);
@@ -4442,11 +4694,12 @@ sw_scan:
hw_req->req.n_channels = nchan;
cpp = (struct linuxkpi_ieee80211_channel **)(hw_req + 1);
lc = (struct linuxkpi_ieee80211_channel *)(cpp + nchan);
+#ifdef LKPI_80211_USE_SCANLIST
for (i = 0; i < nchan; i++) {
*(cpp + i) =
(struct linuxkpi_ieee80211_channel *)(lc + i);
}
-#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */
+ /* Avoid net80211 scan lists until it has proper scan offload support. */
for (i = 0; i < nchan; i++) {
struct ieee80211_channel *c;
@@ -4459,7 +4712,9 @@ sw_scan:
lc++;
}
#else
- for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ /* Add bands in reverse order for scanning. */
+ n = 0;
+ for (band = NUM_NL80211_BANDS - 1; band >= 0; band--) {
struct ieee80211_supported_band *supband;
struct linuxkpi_ieee80211_channel *channels;
@@ -4474,11 +4729,30 @@ sw_scan:
channels = supband->channels;
for (i = 0; i < supband->n_channels; i++) {
- *lc = channels[i];
- lc++;
+ if (lkpi_scan_chan(&channels[i], ic, false))
+ *(cpp + n++) = &channels[i];
}
}
+ if (lkpi_order_scanlist)
+ lkpi_scan_chan_list_resort(cpp, nchan);
+
+ if ((linuxkpi_debug_80211 & D80211_SCAN) != 0) {
+ printf("%s:%d: %s SCAN Channel List (nchan=%zu): ",
+ __func__, __LINE__, ic->ic_name, nchan);
+ for (i = 0; i < nchan; i++) {
+ struct linuxkpi_ieee80211_channel *xc;
+
+ xc = *(cpp + i);
+ printf(" %d(%d)",
+ ieee80211_mhz2ieee(xc->center_freq,
+ lkpi_nl80211_band_to_net80211_band(
+ xc->band)),
+ xc->center_freq);
+ }
+ printf("\n");
+ }
#endif
+
hw_req->req.n_ssids = ssid_count;
if (hw_req->req.n_ssids > 0) {
ssids = (struct cfg80211_ssid *)lc;
@@ -4505,6 +4779,7 @@ sw_scan:
ieend = lkpi_scan_ies_add(ie, &hw_req->ies, band_mask, vap, hw);
hw_req->req.ie = ie;
hw_req->req.ie_len = ieend - ie;
+ hw_req->req.scan_start = jiffies;
lvif = VAP_TO_LVIF(vap);
vif = LVIF_TO_VIF(lvif);
@@ -4522,13 +4797,30 @@ sw_scan:
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
if (running) {
free(hw_req, M_LKPI80211);
+ TRACE_SCAN(ic, "Trying to start new scan while still "
+ "running (2); cancelling new net80211 scan; "
+ "scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+ ieee80211_cancel_scan(vap);
return;
}
lkpi_update_mcast_filter(ic);
+ TRACE_SCAN(ic, "Starting HW_SCAN: scan_flags %b, "
+ "ie_len %d, n_ssids %d, n_chan %d, common_ie_len %d [%d, %d]",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, hw_req->req.ie_len,
+ hw_req->req.n_ssids, hw_req->req.n_channels,
+ hw_req->ies.common_ie_len,
+ hw_req->ies.len[NL80211_BAND_2GHZ],
+ hw_req->ies.len[NL80211_BAND_5GHZ]);
error = lkpi_80211_mo_hw_scan(hw, vif, hw_req);
if (error != 0) {
+ bool scan_done;
+ int e;
+
+ TRACE_SCAN(ic, "hw_scan failed; scan_flags %b, error %d",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, error);
ieee80211_cancel_scan(vap);
/*
@@ -4545,14 +4837,35 @@ sw_scan:
* So we cannot rely on that behaviour and have to check
* and balance between both code paths.
*/
+ e = 0;
+ scan_done = true;
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) {
+
free(lhw->hw_req, M_LKPI80211);
lhw->hw_req = NULL;
+ /*
+ * The ieee80211_cancel_scan() above runs in a
+ * taskq and it may take ages for the previous
+ * scan to clear; starting a new one right away
+ * we run into the problem that the old one is
+ * still active.
+ */
+ e = msleep(lhw, &lhw->scan_mtx, 0, "lhwscanstop", hz);
+ scan_done = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0;
+
+ /*
+ * Now we can clear running if no one else did.
+ */
lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING;
}
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
lkpi_update_mcast_filter(ic);
+ if (!scan_done) {
+ ic_printf(ic, "ERROR: %s: timeout/error to wait "
+ "for ieee80211_cancel_scan: %d\n", __func__, e);
+ return;
+ }
/*
* XXX-SIGH magic number.
@@ -4560,24 +4873,15 @@ sw_scan:
* not possible. Fall back to sw scan in that case.
*/
if (error == 1) {
- LKPI_80211_LHW_SCAN_LOCK(lhw);
- lhw->scan_flags &= ~LKPI_LHW_SCAN_HW;
- LKPI_80211_LHW_SCAN_UNLOCK(lhw);
/*
- * XXX If we clear this now and later a driver
- * thinks it * can do a hw_scan again, we will
- * currently not re-enable it?
+ * We need to put this into some defered context
+ * the net80211 scan may not be done yet
+ * (ic_flags & IEEE80211_F_SCAN) and we cannot
+ * wait here; if we do scan_curchan_task always
+ * runs after our timeout to finalize the scan.
*/
- vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD;
- ieee80211_start_scan(vap,
- IEEE80211_SCAN_ACTIVE |
- IEEE80211_SCAN_NOPICK |
- IEEE80211_SCAN_ONCE,
- IEEE80211_SCAN_FOREVER,
- ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20),
- ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200),
- vap->iv_des_nssid, vap->iv_des_ssid);
- goto sw_scan;
+ ieee80211_runtask(ic, &lvif->sw_scan_task);
+ return;
}
ic_printf(ic, "ERROR: %s: hw_scan returned %d\n",
@@ -4587,12 +4891,50 @@ sw_scan:
}
static void
+lkpi_sw_scan_task(void *arg, int pending __unused)
+{
+ struct lkpi_hw *lhw;
+ struct lkpi_vif *lvif;
+ struct ieee80211vap *vap;
+ struct ieee80211_scan_state *ss;
+
+ lvif = arg;
+ vap = LVIF_TO_VAP(lvif);
+ lhw = vap->iv_ic->ic_softc;
+ ss = vap->iv_ic->ic_scan;
+
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ /*
+ * We will re-enable this at scan_end calling lkpi_enable_hw_scan().
+ * IEEE80211_FEXT_SCAN_OFFLOAD will be cleared by lkpi_ic_scan_start.
+ */
+ lhw->scan_flags &= ~LKPI_LHW_SCAN_HW;
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+
+ TRACE_SCAN(vap->iv_ic, "Triggering SW_SCAN: pending %d, scan_flags %b",
+ pending, lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
+ /*
+ * This will call ic_scan_start() and we will get into the right path
+ * unless other scans started in between.
+ */
+ ieee80211_start_scan(vap,
+ IEEE80211_SCAN_ONCE,
+ msecs_to_ticks(10000), /* 10000 ms (=~ 50 chan * 200 ms) */
+ ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20),
+ ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200),
+ vap->iv_des_nssid, vap->iv_des_ssid);
+}
+
+static void
lkpi_ic_scan_end(struct ieee80211com *ic)
{
struct lkpi_hw *lhw;
bool is_hw_scan;
lhw = ic->ic_softc;
+ TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) == 0) {
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -4621,6 +4963,16 @@ lkpi_ic_scan_end(struct ieee80211com *ic)
if (vap->iv_state == IEEE80211_S_SCAN)
lkpi_hw_conf_idle(hw, true);
}
+
+ /*
+ * In case we disabled the hw_scan in lkpi_ic_scan_start() and
+ * switched to swscan, re-enable hw_scan if available.
+ */
+ lkpi_enable_hw_scan(lhw);
+
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ wakeup(lhw);
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
}
static void
@@ -4631,6 +4983,10 @@ lkpi_ic_scan_curchan(struct ieee80211_scan_state *ss,
bool is_hw_scan;
lhw = ss->ss_ic->ic_softc;
+ TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d maxdwell %lu",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ ss->ss_ic->ic_curchan->ic_ieee, maxdwell);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -4645,6 +5001,10 @@ lkpi_ic_scan_mindwell(struct ieee80211_scan_state *ss)
bool is_hw_scan;
lhw = ss->ss_ic->ic_softc;
+ TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d mindwell %lu",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ ss->ss_ic->ic_curchan->ic_ieee, ss->ss_mindwell);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -6042,6 +6402,7 @@ linuxkpi_ieee80211_alloc_hw(size_t priv_len, const struct ieee80211_ops *ops)
LKPI_80211_LHW_SCAN_LOCK_INIT(lhw);
LKPI_80211_LHW_TXQ_LOCK_INIT(lhw);
+ spin_lock_init(&lhw->txq_lock);
sx_init_flags(&lhw->lvif_sx, "lhw-lvif", SX_RECURSE | SX_DUPOK);
LKPI_80211_LHW_MC_LOCK_INIT(lhw);
TAILQ_INIT(&lhw->lvif_head);
@@ -6147,6 +6508,7 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw)
LKPI_80211_LHW_MC_UNLOCK(lhw);
/* Cleanup more of lhw here or in wiphy_free()? */
+ spin_lock_destroy(&lhw->txq_lock);
LKPI_80211_LHW_TXQ_LOCK_DESTROY(lhw);
LKPI_80211_LHW_SCAN_LOCK_DESTROY(lhw);
sx_destroy(&lhw->lvif_sx);
@@ -6255,26 +6617,26 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw)
IEEE80211_C_SHSLOT | /* short slot time supported */
IEEE80211_C_SHPREAMBLE /* short preamble supported */
;
-#if 0
- /* Scanning is a different kind of beast to re-work. */
- ic->ic_caps |= IEEE80211_C_BGSCAN;
+
+#ifdef LKPI_80211_BGSCAN
+ if (lhw->ops->hw_scan)
+ ic->ic_caps |= IEEE80211_C_BGSCAN;
#endif
- if (lhw->ops->hw_scan) {
- /*
- * Advertise full-offload scanning.
- *
- * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise
- * we essentially disable hw_scan for all drivers not setting
- * the flag.
- */
- ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
- lhw->scan_flags |= LKPI_LHW_SCAN_HW;
- }
+
+ lkpi_enable_hw_scan(lhw);
/* Does HW support Fragmentation offload? */
if (ieee80211_hw_check(hw, SUPPORTS_TX_FRAG))
ic->ic_flags_ext |= IEEE80211_FEXT_FRAG_OFFLOAD;
+ /* Does HW support full AMPDU[-TX] offload? */
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION))
+ ic->ic_flags_ext |= IEEE80211_FEXT_AMPDU_OFFLOAD;
+#ifdef __notyet__
+ if (ieee80211_hw_check(hw, TX_AMSDU))
+ if (ieee80211_hw_check(hw, SUPPORTS_AMSDU_IN_AMPDU))
+#endif
+
/*
* The wiphy variables report bitmasks of avail antennas.
* (*get_antenna) get the current bitmask sets which can be
@@ -6726,13 +7088,19 @@ linuxkpi_ieee80211_scan_completed(struct ieee80211_hw *hw,
ic = lhw->ic;
ss = ic->ic_scan;
+ TRACE_SCAN(ic, "scan_flags %b info { %ju, %6D, aborted %d }",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ (uintmax_t)info->scan_start_tsf, info->tsf_bssid, ":",
+ info->aborted);
+
ieee80211_scan_done(ss->ss_vap);
LKPI_80211_LHW_SCAN_LOCK(lhw);
free(lhw->hw_req, M_LKPI80211);
lhw->hw_req = NULL;
lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING;
- wakeup(lhw);
+ /* The wakeup(lhw) will be called from lkpi_ic_scan_end(). */
+ /* wakeup(lhw); */
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
return;
@@ -7002,6 +7370,63 @@ lkpi_convert_rx_status(struct ieee80211_hw *hw, struct lkpi_sta *lsta,
}
}
+#ifdef LINUXKPI_DEBUG_80211
+static void
+lkpi_rx_log_beacon(struct mbuf *m, struct lkpi_hw *lhw,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_mgmt *f;
+ uint8_t *e;
+ char ssid[IEEE80211_NWID_LEN * 4 + 1];
+
+ memset(ssid, '\0', sizeof(ssid));
+
+ f = mtod(m, struct ieee80211_mgmt *);
+ e = f->u.beacon.variable;
+ /*
+ * Usually SSID is right after the fixed part and for debugging we will
+ * be fine should we miss it if it is not.
+ */
+ while ((e - (uint8_t *)f) < m->m_len) {
+ if (*e == IEEE80211_ELEMID_SSID)
+ break;
+ e += (2 + *(e + 1));
+ }
+ if (*e == IEEE80211_ELEMID_SSID) {
+ int i, len;
+ char *p;
+
+ p = ssid;
+ len = m->m_len - ((e + 2) - (uint8_t *)f);
+ if (len > *(e + 1))
+ len = *(e + 1);
+ e += 2;
+ for (i = 0; i < len; i++) {
+ /* Printable character? */
+ if (*e >= 0x20 && *e < 0x7f) {
+ *p++ = *e++;
+ } else {
+ snprintf(p, 5, "%#04x", *e++);
+ p += 4;
+ }
+ }
+ *p = '\0';
+ }
+
+ /* We print skb, skb->data, m as we are seeing 'ghost beacons'. */
+ TRACE_SCAN_BEACON(lhw->ic, "Beacon: scan_flags %b, band %s freq %u chan %-4d "
+ "len %d { %#06x %#06x %6D %6D %6D %#06x %ju %u %#06x SSID '%s' }",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ lkpi_nl80211_band_name(rx_status->band), rx_status->freq,
+ linuxkpi_ieee80211_frequency_to_channel(rx_status->freq, 0),
+ m->m_pkthdr.len, f->frame_control, f->duration_id,
+ f->da, ":", f->sa, ":", f->bssid, ":", f->seq_ctrl,
+ (uintmax_t)le64_to_cpu(f->u.beacon.timestamp),
+ le16_to_cpu(f->u.beacon.beacon_int),
+ le16_to_cpu(f->u.beacon.capab_info), ssid);
+}
+#endif
+
/* For %list see comment towards the end of the function. */
void
linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
@@ -7058,7 +7483,15 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
is_beacon = ieee80211_is_beacon(hdr->frame_control);
#ifdef LINUXKPI_DEBUG_80211
- if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0)
+ /*
+ * We use the mbuf here as otherwise the variable part might
+ * be in skb frags.
+ */
+ if (is_beacon && ((linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0))
+ lkpi_rx_log_beacon(m, lhw, rx_status);
+
+ if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0 &&
+ (linuxkpi_debug_80211 & D80211_SCAN_BEACON) == 0)
goto no_trace_beacons;
if (linuxkpi_debug_80211 & D80211_TRACE_RX)
@@ -7073,7 +7506,8 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
hexdump(mtod(m, const void *), m->m_len, "RX (raw) ", 0);
/* Implement a dump_rxcb() !!! */
- if (linuxkpi_debug_80211 & D80211_TRACE_RX)
+ if ((linuxkpi_debug_80211 & D80211_TRACE_RX) != 0 ||
+ (linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0)
printf("TRACE-RX: %s: RXCB: %ju %ju %u, %b, %u, %#0x, %#0x, "
"%u band %u, %u { %d %d %d %d }, %d, %#x %#x %#x %#x %u %u %u\n",
__func__,
@@ -7380,7 +7814,7 @@ lkpi_wiphy_delayed_work_timer(struct timer_list *tl)
{
struct wiphy_delayed_work *wdwk;
- wdwk = from_timer(wdwk, tl, timer);
+ wdwk = timer_container_of(wdwk, tl, timer);
wiphy_work_queue(wdwk->wiphy, &wdwk->work);
}
@@ -8124,21 +8558,30 @@ lkpi_ieee80211_wake_queues_locked(struct ieee80211_hw *hw)
void
linuxkpi_ieee80211_wake_queues(struct ieee80211_hw *hw)
{
- wiphy_lock(hw->wiphy);
+ struct lkpi_hw *lhw;
+ unsigned long flags;
+
+ lhw = HW_TO_LHW(hw);
+
+ spin_lock_irqsave(&lhw->txq_lock, flags);
lkpi_ieee80211_wake_queues_locked(hw);
- wiphy_unlock(hw->wiphy);
+ spin_unlock_irqrestore(&lhw->txq_lock, flags);
}
void
linuxkpi_ieee80211_wake_queue(struct ieee80211_hw *hw, int qnum)
{
+ struct lkpi_hw *lhw;
+ unsigned long flags;
KASSERT(qnum < hw->queues, ("%s: qnum %d >= hw->queues %d, hw %p\n",
__func__, qnum, hw->queues, hw));
- wiphy_lock(hw->wiphy);
+ lhw = HW_TO_LHW(hw);
+
+ spin_lock_irqsave(&lhw->txq_lock, flags);
lkpi_ieee80211_wake_queues(hw, qnum);
- wiphy_unlock(hw->wiphy);
+ spin_unlock_irqrestore(&lhw->txq_lock, flags);
}
/* This is just hardware queues. */
diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h
index eaf6d804af4c..0dfcd7646c34 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.h
+++ b/sys/compat/linuxkpi/common/src/linux_80211.h
@@ -59,6 +59,8 @@
#define D80211_IMPROVE_TXQ 0x00000004
#define D80211_TRACE 0x00000010
#define D80211_TRACEOK 0x00000020
+#define D80211_SCAN 0x00000040
+#define D80211_SCAN_BEACON 0x00000080
#define D80211_TRACE_TX 0x00000100
#define D80211_TRACE_TX_DUMP 0x00000200
#define D80211_TRACE_RX 0x00001000
@@ -75,6 +77,20 @@
#define D80211_TRACE_MODE_HE 0x04000000
#define D80211_TRACE_MODE_EHT 0x08000000
+#ifdef LINUXKPI_DEBUG_80211
+#define TRACE_SCAN(ic, fmt, ...) \
+ if (linuxkpi_debug_80211 & D80211_SCAN) \
+ printf("%s:%d: %s SCAN " fmt "\n", \
+ __func__, __LINE__, ic->ic_name, ##__VA_ARGS__)
+#define TRACE_SCAN_BEACON(ic, fmt, ...) \
+ if (linuxkpi_debug_80211 & D80211_SCAN_BEACON) \
+ printf("%s:%d: %s SCAN " fmt "\n", \
+ __func__, __LINE__, ic->ic_name, ##__VA_ARGS__)
+#else
+#define TRACE_SCAN(...) do {} while (0)
+#define TRACE_SCAN_BEACON(...) do {} while (0)
+#endif
+
#define IMPROVE_TXQ(...) \
if (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ) \
printf("%s:%d: XXX LKPI80211 IMPROVE_TXQ\n", __func__, __LINE__)
@@ -191,6 +207,7 @@ struct lkpi_vif {
struct mbuf *, int,
const struct ieee80211_rx_stats *,
int, int);
+ struct task sw_scan_task;
struct list_head lsta_list;
@@ -236,6 +253,7 @@ struct lkpi_hw { /* name it mac80211_sc? */
struct mtx txq_mtx;
uint32_t txq_generation[IEEE80211_NUM_ACS];
TAILQ_HEAD(, lkpi_txq) scheduled_txqs[IEEE80211_NUM_ACS];
+ spinlock_t txq_lock;
/* Deferred RX path. */
struct task rxq_task;
@@ -298,6 +316,9 @@ struct lkpi_hw { /* name it mac80211_sc? */
#define LHW_TO_HW(_lhw) (&(_lhw)->hw)
#define HW_TO_LHW(_hw) container_of(_hw, struct lkpi_hw, hw)
+#define LKPI_LHW_SCAN_BITS \
+ "\010\1RUNING\2HW"
+
struct lkpi_chanctx {
struct list_head entry;
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index dcdec0dfcc78..458744a9fec6 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -2120,7 +2120,7 @@ add_timer_on(struct timer_list *timer, int cpu)
}
int
-del_timer(struct timer_list *timer)
+timer_delete(struct timer_list *timer)
{
if (callout_stop(&(timer)->callout) == -1)
@@ -2129,7 +2129,7 @@ del_timer(struct timer_list *timer)
}
int
-del_timer_sync(struct timer_list *timer)
+timer_delete_sync(struct timer_list *timer)
{
if (callout_drain(&(timer)->callout) == -1)
@@ -2138,13 +2138,6 @@ del_timer_sync(struct timer_list *timer)
}
int
-timer_delete_sync(struct timer_list *timer)
-{
-
- return (del_timer_sync(timer));
-}
-
-int
timer_shutdown_sync(struct timer_list *timer)
{
diff --git a/sys/compat/linuxkpi/common/src/linux_devres.c b/sys/compat/linuxkpi/common/src/linux_devres.c
index 84f03ba0dd7d..23c91cb5ab2f 100644
--- a/sys/compat/linuxkpi/common/src/linux_devres.c
+++ b/sys/compat/linuxkpi/common/src/linux_devres.c
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2020-2021 The FreeBSD Foundation
+ * Copyright (c) 2020-2025 The FreeBSD Foundation
*
* This software was developed by Bj\xc3\xb6rn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -223,6 +223,30 @@ lkpi_devm_kmalloc_release(struct device *dev __unused, void *p __unused)
/* Nothing to do. Freed with the devres. */
}
+static int
+lkpi_devm_kmalloc_match(struct device *dev __unused, void *p, void *mp)
+{
+ return (p == mp);
+}
+
+void
+lkpi_devm_kfree(struct device *dev, const void *p)
+{
+ void *mp;
+ int error;
+
+ if (p == NULL)
+ return;
+
+ /* I assume Linux simply casts the const away... */
+ mp = __DECONST(void *, p);
+ error = lkpi_devres_destroy(dev, lkpi_devm_kmalloc_release,
+ lkpi_devm_kmalloc_match, mp);
+ if (error != 0)
+ dev_warn(dev, "%s: lkpi_devres_destroy failed with %d\n",
+ __func__, error);
+}
+
struct devres_action {
void *data;
void (*action)(void *);
diff --git a/sys/compat/linuxkpi/common/src/linux_pci.c b/sys/compat/linuxkpi/common/src/linux_pci.c
index d5bbbea1eb2c..43fd6ad28ac4 100644
--- a/sys/compat/linuxkpi/common/src/linux_pci.c
+++ b/sys/compat/linuxkpi/common/src/linux_pci.c
@@ -111,6 +111,9 @@ static device_method_t pci_methods[] = {
DEVMETHOD(pci_iov_uninit, linux_pci_iov_uninit),
DEVMETHOD(pci_iov_add_vf, linux_pci_iov_add_vf),
+ /* Bus interface. */
+ DEVMETHOD(bus_add_child, bus_generic_add_child),
+
/* backlight interface */
DEVMETHOD(backlight_update_status, linux_backlight_update_status),
DEVMETHOD(backlight_get_status, linux_backlight_get_status),
@@ -145,6 +148,23 @@ struct linux_dma_priv {
#define DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock)
#define DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock)
+static void
+lkpi_set_pcim_iomap_devres(struct pcim_iomap_devres *dr, int bar,
+ void *res)
+{
+ dr->mmio_table[bar] = (void *)rman_get_bushandle(res);
+ dr->res_table[bar] = res;
+}
+
+static bool
+lkpi_pci_bar_id_valid(int bar)
+{
+ if (bar < 0 || bar > PCIR_MAX_BAR_0)
+ return (false);
+
+ return (true);
+}
+
static int
linux_pdev_dma_uninit(struct pci_dev *pdev)
{
@@ -289,12 +309,18 @@ lkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
{
struct pci_dev *pdev, *found;
- KASSERT(odev == NULL, ("%s: odev argument not yet supported\n", __func__));
-
found = NULL;
spin_lock(&pci_lock);
list_for_each_entry(pdev, &pci_devices, links) {
- if (pdev->vendor == vendor && pdev->device == device) {
+ /* Walk until we find odev. */
+ if (odev != NULL) {
+ if (pdev == odev)
+ odev = NULL;
+ continue;
+ }
+
+ if ((pdev->vendor == vendor || vendor == PCI_ANY_ID) &&
+ (pdev->device == device || device == PCI_ANY_ID)) {
found = pdev;
break;
}
@@ -757,6 +783,9 @@ _lkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen __unused)
struct pci_mmio_region *mmio, *p;
int type;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
type = pci_resource_type(pdev, bar);
if (type < 0) {
device_printf(pdev->dev.bsddev, "%s: bar %d type %d\n",
@@ -797,6 +826,9 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar,
{
struct resource *res;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
res = _lkpi_pci_iomap(pdev, bar, maxlen);
if (res == NULL)
return (NULL);
@@ -810,9 +842,41 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar,
void *
linuxkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
{
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
return (linuxkpi_pci_iomap_range(pdev, bar, 0, maxlen));
}
+void *
+linuxkpi_pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+{
+ struct pcim_iomap_devres *dr;
+ void *res;
+
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
+ dr = lkpi_pcim_iomap_devres_find(pdev);
+ if (dr == NULL)
+ return (NULL);
+
+ if (dr->res_table[bar] != NULL)
+ return (dr->res_table[bar]);
+
+ res = linuxkpi_pci_iomap(pdev, bar, maxlen);
+ if (res == NULL) {
+ /*
+ * Do not free the devres in case there were
+ * other valid mappings before already.
+ */
+ return (NULL);
+ }
+ lkpi_set_pcim_iomap_devres(dr, bar, res);
+
+ return (res);
+}
+
void
linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res)
{
@@ -864,8 +928,7 @@ linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask, const char *nam
res = _lkpi_pci_iomap(pdev, bar, 0);
if (res == NULL)
goto err;
- dr->mmio_table[bar] = (void *)rman_get_bushandle(res);
- dr->res_table[bar] = res;
+ lkpi_set_pcim_iomap_devres(dr, bar, res);
mappings |= (1 << bar);
}
@@ -1099,8 +1162,9 @@ pci_resource_len(struct pci_dev *pdev, int bar)
return (rle->count);
}
-int
-pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+static int
+lkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name,
+ bool managed)
{
struct resource *res;
struct pci_devres *dr;
@@ -1108,9 +1172,20 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
int rid;
int type;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (-EINVAL);
+
+ /*
+ * If the bar is not valid, return success without adding the BAR;
+ * otherwise linuxkpi_pcim_request_all_regions() will error.
+ */
+ if (pci_resource_len(pdev, bar) == 0)
+ return (0);
+ /* Likewise if it is neither IO nor MEM, nothing to do for us. */
type = pci_resource_type(pdev, bar);
if (type < 0)
- return (-ENODEV);
+ return (0);
+
rid = PCIR_BAR(bar);
res = bus_alloc_resource_any(pdev->dev.bsddev, type, &rid,
RF_ACTIVE|RF_SHAREABLE);
@@ -1123,11 +1198,16 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
/*
* It seems there is an implicit devres tracking on these if the device
- * is managed; otherwise the resources are not automatiaclly freed on
- * FreeBSD/LinuxKPI tough they should be/are expected to be by Linux
- * drivers.
+ * is managed (lkpi_pci_devres_find() case); otherwise the resources are
+ * not automatically freed on FreeBSD/LinuxKPI though they should be/are
+ * expected to be by Linux drivers.
+ * Otherwise if we are called from a pcim-function with the managed
+ * argument set, we need to track devres independent of pdev->managed.
*/
- dr = lkpi_pci_devres_find(pdev);
+ if (managed)
+ dr = lkpi_pci_devres_get_alloc(pdev);
+ else
+ dr = lkpi_pci_devres_find(pdev);
if (dr != NULL) {
dr->region_mask |= (1 << bar);
dr->region_table[bar] = res;
@@ -1144,6 +1224,12 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
}
int
+linuxkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+{
+ return (lkpi_pci_request_region(pdev, bar, res_name, false));
+}
+
+int
linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name)
{
int error;
@@ -1159,6 +1245,24 @@ linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name)
return (0);
}
+int
+linuxkpi_pcim_request_all_regions(struct pci_dev *pdev, const char *res_name)
+{
+ int bar, error;
+
+ for (bar = 0; bar <= PCIR_MAX_BAR_0; bar++) {
+ error = lkpi_pci_request_region(pdev, bar, res_name, true);
+ if (error != 0) {
+ device_printf(pdev->dev.bsddev, "%s: bar %d res_name '%s': "
+ "lkpi_pci_request_region returned %d\n", __func__,
+ bar, res_name, error);
+ pci_release_regions(pdev);
+ return (error);
+ }
+ }
+ return (0);
+}
+
void
linuxkpi_pci_release_region(struct pci_dev *pdev, int bar)
{
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test.h b/sys/compat/linuxkpi/dummy/include/kunit/test.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/test.h
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 92e98aa57ebf..c7a8862fb906 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -888,13 +888,13 @@ options IEEE80211_DEBUG_REFCNT
options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support
options IEEE80211_SUPPORT_TDMA #enable TDMA support
-# The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide
-# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally
-# used with 802.11 devices that depend on the `wlan' module.
+# The `wlan_wep', `wlan_tkip', `wlan_ccmp', and `wlan_gcmp' devices provide
+# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally used
+# with 802.11 devices that depend on the `wlan' module.
device wlan_wep
+device wlan_tkip
device wlan_ccmp
device wlan_gcmp
-device wlan_tkip
# The `wlan_xauth' device provides support for external (i.e. user-mode)
# authenticators for use with 802.11 drivers that use the `wlan'
@@ -1249,7 +1249,7 @@ options MAC
options MAC_BIBA
options MAC_BSDEXTENDED
options MAC_DDB
-options MAC_DO
+options MAC_DO
options MAC_IFOFF
options MAC_IPACL
options MAC_LOMAC
diff --git a/sys/conf/files b/sys/conf/files
index d89813c70355..9661bafea8f9 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
+kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
index 1fcfd6467e7f..0251486247da 100644
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -8,7 +8,11 @@
# the rest of /usr/src, but they still always process SRCCONF even though
# the normal mechanisms to prevent that (compiling out of tree) won't
# work. To ensure they do work, we have to duplicate thee few lines here.
+.if exists(${SRCTOP}/src.conf)
+SRCCONF?= ${SRCTOP}/src.conf
+.else
SRCCONF?= /etc/src.conf
+.endif
.if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_)
.include "${SRCCONF}"
_srcconf_included_:
diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh
index 8b60da95741e..145377c1e75e 100644
--- a/sys/conf/newvers.sh
+++ b/sys/conf/newvers.sh
@@ -50,8 +50,8 @@
#
TYPE="FreeBSD"
-REVISION="15.0"
-BRANCH="PRERELEASE"
+REVISION="16.0"
+BRANCH="CURRENT"
if [ -n "${BRANCH_OVERRIDE}" ]; then
BRANCH=${BRANCH_OVERRIDE}
fi
diff --git a/sys/conf/options b/sys/conf/options
index 4009ba2b4843..66f7f2ee2d7e 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -53,7 +53,7 @@ DDB_CAPTURE_MAXBUFSIZE opt_ddb.h
DDB_CTF opt_ddb.h
DDB_NUMSYM opt_ddb.h
EARLY_PRINTF opt_global.h
-BLOAT_KERNEL_WITH_EXTERR opt_global.h
+EXTERR_STRINGS opt_global.h
FULL_BUF_TRACKING opt_global.h
GDB
KDB opt_global.h
diff --git a/sys/contrib/dev/acpica/components/executer/extrace.c b/sys/contrib/dev/acpica/components/executer/extrace.c
index d54d4908ca65..b48a5fcb289b 100644
--- a/sys/contrib/dev/acpica/components/executer/extrace.c
+++ b/sys/contrib/dev/acpica/components/executer/extrace.c
@@ -301,7 +301,7 @@ AcpiExTraceArgs(ACPI_OPERAND_OBJECT **Params, UINT32 Count)
switch (obj_desc->Common.Type)
{
case ACPI_TYPE_INTEGER:
- ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%lx", obj_desc->Integer.Value));
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%jx", (uintmax_t)obj_desc->Integer.Value));
break;
case ACPI_TYPE_STRING:
diff --git a/sys/contrib/dev/rtw88/main.c b/sys/contrib/dev/rtw88/main.c
index 021d076808e0..963b73f35350 100644
--- a/sys/contrib/dev/rtw88/main.c
+++ b/sys/contrib/dev/rtw88/main.c
@@ -57,6 +57,62 @@ module_param_named(support_vht, rtw_vht_support, bool, 0644);
MODULE_PARM_DESC(support_vht, "Set to Y to enable VHT support");
#endif
+#if defined(__FreeBSD__)
+/* Macros based on rtw89::core.c. */
+#define RTW88_DEF_CHAN(_freq, _hw_val, _flags, _band) \
+ { .center_freq = _freq, .hw_value = _hw_val, .flags = _flags, .band = _band, }
+#define RTW88_DEF_CHAN_2G(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_2GHZ)
+#define RTW88_DEF_CHAN_5G(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_5GHZ)
+#define RTW88_DEF_CHAN_5G_NO_HT40MINUS(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, IEEE80211_CHAN_NO_HT40MINUS, NL80211_BAND_5GHZ)
+
+static struct ieee80211_channel rtw_channeltable_2g[] = {
+ RTW88_DEF_CHAN_2G(2412, 1),
+ RTW88_DEF_CHAN_2G(2417, 2),
+ RTW88_DEF_CHAN_2G(2422, 3),
+ RTW88_DEF_CHAN_2G(2427, 4),
+ RTW88_DEF_CHAN_2G(2432, 5),
+ RTW88_DEF_CHAN_2G(2437, 6),
+ RTW88_DEF_CHAN_2G(2442, 7),
+ RTW88_DEF_CHAN_2G(2447, 8),
+ RTW88_DEF_CHAN_2G(2452, 9),
+ RTW88_DEF_CHAN_2G(2457, 10),
+ RTW88_DEF_CHAN_2G(2462, 11),
+ RTW88_DEF_CHAN_2G(2467, 12),
+ RTW88_DEF_CHAN_2G(2472, 13),
+ RTW88_DEF_CHAN_2G(2484, 14),
+};
+
+static struct ieee80211_channel rtw_channeltable_5g[] = {
+ RTW88_DEF_CHAN_5G(5180, 36),
+ RTW88_DEF_CHAN_5G(5200, 40),
+ RTW88_DEF_CHAN_5G(5220, 44),
+ RTW88_DEF_CHAN_5G(5240, 48),
+ RTW88_DEF_CHAN_5G(5260, 52),
+ RTW88_DEF_CHAN_5G(5280, 56),
+ RTW88_DEF_CHAN_5G(5300, 60),
+ RTW88_DEF_CHAN_5G(5320, 64),
+ RTW88_DEF_CHAN_5G(5500, 100),
+ RTW88_DEF_CHAN_5G(5520, 104),
+ RTW88_DEF_CHAN_5G(5540, 108),
+ RTW88_DEF_CHAN_5G(5560, 112),
+ RTW88_DEF_CHAN_5G(5580, 116),
+ RTW88_DEF_CHAN_5G(5600, 120),
+ RTW88_DEF_CHAN_5G(5620, 124),
+ RTW88_DEF_CHAN_5G(5640, 128),
+ RTW88_DEF_CHAN_5G(5660, 132),
+ RTW88_DEF_CHAN_5G(5680, 136),
+ RTW88_DEF_CHAN_5G(5700, 140),
+ RTW88_DEF_CHAN_5G(5720, 144),
+ RTW88_DEF_CHAN_5G(5745, 149),
+ RTW88_DEF_CHAN_5G(5765, 153),
+ RTW88_DEF_CHAN_5G(5785, 157),
+ RTW88_DEF_CHAN_5G(5805, 161),
+ RTW88_DEF_CHAN_5G_NO_HT40MINUS(5825, 165),
+};
+#elif deifned(__linux__)
static struct ieee80211_channel rtw_channeltable_2g[] = {
{.center_freq = 2412, .hw_value = 1,},
{.center_freq = 2417, .hw_value = 2,},
@@ -102,6 +158,7 @@ static struct ieee80211_channel rtw_channeltable_5g[] = {
{.center_freq = 5825, .hw_value = 165,
.flags = IEEE80211_CHAN_NO_HT40MINUS},
};
+#endif
static struct ieee80211_rate rtw_ratetable[] = {
{.bitrate = 10, .hw_value = 0x00,},
diff --git a/sys/contrib/dev/rtw89/fw.c b/sys/contrib/dev/rtw89/fw.c
index e360f27c2ade..b4c0f864bc75 100644
--- a/sys/contrib/dev/rtw89/fw.c
+++ b/sys/contrib/dev/rtw89/fw.c
@@ -908,11 +908,7 @@ int rtw89_build_phy_tbl_from_elm(struct rtw89_dev *rtwdev,
case RTW89_FW_ELEMENT_ID_RADIO_B:
case RTW89_FW_ELEMENT_ID_RADIO_C:
case RTW89_FW_ELEMENT_ID_RADIO_D:
-#if defined(__linux__)
rf_path = arg.rf_path;
-#elif defined(__FreeBSD__)
- rf_path = __DECONST(enum rtw89_rf_path, arg.rf_path);
-#endif
idx = elm->u.reg2.idx;
elm_info->rf_radio[idx] = tbl;
diff --git a/sys/contrib/libnv/nvlist.c b/sys/contrib/libnv/nvlist.c
index 41edc72322c3..73226ee51a78 100644
--- a/sys/contrib/libnv/nvlist.c
+++ b/sys/contrib/libnv/nvlist.c
@@ -478,7 +478,7 @@ nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level)
void
nvlist_dump(const nvlist_t *nvl, int fd)
{
- const nvlist_t *tmpnvl;
+ const nvlist_t *tmpnvl, *top;
nvpair_t *nvp, *tmpnvp;
void *cookie;
int level;
@@ -487,6 +487,7 @@ nvlist_dump(const nvlist_t *nvl, int fd)
if (nvlist_dump_error_check(nvl, fd, level))
return;
+ top = nvl;
nvp = nvlist_first_nvpair(nvl);
while (nvp != NULL) {
dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp),
@@ -645,6 +646,8 @@ nvlist_dump(const nvlist_t *nvl, int fd)
while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
do {
+ if (nvl == top)
+ return;
cookie = NULL;
if (nvlist_in_array(nvl))
dprintf(fd, "%*s,\n", level * 4, "");
@@ -847,7 +850,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
{
unsigned char *buf, *ptr;
size_t left, size;
- const nvlist_t *tmpnvl;
+ const nvlist_t *tmpnvl, *top;
nvpair_t *nvp, *tmpnvp;
void *cookie;
@@ -868,6 +871,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
ptr = nvlist_pack_header(nvl, ptr, &left);
+ top = nvl;
nvp = nvlist_first_nvpair(nvl);
while (nvp != NULL) {
NVPAIR_ASSERT(nvp);
@@ -958,6 +962,8 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
goto fail;
while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
do {
+ if (nvl == top)
+ goto out;
cookie = NULL;
if (nvlist_in_array(nvl)) {
ptr = nvpair_pack_nvlist_array_next(ptr,
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
index b49255e8381d..08021aabcb61 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
+++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
@@ -65,7 +65,7 @@ if __name__ == '__main__':
# check last (HEAD) commit message
last_commit_message_raw = subprocess.run([
- 'git', 'show', '-s', '--format=%B', 'HEAD'
+ 'git', 'show', '-s', '--format=%B', head
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for line in last_commit_message_raw.stdout.decode().splitlines():
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
index 4ebb80af1f03..a5dbfc099c90 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
@@ -44,7 +44,7 @@ jobs:
os_selection="$FULL_OS"
fi
- if [ ${{ github.event.inputs.fedora_kernel_ver }} != "" ] ; then
+ if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then
# They specified a custom kernel version for Fedora. Use only
# Fedora runners.
os_json=$(echo ${os_selection} | jq -c '[.[] | select(startswith("fedora"))]')
@@ -53,9 +53,8 @@ jobs:
os_json=$(echo ${os_selection} | jq -c)
fi
- echo $os_json
- echo "os=$os_json" >> $GITHUB_OUTPUT
- echo "ci_type=$ci_type" >> $GITHUB_OUTPUT
+ echo "os=$os_json" | tee -a $GITHUB_OUTPUT
+ echo "ci_type=$ci_type" | tee -a $GITHUB_OUTPUT
qemu-vm:
name: qemu-x86
@@ -78,7 +77,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
- name: Setup QEMU
- timeout-minutes: 10
+ timeout-minutes: 15
run: .github/workflows/scripts/qemu-1-setup.sh
- name: Start build machine
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index 42f65290e4e3..5704b5c6de8a 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,8 +1,8 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 2.4.0
-Release: rc1
+Version: 2.4.99
+Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am
index 5f09d170e730..30f78e490b78 100644
--- a/sys/contrib/openzfs/Makefile.am
+++ b/sys/contrib/openzfs/Makefile.am
@@ -1,6 +1,7 @@
CLEANFILES =
dist_noinst_DATA =
INSTALL_DATA_HOOKS =
+INSTALL_EXEC_HOOKS =
ALL_LOCAL =
CLEAN_LOCAL =
CHECKS = shellcheck checkbashisms
@@ -71,6 +72,9 @@ all: gitrev
PHONY += install-data-hook $(INSTALL_DATA_HOOKS)
install-data-hook: $(INSTALL_DATA_HOOKS)
+PHONY += install-exec-hook $(INSTALL_EXEC_HOOKS)
+install-exec-hook: $(INSTALL_EXEC_HOOKS)
+
PHONY += maintainer-clean-local
maintainer-clean-local:
-$(RM) $(GITREV)
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am
index 96040976e53e..e79bfae2b10f 100644
--- a/sys/contrib/openzfs/cmd/Makefile.am
+++ b/sys/contrib/openzfs/cmd/Makefile.am
@@ -107,8 +107,12 @@ $(call SUBST,dbufstat,%D%/)
$(call SUBST,zilstat,%D%/)
arc_summary: %D%/arc_summary
$(AM_V_at)cp $< $@
-endif
+cmd-rename-install-exec-hook:
+ $(LN_S) -f arcstat $(DESTDIR)$(bindir)/zarcstat
+ $(LN_S) -f arc_summary $(DESTDIR)$(bindir)/zarcsummary
+INSTALL_EXEC_HOOKS += cmd-rename-install-exec-hook
+endif
PHONY += cmd
cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS)
diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary
index e60c6b64e8a1..9538dd599cb7 100755
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/arc_summary
@@ -1021,6 +1021,13 @@ def main():
treated separately because they come with their own call.
"""
+ # notify user for upcoming renaming in 2.4.0
+ abs_path = os.path.abspath(sys.argv[0].strip())
+ script_name = os.path.basename(abs_path)
+ if script_name != "zarcsummary":
+ sys.stderr.write("Note: this script will be renamed to zarcsummary in ")
+ sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n")
+
kstats = get_kstats()
if ARGS.graph:
diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/arcstat.in
index 6f9abb39c3fb..e153eddb36cf 100755
--- a/sys/contrib/openzfs/cmd/arcstat.in
+++ b/sys/contrib/openzfs/cmd/arcstat.in
@@ -56,6 +56,7 @@ import time
import getopt
import re
import copy
+import os
from signal import signal, SIGINT, SIGWINCH, SIG_DFL
@@ -766,6 +767,14 @@ def calculate():
def main():
+
+ # notify user for upcoming renaming in 2.4.0
+ abs_path = os.path.abspath(sys.argv[0].strip())
+ script_name = os.path.basename(abs_path)
+ if script_name != "zarcstat":
+ sys.stderr.write("Note: this script will be renamed to zarcstat in ")
+ sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n")
+
global sint
global count
global hdr_intr
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index adaa5cd10961..134c258a1e32 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -2635,7 +2635,7 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
if (BP_GET_LEVEL(bp) != zb->zb_level) {
(void) printf(" (ERROR: Block pointer level "
"(%llu) does not match bookmark level (%lld))",
- BP_GET_LEVEL(bp), (u_longlong_t)zb->zb_level);
+ BP_GET_LEVEL(bp), (longlong_t)zb->zb_level);
corruption_found = B_TRUE;
}
}
diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c
index 2bd3051dce7b..536532a6762d 100644
--- a/sys/contrib/openzfs/cmd/zhack.c
+++ b/sys/contrib/openzfs/cmd/zhack.c
@@ -363,10 +363,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount + 1, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
@@ -376,10 +378,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount - 1, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am
index 2f962408e5a3..5bb6d8160b18 100644
--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
@@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \
%D%/compatibility.d/openzfs-2.1-linux \
%D%/compatibility.d/openzfs-2.2 \
%D%/compatibility.d/openzfs-2.3 \
+ %D%/compatibility.d/openzfs-2.4 \
%D%/compatibility.d/openzfsonosx-1.7.0 \
%D%/compatibility.d/openzfsonosx-1.8.1 \
%D%/compatibility.d/openzfsonosx-1.9.3 \
@@ -187,7 +188,9 @@ zpoolcompatlinks = \
"openzfs-2.2 openzfs-2.2-linux" \
"openzfs-2.2 openzfs-2.2-freebsd" \
"openzfs-2.3 openzfs-2.3-linux" \
- "openzfs-2.3 openzfs-2.3-freebsd"
+ "openzfs-2.3 openzfs-2.3-freebsd" \
+ "openzfs-2.4 openzfs-2.4-linux" \
+ "openzfs-2.4 openzfs-2.4-freebsd"
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
INSTALL_DATA_HOOKS += zpool-install-data-hook
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
new file mode 100644
index 000000000000..3fbd91014c95
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
@@ -0,0 +1,48 @@
+# Features supported by OpenZFS 2.4 on Linux and FreeBSD
+allocation_classes
+async_destroy
+blake3
+block_cloning
+block_cloning_endian
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+dynamic_gang_header
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+fast_dedup
+filesystem_limits
+head_errlog
+hole_birth
+large_blocks
+large_dnode
+large_microzap
+livelist
+log_spacemap
+longname
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+physical_rewrite
+project_quota
+raidz_expansion
+redacted_datasets
+redaction_bookmarks
+redaction_list_spill
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+vdev_zaps_v2
+zilsaxattr
+zpool_checkpoint
+zstd_compress
diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am
index be3539fe905d..80ef1ea7ca11 100644
--- a/sys/contrib/openzfs/cmd/zstream/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am
@@ -18,6 +18,7 @@ zstream_LDADD = \
libzpool.la \
libnvpair.la
-PHONY += install-exec-hook
-install-exec-hook:
+cmd-zstream-install-exec-hook:
cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump
+
+INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook
diff --git a/sys/contrib/openzfs/config/always-arch.m4 b/sys/contrib/openzfs/config/always-arch.m4
index 9f413eeddf95..1ee6099ca8b2 100644
--- a/sys/contrib/openzfs/config/always-arch.m4
+++ b/sys/contrib/openzfs/config/always-arch.m4
@@ -39,3 +39,20 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
AM_CONDITIONAL([TARGET_CPU_ARM], test $TARGET_CPU = arm)
])
+dnl #
+dnl # Check for conflicting environment variables
+dnl #
+dnl # If ARCH env variable is set up, then kernel Makefile in the /usr/src/kernel
+dnl # can misbehave during the zfs ./configure test of the module compilation.
+AC_DEFUN([ZFS_AC_CONFIG_CHECK_ARCH_VAR], [
+ AC_MSG_CHECKING([for conflicting environment variables])
+ if test -n "$ARCH"; then
+ AC_MSG_RESULT([warning])
+ AC_MSG_WARN(m4_normalize([ARCH environment variable is set to "$ARCH".
+ This can cause build kernel modules support check failure.
+ Please unset it.]))
+ else
+ AC_MSG_RESULT([done])
+ fi
+])
+
diff --git a/sys/contrib/openzfs/config/always-compiler-options.m4 b/sys/contrib/openzfs/config/always-compiler-options.m4
index 6383b12506ee..37fa079e0f4c 100644
--- a/sys/contrib/openzfs/config/always-compiler-options.m4
+++ b/sys/contrib/openzfs/config/always-compiler-options.m4
@@ -156,6 +156,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
])
dnl #
+dnl # Check if kernel cc supports -Wno-format-zero-length option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH], [
+ saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length])
+
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Werror -Wno-format-zero-length"
+
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+ KERNEL_NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length
+ AC_MSG_RESULT([yes])
+ ], [
+ KERNEL_NO_FORMAT_ZERO_LENGTH=
+ AC_MSG_RESULT([no])
+ ])
+
+ CC="$saved_cc"
+ CFLAGS="$saved_flags"
+ AC_SUBST([KERNEL_NO_FORMAT_ZERO_LENGTH])
+])
+
+dnl #
dnl # Check if cc supports -Wno-clobbered option.
dnl #
dnl # We actually invoke it with the -Wclobbered option
@@ -231,20 +259,17 @@ dnl #
dnl # Check if kernel cc supports -Winfinite-recursion option.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [
- AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion])
-
saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion])
+
saved_flags="$CFLAGS"
- CC="gcc"
CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
- AS_IF([ test -n "$KERNEL_CC" ], [
- CC="$KERNEL_CC"
- ])
- AS_IF([ test -n "$KERNEL_LLVM" ], [
- CC="clang"
- ])
-
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
KERNEL_INFINITE_RECURSION=-Winfinite-recursion
AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1,
@@ -329,20 +354,17 @@ dnl #
dnl # Check if kernel cc supports -fno-ipa-sra option.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
- AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
-
saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra])
+
saved_flags="$CFLAGS"
- CC="gcc"
CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
- AS_IF([ test -n "$KERNEL_CC" ], [
- CC="$KERNEL_CC"
- ])
- AS_IF([ test -n "$KERNEL_LLVM" ], [
- CC="clang"
- ])
-
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
KERNEL_NO_IPA_SRA=-fno-ipa-sra
AC_MSG_RESULT([yes])
diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4
index 83190c6fbe3f..02011bf39fb2 100644
--- a/sys/contrib/openzfs/config/kernel-blkdev.m4
+++ b/sys/contrib/openzfs/config/kernel-blkdev.m4
@@ -29,9 +29,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- bdev = blkdev_get_by_path(path, mode, holder, &h);
+ bdev = blkdev_get_by_path(path, mode, holder, NULL);
])
])
@@ -48,9 +47,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- bdh = bdev_open_by_path(path, mode, holder, &h);
+ bdh = bdev_open_by_path(path, mode, holder, NULL);
])
])
@@ -68,9 +66,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- file = bdev_file_open_by_path(path, mode, holder, &h);
+ file = bdev_file_open_by_path(path, mode, holder, NULL);
])
])
diff --git a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 b/sys/contrib/openzfs/config/kernel-dentry-operations.m4
index aa5a9f2aff39..6d87ad0e0710 100644
--- a/sys/contrib/openzfs/config/kernel-dentry-operations.m4
+++ b/sys/contrib/openzfs/config/kernel-dentry-operations.m4
@@ -24,6 +24,9 @@ dnl #
dnl # 2.6.38 API change
dnl # Added d_set_d_op() helper function.
dnl #
+dnl # 6.17 API change
+dnl # d_set_d_op() removed. No direct replacement.
+dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [
ZFS_LINUX_TEST_SRC([d_set_d_op], [
#include <linux/dcache.h>
@@ -34,22 +37,21 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [
AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [
AC_MSG_CHECKING([whether d_set_d_op() is available])
- ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op],
- [d_set_d_op], [fs/dcache.c], [
+ ZFS_LINUX_TEST_RESULT([d_set_d_op], [
AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_D_SET_D_OP, 1,
+ [Define if d_set_d_op() is available])
], [
- ZFS_LINUX_TEST_ERROR([d_set_d_op])
+ AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [
ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_SRC_D_SET_D_OP
- ZFS_AC_KERNEL_SRC_S_D_OP
])
AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_D_SET_D_OP
- ZFS_AC_KERNEL_S_D_OP
])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index e3e7625db7d8..35819e4d68c5 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -70,6 +70,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_COMMIT_METADATA
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
+ ZFS_AC_KERNEL_SRC_DENTRY
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SRC_SECURITY_INODE
ZFS_AC_KERNEL_SRC_FST_MOUNT
@@ -188,6 +189,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_COMMIT_METADATA
ZFS_AC_KERNEL_SETATTR_PREPARE
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
+ ZFS_AC_KERNEL_DENTRY
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SECURITY_INODE
ZFS_AC_KERNEL_FST_MOUNT
diff --git a/sys/contrib/openzfs/config/user-statx.m4 b/sys/contrib/openzfs/config/user-statx.m4
index 0315f93e0c20..1ba74a40e9b8 100644
--- a/sys/contrib/openzfs/config/user-statx.m4
+++ b/sys/contrib/openzfs/config/user-statx.m4
@@ -2,7 +2,7 @@ dnl #
dnl # Check for statx() function and STATX_MNT_ID availability
dnl #
AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
- AC_CHECK_HEADERS([linux/stat.h],
+ AC_CHECK_HEADERS([sys/stat.h],
[have_stat_headers=yes],
[have_stat_headers=no])
@@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
AC_MSG_CHECKING([for STATX_MNT_ID])
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
- #include <linux/stat.h>
+ #include <sys/stat.h>
]], [[
struct statx stx;
int mask = STATX_MNT_ID;
@@ -29,6 +29,6 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
])
])
], [
- AC_MSG_WARN([linux/stat.h not found; skipping statx support])
+ AC_MSG_WARN([sys/stat.h not found; skipping statx support])
])
]) dnl end AC_DEFUN
diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4
index 7cf1b02d8757..adf6576f3193 100644
--- a/sys/contrib/openzfs/config/zfs-build.m4
+++ b/sys/contrib/openzfs/config/zfs-build.m4
@@ -256,6 +256,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
+ ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH
ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW
ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
@@ -265,6 +266,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
ZFS_AC_CONFIG_ALWAYS_SYSTEM
ZFS_AC_CONFIG_ALWAYS_ARCH
+ ZFS_AC_CONFIG_CHECK_ARCH_VAR
ZFS_AC_CONFIG_ALWAYS_PYTHON
ZFS_AC_CONFIG_ALWAYS_PYZFS
ZFS_AC_CONFIG_ALWAYS_SED
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
index 37284a78ad18..2362c83dfa3f 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
@@ -37,7 +37,9 @@ usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb
usr/lib/zfs-linux/zfs_prepare_disk
usr/sbin/arc_summary
+usr/sbin/zarcsummary
usr/sbin/arcstat
+usr/sbin/zarcstat
usr/sbin/dbufstat
usr/sbin/zilstat
usr/share/zfs/compatibility.d/
diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in
index 2b0568938b25..966e34bf9dc6 100755
--- a/sys/contrib/openzfs/contrib/debian/rules.in
+++ b/sys/contrib/openzfs/contrib/debian/rules.in
@@ -82,7 +82,9 @@ override_dh_auto_install:
# https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
mkdir -p '$(CURDIR)/debian/tmp/usr/sbin/'
mv '$(CURDIR)/debian/tmp/usr/bin/arc_summary' '$(CURDIR)/debian/tmp/usr/sbin/arc_summary'
+ mv '$(CURDIR)/debian/tmp/usr/bin/zarcsummary' '$(CURDIR)/debian/tmp/usr/sbin/zarcsummary'
mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat'
+ mv '$(CURDIR)/debian/tmp/usr/bin/zarcstat' '$(CURDIR)/debian/tmp/usr/sbin/zarcstat'
mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat'
mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat'
diff --git a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
index 4776087d9a76..db9bf0e20274 100644
--- a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
+++ b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
@@ -8,3 +8,12 @@ fi
. /usr/share/initramfs-tools/hook-functions
copy_exec /usr/share/initramfs-tools/zfsunlock /usr/bin/zfsunlock
+
+if [ -f /etc/initramfs-tools/etc/motd ]; then
+ copy_file text /etc/initramfs-tools/etc/motd /etc/motd
+else
+ tmpf=$(mktemp)
+ echo "If you use zfs encrypted root filesystems, you can use \`zfsunlock\` to manually unlock it" > "$tmpf"
+ copy_file text "$tmpf" /etc/motd
+ rm -f "$tmpf"
+fi
diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
index a0bc172c6f44..88698dedabbc 100644
--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@@ -391,7 +391,11 @@ static int
zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
int argc, const char **argv)
{
+#if defined(__FreeBSD__)
+ config->homes_prefix = strdup("zroot/home");
+#else
config->homes_prefix = strdup("rpool/home");
+#endif
if (config->homes_prefix == NULL) {
pam_syslog(pamh, LOG_ERR, "strdup failure");
return (PAM_SERVICE_ERR);
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
index 16e8a319a5f8..152e5a606f0e 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
@@ -61,32 +61,6 @@
#endif
/*
- * 2.6.30 API change,
- * The const keyword was added to the 'struct dentry_operations' in
- * the dentry structure. To handle this we define an appropriate
- * dentry_operations_t typedef which can be used.
- */
-typedef const struct dentry_operations dentry_operations_t;
-
-/*
- * 2.6.38 API addition,
- * Added d_clear_d_op() helper function which clears some flags and the
- * registered dentry->d_op table. This is required because d_set_d_op()
- * issues a warning when the dentry operations table is already set.
- * For the .zfs control directory to work properly we must be able to
- * override the default operations table and register custom .d_automount
- * and .d_revalidate callbacks.
- */
-static inline void
-d_clear_d_op(struct dentry *dentry)
-{
- dentry->d_op = NULL;
- dentry->d_flags &= ~(
- DCACHE_OP_HASH | DCACHE_OP_COMPARE |
- DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
-}
-
-/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
index 087389b57b34..ad2815e46394 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
@@ -25,6 +25,6 @@
#ifndef _SPL_STAT_H
#define _SPL_STAT_H
-#include <linux/stat.h>
+#include <sys/stat.h>
#endif /* SPL_STAT_H */
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index 353805fcb969..a8acb83b4c2f 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -82,7 +82,8 @@ gbh_nblkptrs(uint64_t size) {
static inline zio_eck_t *
gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) {
ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t)));
- return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - sizeof (zio_eck_t)));
+ return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size -
+ sizeof (zio_eck_t)));
}
static inline blkptr_t *
diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h
index cdc9dba2a28d..5791246e99e4 100644
--- a/sys/contrib/openzfs/include/sys/zvol.h
+++ b/sys/contrib/openzfs/include/sys/zvol.h
@@ -53,7 +53,7 @@ extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volthreading(const char *, boolean_t);
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
extern int zvol_set_ro(const char *, boolean_t);
-extern zvol_state_handle_t *zvol_suspend(const char *);
+extern int zvol_suspend(const char *, zvol_state_handle_t **);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
diff --git a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
index a605af962a6d..13cc0b46ac93 100644
--- a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
+++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
@@ -33,7 +33,7 @@
#ifdef HAVE_STATX
#include <fcntl.h>
-#include <linux/stat.h>
+#include <sys/stat.h>
#endif
/*
diff --git a/sys/contrib/openzfs/man/man1/arcstat.1 b/sys/contrib/openzfs/man/man1/arcstat.1
index f2474fbb701f..288b98d57a11 100644
--- a/sys/contrib/openzfs/man/man1/arcstat.1
+++ b/sys/contrib/openzfs/man/man1/arcstat.1
@@ -13,13 +13,15 @@
.\" Copyright (c) 2015 by Delphix. All rights reserved.
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\"
-.Dd December 23, 2022
+.Dd September 19, 2024
.Dt ARCSTAT 1
.Os
.
.Sh NAME
.Nm arcstat
.Nd report ZFS ARC and L2ARC statistics
+.Sh NOTICE
+It will be renamed to zarcstat in zfs 2.4.0. Please migrate ASAP.
.Sh SYNOPSIS
.Nm
.Op Fl havxp
diff --git a/sys/contrib/openzfs/man/man1/cstyle.1 b/sys/contrib/openzfs/man/man1/cstyle.1
index 241c82edd5a8..8f29129ce175 100644
--- a/sys/contrib/openzfs/man/man1/cstyle.1
+++ b/sys/contrib/openzfs/man/man1/cstyle.1
@@ -21,7 +21,7 @@
.\"
.\" CDDL HEADER END
.\"
-.Dd May 26, 2021
+.Dd April 4, 2022
.Dt CSTYLE 1
.Os
.
diff --git a/sys/contrib/openzfs/man/man1/zhack.1 b/sys/contrib/openzfs/man/man1/zhack.1
index f58c0527649b..743bd53b731c 100644
--- a/sys/contrib/openzfs/man/man1/zhack.1
+++ b/sys/contrib/openzfs/man/man1/zhack.1
@@ -23,7 +23,7 @@
.\"
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
.\"
-.Dd May 26, 2021
+.Dd May 3, 2023
.Dt ZHACK 1
.Os
.
diff --git a/sys/contrib/openzfs/man/man1/ztest.1 b/sys/contrib/openzfs/man/man1/ztest.1
index febbb62b1664..ae857bfea29c 100644
--- a/sys/contrib/openzfs/man/man1/ztest.1
+++ b/sys/contrib/openzfs/man/man1/ztest.1
@@ -24,7 +24,7 @@
.\" reserved.
.\" Copyright (c) 2017, Intel Corporation.
.\"
-.Dd May 26, 2021
+.Dd July 12, 2025
.Dt ZTEST 1
.Os
.
diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4
index 683f8e2b631f..61dfe42e463d 100644
--- a/sys/contrib/openzfs/man/man4/spl.4
+++ b/sys/contrib/openzfs/man/man4/spl.4
@@ -15,7 +15,7 @@
.\"
.\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\"
-.Dd August 24, 2020
+.Dd May 7, 2025
.Dt SPL 4
.Os
.
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 5c7958667f92..e865d6a79c5a 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -17,7 +17,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
-.Dd May 29, 2025
+.Dd August 14, 2025
.Dt ZFS 4
.Os
.
diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
index d2f817631c15..299a23720201 100644
--- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5
+++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
@@ -9,7 +9,7 @@
.\" source. A copy of the CDDL is also available via the Internet at
.\" http://www.illumos.org/license/CDDL.
.\"
-.Dd May 26, 2021
+.Dd October 8, 2024
.Dt VDEV_ID.CONF 5
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/dracut.zfs.7 b/sys/contrib/openzfs/man/man7/dracut.zfs.7
index fb5da553af6e..3d051d4d3343 100644
--- a/sys/contrib/openzfs/man/man7/dracut.zfs.7
+++ b/sys/contrib/openzfs/man/man7/dracut.zfs.7
@@ -1,7 +1,7 @@
.\" SPDX-License-Identifier: CDDL-1.0
.\" SPDX-License-Identifier: 0BSD
.\"
-.Dd March 28, 2023
+.Dd July 13, 2024
.Dt DRACUT.ZFS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7
index acabe6b6613a..61e60d950416 100644
--- a/sys/contrib/openzfs/man/man7/vdevprops.7
+++ b/sys/contrib/openzfs/man/man7/vdevprops.7
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2021 Klara, Inc.
.\"
-.Dd October 30, 2022
+.Dd July 23, 2024
.Dt VDEVPROPS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7
index 5c736e53670d..bb2178d85bcd 100644
--- a/sys/contrib/openzfs/man/man7/zfsconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2023 Klara, Inc.
.\"
-.Dd October 6, 2023
+.Dd October 2, 2024
.Dt ZFSCONCEPTS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7
index ac3152cb5d51..0930771c9fce 100644
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@@ -39,7 +39,7 @@
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
.\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
.\"
-.Dd June 29, 2024
+.Dd August 6, 2025
.Dt ZFSPROPS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index 10dfd1f92936..b4404a6eb58d 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -19,7 +19,7 @@
.\" Copyright (c) 2019, Allan Jude
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd October 2, 2024
+.Dd July 23, 2025
.Dt ZPOOL-FEATURES 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
index dafe3bffc453..b9c8926d835d 100644
--- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd April 7, 2023
+.Dd August 6, 2025
.Dt ZPOOLCONCEPTS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpoolprops.7 b/sys/contrib/openzfs/man/man7/zpoolprops.7
index 5d84753193ee..d3b4c2376943 100644
--- a/sys/contrib/openzfs/man/man7/zpoolprops.7
+++ b/sys/contrib/openzfs/man/man7/zpoolprops.7
@@ -29,7 +29,7 @@
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\" Copyright (c) 2023, Klara Inc.
.\"
-.Dd November 18, 2024
+.Dd December 4, 2024
.Dt ZPOOLPROPS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
index 0a5b6af73fdb..e00544e4a5a4 100644
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@@ -15,7 +15,7 @@
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
.\"
-.Dd April 23, 2025
+.Dd August 12, 2025
.Dt ZDB 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zed.8.in b/sys/contrib/openzfs/man/man8/zed.8.in
index c90a1834403b..eda377aafc1e 100644
--- a/sys/contrib/openzfs/man/man8/zed.8.in
+++ b/sys/contrib/openzfs/man/man8/zed.8.in
@@ -13,7 +13,7 @@
.\"
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
.\"
-.Dd May 26, 2021
+.Dd August 22, 2022
.Dt ZED 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-allow.8 b/sys/contrib/openzfs/man/man8/zfs-allow.8
index 5a8e80bf6a43..b154aebd92aa 100644
--- a/sys/contrib/openzfs/man/man8/zfs-allow.8
+++ b/sys/contrib/openzfs/man/man8/zfs-allow.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd March 13, 2025
.Dt ZFS-ALLOW 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 b/sys/contrib/openzfs/man/man8/zfs-bookmark.8
index 083ff46d241b..5a0933820020 100644
--- a/sys/contrib/openzfs/man/man8/zfs-bookmark.8
+++ b/sys/contrib/openzfs/man/man8/zfs-bookmark.8
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved.
.\"
-.Dd May 12, 2022
+.Dd July 11, 2022
.Dt ZFS-BOOKMARK 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-clone.8 b/sys/contrib/openzfs/man/man8/zfs-clone.8
index cd412815f5fe..9609cf2ce36a 100644
--- a/sys/contrib/openzfs/man/man8/zfs-clone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-clone.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-CLONE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8
index 91878056cc7d..58bde5799240 100644
--- a/sys/contrib/openzfs/man/man8/zfs-create.8
+++ b/sys/contrib/openzfs/man/man8/zfs-create.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd June 2, 2023
.Dt ZFS-CREATE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-destroy.8 b/sys/contrib/openzfs/man/man8/zfs-destroy.8
index 38359be02430..6a6791f7a44e 100644
--- a/sys/contrib/openzfs/man/man8/zfs-destroy.8
+++ b/sys/contrib/openzfs/man/man8/zfs-destroy.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd February 5, 2025
.Dt ZFS-DESTROY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-diff.8 b/sys/contrib/openzfs/man/man8/zfs-diff.8
index d4c48f4109be..5b94ea524666 100644
--- a/sys/contrib/openzfs/man/man8/zfs-diff.8
+++ b/sys/contrib/openzfs/man/man8/zfs-diff.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-DIFF 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-hold.8 b/sys/contrib/openzfs/man/man8/zfs-hold.8
index 0c88937f0dc8..a877e428f88b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-hold.8
+++ b/sys/contrib/openzfs/man/man8/zfs-hold.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd November 8, 2022
.Dt ZFS-HOLD 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-jail.8 b/sys/contrib/openzfs/man/man8/zfs-jail.8
index 53499a279d05..569f5f57eab4 100644
--- a/sys/contrib/openzfs/man/man8/zfs-jail.8
+++ b/sys/contrib/openzfs/man/man8/zfs-jail.8
@@ -37,7 +37,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZFS-JAIL 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8
index 677d8292e207..42eff94f9762 100644
--- a/sys/contrib/openzfs/man/man8/zfs-list.8
+++ b/sys/contrib/openzfs/man/man8/zfs-list.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd February 8, 2024
+.Dd August 25, 2025
.Dt ZFS-LIST 8
.Os
.
@@ -50,27 +50,25 @@
.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns …
.
.Sh DESCRIPTION
-If specified, you can list property information by the absolute pathname or the
-relative pathname.
-By default, all file systems and volumes are displayed.
+By default, all file systems and volumes are displayed, with the following
+fields:
+.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint .
Snapshots are displayed if the
.Sy listsnapshots
pool property is
.Sy on
.Po the default is
.Sy off
-.Pc ,
+.Pc
or if the
.Fl t Sy snapshot
or
.Fl t Sy all
options are specified.
-The following fields are displayed:
-.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint .
.Bl -tag -width "-H"
.It Fl H
Used for scripting mode.
-Do not print headers and separate fields by a single tab instead of arbitrary
+Do not print headers, and separate fields by a single tab instead of arbitrary
white space.
.It Fl j , -json Op Ar --json-int
Print the output in JSON format.
@@ -87,7 +85,7 @@ of
will display only the dataset and its direct children.
.It Fl o Ar property
A comma-separated list of properties to display.
-The property must be:
+Each property must be:
.Bl -bullet -compact
.It
One of the properties described in the
@@ -125,30 +123,41 @@ section of
or the value
.Sy name
to sort by the dataset name.
-Multiple properties can be specified at one time using multiple
+Multiple properties can be specified to operate together using multiple
.Fl s
-property options.
+or
+.Fl S
+options.
Multiple
.Fl s
-options are evaluated from left to right in decreasing order of importance.
-The following is a list of sorting criteria:
+and
+.Fl S
+options are evaluated from left to right to supply sort keys in
+decreasing order of priority.
+Property types operate as follows:
.Bl -bullet -compact
.It
Numeric types sort in numeric order.
.It
String types sort in alphabetical order.
.It
-Types inappropriate for a row sort that row to the literal bottom, regardless of
-the specified ordering.
+Types inappropriate for a row sort that row to the literal bottom,
+regardless of the specified ordering.
.El
.Pp
-If no sorting options are specified the existing behavior of
-.Nm zfs Cm list
-is preserved.
+If no sort columns are specified, or if two lines of output would sort
+equally across all specified columns, then datasets and bookmarks are
+sorted by name, whereas snapshots are sorted first by the name of their
+dataset and then by the time of their creation.
+When no sort columns are specified but snapshots are listed, this
+default behavior causes snapshots to be grouped under their datasets in
+chronological order by creation time.
.It Fl S Ar property
Same as
.Fl s ,
-but sorts by property in descending order.
+but sorts by
+.Ar property
+in descending order.
.It Fl t Ar type
A comma-separated list of types to display, where
.Ar type
diff --git a/sys/contrib/openzfs/man/man8/zfs-load-key.8 b/sys/contrib/openzfs/man/man8/zfs-load-key.8
index 7838c46d9e77..3a11cea99fd6 100644
--- a/sys/contrib/openzfs/man/man8/zfs-load-key.8
+++ b/sys/contrib/openzfs/man/man8/zfs-load-key.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd January 13, 2020
+.Dd July 11, 2022
.Dt ZFS-LOAD-KEY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
index ea470247daac..9e44ea30c636 100644
--- a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
+++ b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
@@ -23,7 +23,7 @@
.\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
.\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
.\"
-.Dd May 31, 2021
+.Dd November 30, 2021
.Dt ZFS-MOUNT-GENERATOR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8
index 9fca6fffd5bb..2689b6dc345b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-mount.8
+++ b/sys/contrib/openzfs/man/man8/zfs-mount.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd February 16, 2019
+.Dd October 12, 2024
.Dt ZFS-MOUNT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-project.8 b/sys/contrib/openzfs/man/man8/zfs-project.8
index 36547680f53e..4ebfdf6ffe4f 100644
--- a/sys/contrib/openzfs/man/man8/zfs-project.8
+++ b/sys/contrib/openzfs/man/man8/zfs-project.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZFS-PROJECT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-promote.8 b/sys/contrib/openzfs/man/man8/zfs-promote.8
index 767045812607..435a7a5d0144 100644
--- a/sys/contrib/openzfs/man/man8/zfs-promote.8
+++ b/sys/contrib/openzfs/man/man8/zfs-promote.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-PROMOTE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-rename.8 b/sys/contrib/openzfs/man/man8/zfs-rename.8
index 4cf192c0682b..8fedc67469e6 100644
--- a/sys/contrib/openzfs/man/man8/zfs-rename.8
+++ b/sys/contrib/openzfs/man/man8/zfs-rename.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-RENAME 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 b/sys/contrib/openzfs/man/man8/zfs-rewrite.8
index a3a037f3794a..ca5340c7e5eb 100644
--- a/sys/contrib/openzfs/man/man8/zfs-rewrite.8
+++ b/sys/contrib/openzfs/man/man8/zfs-rewrite.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2025 iXsystems, Inc.
.\"
-.Dd May 6, 2025
+.Dd July 23, 2025
.Dt ZFS-REWRITE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8
index f7c6b840303c..6c5f6b94afd5 100644
--- a/sys/contrib/openzfs/man/man8/zfs-send.8
+++ b/sys/contrib/openzfs/man/man8/zfs-send.8
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2024, Klara, Inc.
.\"
-.Dd October 2, 2024
+.Dd August 29, 2025
.Dt ZFS-SEND 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8
index 67f4d6eba171..08daf09d05f8 100644
--- a/sys/contrib/openzfs/man/man8/zfs-set.8
+++ b/sys/contrib/openzfs/man/man8/zfs-set.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd April 20, 2024
+.Dd October 12, 2024
.Dt ZFS-SET 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-share.8 b/sys/contrib/openzfs/man/man8/zfs-share.8
index f7a09a189182..e9c32a44b0c7 100644
--- a/sys/contrib/openzfs/man/man8/zfs-share.8
+++ b/sys/contrib/openzfs/man/man8/zfs-share.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 17, 2021
+.Dd July 11, 2022
.Dt ZFS-SHARE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
index 3ddd1273c8e8..8f4b2c335f09 100644
--- a/sys/contrib/openzfs/man/man8/zfs-snapshot.8
+++ b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-SNAPSHOT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 b/sys/contrib/openzfs/man/man8/zfs-upgrade.8
index bac74e37aef9..a5ce2b760da4 100644
--- a/sys/contrib/openzfs/man/man8/zfs-upgrade.8
+++ b/sys/contrib/openzfs/man/man8/zfs-upgrade.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd July 11, 2022
.Dt ZFS-UPGRADE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-userspace.8 b/sys/contrib/openzfs/man/man8/zfs-userspace.8
index d7a4d18e83b1..c255d911740d 100644
--- a/sys/contrib/openzfs/man/man8/zfs-userspace.8
+++ b/sys/contrib/openzfs/man/man8/zfs-userspace.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd July 11, 2022
.Dt ZFS-USERSPACE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-wait.8 b/sys/contrib/openzfs/man/man8/zfs-wait.8
index 554a67455c60..e5c60010d2f9 100644
--- a/sys/contrib/openzfs/man/man8/zfs-wait.8
+++ b/sys/contrib/openzfs/man/man8/zfs-wait.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 31, 2021
+.Dd July 11, 2022
.Dt ZFS-WAIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8
index 7ad0ac89463c..a56a304e82b2 100644
--- a/sys/contrib/openzfs/man/man8/zfs-zone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-zone.8
@@ -38,7 +38,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2021 Klara, Inc.
.\"
-.Dd June 3, 2022
+.Dd July 11, 2022
.Dt ZFS-ZONE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8
index e16a3a82b672..b7566a727469 100644
--- a/sys/contrib/openzfs/man/man8/zfs.8
+++ b/sys/contrib/openzfs/man/man8/zfs.8
@@ -37,7 +37,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd April 18, 2025
+.Dd May 12, 2025
.Dt ZFS 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
index eef0ce68f17b..465e336d170c 100644
--- a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
+++ b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2020 by Delphix. All rights reserved.
.\"
-.Dd April 17, 2020
+.Dd July 11, 2022
.Dt ZFS_IDS_TO_PATH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zgenhostid.8 b/sys/contrib/openzfs/man/man8/zgenhostid.8
index 2b5b4fc18216..ff564880f97d 100644
--- a/sys/contrib/openzfs/man/man8/zgenhostid.8
+++ b/sys/contrib/openzfs/man/man8/zgenhostid.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
.\"
-.Dd May 26, 2021
+.Dd July 11, 2022
.Dt ZGENHOSTID 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-attach.8 b/sys/contrib/openzfs/man/man8/zpool-attach.8
index 51d876767666..f120350a5190 100644
--- a/sys/contrib/openzfs/man/man8/zpool-attach.8
+++ b/sys/contrib/openzfs/man/man8/zpool-attach.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 28, 2023
+.Dd November 8, 2023
.Dt ZPOOL-ATTACH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
index d97d10d5df6e..b654f669cfa2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
+++ b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZPOOL-CHECKPOINT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-clear.8 b/sys/contrib/openzfs/man/man8/zpool-clear.8
index 19cd4be36408..70cd8325bd0e 100644
--- a/sys/contrib/openzfs/man/man8/zpool-clear.8
+++ b/sys/contrib/openzfs/man/man8/zpool-clear.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd April 29, 2024
.Dt ZPOOL-CLEAR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8
index 490c67629a20..a36ae260a158 100644
--- a/sys/contrib/openzfs/man/man8/zpool-create.8
+++ b/sys/contrib/openzfs/man/man8/zpool-create.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-CREATE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-destroy.8 b/sys/contrib/openzfs/man/man8/zpool-destroy.8
index f49f29804ad7..82f3f3e203d6 100644
--- a/sys/contrib/openzfs/man/man8/zpool-destroy.8
+++ b/sys/contrib/openzfs/man/man8/zpool-destroy.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-DESTROY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-detach.8 b/sys/contrib/openzfs/man/man8/zpool-detach.8
index ae02dbc2d5b8..79a44310110d 100644
--- a/sys/contrib/openzfs/man/man8/zpool-detach.8
+++ b/sys/contrib/openzfs/man/man8/zpool-detach.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-DETACH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-export.8 b/sys/contrib/openzfs/man/man8/zpool-export.8
index 171a7541c6d2..02495c088f94 100644
--- a/sys/contrib/openzfs/man/man8/zpool-export.8
+++ b/sys/contrib/openzfs/man/man8/zpool-export.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-EXPORT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8
index 1d6d1f08afa6..bfe1bae7619f 100644
--- a/sys/contrib/openzfs/man/man8/zpool-get.8
+++ b/sys/contrib/openzfs/man/man8/zpool-get.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd October 12, 2024
.Dt ZPOOL-GET 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-history.8 b/sys/contrib/openzfs/man/man8/zpool-history.8
index f15086eabc47..f02168951ff2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-history.8
+++ b/sys/contrib/openzfs/man/man8/zpool-history.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-HISTORY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-import.8 b/sys/contrib/openzfs/man/man8/zpool-import.8
index 9076f5c34929..c6d5f222b6b2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-import.8
+++ b/sys/contrib/openzfs/man/man8/zpool-import.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-IMPORT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-initialize.8 b/sys/contrib/openzfs/man/man8/zpool-initialize.8
index 39579a58010e..5299a897cb97 100644
--- a/sys/contrib/openzfs/man/man8/zpool-initialize.8
+++ b/sys/contrib/openzfs/man/man8/zpool-initialize.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd May 27, 2021
+.Dd July 30, 2025
.Dt ZPOOL-INITIALIZE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8
index d8c21d0cfc6c..5dd9c9d55e20 100644
--- a/sys/contrib/openzfs/man/man8/zpool-iostat.8
+++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd January 29, 2024
.Dt ZPOOL-IOSTAT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 b/sys/contrib/openzfs/man/man8/zpool-labelclear.8
index ba3d1509aa75..b807acaaede3 100644
--- a/sys/contrib/openzfs/man/man8/zpool-labelclear.8
+++ b/sys/contrib/openzfs/man/man8/zpool-labelclear.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 31, 2021
+.Dd July 11, 2022
.Dt ZPOOL-LABELCLEAR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8
index b720e203c1c9..106399941f98 100644
--- a/sys/contrib/openzfs/man/man8/zpool-list.8
+++ b/sys/contrib/openzfs/man/man8/zpool-list.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd October 12, 2024
.Dt ZPOOL-LIST 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8
index 49b1f34ad5d5..388c7634acce 100644
--- a/sys/contrib/openzfs/man/man8/zpool-offline.8
+++ b/sys/contrib/openzfs/man/man8/zpool-offline.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd December 21, 2023
.Dt ZPOOL-OFFLINE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-reguid.8 b/sys/contrib/openzfs/man/man8/zpool-reguid.8
index 77101fc07326..b98c88e320de 100644
--- a/sys/contrib/openzfs/man/man8/zpool-reguid.8
+++ b/sys/contrib/openzfs/man/man8/zpool-reguid.8
@@ -29,7 +29,7 @@
.\" Copyright (c) 2024, Klara Inc.
.\" Copyright (c) 2024, Mateusz Piotrowski
.\"
-.Dd June 21, 2023
+.Dd August 26, 2024
.Dt ZPOOL-REGUID 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-remove.8 b/sys/contrib/openzfs/man/man8/zpool-remove.8
index d10a92e49bbe..4d5fc431d332 100644
--- a/sys/contrib/openzfs/man/man8/zpool-remove.8
+++ b/sys/contrib/openzfs/man/man8/zpool-remove.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd November 19, 2024
.Dt ZPOOL-REMOVE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-reopen.8 b/sys/contrib/openzfs/man/man8/zpool-reopen.8
index 594cff3d16d8..c4e10f0a546e 100644
--- a/sys/contrib/openzfs/man/man8/zpool-reopen.8
+++ b/sys/contrib/openzfs/man/man8/zpool-reopen.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 2, 2021
+.Dd July 11, 2022
.Dt ZPOOL-REOPEN 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-replace.8 b/sys/contrib/openzfs/man/man8/zpool-replace.8
index 9f3156eeb3ef..651af13b19b8 100644
--- a/sys/contrib/openzfs/man/man8/zpool-replace.8
+++ b/sys/contrib/openzfs/man/man8/zpool-replace.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 29, 2021
+.Dd July 11, 2022
.Dt ZPOOL-REPLACE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-resilver.8 b/sys/contrib/openzfs/man/man8/zpool-resilver.8
index 2161d77f62ed..59c4be5db209 100644
--- a/sys/contrib/openzfs/man/man8/zpool-resilver.8
+++ b/sys/contrib/openzfs/man/man8/zpool-resilver.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZPOOL-RESILVER 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8
index 0ecf8bd3851f..cf7ead5788bf 100644
--- a/sys/contrib/openzfs/man/man8/zpool-scrub.8
+++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd December 11, 2024
+.Dd August 6, 2025
.Dt ZPOOL-SCRUB 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-split.8 b/sys/contrib/openzfs/man/man8/zpool-split.8
index a67c865cf30c..ee4c6384cf23 100644
--- a/sys/contrib/openzfs/man/man8/zpool-split.8
+++ b/sys/contrib/openzfs/man/man8/zpool-split.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 2, 2021
+.Dd July 11, 2022
.Dt ZPOOL-SPLIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index a7f3e088043b..108a1067b384 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd February 14, 2024
+.Dd May 20, 2025
.Dt ZPOOL-STATUS 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-sync.8 b/sys/contrib/openzfs/man/man8/zpool-sync.8
index 8f438f363e83..d1dc05d0c202 100644
--- a/sys/contrib/openzfs/man/man8/zpool-sync.8
+++ b/sys/contrib/openzfs/man/man8/zpool-sync.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-SYNC 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-trim.8 b/sys/contrib/openzfs/man/man8/zpool-trim.8
index 18723e1be0d2..c4e849019789 100644
--- a/sys/contrib/openzfs/man/man8/zpool-trim.8
+++ b/sys/contrib/openzfs/man/man8/zpool-trim.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd May 27, 2021
+.Dd July 30, 2025
.Dt ZPOOL-TRIM 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 b/sys/contrib/openzfs/man/man8/zpool-upgrade.8
index 20632ae4bba0..cf69060da5ce 100644
--- a/sys/contrib/openzfs/man/man8/zpool-upgrade.8
+++ b/sys/contrib/openzfs/man/man8/zpool-upgrade.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-UPGRADE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-wait.8 b/sys/contrib/openzfs/man/man8/zpool-wait.8
index 0ffb4badfb7b..28a51d29a913 100644
--- a/sys/contrib/openzfs/man/man8/zpool-wait.8
+++ b/sys/contrib/openzfs/man/man8/zpool-wait.8
@@ -28,7 +28,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd January 29, 2024
.Dt ZPOOL-WAIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8
index b96944050594..3bfef780b298 100644
--- a/sys/contrib/openzfs/man/man8/zpool.8
+++ b/sys/contrib/openzfs/man/man8/zpool.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd February 14, 2024
+.Dd November 19, 2024
.Dt ZPOOL 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zstream.8 b/sys/contrib/openzfs/man/man8/zstream.8
index 03a8479c9e6a..5b3d063bc4a5 100644
--- a/sys/contrib/openzfs/man/man8/zstream.8
+++ b/sys/contrib/openzfs/man/man8/zstream.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2020 by Delphix. All rights reserved.
.\"
-.Dd October 4, 2022
+.Dd November 10, 2022
.Dt ZSTREAM 8
.Os
.
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 362d2295e091..58a80dc4402c 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -4,7 +4,7 @@
ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
ZFS_MODULE_CFLAGS += -Wmissing-prototypes
-ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@
+ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@
ifneq ($(KBUILD_EXTMOD),)
zfs_include = @abs_top_srcdir@/include
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
index 6d3bcca9f995..dcb0a391dda4 100644
--- a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
@@ -38,11 +38,14 @@
kfpu_begin(); E(s, d, b); kfpu_end(); \
}
+#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \
+ defined(__PPC64__)
/* some implementation is always okay */
static inline boolean_t sha2_is_supported(void)
{
return (B_TRUE);
}
+#endif
#if defined(__x86_64)
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
index 2efd9fcf4c99..a85a71a83df4 100644
--- a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
@@ -38,11 +38,14 @@
kfpu_begin(); E(s, d, b); kfpu_end(); \
}
+#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \
+ defined(__aarch64__) || defined(__arm__) || defined(__PPC64__)
/* some implementation is always okay */
static inline boolean_t sha2_is_supported(void)
{
return (B_TRUE);
}
+#endif
#if defined(__x86_64)
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index ace2360c032d..393bfaa65ff5 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -188,11 +188,6 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
- CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_max, "LU",
- "Maximum ARC size in bytes (LEGACY)");
-
int
param_set_arc_min(SYSCTL_HANDLER_ARGS)
{
@@ -217,11 +212,6 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
- CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_min, "LU",
- "Minimum ARC size in bytes (LEGACY)");
-
extern uint_t zfs_arc_free_target;
int
@@ -245,16 +235,6 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
return (0);
}
-/*
- * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on
- * pagedaemon initialization.
- */
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
- CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_free_target, "IU",
- "Desired number of free pages below which ARC triggers reclaim"
- " (LEGACY)");
-
int
param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
{
@@ -273,187 +253,6 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
- CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_no_grow_shift, "I",
- "log2(fraction of ARC which must be free to allow growing) (LEGACY)");
-
-extern uint64_t l2arc_write_max;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max,
- CTLFLAG_RWTUN, &l2arc_write_max, 0,
- "Max write bytes per interval (LEGACY)");
-
-extern uint64_t l2arc_write_boost;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost,
- CTLFLAG_RWTUN, &l2arc_write_boost, 0,
- "Extra write bytes during device warmup (LEGACY)");
-
-extern uint64_t l2arc_headroom;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom,
- CTLFLAG_RWTUN, &l2arc_headroom, 0,
- "Number of max device writes to precache (LEGACY)");
-
-extern uint64_t l2arc_headroom_boost;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost,
- CTLFLAG_RWTUN, &l2arc_headroom_boost, 0,
- "Compressed l2arc_headroom multiplier (LEGACY)");
-
-extern uint64_t l2arc_feed_secs;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs,
- CTLFLAG_RWTUN, &l2arc_feed_secs, 0,
- "Seconds between L2ARC writing (LEGACY)");
-
-extern uint64_t l2arc_feed_min_ms;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms,
- CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0,
- "Min feed interval in milliseconds (LEGACY)");
-
-extern int l2arc_noprefetch;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch,
- CTLFLAG_RWTUN, &l2arc_noprefetch, 0,
- "Skip caching prefetched buffers (LEGACY)");
-
-extern int l2arc_feed_again;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again,
- CTLFLAG_RWTUN, &l2arc_feed_again, 0,
- "Turbo L2ARC warmup (LEGACY)");
-
-extern int l2arc_norw;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
- CTLFLAG_RWTUN, &l2arc_norw, 0,
- "No reads during writes (LEGACY)");
-
-static int
-param_get_arc_state_size(SYSCTL_HANDLER_ARGS)
-{
- arc_state_t *state = (arc_state_t *)arg1;
- int64_t val;
-
- val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) +
- zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]);
- return (sysctl_handle_64(oidp, &val, 0, req));
-}
-
-extern arc_state_t ARC_anon;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_anon, 0, param_get_arc_state_size, "Q",
- "size of anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
- &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
- &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in anonymous state");
-
-extern arc_state_t ARC_mru;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mru, 0, param_get_arc_state_size, "Q",
- "size of mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
- &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
- &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mru state");
-
-extern arc_state_t ARC_mru_ghost;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mru_ghost, 0, param_get_arc_state_size, "Q",
- "size of mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
- &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
- &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mru ghost state");
-
-extern arc_state_t ARC_mfu;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mfu, 0, param_get_arc_state_size, "Q",
- "size of mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
- &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
- &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mfu state");
-
-extern arc_state_t ARC_mfu_ghost;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q",
- "size of mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
- &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
- &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mfu ghost state");
-
-extern arc_state_t ARC_uncached;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_uncached, 0, param_get_arc_state_size, "Q",
- "size of uncached state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
- &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in uncached state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
- &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in uncached state");
-
-extern arc_state_t ARC_l2c_only;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_l2c_only, 0, param_get_arc_state_size, "Q",
- "size of l2c_only state");
-
-/* dbuf.c */
-
-/* dmu.c */
-
-/* dmu_zfetch.c */
-
-SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
-
-extern uint32_t zfetch_max_distance;
-
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance,
- CTLFLAG_RWTUN, &zfetch_max_distance, 0,
- "Max bytes to prefetch per stream (LEGACY)");
-
-extern uint32_t zfetch_max_idistance;
-
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
- CTLFLAG_RWTUN, &zfetch_max_idistance, 0,
- "Max bytes to prefetch indirects for per stream (LEGACY)");
-
-/* dsl_pool.c */
-
-/* dnode.c */
-
-/* dsl_scan.c */
-
/* metaslab.c */
int
@@ -514,19 +313,6 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
"Condense on-disk spacemap when it is more than this many percents"
" of in-memory counterpart");
-extern uint_t zfs_remove_max_segment;
-
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment,
- CTLFLAG_RWTUN, &zfs_remove_max_segment, 0,
- "Largest contiguous segment ZFS will attempt to allocate when removing"
- " a device");
-
-extern int zfs_removal_suspend_progress;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress,
- CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0,
- "Ensures certain actions can happen while in the middle of a removal");
-
/*
* Minimum size which forces the dynamic allocator to change
* it's allocation strategy. Once the space map cannot satisfy
@@ -749,12 +535,6 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
- param_set_min_auto_ashift, "IU",
- "Min ashift used when creating new top-level vdev. (LEGACY)");
-
int
param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
{
@@ -774,13 +554,6 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
- param_set_max_auto_ashift, "IU",
- "Max ashift used when optimizing for logical -> physical sector size on"
- " new top-level vdevs. (LEGACY)");
-
/*
* Since the DTL space map of a vdev is not expected to have a lot of
* entries, we default its block size to 4K.
@@ -802,23 +575,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz,
CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0,
"Block size for standard space map. Power of 2 greater than 4096.");
-extern int vdev_validate_skip;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
- CTLFLAG_RDTUN, &vdev_validate_skip, 0,
- "Enable to bypass vdev_validate().");
-
-/* vdev_mirror.c */
-
-/* vdev_queue.c */
-
-extern uint_t zfs_vdev_max_active;
-
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight,
- CTLFLAG_RWTUN, &zfs_vdev_max_active, 0,
- "The maximum number of I/Os of all types active for each device."
- " (LEGACY)");
-
/* zio.c */
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata,
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index 174141a5deab..120d97510c9e 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -61,6 +61,7 @@
#include <sys/fs/zfs.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/dbuf.h>
@@ -5729,6 +5730,9 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
{
ulong_t val;
int error;
+#ifdef _PC_CLONE_BLKSIZE
+ zfsvfs_t *zfsvfs;
+#endif
error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
curthread->td_ucred, NULL);
@@ -5775,6 +5779,21 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
*ap->a_retval = 1;
return (0);
#endif
+#ifdef _PC_CLONE_BLKSIZE
+ case _PC_CLONE_BLKSIZE:
+ zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data;
+ if (zfs_bclone_enabled &&
+ spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os),
+ SPA_FEATURE_BLOCK_CLONING))
+ *ap->a_retval = dsl_dataset_feature_is_active(
+ zfsvfs->z_os->os_dsl_dataset,
+ SPA_FEATURE_LARGE_BLOCKS) ?
+ SPA_MAXBLOCKSIZE :
+ SPA_OLD_MAXBLOCKSIZE;
+ else
+ *ap->a_retval = 0;
+ return (0);
+#endif
default:
return (vop_stdpathconf(ap));
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index 48dae79a2373..81ac26cb0c93 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -202,7 +202,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
return (!!dentry->d_inode);
}
-static dentry_operations_t zpl_dops_snapdirs = {
+static const struct dentry_operations zpl_dops_snapdirs = {
/*
* Auto mounting of snapshots is only supported for 2.6.37 and
* newer kernels. Prior to this kernel the ops->follow_link()
@@ -215,6 +215,51 @@ static dentry_operations_t zpl_dops_snapdirs = {
.d_revalidate = zpl_snapdir_revalidate,
};
+/*
+ * For the .zfs control directory to work properly we must be able to override
+ * the default operations table and register custom .d_automount and
+ * .d_revalidate callbacks.
+ */
+static void
+set_snapdir_dentry_ops(struct dentry *dentry, unsigned int extraflags) {
+ static const unsigned int op_flags =
+ DCACHE_OP_HASH | DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE |
+ DCACHE_OP_PRUNE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_REAL;
+
+#ifdef HAVE_D_SET_D_OP
+ /*
+ * d_set_d_op() will set the DCACHE_OP_ flags according to what it
+ * finds in the passed dentry_operations, so we don't have to.
+ *
+ * We clear the flags and the old op table before calling d_set_d_op()
+ * because issues a warning when the dentry operations table is already
+ * set.
+ */
+ dentry->d_op = NULL;
+ dentry->d_flags &= ~op_flags;
+ d_set_d_op(dentry, &zpl_dops_snapdirs);
+ dentry->d_flags |= extraflags;
+#else
+ /*
+ * Since 6.17 there's no exported way to modify dentry ops, so we have
+ * to reach in and do it ourselves. This should be safe for our very
+ * narrow use case, which is to create or splice in an entry to give
+ * access to a snapshot.
+ *
+ * We need to set the op flags directly. We hardcode
+ * DCACHE_OP_REVALIDATE because that's the only operation we have; if
+ * we ever extend zpl_dops_snapdirs we will need to update the op flags
+ * to match.
+ */
+ spin_lock(&dentry->d_lock);
+ dentry->d_op = &zpl_dops_snapdirs;
+ dentry->d_flags &= ~op_flags;
+ dentry->d_flags |= DCACHE_OP_REVALIDATE | extraflags;
+ spin_unlock(&dentry->d_lock);
+#endif
+}
+
static struct dentry *
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
@@ -236,10 +281,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
return (ERR_PTR(error));
ASSERT(error == 0 || ip == NULL);
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-
+ set_snapdir_dentry_ops(dentry, DCACHE_NEED_AUTOMOUNT);
return (d_splice_alias(ip, dentry));
}
@@ -373,8 +415,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
+ set_snapdir_dentry_ops(dentry, 0);
d_instantiate(dentry, ip);
}
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index df41e3b49204..bd6dc8edd8ca 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq;
static uint_t zfs_arc_evict_threads = 0;
/* The 7 states: */
-arc_state_t ARC_anon;
-arc_state_t ARC_mru;
-arc_state_t ARC_mru_ghost;
-arc_state_t ARC_mfu;
-arc_state_t ARC_mfu_ghost;
-arc_state_t ARC_l2c_only;
-arc_state_t ARC_uncached;
+static arc_state_t ARC_anon;
+/* */ arc_state_t ARC_mru;
+static arc_state_t ARC_mru_ghost;
+/* */ arc_state_t ARC_mfu;
+static arc_state_t ARC_mfu_ghost;
+static arc_state_t ARC_l2c_only;
+static arc_state_t ARC_uncached;
arc_stats_t arc_stats = {
{ "hits", KSTAT_DATA_UINT64 },
@@ -832,15 +832,15 @@ typedef struct arc_async_flush {
#define L2ARC_FEED_TYPES 4
/* L2ARC Performance Tunables */
-uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
-uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
-uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
-uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
-uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
-int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
-int l2arc_feed_again = B_TRUE; /* turbo warmup */
-int l2arc_norw = B_FALSE; /* no reads during writes */
+static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
+static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
+static uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
+static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
+static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
+static int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
+static int l2arc_feed_again = B_TRUE; /* turbo warmup */
+static int l2arc_norw = B_FALSE; /* no reads during writes */
static uint_t l2arc_meta_percent = 33; /* limit on headers size */
/*
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index d6658375f810..0dc9adc7fd4f 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -1701,9 +1701,11 @@ ddt_load(spa_t *spa)
}
}
- error = ddt_log_load(ddt);
- if (error != 0 && error != ENOENT)
- return (error);
+ if (ddt->ddt_flags & DDT_FLAG_LOG) {
+ error = ddt_log_load(ddt);
+ if (error != 0 && error != ENOENT)
+ return (error);
+ }
DDT_KSTAT_SET(ddt, dds_log_active_entries,
avl_numnodes(&ddt->ddt_log_active->ddl_tree));
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index 3d30e244c1f7..c9217cef4f7d 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -176,11 +176,13 @@ ddt_log_update_stats(ddt_t *ddt)
* that's reasonable to expect anyway.
*/
dmu_object_info_t doi;
- uint64_t nblocks;
- dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, &doi);
- nblocks = doi.doi_physical_blocks_512;
- dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, &doi);
- nblocks += doi.doi_physical_blocks_512;
+ uint64_t nblocks = 0;
+ if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object,
+ &doi) == 0)
+ nblocks += doi.doi_physical_blocks_512;
+ if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object,
+ &doi) == 0)
+ nblocks += doi.doi_physical_blocks_512;
ddt_object_t *ddo = &ddt->ddt_log_stats;
ddo->ddo_count =
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 51165d0bf723..3d3a9c713568 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -57,19 +57,19 @@ static unsigned int zfetch_max_sec_reap = 2;
/* min bytes to prefetch per stream (default 2MB) */
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
/* max bytes to prefetch per stream (default 8MB) */
-unsigned int zfetch_max_distance = 8 * 1024 * 1024;
+static unsigned int zfetch_max_distance = 8 * 1024 * 1024;
#else
/* min bytes to prefetch per stream (default 4MB) */
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
/* max bytes to prefetch per stream (default 64MB) */
-unsigned int zfetch_max_distance = 64 * 1024 * 1024;
+static unsigned int zfetch_max_distance = 64 * 1024 * 1024;
#endif
/* max bytes to prefetch indirects for per stream (default 128MB) */
-unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
+static unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
/* max request reorder distance within a stream (default 16MB) */
-unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
+static unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
/* Max log2 fraction of holes in a stream */
-unsigned int zfetch_hole_shift = 2;
+static unsigned int zfetch_hole_shift = 2;
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 9cf35e379000..ed04ce0c86eb 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29;
/* upper limit for metaslab size (16G) */
static uint_t zfs_vdev_max_ms_shift = 34;
-int vdev_validate_skip = B_FALSE;
+static int vdev_validate_skip = B_FALSE;
/*
* Since the DTL space map of a vdev is not expected to have a lot of
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index c12713b107bf..e69e5598939e 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -122,7 +122,7 @@
* The maximum number of i/os active to each device. Ideally, this will be >=
* the sum of each queue's max_active.
*/
-uint_t zfs_vdev_max_active = 1000;
+static uint_t zfs_vdev_max_active = 1000;
/*
* Per-queue limits on the number of i/os active to each device. If the
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 2f7a739da241..2ce0121324ad 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024;
*
* See also the accessor function spa_remove_max_segment().
*/
-uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
+static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
/*
* Ignore hard IO errors during device removal. When set if a device
@@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024;
* This is used by the test suite so that it can ensure that certain
* actions happen while in the middle of a removal.
*/
-int zfs_removal_suspend_progress = 0;
+static int zfs_removal_suspend_progress = 0;
#define VDEV_REMOVAL_ZAP_OBJS "lzap"
diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c
index 0816ea134bf3..4cf9e0dbb405 100644
--- a/sys/contrib/openzfs/module/zfs/zfeature.c
+++ b/sys/contrib/openzfs/module/zfs/zfeature.c
@@ -308,6 +308,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
+ ASSERT(MUTEX_HELD(&spa->spa_feat_stats_lock));
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount, tx));
@@ -360,7 +361,9 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
feature->fi_desc, tx));
+ mutex_enter(&spa->spa_feat_stats_lock);
feature_sync(spa, feature, initial_refcount, tx);
+ mutex_exit(&spa->spa_feat_stats_lock);
if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) {
uint64_t enabling_txg = dmu_tx_get_txg(tx);
@@ -416,6 +419,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
ASSERT(dmu_tx_is_syncing(tx));
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP);
switch (action) {
@@ -433,6 +437,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
}
feature_sync(spa, feature, refcount, tx);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 121b966b9864..76c9d4ccd51f 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -4726,7 +4726,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
error = error ? error : resume_err;
}
zfs_vfs_rele(zfsvfs);
- } else if ((zv = zvol_suspend(fsname)) != NULL) {
+ } else if (zvol_suspend(fsname, &zv) == 0) {
error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
outnvl);
zvol_resume(zv);
@@ -5448,7 +5448,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
}
error = error ? error : end_err;
zfs_vfs_rele(zfsvfs);
- } else if ((zv = zvol_suspend(tofs)) != NULL) {
+ } else if (zvol_suspend(tofs, &zv) == 0) {
error = dmu_recv_end(&drc, zvol_tag(zv));
zvol_resume(zv);
} else {
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 2fd3e1c37045..faced0db7e9e 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -1145,20 +1145,34 @@ zvol_tag(zvol_state_t *zv)
/*
* Suspend the zvol for recv and rollback.
*/
-zvol_state_t *
-zvol_suspend(const char *name)
+int
+zvol_suspend(const char *name, zvol_state_t **zvp)
{
zvol_state_t *zv;
zv = zvol_find_by_name(name, RW_WRITER);
if (zv == NULL)
- return (NULL);
+ return (SET_ERROR(ENOENT));
/* block all I/O, release in zvol_resume. */
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
+ /*
+ * If it's being removed, unlock and return error. It doesn't make any
+ * sense to try to suspend a zvol being removed, but being here also
+ * means that zvol_remove_minors_impl() is about to call zvol_remove()
+ * and then destroy the zvol_state_t, so returning a pointer to it for
+ * the caller to mess with would be a disaster anyway.
+ */
+ if (zv->zv_flags & ZVOL_REMOVING) {
+ mutex_exit(&zv->zv_state_lock);
+ rw_exit(&zv->zv_suspend_lock);
+ /* NB: Returning EIO here to match zfsvfs_teardown() */
+ return (SET_ERROR(EIO));
+ }
+
atomic_inc(&zv->zv_suspend_ref);
if (zv->zv_open_count > 0)
@@ -1171,7 +1185,8 @@ zvol_suspend(const char *name)
mutex_exit(&zv->zv_state_lock);
/* zv_suspend_lock is released in zvol_resume() */
- return (zv);
+ *zvp = zv;
+ return (0);
}
int
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 1ce668e7b86d..edcfdd2d7136 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -509,7 +509,9 @@ systemctl --system daemon-reload >/dev/null || true
%{_bindir}/zvol_wait
# Optional Python 3 scripts
%{_bindir}/arc_summary
+%{_bindir}/zarcsummary
%{_bindir}/arcstat
+%{_bindir}/zarcstat
%{_bindir}/dbufstat
%{_bindir}/zilstat
# Man pages
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index f3d56acffde0..ba367fad402b 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -161,7 +161,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
tags = ['functional', 'mmp']
[tests/functional/mount:Linux]
-tests = ['umount_unlinked_drain']
+tests = ['umount_unlinked_drain', 'mount_loopback']
tags = ['functional', 'mount']
[tests/functional/pam:Linux]
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index 884a99d785bc..580281b30d7e 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -100,6 +100,7 @@ export SYSTEM_FILES_COMMON='awk
uniq
vmstat
wc
+ which
xargs
xxh128sum'
@@ -146,6 +147,7 @@ export SYSTEM_FILES_LINUX='attr
lscpu
lsmod
lsscsi
+ mkfs.xfs
mkswap
modprobe
mountpoint
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index e273c9f85c28..f2d7ceac0cbb 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -73,8 +73,8 @@ OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_esti
PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable
RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes
REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
-REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
-REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
+REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress
+REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment
RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms
RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent
SCAN_LEGACY scan_legacy zfs_scan_legacy
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 41e7b45ef4ec..94db292c9518 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1706,6 +1706,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/mmp/setup.ksh \
functional/mount/cleanup.ksh \
functional/mount/setup.ksh \
+ functional/mount/mount_loopback.ksh \
functional/mount/umount_001.ksh \
functional/mount/umountall_001.ksh \
functional/mount/umount_unlinked_drain.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
index 1b3310edb98b..45b041503e22 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
@@ -67,7 +67,7 @@ log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))
# Add some data to the pool
log_must zfs create $TESTPOOL/fs
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
-log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
+log_must fill_fs $MNTPOINT $PARITY 200 32768 100 R
sync_pool $TESTPOOL
# Replace the last child vdev to form a replacing vdev
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh
new file mode 100755
index 000000000000..86adef7ea032
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh
@@ -0,0 +1,111 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that we can make an xfs filesystem on a ZFS-backed loopback device.
+#
+# See:
+# https://github.com/openzfs/zfs/pull/17298
+# https://github.com/openzfs/zfs/issues/17277
+#
+# STRATEGY:
+# 1. Make a pool
+# 2. Make a file on the pool or create zvol
+# 3. Mount the file/zvol behind a loopback device
+# 4. Create & mount an xfs filesystem on the loopback device
+
+function cleanup
+{
+ if [ -d $TEST_BASE_DIR/mnt ] ; then
+ umount $TEST_BASE_DIR/mnt
+ log_must rmdir $TEST_BASE_DIR/mnt
+ fi
+ if [ -n "$DEV" ] ; then
+ log_must losetup -d $DEV
+ fi
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/file1
+}
+
+if [ ! -x "$(which mkfs.xfs)" ] ; then
+ log_unsupported "No mkfs.xfs binary"
+fi
+
+if [ ! -d /lib/modules/$(uname -r)/kernel/fs/xfs ] && \
+ ! grep -qE '\sxfs$' /proc/filesystems ; then
+ log_unsupported "No XFS kernel support"
+fi
+
+log_assert "Make an xfs filesystem on a ZFS-backed loopback device"
+log_onexit cleanup
+
+# fio options
+export NUMJOBS=2
+export RUNTIME=3
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export FILE_SIZE=$(( 1024 * 1024 ))
+
+function do_test
+{
+ imgfile=$1
+ log_note "Running test on $imgfile"
+ log_must losetup -f $imgfile
+ DEV=$(losetup --associated $imgfile | grep -Eo '^/dev/loop[0-9]+')
+ log_must mkfs.xfs $DEV
+ mkdir $TEST_BASE_DIR/mnt
+ log_must mount $DEV $TEST_BASE_DIR/mnt
+ export DIRECTORY=$TEST_BASE_DIR/mnt
+
+ for d in 0 1 ; do
+ # fio options
+ export DIRECT=$d
+ log_must fio $FIO_SCRIPTS/mkfiles.fio
+ log_must fio $FIO_SCRIPTS/random_reads.fio
+ done
+ log_must umount $TEST_BASE_DIR/mnt
+ log_must rmdir $TEST_BASE_DIR/mnt
+ log_must losetup -d $DEV
+ DEV=""
+}
+
+log_must truncate -s 1G $TEST_BASE_DIR/file1
+log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/file1
+log_must truncate -s 512M /$TESTPOOL2/img
+do_test /$TESTPOOL2/img
+log_must rm /$TESTPOOL2/img
+log_must zfs create -V 512M $TESTPOOL2/vol
+
+blkdev="$ZVOL_DEVDIR/$TESTPOOL2/vol"
+block_device_wait $blkdev
+do_test $blkdev
+
+log_pass "Verified xfs filesystem on a ZFS-backed loopback device"
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index 733c440f5ee3..a26cf8161294 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -459,12 +459,11 @@ DB_SHOW_COMMAND(proc, db_show_proc)
db_printf("??? (%#x)\n", p->p_state);
}
if (p->p_ucred != NULL) {
- db_printf(" uid: %d gids: ", p->p_ucred->cr_uid);
- for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
- db_printf("%d", p->p_ucred->cr_groups[i]);
- if (i < (p->p_ucred->cr_ngroups - 1))
- db_printf(", ");
- }
+ db_printf(" uid: %d gid: %d supp gids: ",
+ p->p_ucred->cr_uid, p->p_ucred->cr_gid);
+ for (i = 0; i < p->p_ucred->cr_ngroups; i++)
+ db_printf(i == 0 ? "%d" : ", %d",
+ p->p_ucred->cr_groups[i]);
db_printf("\n");
}
if (p->p_pptr != NULL)
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index f29d803e99a8..82f56fc0d19e 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -195,6 +195,7 @@ static const struct {
{0x1f3f8086, 0x00, "Intel Avoton (RAID)", 0},
{0x23a38086, 0x00, "Intel Coleto Creek", 0},
{0x31e38086, 0x00, "Intel Gemini Lake", 0},
+ {0x4b638086, 0x00, "Intel Elkhart Lake", 0},
{0x5ae38086, 0x00, "Intel Apollo Lake", 0},
{0x7ae28086, 0x00, "Intel Alder Lake", 0},
{0x8c028086, 0x00, "Intel Lynx Point", 0},
diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c
index 291d1ec64ed7..79bf08678249 100644
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@@ -179,7 +179,7 @@ ath_rate_sample_find_min_pktlength(struct ath_softc *sc,
const struct txschedule *sched = &sn->sched[rix0];
int max_pkt_length = 65530; // ATH_AGGR_MAXSIZE
// Note: this may not be true in all cases; need to check?
- int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40);
+ int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40);
// Note: not great, but good enough..
int idx = is_ht40 ? MCS_HT40 : MCS_HT20;
@@ -979,7 +979,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
const int size_bin = size_to_bin(frame_size);
const int size = bin_to_size(size_bin);
int tt;
- int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40);
+ int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40);
int pct;
if (!IS_RATE_DEFINED(sn, rix0))
@@ -1365,7 +1365,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni)
continue;
printf(" %d %s/%d", dot11rate(rt, rix), dot11rate_label(rt, rix),
calc_usecs_unicast_packet(sc, 1600, rix, 0,0,
- (ni->ni_chw == IEEE80211_STA_RX_BW_40)));
+ (ni->ni_chw == NET80211_STA_RX_BW_40)));
}
printf("\n");
}
@@ -1396,7 +1396,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni)
sn->stats[y][rix].perfect_tx_time =
calc_usecs_unicast_packet(sc, size, rix, 0, 0,
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
sn->stats[y][rix].average_tx_time =
sn->stats[y][rix].perfect_tx_time;
}
diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c
index e7ee029fecf0..f42058bacb0d 100644
--- a/sys/dev/ath/if_ath_tx_ht.c
+++ b/sys/dev/ath/if_ath_tx_ht.c
@@ -283,7 +283,7 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
if (IS_HT_RATE(rate)) {
rc[i].flags |= ATH_RC_HT_FLAG;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
rc[i].flags |= ATH_RC_CW40_FLAG;
/*
@@ -295,13 +295,13 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
* and doesn't return the fractional part, so
* we are always "out" by some amount.
*/
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40 &&
+ if (ni->ni_chw == NET80211_STA_RX_BW_40 &&
ieee80211_ht_check_tx_shortgi_40(ni) &&
(bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
rc[i].flags |= ATH_RC_SGI_FLAG;
}
- if (ni->ni_chw == IEEE80211_STA_RX_BW_20 &&
+ if (ni->ni_chw == NET80211_STA_RX_BW_20 &&
ieee80211_ht_check_tx_shortgi_20(ni) &&
(bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
rc[i].flags |= ATH_RC_SGI_FLAG;
diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c
index 290156ff11ca..6bc4bd33e162 100644
--- a/sys/dev/axgbe/if_axgbe_pci.c
+++ b/sys/dev/axgbe/if_axgbe_pci.c
@@ -2415,7 +2415,8 @@ axgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_OPACKETS:
return (pstats->txframecount_gb);
case IFCOUNTER_OERRORS:
- return (pstats->txframecount_gb - pstats->txframecount_g);
+ return (if_get_counter_default(ifp, cnt) +
+ pstats->txframecount_gb - pstats->txframecount_g);
case IFCOUNTER_IBYTES:
return (pstats->rxoctetcount_gb);
case IFCOUNTER_OBYTES:
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 9e91250cb61c..9756a6945384 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -9016,7 +9016,7 @@ sysctl_loadavg(SYSCTL_HANDLER_ARGS)
rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
if (rc)
return (rc);
- if (hw_all_ok(sc))
+ if (!hw_all_ok(sc))
rc = ENXIO;
else {
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c
index 50fa4faa560a..ed755f992949 100644
--- a/sys/dev/cyapa/cyapa.c
+++ b/sys/dev/cyapa/cyapa.c
@@ -761,42 +761,60 @@ again:
/*
* Generate report
*/
- c0 = 0;
- if (delta_x < 0)
- c0 |= 0x10;
- if (delta_y < 0)
- c0 |= 0x20;
- c0 |= 0x08;
- if (but & CYAPA_FNGR_LEFT)
- c0 |= 0x01;
- if (but & CYAPA_FNGR_MIDDLE)
- c0 |= 0x04;
- if (but & CYAPA_FNGR_RIGHT)
- c0 |= 0x02;
-
- fifo_write_char(sc, &sc->rfifo, c0);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
- switch(sc->zenabled) {
- case 1:
- /* Z axis all 8 bits */
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
- break;
- case 2:
- /*
- * Z axis low 4 bits + 4th button and 5th button
- * (high 2 bits must be left 0). Auto-scale
- * delta_z to fit to avoid a wrong-direction
- * overflow (don't try to retain the remainder).
- */
- while (delta_z > 7 || delta_z < -8)
- delta_z >>= 1;
- c0 = (uint8_t)delta_z & 0x0F;
+ if (sc->mode.level == 1) {
+ c0 = MOUSE_SYS_SYNC;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= MOUSE_SYS_BUTTON1UP;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= MOUSE_SYS_BUTTON2UP;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= MOUSE_SYS_BUTTON3UP;
fifo_write_char(sc, &sc->rfifo, c0);
- break;
- default:
- /* basic PS/2 */
- break;
+ fifo_write_char(sc, &sc->rfifo, delta_x >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_y >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_x - (delta_x >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_y - (delta_y >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_z >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_z - (delta_z >> 1));
+ fifo_write_char(sc, &sc->rfifo, MOUSE_SYS_EXTBUTTONS);
+ } else {
+ c0 = 0;
+ if (delta_x < 0)
+ c0 |= 0x10;
+ if (delta_y < 0)
+ c0 |= 0x20;
+ c0 |= 0x08;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= 0x01;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= 0x04;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= 0x02;
+
+ fifo_write_char(sc, &sc->rfifo, c0);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
+ switch(sc->zenabled) {
+ case 1:
+ /* Z axis all 8 bits */
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
+ break;
+ case 2:
+ /*
+ * Z axis low 4 bits + 4th button and 5th button
+ * (high 2 bits must be left 0). Auto-scale
+ * delta_z to fit to avoid a wrong-direction
+ * overflow (don't try to retain the remainder).
+ */
+ while (delta_z > 7 || delta_z < -8)
+ delta_z >>= 1;
+ c0 = (uint8_t)delta_z & 0x0F;
+ fifo_write_char(sc, &sc->rfifo, c0);
+ break;
+ default:
+ /* basic PS/2 */
+ break;
+ }
}
cyapa_notify(sc);
}
@@ -1205,6 +1223,11 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
((mousemode_t *)data)->packetsize =
MOUSE_PS2_PACKETSIZE;
break;
+ case 1:
+ ((mousemode_t *)data)->protocol = MOUSE_PROTO_SYSMOUSE;
+ ((mousemode_t *)data)->packetsize =
+ MOUSE_SYS_PACKETSIZE;
+ break;
case 2:
((mousemode_t *)data)->protocol = MOUSE_PROTO_PS2;
((mousemode_t *)data)->packetsize =
@@ -1223,7 +1246,7 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
error = EINVAL;
break;
}
- sc->mode.level = *(int *)data ? 2 : 0;
+ sc->mode.level = *(int *)data;
sc->zenabled = sc->mode.level ? 1 : 0;
break;
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 9c5ae2806f75..60959fe679b8 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -4782,8 +4782,8 @@ em_if_get_counter(if_ctx_t ctx, ift_counter cnt)
sc->stats.ruc + sc->stats.roc +
sc->stats.mpc + sc->stats.cexterr);
case IFCOUNTER_OERRORS:
- return (sc->stats.ecol + sc->stats.latecol +
- sc->watchdog_events);
+ return (if_get_counter_default(ifp, cnt) +
+ sc->stats.ecol + sc->stats.latecol + sc->watchdog_events);
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/enetc/if_enetc.c b/sys/dev/enetc/if_enetc.c
index 3a5d6ec23282..808397b229a7 100644
--- a/sys/dev/enetc/if_enetc.c
+++ b/sys/dev/enetc/if_enetc.c
@@ -1343,7 +1343,8 @@ enetc_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_IERRORS:
return (ENETC_PORT_RD8(sc, ENETC_PM0_RERR));
case IFCOUNTER_OERRORS:
- return (ENETC_PORT_RD8(sc, ENETC_PM0_TERR));
+ return (if_get_counter_default(ifp, cnt) +
+ ENETC_PORT_RD8(sc, ENETC_PM0_TERR));
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/gpio/gpio_if.m b/sys/dev/gpio/gpio_if.m
index 5501b2b5c0e7..0b6988ceba79 100644
--- a/sys/dev/gpio/gpio_if.m
+++ b/sys/dev/gpio/gpio_if.m
@@ -62,6 +62,22 @@ CODE {
return (0);
}
+
+ static int
+ gpio_default_get_pin_list(device_t dev, uint32_t *pin_list)
+ {
+ uint32_t maxpin;
+ int err;
+
+ err = GPIO_PIN_MAX(dev, &maxpin);
+ if (err != 0)
+ return (ENXIO);
+
+ for (int i = 0; i <= maxpin; i++)
+ pin_list[i] = i;
+
+ return (0);
+ }
};
HEADER {
@@ -185,3 +201,13 @@ METHOD int pin_config_32 {
uint32_t num_pins;
uint32_t *pin_flags;
} DEFAULT gpio_default_nosupport;
+
+#
+# Get the controller's pin numbers. pin_list is expected to be an array with at
+# least GPIO_PIN_MAX() elements. Populates pin_list from 0 to GPIO_PIN_MAX() by
+# default.
+#
+METHOD int get_pin_list {
+ device_t dev;
+ uint32_t *pin_list;
+} DEFAULT gpio_default_get_pin_list;
diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c
index 5f1f6532a79b..698b5e5fdd01 100644
--- a/sys/dev/gpio/gpiobus.c
+++ b/sys/dev/gpio/gpiobus.c
@@ -319,10 +319,6 @@ gpiobus_add_bus(device_t dev)
busdev = device_add_child(dev, "gpiobus", DEVICE_UNIT_ANY);
if (busdev == NULL)
return (NULL);
- if (device_add_child(dev, "gpioc", DEVICE_UNIT_ANY) == NULL) {
- device_delete_child(dev, busdev);
- return (NULL);
- }
#ifdef FDT
ofw_gpiobus_register_provider(dev);
#endif
@@ -372,6 +368,37 @@ gpiobus_init_softc(device_t dev)
}
int
+gpiobus_add_gpioc(device_t dev)
+{
+ struct gpiobus_ivar *devi;
+ struct gpiobus_softc *sc;
+ device_t gpioc;
+ int err;
+
+ gpioc = BUS_ADD_CHILD(dev, 0, "gpioc", DEVICE_UNIT_ANY);
+ if (gpioc == NULL)
+ return (ENXIO);
+
+ sc = device_get_softc(dev);
+ devi = device_get_ivars(gpioc);
+
+ devi->npins = sc->sc_npins;
+ err = gpiobus_alloc_ivars(devi);
+ if (err != 0) {
+ device_delete_child(dev, gpioc);
+ return (err);
+ }
+
+ err = GPIO_GET_PIN_LIST(sc->sc_dev, devi->pins);
+ if (err != 0) {
+ device_delete_child(dev, gpioc);
+ gpiobus_free_ivars(devi);
+ }
+
+ return (err);
+}
+
+int
gpiobus_alloc_ivars(struct gpiobus_ivar *devi)
{
@@ -562,6 +589,10 @@ gpiobus_attach(device_t dev)
if (err != 0)
return (err);
+ err = gpiobus_add_gpioc(dev);
+ if (err != 0)
+ return (err);
+
/*
* Get parent's pins and mark them as unmapped
*/
@@ -961,7 +992,7 @@ gpiobus_pin_getflags(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags);
+ return (GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags));
}
static int
@@ -974,7 +1005,7 @@ gpiobus_pin_getcaps(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps);
+ return (GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps));
}
static int
@@ -987,7 +1018,7 @@ gpiobus_pin_set(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value);
+ return (GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value));
}
static int
@@ -1000,7 +1031,7 @@ gpiobus_pin_get(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value);
+ return (GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value));
}
static int
@@ -1012,7 +1043,57 @@ gpiobus_pin_toggle(device_t dev, device_t child, uint32_t pin)
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin]);
+ return (GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin]));
+}
+
+/*
+ * Verify that a child has all the pins they are requesting
+ * to access in their ivars.
+ */
+static bool
+gpiobus_pin_verify_32(struct gpiobus_ivar *devi, uint32_t first_pin,
+ uint32_t num_pins)
+{
+ if (first_pin + num_pins > devi->npins)
+ return (false);
+
+ /* Make sure the pins are consecutive. */
+ for (uint32_t pin = first_pin; pin < first_pin + num_pins - 1; pin++) {
+ if (devi->pins[pin] + 1 != devi->pins[pin + 1])
+ return (false);
+ }
+
+ return (true);
+}
+
+static int
+gpiobus_pin_access_32(device_t dev, device_t child, uint32_t first_pin,
+ uint32_t clear_pins, uint32_t change_pins, uint32_t *orig_pins)
+{
+ struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev);
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+
+ if (!gpiobus_pin_verify_32(devi, first_pin, 32))
+ return (EINVAL);
+
+ return (GPIO_PIN_ACCESS_32(sc->sc_dev, devi->pins[first_pin],
+ clear_pins, change_pins, orig_pins));
+}
+
+static int
+gpiobus_pin_config_32(device_t dev, device_t child, uint32_t first_pin,
+ uint32_t num_pins, uint32_t *pin_flags)
+{
+ struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev);
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+
+ if (num_pins > 32)
+ return (EINVAL);
+ if (!gpiobus_pin_verify_32(devi, first_pin, num_pins))
+ return (EINVAL);
+
+ return (GPIO_PIN_CONFIG_32(sc->sc_dev,
+ devi->pins[first_pin], num_pins, pin_flags));
}
static int
@@ -1093,6 +1174,8 @@ static device_method_t gpiobus_methods[] = {
DEVMETHOD(gpiobus_pin_get, gpiobus_pin_get),
DEVMETHOD(gpiobus_pin_set, gpiobus_pin_set),
DEVMETHOD(gpiobus_pin_toggle, gpiobus_pin_toggle),
+ DEVMETHOD(gpiobus_pin_access_32,gpiobus_pin_access_32),
+ DEVMETHOD(gpiobus_pin_config_32,gpiobus_pin_config_32),
DEVMETHOD(gpiobus_pin_getname, gpiobus_pin_getname),
DEVMETHOD(gpiobus_pin_setname, gpiobus_pin_setname),
diff --git a/sys/dev/gpio/gpiobus_if.m b/sys/dev/gpio/gpiobus_if.m
index 8bf29839ef4e..890738c4e809 100644
--- a/sys/dev/gpio/gpiobus_if.m
+++ b/sys/dev/gpio/gpiobus_if.m
@@ -107,6 +107,36 @@ METHOD int pin_setflags {
};
#
+# Simultaneously read and/or change up to 32 adjacent pins.
+# If the device cannot change the pins simultaneously, returns EOPNOTSUPP.
+#
+# More details about using this interface can be found in sys/gpio.h
+#
+METHOD int pin_access_32 {
+ device_t dev;
+ device_t child;
+ uint32_t first_pin;
+ uint32_t clear_pins;
+ uint32_t change_pins;
+ uint32_t *orig_pins;
+};
+
+#
+# Simultaneously configure up to 32 adjacent pins.
+# This is intended to change the configuration of all the pins simultaneously,
+# but unlike pin_access_32, this will not fail if the hardware can't do so.
+#
+# More details about using this interface can be found in sys/gpio.h
+#
+METHOD int pin_config_32 {
+ device_t dev;
+ device_t child;
+ uint32_t first_pin;
+ uint32_t num_pins;
+ uint32_t *pin_flags;
+};
+
+#
# Get the pin name
#
METHOD int pin_getname {
diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h
index c198e5f79989..58f862343403 100644
--- a/sys/dev/gpio/gpiobus_internal.h
+++ b/sys/dev/gpio/gpiobus_internal.h
@@ -44,6 +44,7 @@ int gpiobus_acquire_pin(device_t, uint32_t);
void gpiobus_release_pin(device_t, uint32_t);
int gpiobus_child_location(device_t, device_t, struct sbuf *);
device_t gpiobus_add_child_common(device_t, u_int, const char *, int, size_t);
+int gpiobus_add_gpioc(device_t);
extern driver_t gpiobus_driver;
#endif
diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c
index 87fed38ebe3e..5a60f939dc78 100644
--- a/sys/dev/gpio/gpioc.c
+++ b/sys/dev/gpio/gpioc.c
@@ -45,7 +45,6 @@
#include <dev/gpio/gpiobusvar.h>
-#include "gpio_if.h"
#include "gpiobus_if.h"
#undef GPIOC_DEBUG
@@ -59,7 +58,7 @@
struct gpioc_softc {
device_t sc_dev; /* gpiocX dev */
- device_t sc_pdev; /* gpioX dev */
+ device_t sc_pdev; /* gpiobusX dev */
struct cdev *sc_ctl_dev; /* controller device */
int sc_unit;
int sc_npins;
@@ -69,6 +68,7 @@ struct gpioc_softc {
struct gpioc_pin_intr {
struct gpioc_softc *sc;
gpio_pin_t pin;
+ uint32_t intr_mode;
bool config_locked;
int intr_rid;
struct resource *intr_res;
@@ -112,8 +112,10 @@ struct gpioc_pin_event {
static MALLOC_DEFINE(M_GPIOC, "gpioc", "gpioc device data");
-static int gpioc_allocate_pin_intr(struct gpioc_pin_intr*, uint32_t);
-static int gpioc_release_pin_intr(struct gpioc_pin_intr*);
+static int gpioc_allocate_pin_intr(struct gpioc_softc*,
+ struct gpioc_pin_intr*, uint32_t, uint32_t);
+static int gpioc_release_pin_intr(struct gpioc_softc*,
+ struct gpioc_pin_intr*);
static int gpioc_attach_priv_pin(struct gpioc_cdevpriv*,
struct gpioc_pin_intr*);
static int gpioc_detach_priv_pin(struct gpioc_cdevpriv*,
@@ -191,27 +193,36 @@ number_of_events(struct gpioc_cdevpriv *priv)
}
static int
-gpioc_allocate_pin_intr(struct gpioc_pin_intr *intr_conf, uint32_t flags)
+gpioc_allocate_pin_intr(struct gpioc_softc *sc,
+ struct gpioc_pin_intr *intr_conf, uint32_t pin, uint32_t flags)
{
int err;
intr_conf->config_locked = true;
mtx_unlock(&intr_conf->mtx);
- intr_conf->intr_res = gpio_alloc_intr_resource(intr_conf->pin->dev,
+ MPASS(intr_conf->pin == NULL);
+ err = gpio_pin_get_by_bus_pinnum(sc->sc_pdev, pin, &intr_conf->pin);
+ if (err != 0)
+ goto error_exit;
+
+ intr_conf->intr_res = gpio_alloc_intr_resource(sc->sc_dev,
&intr_conf->intr_rid, RF_ACTIVE, intr_conf->pin, flags);
if (intr_conf->intr_res == NULL) {
err = ENXIO;
- goto error_exit;
+ goto error_pin;
}
- err = bus_setup_intr(intr_conf->pin->dev, intr_conf->intr_res,
+ err = bus_setup_intr(sc->sc_dev, intr_conf->intr_res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL, gpioc_interrupt_handler,
intr_conf, &intr_conf->intr_cookie);
- if (err != 0)
- goto error_exit;
+ if (err != 0) {
+ bus_release_resource(sc->sc_dev, intr_conf->intr_res);
+ intr_conf->intr_res = NULL;
+ goto error_pin;
+ }
- intr_conf->pin->flags = flags;
+ intr_conf->intr_mode = flags;
error_exit:
mtx_lock(&intr_conf->mtx);
@@ -219,10 +230,15 @@ error_exit:
wakeup(&intr_conf->config_locked);
return (err);
+
+error_pin:
+ gpio_pin_release(intr_conf->pin);
+ intr_conf->pin = NULL;
+ goto error_exit;
}
static int
-gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
+gpioc_release_pin_intr(struct gpioc_softc *sc, struct gpioc_pin_intr *intr_conf)
{
int err;
@@ -230,8 +246,8 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
mtx_unlock(&intr_conf->mtx);
if (intr_conf->intr_cookie != NULL) {
- err = bus_teardown_intr(intr_conf->pin->dev,
- intr_conf->intr_res, intr_conf->intr_cookie);
+ err = bus_teardown_intr(sc->sc_dev, intr_conf->intr_res,
+ intr_conf->intr_cookie);
if (err != 0)
goto error_exit;
else
@@ -239,7 +255,7 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
}
if (intr_conf->intr_res != NULL) {
- err = bus_release_resource(intr_conf->pin->dev, SYS_RES_IRQ,
+ err = bus_release_resource(sc->sc_dev, SYS_RES_IRQ,
intr_conf->intr_rid, intr_conf->intr_res);
if (err != 0)
goto error_exit;
@@ -249,7 +265,10 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
}
}
- intr_conf->pin->flags = 0;
+ gpio_pin_release(intr_conf->pin);
+ intr_conf->pin = NULL;
+
+ intr_conf->intr_mode = 0;
err = 0;
error_exit:
@@ -386,7 +405,7 @@ gpioc_get_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
struct gpioc_privs *priv_link;
uint32_t flags;
- flags = intr_conf->pin->flags;
+ flags = intr_conf->intr_mode;
if (flags == 0)
return (0);
@@ -411,7 +430,7 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
int res;
res = 0;
- if (intr_conf->pin->flags == 0 && flags == 0) {
+ if (intr_conf->intr_mode == 0 && flags == 0) {
/* No interrupt configured and none requested: Do nothing. */
return (0);
}
@@ -419,17 +438,17 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
while (intr_conf->config_locked == true)
mtx_sleep(&intr_conf->config_locked, &intr_conf->mtx, 0,
"gpicfg", 0);
- if (intr_conf->pin->flags == 0 && flags != 0) {
+ if (intr_conf->intr_mode == 0 && flags != 0) {
/*
* No interrupt is configured, but one is requested: Allocate
* and setup interrupt on the according pin.
*/
- res = gpioc_allocate_pin_intr(intr_conf, flags);
+ res = gpioc_allocate_pin_intr(sc, intr_conf, pin, flags);
if (res == 0)
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
res = 0;
- } else if (intr_conf->pin->flags == flags) {
+ } else if (intr_conf->intr_mode == flags) {
/*
* Same interrupt requested as already configured: Attach the
* cdevpriv to the corresponding pin.
@@ -437,14 +456,14 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
res = 0;
- } else if (intr_conf->pin->flags != 0 && flags == 0) {
+ } else if (intr_conf->intr_mode != 0 && flags == 0) {
/*
* Interrupt configured, but none requested: Teardown and
* release the pin when no other cdevpriv is attached. Otherwise
* just detach pin and cdevpriv from each other.
*/
if (gpioc_intr_reconfig_allowed(priv, intr_conf)) {
- res = gpioc_release_pin_intr(intr_conf);
+ res = gpioc_release_pin_intr(sc, intr_conf);
}
if (res == 0)
res = gpioc_detach_priv_pin(priv, intr_conf);
@@ -456,9 +475,10 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
if (!gpioc_intr_reconfig_allowed(priv, intr_conf))
res = EBUSY;
else {
- res = gpioc_release_pin_intr(intr_conf);
+ res = gpioc_release_pin_intr(sc, intr_conf);
if (res == 0)
- res = gpioc_allocate_pin_intr(intr_conf, flags);
+ res = gpioc_allocate_pin_intr(sc, intr_conf,
+ pin, flags);
if (res == 0)
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
@@ -475,18 +495,16 @@ gpioc_interrupt_handler(void *arg)
{
struct gpioc_pin_intr *intr_conf;
struct gpioc_privs *privs;
- struct gpioc_softc *sc;
sbintime_t evtime;
- uint32_t pin_state;
+ bool pin_state;
intr_conf = arg;
- sc = intr_conf->sc;
/* Capture time and pin state first. */
evtime = sbinuptime();
- if (intr_conf->pin->flags & GPIO_INTR_EDGE_BOTH)
- GPIO_PIN_GET(sc->sc_pdev, intr_conf->pin->pin, &pin_state);
- else if (intr_conf->pin->flags & GPIO_INTR_EDGE_RISING)
+ if (intr_conf->intr_mode & GPIO_INTR_EDGE_BOTH)
+ gpio_pin_is_active(intr_conf->pin, &pin_state);
+ else if (intr_conf->intr_mode & GPIO_INTR_EDGE_RISING)
pin_state = true;
else
pin_state = false;
@@ -575,18 +593,11 @@ gpioc_attach(device_t dev)
sc->sc_pdev = device_get_parent(dev);
sc->sc_unit = device_get_unit(dev);
- err = GPIO_PIN_MAX(sc->sc_pdev, &sc->sc_npins);
- sc->sc_npins++; /* Number of pins is one more than max pin number. */
- if (err != 0)
- return (err);
+ sc->sc_npins = gpiobus_get_npins(dev);
sc->sc_pin_intr = malloc(sizeof(struct gpioc_pin_intr) * sc->sc_npins,
M_GPIOC, M_WAITOK | M_ZERO);
for (int i = 0; i < sc->sc_npins; i++) {
- sc->sc_pin_intr[i].pin = malloc(sizeof(struct gpiobus_pin),
- M_GPIOC, M_WAITOK | M_ZERO);
sc->sc_pin_intr[i].sc = sc;
- sc->sc_pin_intr[i].pin->pin = i;
- sc->sc_pin_intr[i].pin->dev = sc->sc_pdev;
mtx_init(&sc->sc_pin_intr[i].mtx, "gpioc pin", NULL, MTX_DEF);
SLIST_INIT(&sc->sc_pin_intr[i].privs);
}
@@ -610,20 +621,16 @@ static int
gpioc_detach(device_t dev)
{
struct gpioc_softc *sc = device_get_softc(dev);
- int err;
if (sc->sc_ctl_dev)
destroy_dev(sc->sc_ctl_dev);
for (int i = 0; i < sc->sc_npins; i++) {
mtx_destroy(&sc->sc_pin_intr[i].mtx);
- free(sc->sc_pin_intr[i].pin, M_GPIOC);
+ MPASS(sc->sc_pin_intr[i].pin == NULL);
}
free(sc->sc_pin_intr, M_GPIOC);
- if ((err = bus_generic_detach(dev)) != 0)
- return (err);
-
return (0);
}
@@ -655,7 +662,7 @@ gpioc_cdevpriv_dtor(void *data)
KASSERT(consistency == 1,
("inconsistent links between pin config and cdevpriv"));
if (gpioc_intr_reconfig_allowed(priv, pin_link->pin)) {
- gpioc_release_pin_intr(pin_link->pin);
+ gpioc_release_pin_intr(priv->sc, pin_link->pin);
}
mtx_unlock(&pin_link->pin->mtx);
SLIST_REMOVE(&priv->pins, pin_link, gpioc_pins, next);
@@ -778,7 +785,6 @@ static int
gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
struct thread *td)
{
- device_t bus;
int max_pin, res;
struct gpioc_softc *sc = cdev->si_drv1;
struct gpioc_cdevpriv *priv;
@@ -789,30 +795,32 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
struct gpio_event_config *evcfg;
uint32_t caps, intrflags;
- bus = GPIO_GET_BUS(sc->sc_pdev);
- if (bus == NULL)
- return (EINVAL);
switch (cmd) {
case GPIOMAXPIN:
- max_pin = -1;
- res = GPIO_PIN_MAX(sc->sc_pdev, &max_pin);
+ res = 0;
+ max_pin = sc->sc_npins - 1;
bcopy(&max_pin, arg, sizeof(max_pin));
break;
case GPIOGETCONFIG:
bcopy(arg, &pin, sizeof(pin));
dprintf("get config pin %d\n", pin.gp_pin);
- res = GPIO_PIN_GETFLAGS(sc->sc_pdev, pin.gp_pin,
+ res = GPIOBUS_PIN_GETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
&pin.gp_flags);
/* Fail early */
- if (res)
+ if (res != 0)
break;
res = devfs_get_cdevpriv((void **)&priv);
- if (res)
+ if (res != 0)
break;
pin.gp_flags |= gpioc_get_intr_config(sc, priv,
pin.gp_pin);
- GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &pin.gp_caps);
- GPIOBUS_PIN_GETNAME(bus, pin.gp_pin, pin.gp_name);
+ res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
+ &pin.gp_caps);
+ if (res != 0)
+ break;
+ res = GPIOBUS_PIN_GETNAME(sc->sc_pdev, pin.gp_pin, pin.gp_name);
+ if (res != 0)
+ break;
bcopy(&pin, arg, sizeof(pin));
break;
case GPIOSETCONFIG:
@@ -821,7 +829,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
res = devfs_get_cdevpriv((void **)&priv);
if (res != 0)
break;
- res = GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &caps);
+ res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev,
+ pin.gp_pin, &caps);
if (res != 0)
break;
res = gpio_check_flags(caps, pin.gp_flags);
@@ -847,8 +856,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
}
if (res != 0)
break;
- res = GPIO_PIN_SETFLAGS(sc->sc_pdev, pin.gp_pin,
- (pin.gp_flags & ~GPIO_INTR_MASK));
+ res = GPIOBUS_PIN_SETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
+ pin.gp_flags & ~GPIO_INTR_MASK);
if (res != 0)
break;
res = gpioc_set_intr_config(sc, priv, pin.gp_pin,
@@ -856,40 +865,43 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
break;
case GPIOGET:
bcopy(arg, &req, sizeof(req));
- res = GPIO_PIN_GET(sc->sc_pdev, req.gp_pin,
+ res = GPIOBUS_PIN_GET(sc->sc_pdev, sc->sc_dev, req.gp_pin,
&req.gp_value);
- dprintf("read pin %d -> %d\n",
+ if (res != 0)
+ break;
+ dprintf("read pin %d -> %d\n",
req.gp_pin, req.gp_value);
bcopy(&req, arg, sizeof(req));
break;
case GPIOSET:
bcopy(arg, &req, sizeof(req));
- res = GPIO_PIN_SET(sc->sc_pdev, req.gp_pin,
+ res = GPIOBUS_PIN_SET(sc->sc_pdev, sc->sc_dev, req.gp_pin,
req.gp_value);
- dprintf("write pin %d -> %d\n",
+ dprintf("write pin %d -> %d\n",
req.gp_pin, req.gp_value);
break;
case GPIOTOGGLE:
bcopy(arg, &req, sizeof(req));
- dprintf("toggle pin %d\n",
+ dprintf("toggle pin %d\n",
req.gp_pin);
- res = GPIO_PIN_TOGGLE(sc->sc_pdev, req.gp_pin);
+ res = GPIOBUS_PIN_TOGGLE(sc->sc_pdev, sc->sc_dev, req.gp_pin);
break;
case GPIOSETNAME:
bcopy(arg, &pin, sizeof(pin));
dprintf("set name on pin %d\n", pin.gp_pin);
- res = GPIOBUS_PIN_SETNAME(bus, pin.gp_pin,
+ res = GPIOBUS_PIN_SETNAME(sc->sc_pdev, pin.gp_pin,
pin.gp_name);
break;
case GPIOACCESS32:
a32 = (struct gpio_access_32 *)arg;
- res = GPIO_PIN_ACCESS_32(sc->sc_pdev, a32->first_pin,
- a32->clear_pins, a32->change_pins, &a32->orig_pins);
+ res = GPIOBUS_PIN_ACCESS_32(sc->sc_pdev, sc->sc_dev,
+ a32->first_pin, a32->clear_pins, a32->change_pins,
+ &a32->orig_pins);
break;
case GPIOCONFIG32:
c32 = (struct gpio_config_32 *)arg;
- res = GPIO_PIN_CONFIG_32(sc->sc_pdev, c32->first_pin,
- c32->num_pins, c32->pin_flags);
+ res = GPIOBUS_PIN_CONFIG_32(sc->sc_pdev, sc->sc_dev,
+ c32->first_pin, c32->num_pins, c32->pin_flags);
break;
case GPIOCONFIGEVENTS:
evcfg = (struct gpio_event_config *)arg;
@@ -1050,9 +1062,6 @@ static device_method_t gpioc_methods[] = {
DEVMETHOD(device_probe, gpioc_probe),
DEVMETHOD(device_attach, gpioc_attach),
DEVMETHOD(device_detach, gpioc_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
DEVMETHOD_END
};
@@ -1063,5 +1072,5 @@ driver_t gpioc_driver = {
sizeof(struct gpioc_softc)
};
-DRIVER_MODULE(gpioc, gpio, gpioc_driver, 0, 0);
+DRIVER_MODULE(gpioc, gpiobus, gpioc_driver, 0, 0);
MODULE_VERSION(gpioc, 1);
diff --git a/sys/dev/gpio/gpioled.c b/sys/dev/gpio/gpioled.c
index ba53cb733971..71af5741b2fe 100644
--- a/sys/dev/gpio/gpioled.c
+++ b/sys/dev/gpio/gpioled.c
@@ -55,13 +55,13 @@
device_get_nameunit((_sc)->sc_dev), "gpioled", MTX_DEF)
#define GPIOLED_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx)
-struct gpioled_softc
+struct gpioled_softc
{
device_t sc_dev;
device_t sc_busdev;
struct mtx sc_mtx;
struct cdev *sc_leddev;
- int sc_invert;
+ int sc_softinvert;
};
static void gpioled_control(void *, int);
@@ -69,20 +69,19 @@ static int gpioled_probe(device_t);
static int gpioled_attach(device_t);
static int gpioled_detach(device_t);
-static void
+static void
gpioled_control(void *priv, int onoff)
{
struct gpioled_softc *sc;
sc = (struct gpioled_softc *)priv;
+ if (onoff == -1) /* Keep the current state. */
+ return;
+ if (sc->sc_softinvert)
+ onoff = !onoff;
GPIOLED_LOCK(sc);
- if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
- GPIO_PIN_OUTPUT) == 0) {
- if (sc->sc_invert)
- onoff = !onoff;
- GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
- onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW);
- }
+ GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW);
GPIOLED_UNLOCK(sc);
}
@@ -95,26 +94,101 @@ gpioled_probe(device_t dev)
}
static int
+gpioled_inv(device_t dev, uint32_t *pin_flags)
+{
+ struct gpioled_softc *sc;
+ int invert;
+ uint32_t pin_caps;
+
+ sc = device_get_softc(dev);
+
+ if (resource_int_value(device_get_name(dev),
+ device_get_unit(dev), "invert", &invert))
+ invert = 0;
+
+ if (GPIOBUS_PIN_GETCAPS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ &pin_caps) != 0) {
+ if (bootverbose)
+ device_printf(sc->sc_dev, "unable to get pin caps\n");
+ return (-1);
+ }
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags &= ~GPIO_PIN_INVOUT;
+ sc->sc_softinvert = 0;
+ if (invert) {
+ const char *invmode;
+
+ if (resource_string_value(device_get_name(dev),
+ device_get_unit(dev), "invmode", &invmode))
+ invmode = NULL;
+
+ if (invmode) {
+ if (!strcmp(invmode, "sw"))
+ sc->sc_softinvert = 1;
+ else if (!strcmp(invmode, "hw")) {
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags |= GPIO_PIN_INVOUT;
+ else {
+ device_printf(sc->sc_dev, "hardware pin inversion not supported\n");
+ return (-1);
+ }
+ } else {
+ if (strcmp(invmode, "auto") != 0)
+ device_printf(sc->sc_dev, "invalid pin inversion mode\n");
+ invmode = NULL;
+ }
+ }
+ /*
+ * auto inversion mode: use hardware support if available, else fallback to
+ * software emulation.
+ */
+ if (invmode == NULL) {
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags |= GPIO_PIN_INVOUT;
+ else
+ sc->sc_softinvert = 1;
+ }
+ }
+ MPASS(!invert ||
+ (((*pin_flags & GPIO_PIN_INVOUT) != 0) && !sc->sc_softinvert) ||
+ (((*pin_flags & GPIO_PIN_INVOUT) == 0) && sc->sc_softinvert));
+ return (invert);
+}
+
+static int
gpioled_attach(device_t dev)
{
struct gpioled_softc *sc;
int state;
const char *name;
+ uint32_t pin_flags;
+ int invert;
sc = device_get_softc(dev);
sc->sc_dev = dev;
sc->sc_busdev = device_get_parent(dev);
GPIOLED_LOCK_INIT(sc);
- state = 0;
-
- if (resource_string_value(device_get_name(dev),
+ if (resource_string_value(device_get_name(dev),
device_get_unit(dev), "name", &name))
name = NULL;
- resource_int_value(device_get_name(dev),
- device_get_unit(dev), "invert", &sc->sc_invert);
- resource_int_value(device_get_name(dev),
- device_get_unit(dev), "state", &state);
+
+ if (resource_int_value(device_get_name(dev),
+ device_get_unit(dev), "state", &state))
+ state = 0;
+
+ pin_flags = GPIO_PIN_OUTPUT;
+ invert = gpioled_inv(dev, &pin_flags);
+ if (invert < 0)
+ return (ENXIO);
+ device_printf(sc->sc_dev, "state %d invert %s\n",
+ state, (invert ? (sc->sc_softinvert ? "sw" : "hw") : "no"));
+ if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ pin_flags) != 0) {
+ if (bootverbose)
+ device_printf(sc->sc_dev, "unable to set pin flags, %#x\n", pin_flags);
+ return (ENXIO);
+ }
sc->sc_leddev = led_create_state(gpioled_control, sc, name ? name :
device_get_nameunit(dev), state);
diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c
index b12b78fac18c..da1bfbc268b8 100644
--- a/sys/dev/gpio/ofw_gpiobus.c
+++ b/sys/dev/gpio/ofw_gpiobus.c
@@ -426,6 +426,9 @@ ofw_gpiobus_attach(device_t dev)
err = gpiobus_init_softc(dev);
if (err != 0)
return (err);
+ err = gpiobus_add_gpioc(dev);
+ if (err != 0)
+ return (err);
bus_identify_children(dev);
bus_enumerate_hinted_children(dev);
/*
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 9b85c989dc96..a6a6ae68996c 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -210,7 +210,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static bool pmc_can_allocate_row(int ri, enum pmc_mode mode);
static bool pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
-static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static bool pmc_can_attach(struct pmc *pm, struct proc *p);
static void pmc_capture_user_callchain(int cpu, int soft,
struct trapframe *tf);
static void pmc_cleanup(void);
@@ -1029,19 +1029,19 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
* Check if PMC 'pm' may be attached to target process 't'.
*/
-static int
+static bool
pmc_can_attach(struct pmc *pm, struct proc *t)
{
struct proc *o; /* pmc owner */
struct ucred *oc, *tc; /* owner, target credentials */
- int decline_attach, i;
+ bool decline_attach;
/*
* A PMC's owner can always attach that PMC to itself.
*/
if ((o = pm->pm_owner->po_owner) == t)
- return 0;
+ return (true);
PROC_LOCK(o);
oc = o->p_ucred;
@@ -1066,18 +1066,17 @@ pmc_can_attach(struct pmc *pm, struct proc *t)
* Every one of the target's group ids, must be in the owner's
* group list.
*/
- for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
+ for (int i = 0; !decline_attach && i < tc->cr_ngroups; i++)
decline_attach = !groupmember(tc->cr_groups[i], oc);
-
- /* check the read and saved gids too */
- if (decline_attach == 0)
- decline_attach = !groupmember(tc->cr_rgid, oc) ||
+ if (!decline_attach)
+ decline_attach = !groupmember(tc->cr_gid, oc) ||
+ !groupmember(tc->cr_rgid, oc) ||
!groupmember(tc->cr_svgid, oc);
crfree(tc);
crfree(oc);
- return !decline_attach;
+ return (!decline_attach);
}
/*
@@ -1412,7 +1411,7 @@ pmc_process_exec(struct thread *td, struct pmckern_procexec *pk)
*/
for (ri = 0; ri < md->pmd_npmc; ri++) {
if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
- if (pmc_can_attach(pm, td->td_proc) != 0) {
+ if (pmc_can_attach(pm, td->td_proc)) {
pmc_detach_one_process(td->td_proc, pm,
PMC_FLAG_NONE);
}
diff --git a/sys/dev/hwt/hwt_ioctl.c b/sys/dev/hwt/hwt_ioctl.c
index 592db4931bb4..184c7e72f986 100644
--- a/sys/dev/hwt/hwt_ioctl.c
+++ b/sys/dev/hwt/hwt_ioctl.c
@@ -112,12 +112,11 @@ hwt_priv_check(struct proc *o, struct proc *t)
error = EPERM;
goto done;
}
-
- /* Check the read and saved GIDs too. */
- if (!groupmember(tc->cr_rgid, oc) ||
+ if (!groupmember(tc->cr_gid, oc) ||
+ !groupmember(tc->cr_rgid, oc) ||
!groupmember(tc->cr_svgid, oc)) {
- error = EPERM;
- goto done;
+ error = EPERM;
+ goto done;
}
done:
diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c
index 442111e5ffaf..8b6349f686eb 100644
--- a/sys/dev/ice/ice_lib.c
+++ b/sys/dev/ice/ice_lib.c
@@ -7818,7 +7818,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
case IFCOUNTER_OPACKETS:
return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
case IFCOUNTER_OERRORS:
- return (es->tx_errors);
+ return (if_get_counter_default(vsi->sc->ifp, counter) +
+ es->tx_errors);
case IFCOUNTER_COLLISIONS:
return (0);
case IFCOUNTER_IBYTES:
@@ -7832,7 +7833,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
case IFCOUNTER_IQDROPS:
return (es->rx_discards);
case IFCOUNTER_OQDROPS:
- return (hs->tx_dropped_link_down);
+ return (if_get_counter_default(vsi->sc->ifp, counter) +
+ hs->tx_dropped_link_down);
case IFCOUNTER_NOPROTO:
return (es->rx_unknown_protocol);
default:
diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c
index 728bb942d503..e4d87fe1fed2 100644
--- a/sys/dev/ichsmb/ichsmb_pci.c
+++ b/sys/dev/ichsmb/ichsmb_pci.c
@@ -107,6 +107,7 @@
#define ID_COMETLAKE2 0x06a3
#define ID_TIGERLAKE 0xa0a3
#define ID_TIGERLAKE2 0x43a3
+#define ID_ELKHARTLAKE 0x4b23
#define ID_GEMINILAKE 0x31d4
#define ID_CEDARFORK 0x18df
#define ID_ICELAKE 0x34a3
@@ -206,6 +207,8 @@ static const struct pci_device_table ichsmb_devices[] = {
PCI_DESCR("Intel Tiger Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_TIGERLAKE2),
PCI_DESCR("Intel Tiger Lake SMBus controller") },
+ { PCI_DEV(PCI_VENDOR_INTEL, ID_ELKHARTLAKE),
+ PCI_DESCR("Intel Elkhart Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_GEMINILAKE),
PCI_DESCR("Intel Gemini Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_CEDARFORK),
diff --git a/sys/dev/igc/if_igc.c b/sys/dev/igc/if_igc.c
index a1ae35c7aa43..f199a128c783 100644
--- a/sys/dev/igc/if_igc.c
+++ b/sys/dev/igc/if_igc.c
@@ -2599,8 +2599,8 @@ igc_if_get_counter(if_ctx_t ctx, ift_counter cnt)
sc->stats.ruc + sc->stats.roc +
sc->stats.mpc + sc->stats.htdpmc);
case IFCOUNTER_OERRORS:
- return (sc->stats.ecol + sc->stats.latecol +
- sc->watchdog_events);
+ return (if_get_counter_default(ifp, cnt) +
+ sc->stats.ecol + sc->stats.latecol + sc->watchdog_events);
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 6856b0551dde..668ccf056463 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -114,8 +114,8 @@ iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
* domain, and must collectively be assigned to use either IOMMU or
* bounce mapping.
*/
-device_t
-iommu_get_requester(device_t dev, uint16_t *rid)
+int
+iommu_get_requester(device_t dev, device_t *requesterp, uint16_t *rid)
{
devclass_t pci_class;
device_t l, pci, pcib, pcip, pcibp, requester;
@@ -129,7 +129,8 @@ iommu_get_requester(device_t dev, uint16_t *rid)
pci = device_get_parent(dev);
if (pci == NULL || device_get_devclass(pci) != pci_class) {
*rid = 0; /* XXXKIB: Could be ACPI HID */
- return (requester);
+ *requesterp = NULL;
+ return (ENOTTY);
}
*rid = pci_get_rid(dev);
@@ -141,16 +142,39 @@ iommu_get_requester(device_t dev, uint16_t *rid)
*/
for (;;) {
pci = device_get_parent(l);
- KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
- "for %s", device_get_name(dev), device_get_name(l)));
- KASSERT(device_get_devclass(pci) == pci_class,
- ("iommu_get_requester(%s): non-pci parent %s for %s",
- device_get_name(dev), device_get_name(pci),
- device_get_name(l)));
+ if (pci == NULL) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): NULL parent for %s\n",
+ device_get_name(dev), device_get_name(l));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
+ if (device_get_devclass(pci) != pci_class) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): non-pci parent %s for %s\n",
+ device_get_name(dev), device_get_name(pci),
+ device_get_name(l));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
pcib = device_get_parent(pci);
- KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
- "for %s", device_get_name(dev), device_get_name(pci)));
+ if (pcib == NULL) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): NULL bridge for %s\n",
+ device_get_name(dev), device_get_name(pci));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
/*
* The parent of our "bridge" isn't another PCI bus,
@@ -229,7 +253,8 @@ iommu_get_requester(device_t dev, uint16_t *rid)
}
}
}
- return (requester);
+ *requesterp = requester;
+ return (0);
}
struct iommu_ctx *
@@ -237,10 +262,13 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
{
device_t requester;
struct iommu_ctx *ctx;
+ int error;
bool disabled;
uint16_t rid;
- requester = iommu_get_requester(dev, &rid);
+ error = iommu_get_requester(dev, &requester, &rid);
+ if (error != 0)
+ return (NULL);
/*
* If the user requested the IOMMU disabled for the device, we
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index b1858f0df9f7..55044042c5d2 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -170,7 +170,7 @@ void iommu_domain_unload(struct iommu_domain *domain,
void iommu_unit_pre_instantiate_ctx(struct iommu_unit *iommu);
struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit *iommu,
device_t dev, bool rmrr);
-device_t iommu_get_requester(device_t dev, uint16_t *rid);
+int iommu_get_requester(device_t dev, device_t *requester, uint16_t *rid);
int iommu_init_busdma(struct iommu_unit *unit);
void iommu_fini_busdma(struct iommu_unit *unit);
diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c
index 450fae662dd8..d4d4f328fb43 100644
--- a/sys/dev/irdma/irdma_cm.c
+++ b/sys/dev/irdma/irdma_cm.c
@@ -1316,7 +1316,7 @@ irdma_cm_timer_tick(struct timer_list *t)
struct irdma_timer_entry *send_entry, *close_entry;
struct list_head *list_core_temp;
struct list_head *list_node;
- struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
+ struct irdma_cm_core *cm_core = timer_container_of(cm_core, t, tcp_timer);
struct irdma_sc_vsi *vsi;
u32 settimer = 0;
unsigned long timetosend;
diff --git a/sys/dev/irdma/irdma_utils.c b/sys/dev/irdma/irdma_utils.c
index 5fc37022981f..038f1980082b 100644
--- a/sys/dev/irdma/irdma_utils.c
+++ b/sys/dev/irdma/irdma_utils.c
@@ -876,7 +876,7 @@ irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred)
static void
irdma_terminate_timeout(struct timer_list *t)
{
- struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer);
+ struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer);
struct irdma_sc_qp *qp = &iwqp->sc_qp;
irdma_terminate_done(qp, 1);
@@ -1528,7 +1528,7 @@ static void
irdma_hw_stats_timeout(struct timer_list *t)
{
struct irdma_vsi_pestat *pf_devstat =
- from_timer(pf_devstat, t, stats_timer);
+ timer_container_of(pf_devstat, t, stats_timer);
struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 73c0fd1ab16f..6d08bd49bc04 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -184,6 +184,7 @@ static int ixgbe_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int,
int);
static void ixgbe_if_queues_free(if_ctx_t);
static void ixgbe_if_timer(if_ctx_t, uint16_t);
+static const char *ixgbe_link_speed_to_str(u32 link_speed);
static void ixgbe_if_update_admin_status(if_ctx_t);
static void ixgbe_if_vlan_register(if_ctx_t, u16);
static void ixgbe_if_vlan_unregister(if_ctx_t, u16);
@@ -1349,8 +1350,6 @@ ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt)
return (0);
case IFCOUNTER_IQDROPS:
return (sc->iqdrops);
- case IFCOUNTER_OQDROPS:
- return (0);
case IFCOUNTER_IERRORS:
return (sc->ierrors);
default:
@@ -4027,6 +4026,33 @@ ixgbe_if_stop(if_ctx_t ctx)
} /* ixgbe_if_stop */
/************************************************************************
+ * ixgbe_link_speed_to_str - Convert link speed to string
+ *
+ * Helper function to convert link speed constants to human-readable
+ * string representations in conventional Gbps or Mbps.
+ ************************************************************************/
+static const char *
+ixgbe_link_speed_to_str(u32 link_speed)
+{
+ switch (link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ return "10 Gbps";
+ case IXGBE_LINK_SPEED_5GB_FULL:
+ return "5 Gbps";
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ return "2.5 Gbps";
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ return "1 Gbps";
+ case IXGBE_LINK_SPEED_100_FULL:
+ return "100 Mbps";
+ case IXGBE_LINK_SPEED_10_FULL:
+ return "10 Mbps";
+ default:
+ return "Unknown";
+ }
+} /* ixgbe_link_speed_to_str */
+
+/************************************************************************
* ixgbe_update_link_status - Update OS on link state
*
* Note: Only updates the OS on the cached link state.
@@ -4042,9 +4068,9 @@ ixgbe_if_update_admin_status(if_ctx_t ctx)
if (sc->link_up) {
if (sc->link_active == false) {
if (bootverbose)
- device_printf(dev, "Link is up %d Gbps %s \n",
- ((sc->link_speed == 128) ? 10 : 1),
- "Full Duplex");
+ device_printf(dev,
+ "Link is up %s Full Duplex\n",
+ ixgbe_link_speed_to_str(sc->link_speed));
sc->link_active = true;
/* Update any Flow Control changes */
ixgbe_fc_enable(&sc->hw);
diff --git a/sys/dev/ixgbe/ixgbe_e610.c b/sys/dev/ixgbe/ixgbe_e610.c
index 95c6dca416c6..18c4612446e0 100644
--- a/sys/dev/ixgbe/ixgbe_e610.c
+++ b/sys/dev/ixgbe/ixgbe_e610.c
@@ -1400,40 +1400,6 @@ s32 ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link)
}
/**
- * ixgbe_is_media_cage_present - check if media cage is present
- * @hw: pointer to the HW struct
- *
- * Identify presence of media cage using the ACI command (0x06E0).
- *
- * Return: true if media cage is present, else false. If no cage, then
- * media type is backplane or BASE-T.
- */
-static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw)
-{
- struct ixgbe_aci_cmd_get_link_topo *cmd;
- struct ixgbe_aci_desc desc;
-
- cmd = &desc.params.get_link_topo;
-
- ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo);
-
- cmd->addr.topo_params.node_type_ctx =
- (IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT <<
- IXGBE_ACI_LINK_TOPO_NODE_CTX_S);
-
- /* set node type */
- cmd->addr.topo_params.node_type_ctx |=
- (IXGBE_ACI_LINK_TOPO_NODE_TYPE_M &
- IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE);
-
- /* Node type cage can be used to determine if cage is present. If AQC
- * returns error (ENOENT), then no cage present. If no cage present then
- * connection type is backplane or BASE-T.
- */
- return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL);
-}
-
-/**
* ixgbe_get_media_type_from_phy_type - Gets media type based on phy type
* @hw: pointer to the HW struct
*
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 43c3af056b67..261f76055901 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -1785,7 +1785,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_OPACKETS:
return (vsi->opackets);
case IFCOUNTER_OERRORS:
- return (vsi->oerrors);
+ return (if_get_counter_default(ifp, cnt) + vsi->oerrors);
case IFCOUNTER_COLLISIONS:
/* Collisions are by standard impossible in 40G/10G Ethernet */
return (0);
@@ -1800,7 +1800,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_IQDROPS:
return (vsi->iqdrops);
case IFCOUNTER_OQDROPS:
- return (vsi->oqdrops);
+ return (if_get_counter_default(ifp, cnt) + vsi->oqdrops);
case IFCOUNTER_NOPROTO:
return (vsi->noproto);
default:
diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c
index d1c572e40669..262d6b58b705 100644
--- a/sys/dev/mpr/mpr.c
+++ b/sys/dev/mpr/mpr.c
@@ -1729,6 +1729,7 @@ mpr_get_tunables(struct mpr_softc *sc)
sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD;
sc->spinup_wait_time = DEFAULT_SPINUP_WAIT;
sc->use_phynum = 1;
+ sc->encl_min_slots = 0;
sc->max_reqframes = MPR_REQ_FRAMES;
sc->max_prireqframes = MPR_PRI_REQ_FRAMES;
sc->max_replyframes = MPR_REPLY_FRAMES;
@@ -1748,6 +1749,7 @@ mpr_get_tunables(struct mpr_softc *sc)
TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu);
TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time);
TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum);
+ TUNABLE_INT_FETCH("hw.mpr.encl_min_slots", &sc->encl_min_slots);
TUNABLE_INT_FETCH("hw.mpr.max_reqframes", &sc->max_reqframes);
TUNABLE_INT_FETCH("hw.mpr.max_prireqframes", &sc->max_prireqframes);
TUNABLE_INT_FETCH("hw.mpr.max_replyframes", &sc->max_replyframes);
@@ -1797,6 +1799,10 @@ mpr_get_tunables(struct mpr_softc *sc)
device_get_unit(sc->mpr_dev));
TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum);
+ snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.encl_min_slots",
+ device_get_unit(sc->mpr_dev));
+ TUNABLE_INT_FETCH(tmpstr, &sc->encl_min_slots);
+
snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_reqframes",
device_get_unit(sc->mpr_dev));
TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes);
@@ -1951,6 +1957,10 @@ mpr_setup_sysctl(struct mpr_softc *sc)
SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD,
&sc->prp_page_alloc_fail, "PRP page allocation failures");
+
+ SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+ OID_AUTO, "encl_min_slots", CTLFLAG_RW, &sc->encl_min_slots, 0,
+ "force enclosure minimum slots");
}
static struct mpr_debug_string {
diff --git a/sys/dev/mpr/mpr_mapping.c b/sys/dev/mpr/mpr_mapping.c
index f9a9ac1c53d0..38aa4dfc7ef2 100644
--- a/sys/dev/mpr/mpr_mapping.c
+++ b/sys/dev/mpr/mpr_mapping.c
@@ -2785,6 +2785,8 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
* DPM, if it's being used.
*/
if (enc_idx != MPR_ENCTABLE_BAD_IDX) {
+ u16 new_num_slots;
+
et_entry = &sc->enclosure_table[enc_idx];
if (et_entry->init_complete &&
!et_entry->missing_count) {
@@ -2796,6 +2798,17 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
et_entry->enc_handle = le16toh(event_data->
EnclosureHandle);
et_entry->start_slot = le16toh(event_data->StartSlot);
+ new_num_slots = le16toh(event_data->NumSlots);
+ if (new_num_slots < sc->encl_min_slots) {
+ mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d num_slots %d, overriding with %d.\n",
+ __func__, enc_idx, new_num_slots, sc->encl_min_slots);
+ new_num_slots = sc->encl_min_slots;
+ }
+ if (et_entry->num_slots != new_num_slots) {
+ mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d old num_slots %d, new %d.\n",
+ __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots);
+ et_entry->num_slots = new_num_slots;
+ }
saved_phy_bits = et_entry->phy_bits;
et_entry->phy_bits |= le32toh(event_data->PhyBits);
if (saved_phy_bits != et_entry->phy_bits)
@@ -2858,6 +2871,11 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
et_entry->start_index = MPR_MAPTABLE_BAD_IDX;
et_entry->dpm_entry_num = MPR_DPM_BAD_IDX;
et_entry->num_slots = le16toh(event_data->NumSlots);
+ if (et_entry->num_slots < sc->encl_min_slots) {
+ mpr_dprint(sc, MPR_ERROR | MPR_MAPPING, "%s: Enclosure %d num_slots is %d, overriding with %d.\n",
+ __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots);
+ et_entry->num_slots = sc->encl_min_slots;
+ }
et_entry->start_slot = le16toh(event_data->StartSlot);
et_entry->phy_bits = le32toh(event_data->PhyBits);
}
diff --git a/sys/dev/mpr/mprvar.h b/sys/dev/mpr/mprvar.h
index 0f1743f4266e..93f3fbffe079 100644
--- a/sys/dev/mpr/mprvar.h
+++ b/sys/dev/mpr/mprvar.h
@@ -366,6 +366,7 @@ struct mpr_softc {
int spinup_wait_time;
int use_phynum;
int dump_reqs_alltypes;
+ int encl_min_slots;
uint64_t chain_alloc_fail;
uint64_t prp_page_alloc_fail;
struct sysctl_ctx_list sysctl_ctx;
diff --git a/sys/dev/mwl/if_mwl.c b/sys/dev/mwl/if_mwl.c
index 0e2eb0b2d8fe..c885968dfe15 100644
--- a/sys/dev/mwl/if_mwl.c
+++ b/sys/dev/mwl/if_mwl.c
@@ -4017,7 +4017,7 @@ mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40;
if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20;
- if (ni->ni_chw != IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw != NET80211_STA_RX_BW_40)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40;
}
return pi;
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index 84f365024f13..ead91f0d01fe 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -295,7 +295,6 @@ nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
void
nvme_unregister_consumer(struct nvme_consumer *consumer)
{
-
consumer->id = INVALID_CONSUMER_ID;
}
diff --git a/sys/dev/nvme/nvme_ahci.c b/sys/dev/nvme/nvme_ahci.c
index 888207a454f7..b06661226d34 100644
--- a/sys/dev/nvme/nvme_ahci.c
+++ b/sys/dev/nvme/nvme_ahci.c
@@ -124,6 +124,5 @@ bad:
static int
nvme_ahci_detach(device_t dev)
{
-
return (nvme_detach(dev));
}
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index fd7f00ced14b..3a1894bf754d 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -41,6 +41,9 @@
#include <sys/endian.h>
#include <sys/stdarg.h>
#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
#include "nvme_private.h"
#include "nvme_linux.h"
@@ -597,7 +600,6 @@ nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
static bool
is_log_page_id_valid(uint8_t page_id)
{
-
switch (page_id) {
case NVME_LOG_ERROR:
case NVME_LOG_HEALTH_INFORMATION:
@@ -653,7 +655,6 @@ static void
nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
uint8_t state)
{
-
if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE)
nvme_printf(ctrlr, "SMART WARNING: available spare space below threshold\n");
@@ -781,7 +782,6 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
static void
nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
{
-
ctrlr->int_coal_time = 0;
TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
&ctrlr->int_coal_time);
@@ -1268,6 +1268,34 @@ nvme_ctrlr_shared_handler(void *arg)
nvme_mmio_write_4(ctrlr, intmc, 1);
}
+#define NVME_MAX_PAGES (int)(1024 / sizeof(vm_page_t))
+
+static int
+nvme_user_ioctl_req(vm_offset_t addr, size_t len, bool is_read,
+ vm_page_t *upages, int max_pages, int *npagesp, struct nvme_request **req,
+ nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+ vm_prot_t prot = VM_PROT_READ;
+ int err;
+
+ if (is_read)
+ prot |= VM_PROT_WRITE; /* Device will write to host memory */
+ err = vm_fault_hold_pages(&curproc->p_vmspace->vm_map,
+ addr, len, prot, upages, max_pages, npagesp);
+ if (err != 0)
+ return (err);
+ *req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
+ (*req)->payload = memdesc_vmpages(upages, len, addr & PAGE_MASK);
+ (*req)->payload_valid = true;
+ return (0);
+}
+
+static void
+nvme_user_ioctl_free(vm_page_t *pages, int npage)
+{
+ vm_page_unhold_pages(pages, npage);
+}
+
static void
nvme_pt_done(void *arg, const struct nvme_completion *cpl)
{
@@ -1290,30 +1318,28 @@ nvme_pt_done(void *arg, const struct nvme_completion *cpl)
int
nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
- struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer,
+ struct nvme_pt_command *pt, uint32_t nsid, int is_user,
int is_admin_cmd)
{
- struct nvme_request *req;
- struct mtx *mtx;
- struct buf *buf = NULL;
- int ret = 0;
+ struct nvme_request *req;
+ struct mtx *mtx;
+ int ret = 0;
+ int npages = 0;
+ vm_page_t upages[NVME_MAX_PAGES];
if (pt->len > 0) {
if (pt->len > ctrlr->max_xfer_size) {
- nvme_printf(ctrlr, "pt->len (%d) "
- "exceeds max_xfer_size (%d)\n", pt->len,
- ctrlr->max_xfer_size);
- return EIO;
+ nvme_printf(ctrlr,
+ "len (%d) exceeds max_xfer_size (%d)\n",
+ pt->len, ctrlr->max_xfer_size);
+ return (EIO);
}
- if (is_user_buffer) {
- buf = uma_zalloc(pbuf_zone, M_WAITOK);
- buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
- if (vmapbuf(buf, pt->buf, pt->len, 1) < 0) {
- ret = EFAULT;
- goto err;
- }
- req = nvme_allocate_request_vaddr(buf->b_data, pt->len,
- M_WAITOK, nvme_pt_done, pt);
+ if (is_user) {
+ ret = nvme_user_ioctl_req((vm_offset_t)pt->buf, pt->len,
+ pt->is_read, upages, nitems(upages), &npages, &req,
+ nvme_pt_done, pt);
+ if (ret != 0)
+ return (ret);
} else
req = nvme_allocate_request_vaddr(pt->buf, pt->len,
M_WAITOK, nvme_pt_done, pt);
@@ -1347,11 +1373,8 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0);
mtx_unlock(mtx);
- if (buf != NULL) {
- vunmapbuf(buf);
-err:
- uma_zfree(pbuf_zone, buf);
- }
+ if (npages > 0)
+ nvme_user_ioctl_free(upages, npages);
return (ret);
}
@@ -1377,8 +1400,9 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
{
struct nvme_request *req;
struct mtx *mtx;
- struct buf *buf = NULL;
int ret = 0;
+ int npages = 0;
+ vm_page_t upages[NVME_MAX_PAGES];
/*
* We don't support metadata.
@@ -1389,28 +1413,16 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
if (npc->data_len > 0 && npc->addr != 0) {
if (npc->data_len > ctrlr->max_xfer_size) {
nvme_printf(ctrlr,
- "npc->data_len (%d) exceeds max_xfer_size (%d)\n",
+ "data_len (%d) exceeds max_xfer_size (%d)\n",
npc->data_len, ctrlr->max_xfer_size);
return (EIO);
}
- /*
- * We only support data out or data in commands, but not both at
- * once. However, there's some comands with lower bit cleared
- * that are really read commands, so we should filter & 3 == 0,
- * but don't.
- */
- if ((npc->opcode & 0x3) == 3)
- return (EINVAL);
if (is_user) {
- buf = uma_zalloc(pbuf_zone, M_WAITOK);
- buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ;
- if (vmapbuf(buf, (void *)(uintptr_t)npc->addr,
- npc->data_len, 1) < 0) {
- ret = EFAULT;
- goto err;
- }
- req = nvme_allocate_request_vaddr(buf->b_data,
- npc->data_len, M_WAITOK, nvme_npc_done, npc);
+ ret = nvme_user_ioctl_req(npc->addr, npc->data_len,
+ npc->opcode & 0x1, upages, nitems(upages), &npages,
+ &req, nvme_npc_done, npc);
+ if (ret != 0)
+ return (ret);
} else
req = nvme_allocate_request_vaddr(
(void *)(uintptr_t)npc->addr, npc->data_len,
@@ -1420,8 +1432,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
req->cmd.opc = npc->opcode;
req->cmd.fuse = npc->flags;
- req->cmd.rsvd2 = htole16(npc->cdw2);
- req->cmd.rsvd3 = htole16(npc->cdw3);
+ req->cmd.rsvd2 = htole32(npc->cdw2);
+ req->cmd.rsvd3 = htole32(npc->cdw3);
req->cmd.cdw10 = htole32(npc->cdw10);
req->cmd.cdw11 = htole32(npc->cdw11);
req->cmd.cdw12 = htole32(npc->cdw12);
@@ -1445,11 +1457,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0);
mtx_unlock(mtx);
- if (buf != NULL) {
- vunmapbuf(buf);
-err:
- uma_zfree(pbuf_zone, buf);
- }
+ if (npages > 0)
+ nvme_user_ioctl_free(upages, npages);
return (ret);
}
@@ -1776,7 +1785,6 @@ void
nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
struct nvme_request *req)
{
-
nvme_qpair_submit_request(&ctrlr->adminq, req);
}
@@ -1793,14 +1801,12 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
device_t
nvme_ctrlr_get_device(struct nvme_controller *ctrlr)
{
-
return (ctrlr->dev);
}
const struct nvme_controller_data *
nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
{
-
return (&ctrlr->cdata);
}
@@ -1853,7 +1859,6 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
int
nvme_ctrlr_resume(struct nvme_controller *ctrlr)
{
-
/*
* Can't touch failed controllers, so nothing to do to resume.
*/
diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c
index 993a7718356d..5a44ed425acb 100644
--- a/sys/dev/nvme/nvme_ctrlr_cmd.c
+++ b/sys/dev/nvme/nvme_ctrlr_cmd.c
@@ -281,7 +281,6 @@ nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
struct nvme_error_information_entry *payload, uint32_t num_entries,
nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
KASSERT(num_entries > 0, ("%s called with num_entries==0\n", __func__));
/* Controller's error log page entries is 0-based. */
@@ -302,7 +301,6 @@ nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
uint32_t nsid, struct nvme_health_information_page *payload,
nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_HEALTH_INFORMATION,
nsid, payload, sizeof(*payload), cb_fn, cb_arg);
}
@@ -311,7 +309,6 @@ void
nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
struct nvme_firmware_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_FIRMWARE_SLOT,
NVME_GLOBAL_NAMESPACE_TAG, payload, sizeof(*payload), cb_fn,
cb_arg);
diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c
index 3f29382fe42f..e84d2066930e 100644
--- a/sys/dev/nvme/nvme_ns.c
+++ b/sys/dev/nvme/nvme_ns.c
@@ -129,7 +129,6 @@ static int
nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused,
struct thread *td)
{
-
return (0);
}
@@ -231,7 +230,6 @@ nvme_ns_get_model_number(struct nvme_namespace *ns)
const struct nvme_namespace_data *
nvme_ns_get_data(struct nvme_namespace *ns)
{
-
return (&ns->data);
}
@@ -631,7 +629,6 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
void
nvme_ns_destruct(struct nvme_namespace *ns)
{
-
if (ns->cdev != NULL) {
if (ns->cdev->si_drv2 != NULL)
destroy_dev(ns->cdev->si_drv2);
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
index 29b49b7df403..c07a68d2f0dc 100644
--- a/sys/dev/nvme/nvme_pci.c
+++ b/sys/dev/nvme/nvme_pci.c
@@ -151,7 +151,6 @@ nvme_pci_probe (device_t device)
static int
nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
{
-
ctrlr->resource_id = PCIR_BAR(0);
ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 36f00fedc48e..52f9e12f8f9a 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -459,8 +459,7 @@ int nvme_detach(device_t dev);
* vast majority of these without waiting for a tick plus scheduling delays. Since
* these are on startup, this drastically reduces startup time.
*/
-static __inline
-void
+static __inline void
nvme_completion_poll(struct nvme_completion_poll_status *status)
{
int timeout = ticks + 10 * hz;
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index bd8626e32209..4f2c44da3b4f 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -793,7 +793,6 @@ nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
void
nvme_io_qpair_destroy(struct nvme_qpair *qpair)
{
-
nvme_qpair_destroy(qpair);
}
@@ -1202,7 +1201,6 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
void
nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
{
-
mtx_lock(&qpair->lock);
_nvme_qpair_submit_request(qpair, req);
mtx_unlock(&qpair->lock);
@@ -1226,7 +1224,6 @@ nvme_qpair_enable(struct nvme_qpair *qpair)
void
nvme_qpair_reset(struct nvme_qpair *qpair)
{
-
qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
/*
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index 4974bb718222..a06774a64761 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -301,7 +301,6 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
static void
nvme_sim_poll(struct cam_sim *sim)
{
-
nvme_ctrlr_poll(sim2ctrlr(sim));
}
diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c
index a5a44721f9f9..50d19e730a16 100644
--- a/sys/dev/nvme/nvme_sysctl.c
+++ b/sys/dev/nvme/nvme_sysctl.c
@@ -153,7 +153,6 @@ nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
static void
nvme_qpair_reset_stats(struct nvme_qpair *qpair)
{
-
/*
* Reset the values. Due to sanity checks in
* nvme_qpair_process_completions, we reset the number of interrupt
diff --git a/sys/dev/nvme/nvme_util.c b/sys/dev/nvme/nvme_util.c
index 0a07653a7378..cb0ba729ac96 100644
--- a/sys/dev/nvme/nvme_util.c
+++ b/sys/dev/nvme/nvme_util.c
@@ -208,31 +208,33 @@ nvme_opcode_sbuf(bool admin, uint8_t opc, struct sbuf *sb)
if (s == NULL)
sbuf_printf(sb, "%s (%02x)", type, opc);
else
- sbuf_printf(sb, "%s", s);
+ sbuf_printf(sb, "%s (%02x)", s, opc);
}
void
nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb)
{
const char *s, *type;
- uint16_t status;
+ uint16_t status, sc, sct;
status = le16toh(cpl->status);
- switch (NVME_STATUS_GET_SCT(status)) {
+ sc = NVME_STATUS_GET_SC(status);
+ sct = NVME_STATUS_GET_SCT(status);
+ switch (sct) {
case NVME_SCT_GENERIC:
- s = generic_status[NVME_STATUS_GET_SC(status)];
+ s = generic_status[sc];
type = "GENERIC";
break;
case NVME_SCT_COMMAND_SPECIFIC:
- s = command_specific_status[NVME_STATUS_GET_SC(status)];
+ s = command_specific_status[sc];
type = "COMMAND SPECIFIC";
break;
case NVME_SCT_MEDIA_ERROR:
- s = media_error_status[NVME_STATUS_GET_SC(status)];
+ s = media_error_status[sc];
type = "MEDIA ERROR";
break;
case NVME_SCT_PATH_RELATED:
- s = path_related_status[NVME_STATUS_GET_SC(status)];
+ s = path_related_status[sc];
type = "PATH RELATED";
break;
case NVME_SCT_VENDOR_SPECIFIC:
@@ -246,12 +248,11 @@ nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb)
}
if (type == NULL)
- sbuf_printf(sb, "RESERVED (%02x/%02x)",
- NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status));
+ sbuf_printf(sb, "RESERVED (%02x/%02x)", sct, sc);
else if (s == NULL)
- sbuf_printf(sb, "%s (%02x)", type, NVME_STATUS_GET_SC(status));
+ sbuf_printf(sb, "%s (%02x/%02x)", type, sct, sc);
else
- sbuf_printf(sb, "%s", s);
+ sbuf_printf(sb, "%s (%02x/%02x)", s, sct, sc);
}
void
diff --git a/sys/dev/pci/pci_user.c b/sys/dev/pci/pci_user.c
index f68b5b7e71ff..9768030995e7 100644
--- a/sys/dev/pci/pci_user.c
+++ b/sys/dev/pci/pci_user.c
@@ -79,6 +79,9 @@ struct pci_conf32 {
u_int8_t pc_revid; /* chip revision ID */
char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
u_int32_t pd_unit; /* device unit number */
+ int pd_numa_domain; /* device NUMA domain */
+ u_int32_t pc_reported_len;/* length of PCI data reported */
+ char pc_spare[64]; /* space for future fields */
};
struct pci_match_conf32 {
@@ -502,11 +505,58 @@ pci_conf_match_freebsd6_32(struct pci_match_conf_freebsd6_32 *matches, int num_m
#endif /* COMPAT_FREEBSD32 */
#endif /* !PRE7_COMPAT */
+#ifdef COMPAT_FREEBSD14
+struct pci_conf_freebsd14 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_long pd_unit; /* device unit number */
+};
+#define PCIOCGETCONF_FREEBSD14 _IOWR('p', 5, struct pci_conf_io)
+
+#ifdef COMPAT_FREEBSD32
+struct pci_conf_freebsd14_32 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_int32_t pd_unit; /* device unit number */
+};
+#define PCIOCGETCONF_FREEBSD14_32 \
+ _IOC_NEWTYPE(PCIOCGETCONF_FREEBSD14, struct pci_conf_io32)
+#endif /* COMPAT_FREEBSD32 */
+#endif /* COMPAT_FREEBSD14 */
+
union pci_conf_union {
struct pci_conf pc;
#ifdef COMPAT_FREEBSD32
struct pci_conf32 pc32;
#endif
+#ifdef COMPAT_FREEBSD14
+ struct pci_conf_freebsd14 pc14;
+#ifdef COMPAT_FREEBSD32
+ struct pci_conf_freebsd14_32 pc14_32;
+#endif
+#endif
#ifdef PRE7_COMPAT
struct pci_conf_freebsd6 pco;
#ifdef COMPAT_FREEBSD32
@@ -522,10 +572,16 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches,
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
return (pci_conf_match_native(
(struct pci_match_conf *)matches, num_matches, match_buf));
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
return (pci_conf_match32((struct pci_match_conf32 *)matches,
num_matches, match_buf));
#endif
@@ -645,9 +701,15 @@ pci_match_conf_size(u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
return (sizeof(struct pci_match_conf));
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
return (sizeof(struct pci_match_conf32));
#endif
#ifdef PRE7_COMPAT
@@ -675,6 +737,14 @@ pci_conf_size(u_long cmd)
case PCIOCGETCONF32:
return (sizeof(struct pci_conf32));
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+ return (sizeof(struct pci_conf_freebsd14));
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+ return (sizeof(struct pci_conf_freebsd14_32));
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
return (sizeof(struct pci_conf_freebsd6));
@@ -698,6 +768,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#endif
@@ -706,6 +779,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6_32:
#endif
@@ -739,6 +815,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#endif
@@ -751,6 +830,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6_32:
#endif
@@ -781,8 +863,17 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
pcup->pc = *pcp;
return;
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+ memcpy(&pcup->pc14, pcp, sizeof(pcup->pc14));
+ return;
+#endif
+
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
pcup->pc32.pc_sel = pcp->pc_sel;
pcup->pc32.pc_hdr = pcp->pc_hdr;
pcup->pc32.pc_subvendor = pcp->pc_subvendor;
@@ -796,8 +887,13 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
strlcpy(pcup->pc32.pd_name, pcp->pd_name,
sizeof(pcup->pc32.pd_name));
pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit;
+ if (cmd == PCIOCGETCONF32) {
+ pcup->pc32.pd_numa_domain = pcp->pd_numa_domain;
+ pcup->pc32.pc_reported_len =
+ (uint32_t)offsetof(struct pci_conf32, pc_spare);
+ }
return;
-#endif
+#endif /* COMPAT_FREEBSD32 */
#ifdef PRE7_COMPAT
#ifdef COMPAT_FREEBSD32
@@ -1024,7 +1120,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
struct pci_map *pm;
struct pci_bar_mmap *pbm;
size_t confsz, iolen;
- int error, ionum, i, num_patterns;
+ int domain, error, ionum, i, num_patterns;
union pci_conf_union pcu;
#ifdef PRE7_COMPAT
struct pci_io iodata;
@@ -1044,6 +1140,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#ifdef COMPAT_FREEBSD32
@@ -1069,6 +1171,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#ifdef COMPAT_FREEBSD32
@@ -1201,6 +1309,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
dinfo->conf.pd_unit = 0;
}
+ if (dinfo->cfg.dev != NULL &&
+ bus_get_domain(dinfo->cfg.dev, &domain) == 0)
+ dinfo->conf.pd_numa_domain = domain;
+ else
+ dinfo->conf.pd_numa_domain = 0;
+
if (pattern_buf == NULL ||
pci_conf_match(cmd, pattern_buf, num_patterns,
&dinfo->conf) == 0) {
@@ -1217,6 +1331,9 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
break;
}
+ dinfo->conf.pc_reported_len =
+ offsetof(struct pci_conf, pc_spare);
+
pci_conf_for_copyout(&dinfo->conf, &pcu, cmd);
error = copyout(&pcu,
(caddr_t)cio->matches +
diff --git a/sys/dev/qat/qat_common/adf_gen4_timer.c b/sys/dev/qat/qat_common/adf_gen4_timer.c
index 96b65cdff181..2c74d09418e5 100644
--- a/sys/dev/qat/qat_common/adf_gen4_timer.c
+++ b/sys/dev/qat/qat_common/adf_gen4_timer.c
@@ -57,7 +57,7 @@ end:
static void
timer_handler(struct timer_list *tl)
{
- struct adf_int_timer *int_timer = from_timer(int_timer, tl, timer);
+ struct adf_int_timer *int_timer = timer_container_of(int_timer, tl, timer);
struct adf_accel_dev *accel_dev = int_timer->accel_dev;
struct adf_hb_timer_data *hb_timer_data = NULL;
u64 timeout_val = adf_get_next_timeout(int_timer->timeout_val);
diff --git a/sys/dev/qlnx/qlnxe/ecore_dev.c b/sys/dev/qlnx/qlnxe/ecore_dev.c
index 6187ecdbc446..389a95a4164c 100644
--- a/sys/dev/qlnx/qlnxe/ecore_dev.c
+++ b/sys/dev/qlnx/qlnxe/ecore_dev.c
@@ -5268,7 +5268,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
}
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n",
+ "Read default link: Speed %u Mb/sec, Adv. Speeds 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%u usec]\n",
link->speed.forced_speed, link->speed.advertised_speeds,
link->speed.autoneg, link->pause.autoneg,
p_caps->default_eee, p_caps->eee_lpi_timer);
@@ -6860,7 +6860,7 @@ int __ecore_configure_pf_max_bandwidth(struct ecore_hwfn *p_hwfn,
p_hwfn->qm_info.pf_rl);
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configured MAX bandwidth to be %08x Mb/sec\n",
+ "Configured MAX bandwidth to be %u Mb/sec\n",
p_link->speed);
return rc;
@@ -6918,7 +6918,7 @@ int __ecore_configure_pf_min_bandwidth(struct ecore_hwfn *p_hwfn,
rc = ecore_init_pf_wfq(p_hwfn, p_ptt, p_hwfn->rel_pf_id, min_bw);
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configured MIN bandwidth to be %d Mb/sec\n",
+ "Configured MIN bandwidth to be %u Mb/sec\n",
p_link->min_pf_rate);
return rc;
diff --git a/sys/dev/qlnx/qlnxe/ecore_mcp.c b/sys/dev/qlnx/qlnxe/ecore_mcp.c
index ab14b1eb5186..6d1e5fe24d06 100644
--- a/sys/dev/qlnx/qlnxe/ecore_mcp.c
+++ b/sys/dev/qlnx/qlnxe/ecore_mcp.c
@@ -1638,7 +1638,7 @@ enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn,
if (b_up)
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n",
+ "Configuring Link: Speed %u Mb/sec, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n",
phy_cfg.speed, phy_cfg.pause, phy_cfg.adv_speed,
phy_cfg.loopback_mode);
else
diff --git a/sys/dev/qlnx/qlnxe/qlnx_def.h b/sys/dev/qlnx/qlnxe/qlnx_def.h
index 4342bba89587..796845f3f8c6 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_def.h
+++ b/sys/dev/qlnx/qlnxe/qlnx_def.h
@@ -696,22 +696,6 @@ extern int qlnx_alloc_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info,
* Some OS specific stuff
*/
-#if (defined IFM_100G_SR4)
-#define QLNX_IFM_100G_SR4 IFM_100G_SR4
-#define QLNX_IFM_100G_LR4 IFM_100G_LR4
-#define QLNX_IFM_100G_CR4 IFM_100G_CR4
-#else
-#define QLNX_IFM_100G_SR4 IFM_UNKNOWN
-#define QLNX_IFM_100G_LR4 IFM_UNKNOWN
-#endif /* #if (defined IFM_100G_SR4) */
-
-#if (defined IFM_25G_SR)
-#define QLNX_IFM_25G_SR IFM_25G_SR
-#define QLNX_IFM_25G_CR IFM_25G_CR
-#else
-#define QLNX_IFM_25G_SR IFM_UNKNOWN
-#define QLNX_IFM_25G_CR IFM_UNKNOWN
-#endif /* #if (defined IFM_25G_SR) */
#define QLNX_INC_IERRORS(ifp) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1)
#define QLNX_INC_IQDROPS(ifp) if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1)
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 4ad190374f87..9963f472c615 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2375,18 +2375,15 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_CR4), 0, NULL);
} else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) ||
(device_id == QLOGIC_PCI_DEVICE_ID_8070)) {
- ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_SR), 0, NULL);
- ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_CR), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_SR), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_CR), 0, NULL);
} else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) {
ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_KR2), 0, NULL);
ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_CR2), 0, NULL);
} else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) {
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_LR4), 0, NULL);
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_SR4), 0, NULL);
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_CR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_LR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_SR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_CR4), 0, NULL);
}
ifmedia_add(&ha->media, (IFM_ETHER | IFM_FDX), 0, NULL);
@@ -2724,7 +2721,9 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
- QL_DPRINT4(ha, "SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", cmd);
+ case SIOCGIFXMEDIA:
+ QL_DPRINT4(ha,
+ "SIOCSIFMEDIA/SIOCGIFMEDIA/SIOCGIFXMEDIA (0x%lx)\n", cmd);
ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd);
break;
@@ -3808,11 +3807,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link)
case MEDIA_MODULE_FIBER:
case MEDIA_UNSPECIFIED:
if (if_link->speed == (100 * 1000))
- ifm_type = QLNX_IFM_100G_SR4;
+ ifm_type = IFM_100G_SR4;
else if (if_link->speed == (40 * 1000))
ifm_type = IFM_40G_SR4;
else if (if_link->speed == (25 * 1000))
- ifm_type = QLNX_IFM_25G_SR;
+ ifm_type = IFM_25G_SR;
else if (if_link->speed == (10 * 1000))
ifm_type = (IFM_10G_LR | IFM_10G_SR);
else if (if_link->speed == (1 * 1000))
@@ -3822,11 +3821,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link)
case MEDIA_DA_TWINAX:
if (if_link->speed == (100 * 1000))
- ifm_type = QLNX_IFM_100G_CR4;
+ ifm_type = IFM_100G_CR4;
else if (if_link->speed == (40 * 1000))
ifm_type = IFM_40G_CR4;
else if (if_link->speed == (25 * 1000))
- ifm_type = QLNX_IFM_25G_CR;
+ ifm_type = IFM_25G_CR;
else if (if_link->speed == (10 * 1000))
ifm_type = IFM_10G_TWINAX;
diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index 84ec174bd08e..2d7af254c52c 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -103,8 +103,10 @@ static const char *random_source_descr[ENTROPYSOURCE];
volatile int random_kthread_control;
-/* Allow the sysadmin to select the broad category of
- * entropy types to harvest.
+/*
+ * Allow the sysadmin to select the broad category of entropy types to harvest.
+ *
+ * Updates are synchronized by the harvest mutex.
*/
__read_frequently u_int hc_source_mask;
@@ -278,8 +280,15 @@ random_sources_feed(void)
epoch_enter_preempt(rs_epoch, &et);
CK_LIST_FOREACH(rrs, &source_list, rrs_entries) {
for (i = 0; i < npools; i++) {
+ if (rrs->rrs_source->rs_read == NULL) {
+ /* Source pushes entropy asynchronously. */
+ continue;
+ }
n = rrs->rrs_source->rs_read(entropy, sizeof(entropy));
- KASSERT((n <= sizeof(entropy)), ("%s: rs_read returned too much data (%u > %zu)", __func__, n, sizeof(entropy)));
+ KASSERT((n <= sizeof(entropy)),
+ ("%s: rs_read returned too much data (%u > %zu)",
+ __func__, n, sizeof(entropy)));
+
/*
* Sometimes the HW entropy source doesn't have anything
* ready for us. This isn't necessarily untrustworthy.
@@ -334,7 +343,17 @@ copy_event(uint32_t dst[static HARVESTSIZE + 1],
{
memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1));
memcpy(dst, event->he_entropy, event->he_size);
- dst[HARVESTSIZE] = event->he_somecounter;
+ if (event->he_source <= RANDOM_ENVIRONMENTAL_END) {
+ /*
+ * For pure entropy sources the timestamp counter is generally
+ * quite determinstic since samples are taken at regular
+ * intervals, so does not contribute much to the entropy. To
+ * make health tests more effective, exclude it from the sample,
+ * since it might otherwise defeat the health tests in a
+ * scenario where the source is stuck.
+ */
+ dst[HARVESTSIZE] = event->he_somecounter;
+ }
}
static void
@@ -464,11 +483,12 @@ SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled,
"Enable NIST SP 800-90B health tests for noise sources");
static void
-random_healthtest_init(enum random_entropy_source source)
+random_healthtest_init(enum random_entropy_source source, int min_entropy)
{
struct health_test_softc *ht;
ht = &healthtest[source];
+ memset(ht, 0, sizeof(*ht));
KASSERT(ht->ht_state == INIT,
("%s: health test state is %d for source %d",
__func__, ht->ht_state, source));
@@ -485,20 +505,62 @@ random_healthtest_init(enum random_entropy_source source)
}
/*
- * Set cutoff values for the two tests, assuming that each sample has
- * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}.
- * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false
- * positive once in ~54.5 years.
+ * Set cutoff values for the two tests, given a min-entropy estimate for
+ * the source and allowing for an error rate of 1 in 2^{34}. With a
+ * min-entropy estimate of 1 bit and a sample rate of RANDOM_KTHREAD_HZ,
+ * we expect to see an false positive once in ~54.5 years.
*
* The RCT limit comes from the formula in section 4.4.1.
*
- * The APT cutoff is calculated using the formula in section 4.4.2
+ * The APT cutoffs are calculated using the formula in section 4.4.2
* footnote 10 with the number of Bernoulli trials changed from W to
* W-1, since the test as written counts the number of samples equal to
- * the first sample in the window, and thus tests W-1 samples.
+ * the first sample in the window, and thus tests W-1 samples. We
+ * provide cutoffs for estimates up to sizeof(uint32_t)*HARVESTSIZE*8
+ * bits.
*/
- ht->ht_rct_limit = 35;
- ht->ht_apt_cutoff = 330;
+ const int apt_cutoffs[] = {
+ [1] = 329,
+ [2] = 195,
+ [3] = 118,
+ [4] = 73,
+ [5] = 48,
+ [6] = 33,
+ [7] = 23,
+ [8] = 17,
+ [9] = 13,
+ [10] = 11,
+ [11] = 9,
+ [12] = 8,
+ [13] = 7,
+ [14] = 6,
+ [15] = 5,
+ [16] = 5,
+ [17 ... 19] = 4,
+ [20 ... 25] = 3,
+ [26 ... 42] = 2,
+ [43 ... 64] = 1,
+ };
+ const int error_rate = 34;
+
+ if (min_entropy == 0) {
+ /*
+ * For environmental sources, the main source of entropy is the
+ * associated timecounter value. Since these sources can be
+ * influenced by unprivileged users, we conservatively use a
+ * min-entropy estimate of 1 bit per sample. For "pure"
+ * sources, we assume 8 bits per sample, as such sources provide
+ * a variable amount of data per read and in particular might
+ * only provide a single byte at a time.
+ */
+ min_entropy = source >= RANDOM_PURE_START ? 8 : 1;
+ } else if (min_entropy < 0 || min_entropy >= nitems(apt_cutoffs)) {
+ panic("invalid min_entropy %d for %s", min_entropy,
+ random_source_descr[source]);
+ }
+
+ ht->ht_rct_limit = 1 + howmany(error_rate, min_entropy);
+ ht->ht_apt_cutoff = apt_cutoffs[min_entropy];
}
static int
@@ -533,9 +595,9 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
_RANDOM_HARVEST_ETHER_OFF | _RANDOM_HARVEST_UMA_OFF;
int error;
- u_int value, orig_value;
+ u_int value;
- orig_value = value = hc_source_mask;
+ value = atomic_load_int(&hc_source_mask);
error = sysctl_handle_int(oidp, &value, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
@@ -546,12 +608,14 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
/*
* Disallow userspace modification of pure entropy sources.
*/
+ RANDOM_HARVEST_LOCK();
hc_source_mask = (value & ~user_immutable_mask) |
- (orig_value & user_immutable_mask);
+ (hc_source_mask & user_immutable_mask);
+ RANDOM_HARVEST_UNLOCK();
return (0);
}
SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask,
- CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
random_check_uint_harvestmask, "IU",
"Entropy harvesting mask");
@@ -563,9 +627,16 @@ random_print_harvestmask(SYSCTL_HANDLER_ARGS)
error = sysctl_wire_old_buffer(req, 0);
if (error == 0) {
+ u_int mask;
+
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
- for (i = ENTROPYSOURCE - 1; i >= 0; i--)
- sbuf_cat(&sbuf, (hc_source_mask & (1 << i)) ? "1" : "0");
+ mask = atomic_load_int(&hc_source_mask);
+ for (i = ENTROPYSOURCE - 1; i >= 0; i--) {
+ bool present;
+
+ present = (mask & (1u << i)) != 0;
+ sbuf_cat(&sbuf, present ? "1" : "0");
+ }
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
}
@@ -619,16 +690,21 @@ random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS)
first = true;
error = sysctl_wire_old_buffer(req, 0);
if (error == 0) {
+ u_int mask;
+
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ mask = atomic_load_int(&hc_source_mask);
for (i = ENTROPYSOURCE - 1; i >= 0; i--) {
- if (i >= RANDOM_PURE_START &&
- (hc_source_mask & (1 << i)) == 0)
+ bool present;
+
+ present = (mask & (1u << i)) != 0;
+ if (i >= RANDOM_PURE_START && !present)
continue;
if (!first)
sbuf_cat(&sbuf, ",");
- sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "[" : "");
+ sbuf_cat(&sbuf, !present ? "[" : "");
sbuf_cat(&sbuf, random_source_descr[i]);
- sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "]" : "");
+ sbuf_cat(&sbuf, !present ? "]" : "");
first = false;
}
error = sbuf_finish(&sbuf);
@@ -652,8 +728,8 @@ random_harvestq_init(void *unused __unused)
RANDOM_HARVEST_INIT_LOCK();
harvest_context.hc_active_buf = 0;
- for (int i = 0; i < ENTROPYSOURCE; i++)
- random_healthtest_init(i);
+ for (int i = RANDOM_START; i <= RANDOM_ENVIRONMENTAL_END; i++)
+ random_healthtest_init(i, 0);
}
SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL);
@@ -835,20 +911,6 @@ random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_sour
}
void
-random_harvest_register_source(enum random_entropy_source source)
-{
-
- hc_source_mask |= (1 << source);
-}
-
-void
-random_harvest_deregister_source(enum random_entropy_source source)
-{
-
- hc_source_mask &= ~(1 << source);
-}
-
-void
random_source_register(const struct random_source *rsource)
{
struct random_sources *rrs;
@@ -858,11 +920,12 @@ random_source_register(const struct random_source *rsource)
rrs = malloc(sizeof(*rrs), M_ENTROPY, M_WAITOK);
rrs->rrs_source = rsource;
- random_harvest_register_source(rsource->rs_source);
-
printf("random: registering fast source %s\n", rsource->rs_ident);
+ random_healthtest_init(rsource->rs_source, rsource->rs_min_entropy);
+
RANDOM_HARVEST_LOCK();
+ hc_source_mask |= (1 << rsource->rs_source);
CK_LIST_INSERT_HEAD(&source_list, rrs, rrs_entries);
RANDOM_HARVEST_UNLOCK();
}
@@ -874,9 +937,8 @@ random_source_deregister(const struct random_source *rsource)
KASSERT(rsource != NULL, ("invalid input to %s", __func__));
- random_harvest_deregister_source(rsource->rs_source);
-
RANDOM_HARVEST_LOCK();
+ hc_source_mask &= ~(1 << rsource->rs_source);
CK_LIST_FOREACH(rrs, &source_list, rrs_entries)
if (rrs->rrs_source == rsource) {
CK_LIST_REMOVE(rrs, rrs_entries);
diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h
index 6d742447ea8b..a6ca66c7d92e 100644
--- a/sys/dev/random/randomdev.h
+++ b/sys/dev/random/randomdev.h
@@ -52,7 +52,9 @@ random_check_uint_##name(SYSCTL_HANDLER_ARGS) \
}
#endif /* SYSCTL_DECL */
+#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_ENTROPY);
+#endif
extern bool random_bypass_before_seeding;
extern bool read_random_bypassed_before_seeding;
@@ -101,6 +103,7 @@ struct random_source {
const char *rs_ident;
enum random_entropy_source rs_source;
random_source_read_t *rs_read;
+ int rs_min_entropy;
};
void random_source_register(const struct random_source *);
diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c
index 091ab2db72ec..67864c2de388 100644
--- a/sys/dev/re/if_re.c
+++ b/sys/dev/re/if_re.c
@@ -3558,6 +3558,7 @@ re_ioctl(if_t ifp, u_long command, caddr_t data)
static void
re_watchdog(struct rl_softc *sc)
{
+ struct epoch_tracker et;
if_t ifp;
RL_LOCK_ASSERT(sc);
@@ -3578,7 +3579,9 @@ re_watchdog(struct rl_softc *sc)
if_printf(ifp, "watchdog timeout\n");
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_ENTER(et);
re_rxeof(sc, NULL);
+ NET_EPOCH_EXIT(et);
if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
re_init_locked(sc);
if (!if_sendq_empty(ifp))
diff --git a/sys/dev/rtwn/if_rtwn.c b/sys/dev/rtwn/if_rtwn.c
index 7a547e13cafa..25287f222270 100644
--- a/sys/dev/rtwn/if_rtwn.c
+++ b/sys/dev/rtwn/if_rtwn.c
@@ -268,6 +268,9 @@ rtwn_attach(struct rtwn_softc *sc)
ic->ic_flags_ext |= IEEE80211_FEXT_WATCHDOG;
#endif
+ /* Enable seqno offload */
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
+
/* Adjust capabilities. */
rtwn_adj_devcaps(sc);
diff --git a/sys/dev/rtwn/if_rtwn_tx.c b/sys/dev/rtwn/if_rtwn_tx.c
index 2c9c246dfbb4..fa7f35f2de83 100644
--- a/sys/dev/rtwn/if_rtwn_tx.c
+++ b/sys/dev/rtwn/if_rtwn_tx.c
@@ -183,6 +183,10 @@ rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni,
}
}
+ /* seqno allocate, only if AMPDU isn't running */
+ if ((m->m_flags & M_AMPDU_MPDU) == 0)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
cipher = IEEE80211_CIPHER_NONE;
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_encap(ni, m);
@@ -229,6 +233,10 @@ rtwn_tx_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
uint8_t type;
u_int cipher;
+ /* seqno allocate, only if AMPDU isn't running */
+ if ((m->m_flags & M_AMPDU_MPDU) == 0)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
/* Encrypt the frame if need be. */
cipher = IEEE80211_CIPHER_NONE;
if (params->ibp_flags & IEEE80211_BPF_CRYPTO) {
diff --git a/sys/dev/rtwn/rtl8192c/r92c_tx.c b/sys/dev/rtwn/rtl8192c/r92c_tx.c
index 6b013de0c536..ba2f60bd9295 100644
--- a/sys/dev/rtwn/rtl8192c/r92c_tx.c
+++ b/sys/dev/rtwn/rtl8192c/r92c_tx.c
@@ -452,11 +452,10 @@ r92c_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
} else {
uint16_t seqno;
- if (m->m_flags & M_AMPDU_MPDU) {
- seqno = ni->ni_txseqs[tid] % IEEE80211_SEQ_RANGE;
- ni->ni_txseqs[tid]++;
- } else
- seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE;
+ if (m->m_flags & M_AMPDU_MPDU)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
+ seqno = M_SEQNO_GET(m);
/* Set sequence number. */
txd->txdseq = htole16(seqno);
@@ -511,7 +510,7 @@ r92c_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
rtwn_r92c_tx_setup_hwseq(sc, txd);
} else {
/* Set sequence number. */
- txd->txdseq |= htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE);
+ txd->txdseq |= htole16(M_SEQNO_GET(m));
}
}
diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c
index acb238316559..6a7af0a9b674 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -101,12 +101,12 @@ r12a_tx_set_vht_bw(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni)
prim_chan = r12a_get_primary_channel(sc, ni->ni_chan);
- if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_80)) {
+ if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_80)) {
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
R12A_TXDW5_DATA_BW80));
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
prim_chan));
- } else if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_40)) {
+ } else if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_40)) {
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
R12A_TXDW5_DATA_BW40));
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
@@ -433,12 +433,9 @@ r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
} else {
uint16_t seqno;
- if (m->m_flags & M_AMPDU_MPDU) {
- seqno = ni->ni_txseqs[tid];
- ni->ni_txseqs[tid]++;
- } else
- seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE;
-
+ if (m->m_flags & M_AMPDU_MPDU)
+ ieee80211_output_seqno_assign(ni, -1, m);
+ seqno = M_SEQNO_GET(m);
/* Set sequence number. */
txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, seqno));
}
@@ -493,8 +490,7 @@ r12a_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
txd->txdw3 |= htole32(SM(R12A_TXDW3_SEQ_SEL, uvp->id));
} else {
/* Set sequence number. */
- txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ,
- M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE));
+ txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, M_SEQNO_GET(m)));
}
}
diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c
index 900578b73de4..80028063bb0d 100644
--- a/sys/dev/sound/pci/hda/hdac.c
+++ b/sys/dev/sound/pci/hda/hdac.c
@@ -133,6 +133,7 @@ static const struct {
{ HDA_INTEL_PCH, "Intel Ibex Peak", 0, 0 },
{ HDA_INTEL_PCH2, "Intel Ibex Peak", 0, 0 },
{ HDA_INTEL_ELLK, "Intel Elkhart Lake", 0, 0 },
+ { HDA_INTEL_ELLK2, "Intel Elkhart Lake", 0, 0 },
{ HDA_INTEL_JLK2, "Intel Jasper Lake", 0, 0 },
{ HDA_INTEL_BXTNP, "Intel Broxton-P", 0, 0 },
{ HDA_INTEL_SCH, "Intel SCH", 0, 0 },
@@ -1773,17 +1774,17 @@ hdac_detach(device_t dev)
struct hdac_softc *sc = device_get_softc(dev);
int i, error;
+ callout_drain(&sc->poll_callout);
+ hdac_irq_free(sc);
+ taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
+
error = bus_generic_detach(dev);
if (error != 0)
return (error);
hdac_lock(sc);
- callout_stop(&sc->poll_callout);
hdac_reset(sc, false);
hdac_unlock(sc);
- callout_drain(&sc->poll_callout);
- taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
- hdac_irq_free(sc);
for (i = 0; i < sc->num_ss; i++)
hdac_dma_free(sc, &sc->streams[i].bdl);
@@ -2206,4 +2207,4 @@ static driver_t hdac_driver = {
sizeof(struct hdac_softc),
};
-DRIVER_MODULE(snd_hda, pci, hdac_driver, NULL, NULL);
+DRIVER_MODULE_ORDERED(snd_hda, pci, hdac_driver, NULL, NULL, SI_ORDER_ANY);
diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h
index 223434a214b1..09a17f702019 100644
--- a/sys/dev/sound/pci/hda/hdac.h
+++ b/sys/dev/sound/pci/hda/hdac.h
@@ -66,6 +66,7 @@
#define HDA_INTEL_PCH HDA_MODEL_CONSTRUCT(INTEL, 0x3b56)
#define HDA_INTEL_PCH2 HDA_MODEL_CONSTRUCT(INTEL, 0x3b57)
#define HDA_INTEL_ELLK HDA_MODEL_CONSTRUCT(INTEL, 0x4b55)
+#define HDA_INTEL_ELLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4b58)
#define HDA_INTEL_JLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4dc8)
#define HDA_INTEL_BXTNP HDA_MODEL_CONSTRUCT(INTEL, 0x5a98)
#define HDA_INTEL_MACBOOKPRO92 HDA_MODEL_CONSTRUCT(INTEL, 0x7270)
diff --git a/sys/dev/tpm/tpm20.c b/sys/dev/tpm/tpm20.c
index 876dd0bcc40d..067e7ccae8f9 100644
--- a/sys/dev/tpm/tpm20.c
+++ b/sys/dev/tpm/tpm20.c
@@ -25,8 +25,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include <sys/random.h>
+#include <dev/random/randomdev.h>
#include "tpm20.h"
@@ -184,6 +184,13 @@ tpm20_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
return (ENOTTY);
}
+#ifdef TPM_HARVEST
+static const struct random_source random_tpm = {
+ .rs_ident = "TPM",
+ .rs_source = RANDOM_PURE_TPM,
+};
+#endif
+
int
tpm20_init(struct tpm_sc *sc)
{
@@ -206,7 +213,7 @@ tpm20_init(struct tpm_sc *sc)
tpm20_release(sc);
#ifdef TPM_HARVEST
- random_harvest_register_source(RANDOM_PURE_TPM);
+ random_source_register(&random_tpm);
TIMEOUT_TASK_INIT(taskqueue_thread, &sc->harvest_task, 0,
tpm20_harvest, sc);
taskqueue_enqueue_timeout(taskqueue_thread, &sc->harvest_task, 0);
@@ -223,7 +230,7 @@ tpm20_release(struct tpm_sc *sc)
#ifdef TPM_HARVEST
if (device_is_attached(sc->dev))
taskqueue_drain_timeout(taskqueue_thread, &sc->harvest_task);
- random_harvest_deregister_source(RANDOM_PURE_TPM);
+ random_source_deregister(&random_tpm);
#endif
if (sc->buf != NULL)
diff --git a/sys/dev/tpm/tpm_tis_core.c b/sys/dev/tpm/tpm_tis_core.c
index d8421f8156c9..4159de4daf3b 100644
--- a/sys/dev/tpm/tpm_tis_core.c
+++ b/sys/dev/tpm/tpm_tis_core.c
@@ -97,6 +97,7 @@ tpmtis_attach(device_t dev)
{
struct tpm_sc *sc;
int result;
+ int poll = 0;
sc = device_get_softc(dev);
sc->dev = dev;
@@ -105,6 +106,12 @@ tpmtis_attach(device_t dev)
sx_init(&sc->dev_lock, "TPM driver lock");
sc->buf = malloc(TPM_BUFSIZE, M_TPM20, M_WAITOK);
+ resource_int_value("tpm", device_get_unit(dev), "use_polling", &poll);
+ if (poll != 0) {
+ device_printf(dev, "Using poll method to get TPM operation status \n");
+ goto skip_irq;
+ }
+
sc->irq_rid = 0;
sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid,
RF_ACTIVE | RF_SHAREABLE);
diff --git a/sys/dev/ufshci/ufshci.h b/sys/dev/ufshci/ufshci.h
index b96d82ff836e..b055d2d2d769 100644
--- a/sys/dev/ufshci/ufshci.h
+++ b/sys/dev/ufshci/ufshci.h
@@ -716,6 +716,42 @@ struct ufshci_device_descriptor {
_Static_assert(sizeof(struct ufshci_device_descriptor) == 89,
"bad size for ufshci_device_descriptor");
+/* Defines the bit field of dExtendedUfsFeaturesSupport. */
+enum ufshci_desc_wb_ext_ufs_feature {
+ UFSHCI_DESC_EXT_UFS_FEATURE_FFU = (1 << 0),
+ UFSHCI_DESC_EXT_UFS_FEATURE_PSA = (1 << 1),
+ UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LIFE_SPAN = (1 << 2),
+ UFSHCI_DESC_EXT_UFS_FEATURE_REFRESH_OP = (1 << 3),
+ UFSHCI_DESC_EXT_UFS_FEATURE_TOO_HIGH_TEMP = (1 << 4),
+ UFSHCI_DESC_EXT_UFS_FEATURE_TOO_LOW_TEMP = (1 << 5),
+ UFSHCI_DESC_EXT_UFS_FEATURE_EXT_TEMP = (1 << 6),
+ UFSHCI_DESC_EXT_UFS_FEATURE_HPB_SUPPORT = (1 << 7),
+ UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER = (1 << 8),
+ UFSHCI_DESC_EXT_UFS_FEATURE_PERF_THROTTLING = (1 << 9),
+ UFSHCI_DESC_EXT_UFS_FEATURE_ADVANCED_RPMB = (1 << 10),
+ UFSHCI_DESC_EXT_UFS_FEATURE_ZONED_UFS_EXTENSION = (1 << 11),
+ UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LEVEL_EXCEPTION = (1 << 12),
+ UFSHCI_DESC_EXT_UFS_FEATURE_HID = (1 << 13),
+ UFSHCI_DESC_EXT_UFS_FEATURE_BARRIER = (1 << 14),
+ UFSHCI_DESC_EXT_UFS_FEATURE_CLEAR_ERROR_HISTORY = (1 << 15),
+ UFSHCI_DESC_EXT_UFS_FEATURE_EXT_IID = (1 << 16),
+ UFSHCI_DESC_EXT_UFS_FEATURE_FBO = (1 << 17),
+ UFSHCI_DESC_EXT_UFS_FEATURE_FAST_RECOVERY_MODE = (1 << 18),
+ UFSHCI_DESC_EXT_UFS_FEATURE_RPMB_VENDOR_CMD = (1 << 19),
+};
+
+/* Defines the bit field of bWriteBoosterBufferType. */
+enum ufshci_desc_wb_buffer_type {
+ UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED = 0x00,
+ UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED = 0x01,
+};
+
+/* Defines the bit field of bWriteBoosterBufferPreserveUserSpaceEn. */
+enum ufshci_desc_user_space_config {
+ UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION = 0x00,
+ UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE = 0x01,
+};
+
/*
* UFS Spec 4.1, section 14.1.5.3 "Configuration Descriptor"
* ConfigurationDescriptor use big-endian byte ordering.
@@ -1014,4 +1050,37 @@ enum ufshci_attributes {
UFSHCI_ATTR_B_REFRESH_METHOD = 0x2f,
};
+/* bAvailableWriteBoosterBufferSize codes (UFS WriteBooster abailable buffer
+ * left %) */
+enum ufshci_wb_available_buffer_Size {
+ UFSHCI_ATTR_WB_AVAILABLE_0 = 0x00, /* 0% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_10 = 0x01, /* 10% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_20 = 0x02, /* 20% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_30 = 0x03, /* 30% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_40 = 0x04, /* 40% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_50 = 0x05, /* 50% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_60 = 0x06, /* 60% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_70 = 0x07, /* 70% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_80 = 0x08, /* 80% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_90 = 0x09, /* 90% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_100 = 0x0A, /* 100% buffer remains */
+};
+
+/* bWriteBoosterBufferLifeTimeEst codes (UFS WriteBooster buffer life %) */
+enum ufshci_wb_lifetime {
+ UFSHCI_ATTR_WB_LIFE_DISABLED = 0x00, /* Info not available */
+ UFSHCI_ATTR_WB_LIFE_0_10 = 0x01, /* 0%–10% used */
+ UFSHCI_ATTR_WB_LIFE_10_20 = 0x02, /* 10%–20% used */
+ UFSHCI_ATTR_WB_LIFE_20_30 = 0x03, /* 20%–30% used */
+ UFSHCI_ATTR_WB_LIFE_30_40 = 0x04, /* 30%–40% used */
+ UFSHCI_ATTR_WB_LIFE_40_50 = 0x05, /* 40%–50% used */
+ UFSHCI_ATTR_WB_LIFE_50_60 = 0x06, /* 50%–60% used */
+ UFSHCI_ATTR_WB_LIFE_60_70 = 0x07, /* 60%–70% used */
+ UFSHCI_ATTR_WB_LIFE_70_80 = 0x08, /* 70%–80% used */
+ UFSHCI_ATTR_WB_LIFE_80_90 = 0x09, /* 80%–90% used */
+ UFSHCI_ATTR_WB_LIFE_90_100 = 0x0A, /* 90%–100% used */
+ UFSHCI_ATTR_WB_LIFE_EXCEEDED =
+ 0x0B, /* Exceeded estimated life (treat as WB disabled) */
+};
+
#endif /* __UFSHCI_H__ */
diff --git a/sys/dev/ufshci/ufshci_ctrlr.c b/sys/dev/ufshci/ufshci_ctrlr.c
index 37bd32665b2b..36be94b8b8b7 100644
--- a/sys/dev/ufshci/ufshci_ctrlr.c
+++ b/sys/dev/ufshci/ufshci_ctrlr.c
@@ -61,7 +61,7 @@ ufshci_ctrlr_enable_host_ctrlr(struct ufshci_controller *ctrlr)
int
ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
{
- uint32_t ver, cap, hcs, ie;
+ uint32_t ver, cap, hcs, ie, ahit;
uint32_t timeout_period, retry_count;
int error;
@@ -127,6 +127,13 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
if (error)
return (error);
+ /* Read the UECPA register to clear */
+ ufshci_mmio_read_4(ctrlr, uecpa);
+
+ /* Diable Auto-hibernate */
+ ahit = 0;
+ ufshci_mmio_write_4(ctrlr, ahit, ahit);
+
/*
* The device_present(UFSHCI_HCS_REG_DP) bit becomes true if the host
* controller has successfully received a Link Startup UIC command
@@ -139,6 +146,16 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
return (ENXIO);
}
+ /* Allocate and initialize UTP Task Management Request List. */
+ error = ufshci_utmr_req_queue_construct(ctrlr);
+ if (error)
+ return (error);
+
+ /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */
+ error = ufshci_utr_req_queue_construct(ctrlr);
+ if (error)
+ return (error);
+
/* Enable additional interrupts by programming the IE register. */
ie = ufshci_mmio_read_4(ctrlr, ie);
ie |= UFSHCIM(UFSHCI_IE_REG_UTRCE); /* UTR Completion */
@@ -153,19 +170,12 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
/* TODO: Initialize interrupt Aggregation Control Register (UTRIACR) */
- /* Allocate and initialize UTP Task Management Request List. */
- error = ufshci_utmr_req_queue_construct(ctrlr);
- if (error)
- return (error);
-
- /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */
- error = ufshci_utr_req_queue_construct(ctrlr);
- if (error)
- return (error);
-
/* TODO: Separate IO and Admin slot */
- /* max_hw_pend_io is the number of slots in the transfer_req_queue */
- ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries;
+ /*
+ * max_hw_pend_io is the number of slots in the transfer_req_queue.
+ * Reduce num_entries by one to reserve an admin slot.
+ */
+ ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries - 1;
return (0);
}
@@ -342,18 +352,19 @@ ufshci_ctrlr_start(struct ufshci_controller *ctrlr)
return;
}
- /* Read Controller Descriptor (Device, Geometry)*/
+ /* Read Controller Descriptor (Device, Geometry) */
if (ufshci_dev_get_descriptor(ctrlr) != 0) {
ufshci_ctrlr_fail(ctrlr, false);
return;
}
- /* TODO: Configure Write Protect */
+ if (ufshci_dev_config_write_booster(ctrlr)) {
+ ufshci_ctrlr_fail(ctrlr, false);
+ return;
+ }
/* TODO: Configure Background Operations */
- /* TODO: Configure Write Booster */
-
if (ufshci_sim_attach(ctrlr) != 0) {
ufshci_ctrlr_fail(ctrlr, false);
return;
diff --git a/sys/dev/ufshci/ufshci_dev.c b/sys/dev/ufshci/ufshci_dev.c
index a0e32914e2aa..dd196b1d638b 100644
--- a/sys/dev/ufshci/ufshci_dev.c
+++ b/sys/dev/ufshci/ufshci_dev.c
@@ -60,6 +60,14 @@ ufshci_dev_read_geometry_descriptor(struct ufshci_controller *ctrlr,
}
static int
+ufshci_dev_read_unit_descriptor(struct ufshci_controller *ctrlr, uint8_t lun,
+ struct ufshci_unit_descriptor *desc)
+{
+ return (ufshci_dev_read_descriptor(ctrlr, UFSHCI_DESC_TYPE_UNIT, lun, 0,
+ desc, sizeof(struct ufshci_unit_descriptor)));
+}
+
+static int
ufshci_dev_read_flag(struct ufshci_controller *ctrlr,
enum ufshci_flags flag_type, uint8_t *flag)
{
@@ -114,6 +122,61 @@ ufshci_dev_set_flag(struct ufshci_controller *ctrlr,
}
static int
+ufshci_dev_clear_flag(struct ufshci_controller *ctrlr,
+ enum ufshci_flags flag_type)
+{
+ struct ufshci_completion_poll_status status;
+ struct ufshci_query_param param;
+
+ param.function = UFSHCI_QUERY_FUNC_STANDARD_WRITE_REQUEST;
+ param.opcode = UFSHCI_QUERY_OPCODE_CLEAR_FLAG;
+ param.type = flag_type;
+ param.index = 0;
+ param.selector = 0;
+ param.value = 0;
+
+ status.done = 0;
+ ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb,
+ &status, param);
+ ufshci_completion_poll(&status);
+ if (status.error) {
+ ufshci_printf(ctrlr, "ufshci_dev_clear_flag failed!\n");
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+static int
+ufshci_dev_read_attribute(struct ufshci_controller *ctrlr,
+ enum ufshci_attributes attr_type, uint8_t index, uint8_t selector,
+ uint64_t *value)
+{
+ struct ufshci_completion_poll_status status;
+ struct ufshci_query_param param;
+
+ param.function = UFSHCI_QUERY_FUNC_STANDARD_READ_REQUEST;
+ param.opcode = UFSHCI_QUERY_OPCODE_READ_ATTRIBUTE;
+ param.type = attr_type;
+ param.index = index;
+ param.selector = selector;
+ param.value = 0;
+
+ status.done = 0;
+ ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb,
+ &status, param);
+ ufshci_completion_poll(&status);
+ if (status.error) {
+ ufshci_printf(ctrlr, "ufshci_dev_read_attribute failed!\n");
+ return (ENXIO);
+ }
+
+ *value = status.cpl.response_upiu.query_response_upiu.value_64;
+
+ return (0);
+}
+
+static int
ufshci_dev_write_attribute(struct ufshci_controller *ctrlr,
enum ufshci_attributes attr_type, uint8_t index, uint8_t selector,
uint64_t value)
@@ -270,7 +333,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
*/
const uint32_t fast_mode = 1;
const uint32_t rx_bit_shift = 4;
- const uint32_t power_mode = (fast_mode << rx_bit_shift) | fast_mode;
+ uint32_t power_mode, peer_granularity;
/* Update lanes with available TX/RX lanes */
if (ufshci_uic_send_dme_get(ctrlr, PA_AvailTxDataLanes,
@@ -295,6 +358,20 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
ctrlr->rx_lanes))
return (ENXIO);
+ if (ctrlr->quirks & UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY) {
+ /* Before changing gears, first change the number of lanes. */
+ if (ufshci_uic_send_dme_get(ctrlr, PA_PWRMode, &power_mode))
+ return (ENXIO);
+ if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode))
+ return (ENXIO);
+
+ /* Wait for power mode changed. */
+ if (ufshci_uic_power_mode_ready(ctrlr)) {
+ ufshci_reg_dump(ctrlr);
+ return (ENXIO);
+ }
+ }
+
/* Set HS-GEAR to max gear */
ctrlr->hs_gear = ctrlr->max_rx_hs_gear;
if (ufshci_uic_send_dme_set(ctrlr, PA_TxGear, ctrlr->hs_gear))
@@ -346,6 +423,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
return (ENXIO);
/* Set TX/RX PWRMode */
+ power_mode = (fast_mode << rx_bit_shift) | fast_mode;
if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode))
return (ENXIO);
@@ -366,7 +444,8 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
pause_sbt("ufshci", ustosbt(1250), 0, C_PREL(1));
/* Test with dme_peer_get to make sure there are no errors. */
- if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity, NULL))
+ if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity,
+ &peer_granularity))
return (ENXIO);
}
@@ -398,7 +477,7 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr)
return (error);
ver = be16toh(device->dev_desc.wSpecVersion);
- ufshci_printf(ctrlr, "UFS device spec version %u.%u%u\n",
+ ufshci_printf(ctrlr, "UFS device spec version %u.%u.%u\n",
UFSHCIV(UFSHCI_VER_REG_MJR, ver), UFSHCIV(UFSHCI_VER_REG_MNR, ver),
UFSHCIV(UFSHCI_VER_REG_VS, ver));
ufshci_printf(ctrlr, "%u enabled LUNs found\n",
@@ -426,3 +505,273 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr)
return (0);
}
+
+static int
+ufshci_dev_enable_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ int error;
+
+ /* Enable WriteBooster */
+ error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN);
+ if (error) {
+ ufshci_printf(ctrlr, "Failed to enable WriteBooster\n");
+ return (error);
+ }
+ dev->is_wb_enabled = true;
+
+ /* Enable WriteBooster buffer flush during hibernate */
+ error = ufshci_dev_set_flag(ctrlr,
+ UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to enable WriteBooster buffer flush during hibernate\n");
+ return (error);
+ }
+
+ /* Enable WriteBooster buffer flush */
+ error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to enable WriteBooster buffer flush\n");
+ return (error);
+ }
+ dev->is_wb_flush_enabled = true;
+
+ return (0);
+}
+
+static int
+ufshci_dev_disable_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ int error;
+
+ /* Disable WriteBooster buffer flush */
+ error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to disable WriteBooster buffer flush\n");
+ return (error);
+ }
+ dev->is_wb_flush_enabled = false;
+
+ /* Disable WriteBooster buffer flush during hibernate */
+ error = ufshci_dev_clear_flag(ctrlr,
+ UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to disable WriteBooster buffer flush during hibernate\n");
+ return (error);
+ }
+
+ /* Disable WriteBooster */
+ error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN);
+ if (error) {
+ ufshci_printf(ctrlr, "Failed to disable WriteBooster\n");
+ return (error);
+ }
+ dev->is_wb_enabled = false;
+
+ return (0);
+}
+
+static int
+ufshci_dev_is_write_booster_buffer_life_time_left(
+ struct ufshci_controller *ctrlr, bool *is_life_time_left)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ uint8_t buffer_lun;
+ uint64_t life_time;
+ uint32_t error;
+
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED)
+ buffer_lun = dev->wb_dedicated_lu;
+ else
+ buffer_lun = 0;
+
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_B_WB_BUFFER_LIFE_TIME_EST, buffer_lun, 0, &life_time);
+ if (error)
+ return (error);
+
+ *is_life_time_left = (life_time != UFSHCI_ATTR_WB_LIFE_EXCEEDED);
+
+ return (0);
+}
+
+/*
+ * This function is not yet in use. It will be used when suspend/resume is
+ * implemented.
+ */
+static __unused int
+ufshci_dev_need_write_booster_buffer_flush(struct ufshci_controller *ctrlr,
+ bool *need_flush)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ bool is_life_time_left = false;
+ uint64_t available_buffer_size, current_buffer_size;
+ uint8_t buffer_lun;
+ uint32_t error;
+
+ *need_flush = false;
+
+ if (!dev->is_wb_enabled)
+ return (0);
+
+ error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr,
+ &is_life_time_left);
+ if (error)
+ return (error);
+
+ if (!is_life_time_left)
+ return (ufshci_dev_disable_write_booster(ctrlr));
+
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED)
+ buffer_lun = dev->wb_dedicated_lu;
+ else
+ buffer_lun = 0;
+
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_B_AVAILABLE_WB_BUFFER_SIZE, buffer_lun, 0,
+ &available_buffer_size);
+ if (error)
+ return (error);
+
+ switch (dev->wb_user_space_config_option) {
+ case UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION:
+ *need_flush = (available_buffer_size <=
+ UFSHCI_ATTR_WB_AVAILABLE_10);
+ break;
+ case UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE:
+ /*
+ * In PRESERVE USER SPACE mode, flush should be performed when
+ * the current buffer is greater than 0 and the available buffer
+ * below write_booster_flush_threshold is left.
+ */
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_D_CURRENT_WB_BUFFER_SIZE, buffer_lun, 0,
+ &current_buffer_size);
+ if (error)
+ return (error);
+
+ if (current_buffer_size == 0)
+ return (0);
+
+ *need_flush = (available_buffer_size <
+ dev->write_booster_flush_threshold);
+ break;
+ default:
+ ufshci_printf(ctrlr,
+ "Invalid bWriteBoosterBufferPreserveUserSpaceEn value");
+ return (EINVAL);
+ }
+
+ /*
+ * TODO: Need to handle WRITEBOOSTER_FLUSH_NEEDED exception case from
+ * wExceptionEventStatus attribute.
+ */
+
+ return (0);
+}
+
+int
+ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ uint32_t extended_ufs_feature_support;
+ uint32_t alloc_units;
+ struct ufshci_unit_descriptor unit_desc;
+ uint8_t lun;
+ bool is_life_time_left;
+ uint32_t mega_byte = 1024 * 1024;
+ uint32_t error = 0;
+
+ extended_ufs_feature_support = be32toh(
+ dev->dev_desc.dExtendedUfsFeaturesSupport);
+ if (!(extended_ufs_feature_support &
+ UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER)) {
+ /* This device does not support Write Booster */
+ return (0);
+ }
+
+ if (ufshci_dev_enable_write_booster(ctrlr))
+ return (0);
+
+ /* Get WriteBooster buffer parameters */
+ dev->wb_buffer_type = dev->dev_desc.bWriteBoosterBufferType;
+ dev->wb_user_space_config_option =
+ dev->dev_desc.bWriteBoosterBufferPreserveUserSpaceEn;
+
+ /*
+ * Find the size of the write buffer.
+ * With LU-dedicated (00h), the WriteBooster buffer is assigned
+ * exclusively to one chosen LU (not one-per-LU), whereas Shared (01h)
+ * uses a single device-wide buffer shared by multiple LUs.
+ */
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED) {
+ alloc_units = be32toh(
+ dev->dev_desc.dNumSharedWriteBoosterBufferAllocUnits);
+ ufshci_printf(ctrlr,
+ "WriteBooster buffer type = Shared, alloc_units=%d\n",
+ alloc_units);
+ } else if (dev->wb_buffer_type ==
+ UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED) {
+ ufshci_printf(ctrlr, "WriteBooster buffer type = Dedicated\n");
+ for (lun = 0; lun < ctrlr->max_lun_count; lun++) {
+ /* Find a dedicated buffer using a unit descriptor */
+ if (ufshci_dev_read_unit_descriptor(ctrlr, lun,
+ &unit_desc))
+ continue;
+
+ alloc_units = be32toh(
+ unit_desc.dLUNumWriteBoosterBufferAllocUnits);
+ if (alloc_units) {
+ dev->wb_dedicated_lu = lun;
+ break;
+ }
+ }
+ } else {
+ ufshci_printf(ctrlr,
+ "Not supported WriteBooster buffer type: 0x%x\n",
+ dev->wb_buffer_type);
+ goto out;
+ }
+
+ if (alloc_units == 0) {
+ ufshci_printf(ctrlr, "The WriteBooster buffer size is zero\n");
+ goto out;
+ }
+
+ dev->wb_buffer_size_mb = alloc_units *
+ dev->geo_desc.bAllocationUnitSize *
+ (be32toh(dev->geo_desc.dSegmentSize)) /
+ (mega_byte / UFSHCI_SECTOR_SIZE);
+
+ /* Set to flush when 40% of the available buffer size remains */
+ dev->write_booster_flush_threshold = UFSHCI_ATTR_WB_AVAILABLE_40;
+
+ /*
+ * Check if WriteBooster Buffer lifetime is left.
+ * WriteBooster Buffer lifetime — percent of life used based on P/E
+ * cycles. If "preserve user space" is enabled, writes to normal user
+ * space also consume WB life since the area is shared.
+ */
+ error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr,
+ &is_life_time_left);
+ if (error)
+ goto out;
+
+ if (!is_life_time_left) {
+ ufshci_printf(ctrlr,
+ "There is no WriteBooster buffer life time left.\n");
+ goto out;
+ }
+
+ ufshci_printf(ctrlr, "WriteBooster Enabled\n");
+ return (0);
+out:
+ ufshci_dev_disable_write_booster(ctrlr);
+ return (error);
+}
+
diff --git a/sys/dev/ufshci/ufshci_pci.c b/sys/dev/ufshci/ufshci_pci.c
index 65a69ee0b518..d64b7526f713 100644
--- a/sys/dev/ufshci/ufshci_pci.c
+++ b/sys/dev/ufshci/ufshci_pci.c
@@ -53,7 +53,8 @@ static struct _pcsid {
{ 0x98fa8086, "Intel Lakefield UFS Host Controller",
UFSHCI_REF_CLK_19_2MHz,
UFSHCI_QUIRK_LONG_PEER_PA_TACTIVATE |
- UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE },
+ UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE |
+ UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY },
{ 0x54ff8086, "Intel UFS Host Controller", UFSHCI_REF_CLK_19_2MHz },
{ 0x00000000, NULL } };
diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h
index 1a2742ae2e80..2e033f84c373 100644
--- a/sys/dev/ufshci/ufshci_private.h
+++ b/sys/dev/ufshci/ufshci_private.h
@@ -46,6 +46,8 @@ MALLOC_DECLARE(M_UFSHCI);
#define UFSHCI_UTR_ENTRIES (32)
#define UFSHCI_UTRM_ENTRIES (8)
+#define UFSHCI_SECTOR_SIZE (512)
+
struct ufshci_controller;
struct ufshci_completion_poll_status {
@@ -214,6 +216,15 @@ struct ufshci_device {
struct ufshci_geometry_descriptor geo_desc;
uint32_t unipro_version;
+
+ /* WriteBooster */
+ bool is_wb_enabled;
+ bool is_wb_flush_enabled;
+ uint32_t wb_buffer_type;
+ uint32_t wb_buffer_size_mb;
+ uint32_t wb_user_space_config_option;
+ uint8_t wb_dedicated_lu;
+ uint32_t write_booster_flush_threshold;
};
/*
@@ -229,7 +240,8 @@ struct ufshci_controller {
2 /* Need an additional 200 ms of PA_TActivate */
#define UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE \
4 /* Need to wait 1250us after power mode change */
-
+#define UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY \
+ 8 /* Need to change the number of lanes before changing HS-GEAR. */
uint32_t ref_clk;
struct cam_sim *ufshci_sim;
@@ -356,6 +368,7 @@ int ufshci_dev_init_unipro(struct ufshci_controller *ctrlr);
int ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr);
int ufshci_dev_init_ufs_power_mode(struct ufshci_controller *ctrlr);
int ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr);
+int ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr);
/* Controller Command */
void ufshci_ctrlr_cmd_send_task_mgmt_request(struct ufshci_controller *ctrlr,
diff --git a/sys/dev/ufshci/ufshci_reg.h b/sys/dev/ufshci/ufshci_reg.h
index 6c9b3e2c8c04..6d5768505102 100644
--- a/sys/dev/ufshci/ufshci_reg.h
+++ b/sys/dev/ufshci/ufshci_reg.h
@@ -274,7 +274,7 @@ struct ufshci_registers {
#define UFSHCI_HCS_REG_UTMRLRDY_MASK (0x1)
#define UFSHCI_HCS_REG_UCRDY_SHIFT (3)
#define UFSHCI_HCS_REG_UCRDY_MASK (0x1)
-#define UFSHCI_HCS_REG_UPMCRS_SHIFT (7)
+#define UFSHCI_HCS_REG_UPMCRS_SHIFT (8)
#define UFSHCI_HCS_REG_UPMCRS_MASK (0x7)
#define UFSHCI_HCS_REG_UTPEC_SHIFT (12)
#define UFSHCI_HCS_REG_UTPEC_MASK (0xF)
diff --git a/sys/dev/ufshci/ufshci_sysctl.c b/sys/dev/ufshci/ufshci_sysctl.c
index 5e5069f12e5f..56bc06b13f3c 100644
--- a/sys/dev/ufshci/ufshci_sysctl.c
+++ b/sys/dev/ufshci/ufshci_sysctl.c
@@ -152,6 +152,7 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr)
struct sysctl_ctx_list *ctrlr_ctx;
struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree;
struct sysctl_oid_list *ctrlr_list, *ioq_list;
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
#define QUEUE_NAME_LENGTH 16
char queue_name[QUEUE_NAME_LENGTH];
int i;
@@ -177,6 +178,25 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr)
SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap", CTLFLAG_RD,
&ctrlr->cap, 0, "Number of I/O queue pairs");
+ SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_enabled",
+ CTLFLAG_RD, &dev->is_wb_enabled, 0, "WriteBooster enable/disable");
+
+ SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_flush_enabled",
+ CTLFLAG_RD, &dev->is_wb_flush_enabled, 0,
+ "WriteBooster flush enable/disable");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_type",
+ CTLFLAG_RD, &dev->wb_buffer_type, 0, "WriteBooster type");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_size_mb",
+ CTLFLAG_RD, &dev->wb_buffer_size_mb, 0,
+ "WriteBooster buffer size in MB");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO,
+ "wb_user_space_config_option", CTLFLAG_RD,
+ &dev->wb_user_space_config_option, 0,
+ "WriteBooster preserve user space mode");
+
SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "timeout_period",
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ctrlr->timeout_period,
0, ufshci_sysctl_timeout_period, "IU",
diff --git a/sys/dev/ufshci/ufshci_uic_cmd.c b/sys/dev/ufshci/ufshci_uic_cmd.c
index 2c5f635dc11e..b9c867ff7065 100644
--- a/sys/dev/ufshci/ufshci_uic_cmd.c
+++ b/sys/dev/ufshci/ufshci_uic_cmd.c
@@ -14,7 +14,7 @@
int
ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr)
{
- uint32_t is;
+ uint32_t is, hcs;
int timeout;
/* Wait for the IS flag to change */
@@ -40,6 +40,15 @@ ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr)
DELAY(10);
}
+ /* Check HCS power mode change request status */
+ hcs = ufshci_mmio_read_4(ctrlr, hcs);
+ if (UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs) != 0x01) {
+ ufshci_printf(ctrlr,
+ "Power mode change request status error: 0x%x\n",
+ UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs));
+ return (ENXIO);
+ }
+
return (0);
}
@@ -112,6 +121,7 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr,
struct ufshci_uic_cmd *uic_cmd, uint32_t *return_value)
{
int error;
+ uint32_t config_result_code;
mtx_lock(&ctrlr->uic_cmd_lock);
@@ -134,6 +144,13 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr,
if (error)
return (ENXIO);
+ config_result_code = ufshci_mmio_read_4(ctrlr, ucmdarg2);
+ if (config_result_code) {
+ ufshci_printf(ctrlr,
+ "Failed to send UIC command. (config result code = 0x%x)\n",
+ config_result_code);
+ }
+
if (return_value != NULL)
*return_value = ufshci_mmio_read_4(ctrlr, ucmdarg3);
diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c
index 5be592512196..788b2b718062 100644
--- a/sys/dev/usb/controller/xhci.c
+++ b/sys/dev/usb/controller/xhci.c
@@ -156,6 +156,7 @@ struct xhci_std_temp {
static void xhci_do_poll(struct usb_bus *);
static void xhci_device_done(struct usb_xfer *, usb_error_t);
+static void xhci_get_xecp(struct xhci_softc *);
static void xhci_root_intr(struct xhci_softc *);
static void xhci_free_device_ext(struct usb_device *);
static struct xhci_endpoint_ext *xhci_get_endpoint_ext(struct usb_device *,
@@ -566,6 +567,8 @@ xhci_init(struct xhci_softc *sc, device_t self, uint8_t dma32)
device_printf(self, "%d bytes context size, %d-bit DMA\n",
sc->sc_ctx_is_64_byte ? 64 : 32, (int)sc->sc_bus.dma_bits);
+ xhci_get_xecp(sc);
+
/* enable 64Kbyte control endpoint quirk */
sc->sc_bus.control_ep_quirk = (xhcictlquirk ? 1 : 0);
@@ -654,6 +657,88 @@ xhci_uninit(struct xhci_softc *sc)
}
static void
+xhci_get_xecp(struct xhci_softc *sc)
+{
+
+ uint32_t hccp1;
+ uint32_t eec;
+ uint32_t eecp;
+ bool first = true;
+
+ hccp1 = XREAD4(sc, capa, XHCI_HCSPARAMS0);
+
+ if (XHCI_HCS0_XECP(hccp1) == 0) {
+ device_printf(sc->sc_bus.parent,
+ "xECP: no capabilities found\n");
+ return;
+ }
+
+ /*
+ * Parse the xECP Capabilities table and print known caps.
+ * Implemented, vendor and reserved xECP Capabilities values are
+ * documented in Table 7.2 of eXtensible Host Controller Interface for
+ * Universal Serial Bus (xHCI) Rev 1.2b 2023.
+ */
+ device_printf(sc->sc_bus.parent, "xECP capabilities <");
+
+ eec = -1;
+ for (eecp = XHCI_HCS0_XECP(hccp1) << 2;
+ eecp != 0 && XHCI_XECP_NEXT(eec) != 0;
+ eecp += XHCI_XECP_NEXT(eec) << 2) {
+ eec = XREAD4(sc, capa, eecp);
+
+ uint8_t xecpid = XHCI_XECP_ID(eec);
+
+ if ((xecpid >= 11 && xecpid <= 16) ||
+ (xecpid >= 19 && xecpid <= 191)) {
+ if (!first)
+ printf(",");
+ printf("RES(%x)", xecpid);
+ } else if (xecpid > 191) {
+ if (!first)
+ printf(",");
+ printf("VEND(%x)", xecpid);
+ } else {
+ if (!first)
+ printf(",");
+ switch (xecpid)
+ {
+ case XHCI_ID_USB_LEGACY:
+ printf("LEGACY");
+ break;
+ case XHCI_ID_PROTOCOLS:
+ printf("PROTO");
+ break;
+ case XHCI_ID_POWER_MGMT:
+ printf("POWER");
+ break;
+ case XHCI_ID_VIRTUALIZATION:
+ printf("VIRT");
+ break;
+ case XHCI_ID_MSG_IRQ:
+ printf("MSG IRQ");
+ break;
+ case XHCI_ID_USB_LOCAL_MEM:
+ printf("LOCAL MEM");
+ break;
+ case XHCI_ID_USB_DEBUG:
+ printf("DEBUG");
+ break;
+ case XHCI_ID_EXT_MSI:
+ printf("EXT MSI");
+ break;
+ case XHCI_ID_USB3_TUN:
+ printf("TUN");
+ break;
+
+ }
+ }
+ first = false;
+ }
+ printf(">\n");
+}
+
+static void
xhci_set_hw_power_sleep(struct usb_bus *bus, uint32_t state)
{
struct xhci_softc *sc = XHCI_BUS2SC(bus);
diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c
index d5cfd228a429..820fb2f738a1 100644
--- a/sys/dev/usb/controller/xhci_pci.c
+++ b/sys/dev/usb/controller/xhci_pci.c
@@ -178,6 +178,8 @@ xhci_pci_match(device_t self)
return ("Intel Tiger Lake-H USB 3.2 controller");
case 0x461e8086:
return ("Intel Alder Lake-P Thunderbolt 4 USB controller");
+ case 0x4b7d8086:
+ return ("Intel Elkhart Lake USB 3.1 controller");
case 0x51ed8086:
return ("Intel Alder Lake USB 3.2 controller");
case 0x5aa88086:
diff --git a/sys/dev/usb/controller/xhcireg.h b/sys/dev/usb/controller/xhcireg.h
index 9d0b6e2f4b4b..821897155544 100644
--- a/sys/dev/usb/controller/xhcireg.h
+++ b/sys/dev/usb/controller/xhcireg.h
@@ -205,6 +205,11 @@
#define XHCI_ID_VIRTUALIZATION 0x0004
#define XHCI_ID_MSG_IRQ 0x0005
#define XHCI_ID_USB_LOCAL_MEM 0x0006
+/* values 7-9 are reserved */
+#define XHCI_ID_USB_DEBUG 0x000a
+/* values 11-16 are reserved */
+#define XHCI_ID_EXT_MSI 0x0011
+#define XHCI_ID_USB3_TUN 0x0012
/* XHCI register R/W wrappers */
#define XREAD1(sc, what, a) \
diff --git a/sys/dev/usb/net/if_umb.c b/sys/dev/usb/net/if_umb.c
index f640b4224aad..b1082b117259 100644
--- a/sys/dev/usb/net/if_umb.c
+++ b/sys/dev/usb/net/if_umb.c
@@ -177,9 +177,7 @@ static void umb_ncm_setup(struct umb_softc *, struct usb_config *);
static void umb_close_bulkpipes(struct umb_softc *);
static int umb_ioctl(if_t , u_long, caddr_t);
static void umb_init(void *);
-#ifdef DEV_NETMAP
static void umb_input(if_t , struct mbuf *);
-#endif
static int umb_output(if_t , struct mbuf *,
const struct sockaddr *, struct route *);
static void umb_start(if_t );
@@ -585,9 +583,7 @@ umb_attach_task(struct usb_proc_msg *msg)
if_setsoftc(ifp, sc);
if_setflags(ifp, IFF_SIMPLEX | IFF_MULTICAST | IFF_POINTOPOINT);
if_setioctlfn(ifp, umb_ioctl);
-#ifdef DEV_NETMAP
if_setinputfn(ifp, umb_input);
-#endif
if_setoutputfn(ifp, umb_output);
if_setstartfn(ifp, umb_start);
if_setinitfn(ifp, umb_init);
diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c
index e3509862ef54..ee9d8ab0c9bb 100644
--- a/sys/dev/usb/usb_hub.c
+++ b/sys/dev/usb/usb_hub.c
@@ -954,7 +954,8 @@ done:
* packet. This function is called having the "bus_mtx" locked.
*------------------------------------------------------------------------*/
void
-uhub_root_intr(struct usb_bus *bus, const uint8_t *ptr, uint8_t len)
+uhub_root_intr(struct usb_bus *bus,
+ const uint8_t *ptr __unused, uint8_t len __unused)
{
USB_BUS_LOCK_ASSERT(bus, MA_OWNED);
diff --git a/sys/dev/usb/wlan/if_rsu.c b/sys/dev/usb/wlan/if_rsu.c
index 07f7b6f3a708..e976948f6849 100644
--- a/sys/dev/usb/wlan/if_rsu.c
+++ b/sys/dev/usb/wlan/if_rsu.c
@@ -371,18 +371,16 @@ rsu_update_chw(struct ieee80211com *ic)
/*
* notification from net80211 that it'd like to do A-MPDU on the given TID.
- *
- * Note: this actually hangs traffic at the present moment, so don't use it.
- * The firmware debug does indiciate it's sending and establishing a TX AMPDU
- * session, but then no traffic flows.
*/
static int
rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
{
-#if 0
struct rsu_softc *sc = ni->ni_ic->ic_softc;
struct r92s_add_ba_req req;
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: called, tid=%d\n",
+ __func__, tap->txa_tid);
+
/* Don't enable if it's requested or running */
if (IEEE80211_AMPDU_REQUESTED(tap))
return (0);
@@ -397,23 +395,30 @@ rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
return (0);
/* Send the firmware command */
- RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: establishing AMPDU TX for TID %d\n",
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU,
+ "%s: establishing AMPDU TX for TID %d\n",
__func__,
tap->txa_tid);
RSU_LOCK(sc);
- if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 1) {
+ if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 0) {
RSU_UNLOCK(sc);
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd failure\n",
+ __func__);
/* Mark failure */
- (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0);
+ ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0);
+ /* Return 0, we've been driving this ourselves */
return (0);
}
RSU_UNLOCK(sc);
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd success\n",
+ __func__);
+
/* Mark success; we don't get any further notifications */
- (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1);
-#endif
- /* Return 0, we're driving this ourselves */
+ ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1);
+
+ /* Return 0, we've been driving this ourselves */
return (0);
}
@@ -563,9 +568,7 @@ rsu_attach(device_t self)
/* Enable basic HT */
ic->ic_htcaps = IEEE80211_HTC_HT |
-#if 0
IEEE80211_HTC_AMPDU |
-#endif
IEEE80211_HTC_AMSDU |
IEEE80211_HTCAP_MAXAMSDU_3839 |
IEEE80211_HTCAP_SMPS_OFF;
@@ -576,6 +579,7 @@ rsu_attach(device_t self)
ic->ic_rxstream = sc->sc_nrxstream;
}
ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
rsu_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
ic->ic_channels);
@@ -1537,6 +1541,10 @@ rsu_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k,
is_checked = 1;
k->wk_flags |= IEEE80211_KEY_SWCRYPT;
} else
+ /*
+ * TODO: should allocate these from the CAM space;
+ * skipping over the fixed slots and _BC / _BSS.
+ */
*keyix = R92S_MACID_BSS;
}
@@ -2166,7 +2174,7 @@ rsu_event_addba_req_report(struct rsu_softc *sc, uint8_t *buf, int len)
__func__,
ether_sprintf(ba->mac_addr),
(int) ba->tid,
- (int) le16toh(ba->ssn));
+ (int) le16toh(ba->ssn) >> 4);
/* XXX do node lookup; this is STA specific */
@@ -2212,6 +2220,11 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len)
if (vap->iv_state == IEEE80211_S_AUTH)
rsu_event_join_bss(sc, buf, len);
break;
+
+ /* TODO: what about R92S_EVT_ADD_STA? and decoding macid? */
+ /* It likely is required for IBSS/AP mode */
+
+ /* TODO: should I be doing this transition in AP mode? */
case R92S_EVT_DEL_STA:
RSU_DPRINTF(sc, RSU_DEBUG_FWCMD | RSU_DEBUG_STATE,
"%s: disassociated from %s\n", __func__,
@@ -2229,6 +2242,7 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len)
break;
case R92S_EVT_FWDBG:
buf[60] = '\0';
+ /* TODO: some are \n terminated, some aren't, sigh */
RSU_DPRINTF(sc, RSU_DEBUG_FWDBG, "FWDBG: %s\n", (char *)buf);
break;
case R92S_EVT_ADDBA_REQ_REPORT:
@@ -2782,6 +2796,9 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
if (rate != 0)
ridx = rate2ridx(rate);
+ /* Assign sequence number, A-MPDU or otherwise */
+ ieee80211_output_seqno_assign(ni, -1, m0);
+
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_encap(ni, m0);
if (k == NULL) {
@@ -2838,8 +2855,10 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
SM(R92S_TXDW0_OFFSET, sizeof(*txd)) |
R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG);
+ /* TODO: correct macid here? It should be in the node */
txd->txdw1 |= htole32(
SM(R92S_TXDW1_MACID, R92S_MACID_BSS) | SM(R92S_TXDW1_QSEL, qid));
+
if (!hasqos)
txd->txdw1 |= htole32(R92S_TXDW1_NONQOS);
if (k != NULL && !(k->wk_flags & IEEE80211_KEY_SWENCRYPT)) {
@@ -2860,8 +2879,13 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
SM(R92S_TXDW1_CIPHER, cipher) |
SM(R92S_TXDW1_KEYIDX, k->wk_keyix));
}
- /* XXX todo: set AGGEN bit if appropriate? */
- txd->txdw2 |= htole32(R92S_TXDW2_BK);
+
+ /*
+ * Note: no need to set TXDW2_AGGEN/TXDW2_BK to mark
+ * A-MPDU and non-AMPDU candidates; the firmware will
+ * handle this for us.
+ */
+
if (ismcast)
txd->txdw2 |= htole32(R92S_TXDW2_BMCAST);
@@ -2880,8 +2904,11 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
}
/*
- * Firmware will use and increment the sequence number for the
- * specified priority.
+ * Pass in prio here, NOT the sequence number.
+ *
+ * The hardware is in theory incrementing sequence numbers
+ * for us, but I haven't yet figured out exactly when/how
+ * it's supposed to work.
*/
txd->txdw3 |= htole32(SM(R92S_TXDW3_SEQ, prio));
@@ -3481,7 +3508,8 @@ rsu_load_firmware(struct rsu_softc *sc)
dmem.vcs_mode = R92S_VCS_MODE_RTS_CTS;
dmem.turbo_mode = 0;
dmem.bw40_en = !! (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40);
- dmem.amsdu2ampdu_en = !! (sc->sc_ht);
+ /* net80211 handles AMSDUs just fine */
+ dmem.amsdu2ampdu_en = 0;
dmem.ampdu_en = !! (sc->sc_ht);
dmem.agg_offload = !! (sc->sc_ht);
dmem.qos_en = 1;
diff --git a/sys/dev/usb/wlan/if_rsureg.h b/sys/dev/usb/wlan/if_rsureg.h
index fb706a4d9b1a..e2074e1dd2ad 100644
--- a/sys/dev/usb/wlan/if_rsureg.h
+++ b/sys/dev/usb/wlan/if_rsureg.h
@@ -593,7 +593,14 @@ struct r92s_event_join_bss {
struct ndis_wlan_bssid_ex bss;
} __packed;
-#define R92S_MACID_BSS 5 /* XXX hardcoded somewhere */
+/*
+ * This is hard-coded in the firmware for a STA mode
+ * BSS join. If you turn on FWDEBUG, you'll see this
+ * in the logs:
+ *
+ * rsu0: FWDBG: mac id #5: 0000005b, 000fffff, 00000000
+ */
+#define R92S_MACID_BSS 5
/* Rx MAC descriptor. */
struct r92s_rx_stat {
diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c
index 97c790dd5b81..147aa4044057 100644
--- a/sys/dev/usb/wlan/if_run.c
+++ b/sys/dev/usb/wlan/if_run.c
@@ -882,6 +882,7 @@ run_attach(device_t self)
ic->ic_flags |= IEEE80211_F_DATAPAD;
ic->ic_flags_ext |= IEEE80211_FEXT_SWBMISS;
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
run_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
ic->ic_channels);
@@ -3522,6 +3523,9 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
data->ni = ni;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
/*
@@ -3627,6 +3631,9 @@ run_tx_mgt(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
data->ni = ni;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending mgt frame len=%d rate=%d\n",
@@ -3771,6 +3778,9 @@ run_tx_param(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
break;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending raw frame len=%u rate=%u\n",
@@ -6416,6 +6426,10 @@ run_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
{
/* For now, no A-MPDU TX support in the driver */
+ /*
+ * TODO: maybe we needed to enable seqno generation too?
+ * What other TX desc bits are missing/needed?
+ */
return (0);
}
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index ecb3dbb370e5..528ff3372097 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -133,12 +133,14 @@ static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_new_buf(struct vtnet_rxq *);
+#if defined(INET) || defined(INET6)
static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
- uint16_t, int, struct virtio_net_hdr *);
-static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
- uint16_t, int, struct virtio_net_hdr *);
+ bool, int, struct virtio_net_hdr *);
+static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
+ int);
static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
struct virtio_net_hdr *);
+#endif
static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
@@ -1178,6 +1180,7 @@ vtnet_setup_interface(struct vtnet_softc *sc)
if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
/*
* Capabilities after here are not enabled by default.
@@ -1760,164 +1763,165 @@ vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
return (error);
}
+#if defined(INET) || defined(INET6)
static int
-vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
- int hoff, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
+ int protocol, struct virtio_net_hdr *hdr)
{
struct vtnet_softc *sc;
- int error;
- sc = rxq->vtnrx_sc;
+ /*
+ * The packet is likely from another VM on the same host or from the
+ * host that itself performed checksum offloading so Tx/Rx is basically
+ * a memcpy and the checksum has little value so far.
+ */
+
+ KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+ ("%s: unsupported IP protocol %d", __func__, protocol));
/*
- * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
- * not have an analogous CSUM flag. The checksum has been validated,
- * but is incomplete (TCP/UDP pseudo header).
- *
- * The packet is likely from another VM on the same host that itself
- * performed checksum offloading so Tx/Rx is basically a memcpy and
- * the checksum has little value.
- *
- * Default to receiving the packet as-is for performance reasons, but
- * this can cause issues if the packet is to be forwarded because it
- * does not contain a valid checksum. This patch may be helpful:
- * https://reviews.freebsd.org/D6611. In the meantime, have the driver
- * compute the checksum if requested.
- *
- * BMV: Need to add an CSUM_PARTIAL flag?
+ * If the user don't want us to fix it up here by computing the
+ * checksum, just forward the order to compute the checksum by setting
+ * the corresponding mbuf flag (e.g., CSUM_TCP).
*/
+ sc = rxq->vtnrx_sc;
if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
- error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
- return (error);
+ switch (protocol) {
+ case IPPROTO_TCP:
+ m->m_pkthdr.csum_flags |=
+ (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
+ break;
+ case IPPROTO_UDP:
+ m->m_pkthdr.csum_flags |=
+ (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
+ break;
+ }
+ m->m_pkthdr.csum_data = hdr->csum_offset;
+ return (0);
}
/*
* Compute the checksum in the driver so the packet will contain a
* valid checksum. The checksum is at csum_offset from csum_start.
*/
- switch (etype) {
-#if defined(INET) || defined(INET6)
- case ETHERTYPE_IP:
- case ETHERTYPE_IPV6: {
- int csum_off, csum_end;
- uint16_t csum;
-
- csum_off = hdr->csum_start + hdr->csum_offset;
- csum_end = csum_off + sizeof(uint16_t);
+ int csum_off, csum_end;
+ uint16_t csum;
- /* Assume checksum will be in the first mbuf. */
- if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
- return (1);
+ csum_off = hdr->csum_start + hdr->csum_offset;
+ csum_end = csum_off + sizeof(uint16_t);
- /*
- * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
- * checksum and write it at the specified offset. We could
- * try to verify the packet: csum_start should probably
- * correspond to the start of the TCP/UDP header.
- *
- * BMV: Need to properly handle UDP with zero checksum. Is
- * the IPv4 header checksum implicitly validated?
- */
- csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
- *(uint16_t *)(mtodo(m, csum_off)) = csum;
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
- break;
- }
-#endif
- default:
- sc->vtnet_stats.rx_csum_bad_ethtype++;
+ /* Assume checksum will be in the first mbuf. */
+ if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) {
+ sc->vtnet_stats.rx_csum_bad_offset++;
return (1);
}
+ /*
+ * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
+ * checksum and write it at the specified offset. We could
+ * try to verify the packet: csum_start should probably
+ * correspond to the start of the TCP/UDP header.
+ *
+ * BMV: Need to properly handle UDP with zero checksum. Is
+ * the IPv4 header checksum implicitly validated?
+ */
+ csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
+ *(uint16_t *)(mtodo(m, csum_off)) = csum;
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+
return (0);
}
+static void
+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
+{
+ KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+ ("%s: unsupported IP protocol %d", __func__, protocol));
+
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+}
+
static int
-vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
- uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
{
-#if 0
+ const struct ether_header *eh;
struct vtnet_softc *sc;
-#endif
- int protocol;
+ int hoff, protocol;
+ uint16_t etype;
+ bool isipv6;
+
+ KASSERT(hdr->flags &
+ (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
+ ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
+
+ eh = mtod(m, const struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (etype == ETHERTYPE_VLAN) {
+ /* TODO BMV: Handle QinQ. */
+ const struct ether_vlan_header *evh =
+ mtod(m, const struct ether_vlan_header *);
+ etype = ntohs(evh->evl_proto);
+ hoff = sizeof(struct ether_vlan_header);
+ } else
+ hoff = sizeof(struct ether_header);
-#if 0
sc = rxq->vtnrx_sc;
-#endif
+ /* Check whether ethernet type is IP or IPv6, and get protocol. */
switch (etype) {
#if defined(INET)
case ETHERTYPE_IP:
- if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
- protocol = IPPROTO_DONE;
- else {
+ if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
+ sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+ return (1);
+ } else {
struct ip *ip = (struct ip *)(m->m_data + hoff);
protocol = ip->ip_p;
}
+ isipv6 = false;
break;
#endif
#if defined(INET6)
case ETHERTYPE_IPV6:
if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
- || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
- protocol = IPPROTO_DONE;
+ || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
+ sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+ return (1);
+ }
+ isipv6 = true;
break;
#endif
default:
- protocol = IPPROTO_DONE;
- break;
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ return (1);
}
+ /* Check whether protocol is TCP or UDP. */
switch (protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
break;
default:
/*
* FreeBSD does not support checksum offloading of this
- * protocol. Let the stack re-verify the checksum later
- * if the protocol is supported.
+ * protocol here.
*/
-#if 0
- if_printf(sc->vtnet_ifp,
- "%s: checksum offload of unsupported protocol "
- "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
- __func__, etype, protocol, hdr->csum_start,
- hdr->csum_offset);
-#endif
- break;
+ sc->vtnet_stats.rx_csum_bad_ipproto++;
+ return (1);
}
- return (0);
-}
-
-static int
-vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
- struct virtio_net_hdr *hdr)
-{
- const struct ether_header *eh;
- int hoff;
- uint16_t etype;
-
- eh = mtod(m, const struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (etype == ETHERTYPE_VLAN) {
- /* TODO BMV: Handle QinQ. */
- const struct ether_vlan_header *evh =
- mtod(m, const struct ether_vlan_header *);
- etype = ntohs(evh->evl_proto);
- hoff = sizeof(struct ether_vlan_header);
- } else
- hoff = sizeof(struct ether_header);
-
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
+ return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol,
+ hdr));
else /* VIRTIO_NET_HDR_F_DATA_VALID */
- return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
+ vtnet_rxq_csum_data_valid(rxq, m, protocol);
+
+ return (0);
}
+#endif
static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
@@ -2040,10 +2044,15 @@ vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
if (hdr->flags &
(VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
+#if defined(INET) || defined(INET6)
if (vtnet_rxq_csum(rxq, m, hdr) == 0)
rxq->vtnrx_stats.vrxs_csum++;
else
rxq->vtnrx_stats.vrxs_csum_failed++;
+#else
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ rxq->vtnrx_stats.vrxs_csum_failed++;
+#endif
}
if (hdr->gso_size != 0) {
@@ -2497,6 +2506,10 @@ vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
hdr->csum_start = vtnet_gtoh16(sc, csum_start);
hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
txq->vtntx_stats.vtxs_csum++;
+ } else if ((flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) &&
+ (proto == IPPROTO_TCP || proto == IPPROTO_UDP) &&
+ (m->m_pkthdr.csum_data == 0xFFFF)) {
+ hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
}
if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
@@ -2551,8 +2564,10 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
error = sglist_append_mbuf(sg, m);
if (error) {
m = m_defrag(m, M_NOWAIT);
- if (m == NULL)
+ if (m == NULL) {
+ sc->vtnet_stats.tx_defrag_failed++;
goto fail;
+ }
*m_head = m;
sc->vtnet_stats.tx_defragged++;
@@ -2568,7 +2583,6 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
return (error);
fail:
- sc->vtnet_stats.tx_defrag_failed++;
m_freem(*m_head);
*m_head = NULL;
@@ -2609,7 +2623,8 @@ vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
m->m_flags &= ~M_VLANTAG;
}
- if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+ if (m->m_pkthdr.csum_flags &
+ (VTNET_CSUM_ALL_OFFLOAD | CSUM_DATA_VALID)) {
m = vtnet_txq_offload(txq, m, hdr);
if ((*m_head = m) == NULL) {
error = ENOBUFS;
@@ -3031,16 +3046,14 @@ vtnet_get_counter(if_t ifp, ift_counter cnt)
return (rxaccum.vrxs_iqdrops);
case IFCOUNTER_IERRORS:
return (rxaccum.vrxs_ierrors);
+ case IFCOUNTER_IBYTES:
+ return (rxaccum.vrxs_ibytes);
case IFCOUNTER_OPACKETS:
return (txaccum.vtxs_opackets);
case IFCOUNTER_OBYTES:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_obytes);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_obytes);
case IFCOUNTER_OMCASTS:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_omcasts);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_omcasts);
default:
return (if_get_counter_default(ifp, cnt));
}
@@ -3813,9 +3826,9 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
if_printf(ifp, "error setting host MAC filter table\n");
out:
- if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
+ if (promisc && vtnet_set_promisc(sc, true) != 0)
if_printf(ifp, "cannot enable promiscuous mode\n");
- if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
+ if (allmulti && vtnet_set_allmulti(sc, true) != 0)
if_printf(ifp, "cannot enable all-multicast mode\n");
}
@@ -4100,21 +4113,29 @@ vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
stats = &rxq->vtnrx_stats;
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ipackets, "Receive packets");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ibytes, "Receive bytes");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_iqdrops, "Receive drops");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ierrors, "Receive errors");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_csum, "Receive checksum offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_csum_failed, "Receive checksum offload failed");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_host_lro, "Receive host segmentation offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_rescheduled,
"Receive interrupt handler rescheduled");
}
@@ -4135,17 +4156,23 @@ vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
stats = &txq->vtntx_stats;
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_opackets, "Transmit packets");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_obytes, "Transmit bytes");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_omcasts, "Transmit multicasts");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_csum, "Transmit checksum offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_tso, "Transmit TCP segmentation offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_rescheduled,
"Transmit interrupt handler rescheduled");
}
@@ -4170,6 +4197,102 @@ vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
}
}
+static int
+vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_csum_failed = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_csum_failed += rxst->vrxs_csum_failed;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req));
+}
+
+static int
+vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_csum_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_csum_offloaded += rxst->vrxs_csum;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req));
+}
+
+static int
+vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_task_rescheduled = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_task_rescheduled += rxst->vrxs_rescheduled;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req));
+}
+
+static int
+vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_csum_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_csum_offloaded += txst->vtxs_csum;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req));
+}
+
+static int
+vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_tso_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_tso_offloaded += txst->vtxs_tso;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req));
+}
+
+static int
+vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_task_rescheduled = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_task_rescheduled += txst->vtxs_rescheduled;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req));
+}
+
static void
vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *child, struct vtnet_softc *sc)
@@ -4189,69 +4312,75 @@ vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
- CTLFLAG_RD, &stats->mbuf_alloc_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed,
"Mbuf cluster allocation failures");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
- CTLFLAG_RD, &stats->rx_frame_too_large,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large,
"Received frame larger than the mbuf chain");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
- CTLFLAG_RD, &stats->rx_enq_replacement_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed,
"Enqueuing the replacement receive mbuf failed");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
- CTLFLAG_RD, &stats->rx_mergeable_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed,
"Mergeable buffers receive failures");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
- CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype,
"Received checksum offloaded buffer with unsupported "
"Ethernet type");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
- CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto,
"Received checksum offloaded buffer with incorrect IP protocol");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
- CTLFLAG_RD, &stats->rx_csum_bad_offset,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset,
"Received checksum offloaded buffer with incorrect offset");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
- CTLFLAG_RD, &stats->rx_csum_bad_proto,
- "Received checksum offloaded buffer with incorrect protocol");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
- CTLFLAG_RD, &stats->rx_csum_failed,
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
+ "Received checksum offloaded buffer with inaccessible IP protocol");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
"Received buffer checksum offload failed");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
- CTLFLAG_RD, &stats->rx_csum_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU",
"Received buffer checksum offload succeeded");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
- CTLFLAG_RD, &stats->rx_task_rescheduled,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU",
"Times the receive interrupt task rescheduled itself");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
- CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype,
"Aborted transmit of checksum offloaded buffer with unknown "
"Ethernet type");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
- CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch,
"Aborted transmit of checksum offloaded buffer because mismatched "
"protocols");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
- CTLFLAG_RD, &stats->tx_tso_not_tcp,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp,
"Aborted transmit of TSO buffer with non TCP protocol");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
- CTLFLAG_RD, &stats->tx_tso_without_csum,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum,
"Aborted transmit of TSO buffer without TCP checksum offload");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
- CTLFLAG_RD, &stats->tx_defragged,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged,
"Transmit mbufs defragged");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
- CTLFLAG_RD, &stats->tx_defrag_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed,
"Aborted transmit of buffer because defrag failed");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
- CTLFLAG_RD, &stats->tx_csum_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU",
"Offloaded checksum of transmitted buffer");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
- CTLFLAG_RD, &stats->tx_tso_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU",
"Segmentation offload of transmitted buffer");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
- CTLFLAG_RD, &stats->tx_task_rescheduled,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU",
"Times the transmit interrupt task rescheduled itself");
}
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index 0144b0f3232d..cab7ced639a7 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -46,7 +46,7 @@ struct vtnet_statistics {
uint64_t rx_csum_bad_ethtype;
uint64_t rx_csum_bad_ipproto;
uint64_t rx_csum_bad_offset;
- uint64_t rx_csum_bad_proto;
+ uint64_t rx_csum_inaccessible_ipproto;
uint64_t tx_csum_unknown_ethtype;
uint64_t tx_csum_proto_mismatch;
uint64_t tx_tso_not_tcp;
diff --git a/sys/dev/vmgenc/vmgenc_acpi.c b/sys/dev/vmgenc/vmgenc_acpi.c
index 2ad8929dfd34..18519a8e4f22 100644
--- a/sys/dev/vmgenc/vmgenc_acpi.c
+++ b/sys/dev/vmgenc/vmgenc_acpi.c
@@ -56,6 +56,7 @@
#include <contrib/dev/acpica/include/acpi.h>
#include <dev/acpica/acpivar.h>
+#include <dev/random/randomdev.h>
#include <dev/random/random_harvestq.h>
#include <dev/vmgenc/vmgenc_acpi.h>
@@ -210,6 +211,11 @@ acpi_GetPackedUINT64(device_t dev, ACPI_HANDLE handle, char *path,
}
+static const struct random_source random_vmgenid = {
+ .rs_ident = "VM Generation ID",
+ .rs_source = RANDOM_PURE_VMGENID,
+};
+
static int
vmgenc_attach(device_t dev)
{
@@ -234,7 +240,7 @@ vmgenc_attach(device_t dev)
memcpy(sc->vmg_cache_guid, __DEVOLATILE(void *, sc->vmg_pguid),
sizeof(sc->vmg_cache_guid));
- random_harvest_register_source(RANDOM_PURE_VMGENID);
+ random_source_register(&random_vmgenid);
vmgenc_harvest_all(sc->vmg_cache_guid, sizeof(sc->vmg_cache_guid));
AcpiInstallNotifyHandler(h, ACPI_DEVICE_NOTIFY, vmgenc_notify, dev);
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 9f2b009d02ec..460a508a60dc 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -901,6 +901,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
sc->cdev = NULL;
sx_xunlock(&vmmdev_mtx);
+ vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
destroy_dev(cdev);
vmmdev_destroy(sc);
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index e5fdb395c9f7..862780741ee7 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -371,8 +371,6 @@ nfsrv_atroot(struct vnode *vp, uint64_t *retp)
/*
* Set the credentials to refer to root.
- * If only the various BSDen could agree on whether cr_gid is a separate
- * field or cr_groups[0]...
*/
void
newnfs_setroot(struct ucred *cred)
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index e9f1dc23ddbe..77e71d4153c9 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -1098,9 +1098,10 @@ newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr)
KASSERT(cr->cr_ngroups >= 0,
("newnfs_copyincred: negative cr_ngroups"));
nfscr->nfsc_uid = cr->cr_uid;
- nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1);
- for (i = 0; i < nfscr->nfsc_ngroups; i++)
- nfscr->nfsc_groups[i] = cr->cr_groups[i];
+ nfscr->nfsc_ngroups = MIN(cr->cr_ngroups + 1, NFS_MAXGRPS + 1);
+ nfscr->nfsc_groups[0] = cr->cr_gid;
+ for (i = 1; i < nfscr->nfsc_ngroups; i++)
+ nfscr->nfsc_groups[i] = cr->cr_groups[i - 1];
}
/*
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index a8b06fdb261b..eee571a04821 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -3474,7 +3474,7 @@ nfs_advlock(struct vop_advlock_args *ap)
u_quad_t size;
struct nfsmount *nmp;
- error = NFSVOPLOCK(vp, LK_SHARED);
+ error = NFSVOPLOCK(vp, LK_EXCLUSIVE);
if (error != 0)
return (EBADF);
nmp = VFSTONFS(vp->v_mount);
@@ -3511,11 +3511,6 @@ nfs_advlock(struct vop_advlock_args *ap)
cred = p->p_ucred;
else
cred = td->td_ucred;
- NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
- if (VN_IS_DOOMED(vp)) {
- error = EBADF;
- goto out;
- }
/*
* If this is unlocking a write locked region, flush and
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index b2966934f9b7..7040c4afb797 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -2607,6 +2607,7 @@ again:
* rpc reply
*/
if (siz == 0) {
+ateof:
vput(vp);
if (nd->nd_flag & ND_NFSV3)
nfsrv_postopattr(nd, getret, &at);
@@ -2648,6 +2649,8 @@ again:
ncookies--;
}
if (cpos >= cend || ncookies == 0) {
+ if (eofflag != 0)
+ goto ateof;
siz = fullsiz;
toff = off;
goto again;
diff --git a/sys/fs/procfs/procfs.c b/sys/fs/procfs/procfs.c
index ab60ba47f322..cd66dd6f8b3b 100644
--- a/sys/fs/procfs/procfs.c
+++ b/sys/fs/procfs/procfs.c
@@ -156,42 +156,42 @@ procfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
- pfs_create_link(root, "curproc", procfs_docurproc,
- NULL, NULL, NULL, 0);
- pfs_create_link(root, "self", procfs_docurproc,
- NULL, NULL, NULL, 0);
-
- dir = pfs_create_dir(root, "pid",
- procfs_attr_all_rx, NULL, NULL, PFS_PROCDEP);
- pfs_create_file(dir, "cmdline", procfs_doproccmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "dbregs", procfs_doprocdbregs,
+ pfs_create_link(root, NULL, "curproc", procfs_docurproc, NULL, NULL,
+ NULL, 0);
+ pfs_create_link(root, NULL, "self", procfs_docurproc, NULL, NULL, NULL,
+ 0);
+
+ pfs_create_dir(root, &dir, "pid", procfs_attr_all_rx, NULL, NULL,
+ PFS_PROCDEP);
+ pfs_create_file(dir, NULL, "cmdline", procfs_doproccmdline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "dbregs", procfs_doprocdbregs,
procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "etype", procfs_doproctype,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "fpregs", procfs_doprocfpregs,
+ pfs_create_file(dir, NULL, "etype", procfs_doproctype, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "fpregs", procfs_doprocfpregs,
procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "map", procfs_doprocmap,
- NULL, procfs_notsystem, NULL, PFS_RD);
- pfs_create_file(dir, "mem", procfs_doprocmem,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "note", procfs_doprocnote,
- procfs_attr_w, procfs_candebug, NULL, PFS_WR);
- pfs_create_file(dir, "notepg", procfs_doprocnote,
- procfs_attr_w, procfs_candebug, NULL, PFS_WR);
- pfs_create_file(dir, "regs", procfs_doprocregs,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "rlimit", procfs_doprocrlimit,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "status", procfs_doprocstatus,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "osrel", procfs_doosrel,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR);
-
- pfs_create_link(dir, "file", procfs_doprocfile,
- NULL, procfs_notsystem, NULL, 0);
- pfs_create_link(dir, "exe", procfs_doprocfile,
- NULL, procfs_notsystem, NULL, 0);
+ pfs_create_file(dir, NULL, "map", procfs_doprocmap, NULL,
+ procfs_notsystem, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "mem", procfs_doprocmem, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "note", procfs_doprocnote, procfs_attr_w,
+ procfs_candebug, NULL, PFS_WR);
+ pfs_create_file(dir, NULL, "notepg", procfs_doprocnote, procfs_attr_w,
+ procfs_candebug, NULL, PFS_WR);
+ pfs_create_file(dir, NULL, "regs", procfs_doprocregs, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "rlimit", procfs_doprocrlimit, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "status", procfs_doprocstatus, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "osrel", procfs_doosrel, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR);
+
+ pfs_create_link(dir, NULL, "file", procfs_doprocfile, NULL,
+ procfs_notsystem, NULL, 0);
+ pfs_create_link(dir, NULL, "exe", procfs_doprocfile, NULL,
+ procfs_notsystem, NULL, 0);
return (0);
}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
index 38070e0946bb..49c084d02ff8 100644
--- a/sys/fs/procfs/procfs_status.c
+++ b/sys/fs/procfs/procfs_status.c
@@ -141,13 +141,9 @@ procfs_doprocstatus(PFS_FILL_ARGS)
(u_long)cr->cr_uid,
(u_long)cr->cr_ruid,
(u_long)cr->cr_rgid);
-
- /* egid (cr->cr_svgid) is equal to cr_ngroups[0]
- see also getegid(2) in /sys/kern/kern_prot.c */
-
- for (i = 0; i < cr->cr_ngroups; i++) {
+ sbuf_printf(sb, ",%lu", (u_long)cr->cr_gid);
+ for (i = 0; i < cr->cr_ngroups; i++)
sbuf_printf(sb, ",%lu", (u_long)cr->cr_groups[i]);
- }
if (jailed(cr)) {
mtx_lock(&cr->cr_prison->pr_mtx);
diff --git a/sys/fs/pseudofs/pseudofs.c b/sys/fs/pseudofs/pseudofs.c
index ef45f96a6192..7a4e67455214 100644
--- a/sys/fs/pseudofs/pseudofs.c
+++ b/sys/fs/pseudofs/pseudofs.c
@@ -133,7 +133,7 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
if (strcmp(pn->pn_name, iter->pn_name) != 0)
continue;
- printf("pfs_add_node: homonymous siblings: '%s/%s' type %d",
+ printf("pfs_add_node: homonymous siblings: '%s/%s' type %d\n",
parent->pn_name, pn->pn_name, pn->pn_type);
/* Do not detach, because we are not yet attached. */
pn->pn_parent = NULL;
@@ -234,81 +234,101 @@ pfs_fixup_dir(struct pfs_node *parent)
/*
* Create a directory
*/
-struct pfs_node *
-pfs_create_dir(struct pfs_node *parent, const char *name,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_dir(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags)
{
- struct pfs_node *pn;
+ struct pfs_node *pdir, *pn;
int rc;
- pn = pfs_alloc_node_flags(parent->pn_info, name,
+ /* Preserve in case the caller is reusing the one pointer for both. */
+ pdir = parent;
+ if (opn != NULL)
+ *opn = NULL;
+ pn = pfs_alloc_node_flags(pdir->pn_info, name,
(flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- rc = pfs_add_node(parent, pn);
+ rc = pfs_add_node(pdir, pn);
if (rc == 0)
rc = pfs_fixup_dir_flags(pn, flags);
if (rc != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+
+ return (rc);
}
/*
* Create a file
*/
-struct pfs_node *
-pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_file(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill, pfs_attr_t attr,
+ pfs_vis_t vis, pfs_destroy_t destroy, int flags)
{
struct pfs_node *pn;
+ int rc;
+ if (opn != NULL)
+ *opn = NULL;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_file, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
+
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- if (pfs_add_node(parent, pn) != 0) {
+ if ((rc = pfs_add_node(parent, pn)) != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+
+ return (rc);
}
/*
* Create a symlink
*/
-struct pfs_node *
-pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_link(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill, pfs_attr_t attr,
+ pfs_vis_t vis, pfs_destroy_t destroy, int flags)
{
struct pfs_node *pn;
+ int rc;
+ if (opn != NULL)
+ *opn = NULL;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_symlink, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
+
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- if (pfs_add_node(parent, pn) != 0) {
+ if ((rc = pfs_add_node(parent, pn)) != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+ return (rc);
}
/*
@@ -475,6 +495,7 @@ pfs_init(struct pfs_info *pi, struct vfsconf *vfc)
if (error) {
pfs_destroy(root);
pi->pi_root = NULL;
+ pfs_fileno_uninit(pi);
return (error);
}
diff --git a/sys/fs/pseudofs/pseudofs.h b/sys/fs/pseudofs/pseudofs.h
index c60dd7b339d1..2b08dcad978d 100644
--- a/sys/fs/pseudofs/pseudofs.h
+++ b/sys/fs/pseudofs/pseudofs.h
@@ -255,17 +255,18 @@ int pfs_uninit (struct pfs_info *pi, struct vfsconf *vfc);
/*
* Directory structure construction and manipulation
*/
-struct pfs_node *pfs_create_dir (struct pfs_node *parent, const char *name,
- pfs_attr_t attr, pfs_vis_t vis,
- pfs_destroy_t destroy, int flags);
-struct pfs_node *pfs_create_file(struct pfs_node *parent, const char *name,
- pfs_fill_t fill, pfs_attr_t attr,
- pfs_vis_t vis, pfs_destroy_t destroy,
- int flags);
-struct pfs_node *pfs_create_link(struct pfs_node *parent, const char *name,
- pfs_fill_t fill, pfs_attr_t attr,
+int pfs_create_dir (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_attr_t attr,
pfs_vis_t vis, pfs_destroy_t destroy,
int flags);
+int pfs_create_file (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill,
+ pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags);
+int pfs_create_link (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill,
+ pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags);
struct pfs_node *pfs_find_node (struct pfs_node *parent, const char *name);
void pfs_purge (struct pfs_node *pn);
int pfs_destroy (struct pfs_node *pn);
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
index acf18de5ab51..c110107bb210 100644
--- a/sys/fs/tarfs/tarfs_vnops.c
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -334,6 +334,10 @@ tarfs_readdir(struct vop_readdir_args *ap)
tnp, tnp->name, uio->uio_offset, uio->uio_resid);
if (uio->uio_offset == TARFS_COOKIE_EOF) {
+ if (eofflag != NULL) {
+ TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__);
+ *eofflag = 1;
+ }
TARFS_DPF(VNODE, "%s: EOF\n", __func__);
return (0);
}
diff --git a/sys/geom/cache/g_cache.c b/sys/geom/cache/g_cache.c
index 9d0b10f4192e..c6b80786ade5 100644
--- a/sys/geom/cache/g_cache.c
+++ b/sys/geom/cache/g_cache.c
@@ -504,7 +504,7 @@ g_cache_create(struct g_class *mp, struct g_provider *pp,
return (NULL);
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
sc->sc_type = type;
sc->sc_bshift = bshift;
@@ -665,7 +665,7 @@ g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_CACHE_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "cache:taste");
+ gp = g_new_geom(mp, "cache:taste");
gp->start = g_cache_start;
gp->orphan = g_cache_orphan;
gp->access = g_cache_access;
diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c
index 2173a84c7acf..fe83b54953cc 100644
--- a/sys/geom/concat/g_concat.c
+++ b/sys/geom/concat/g_concat.c
@@ -646,7 +646,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO);
gp->start = g_concat_start;
gp->spoiled = g_concat_orphan;
@@ -753,7 +753,7 @@ g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_CONCAT_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "concat:taste");
+ gp = g_new_geom(mp, "concat:taste");
gp->start = g_concat_start;
gp->access = g_concat_access;
gp->orphan = g_concat_orphan;
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index 5bd2d465183e..7fca50e7635c 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -769,7 +769,7 @@ g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
g_topology_assert();
- gp = g_new_geomf(mp, "eli:taste");
+ gp = g_new_geom(mp, "eli:taste");
gp->start = g_eli_start;
gp->access = g_std_access;
/*
diff --git a/sys/geom/gate/g_gate.c b/sys/geom/gate/g_gate.c
index ecdcacff6707..76a4328227dd 100644
--- a/sys/geom/gate/g_gate.c
+++ b/sys/geom/gate/g_gate.c
@@ -571,7 +571,7 @@ g_gate_create(struct g_gate_ctl_create *ggio)
}
}
- gp = g_new_geomf(&g_gate_class, "%s", name);
+ gp = g_new_geom(&g_gate_class, name);
gp->start = g_gate_start;
gp->access = g_gate_access;
gp->orphan = g_gate_orphan;
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index 908ce86f03a6..50e6627b0157 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -289,8 +289,9 @@ int g_handleattr_int(struct bio *bp, const char *attribute, int val);
int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
-struct g_consumer * g_new_consumer(struct g_geom *gp);
-struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
+struct g_consumer *g_new_consumer(struct g_geom *gp);
+struct g_geom *g_new_geom(struct g_class *mp, const char *name);
+struct g_geom *g_new_geomf(struct g_class *mp, const char *fmt, ...)
__printflike(2, 3);
struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...)
__printflike(2, 3);
diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
index 4a2a850c2eab..27c65f15d5e3 100644
--- a/sys/geom/geom_dev.c
+++ b/sys/geom/geom_dev.c
@@ -355,7 +355,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
g_topology_assert();
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF);
cp = g_new_consumer(gp);
diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c
index 341233a6ef47..ffd46db55416 100644
--- a/sys/geom/geom_event.c
+++ b/sys/geom/geom_event.c
@@ -347,6 +347,7 @@ static void
g_post_event_ep_va(g_event_t *func, void *arg, int wuflag,
struct g_event *ep, va_list ap)
{
+ struct thread *td;
void *p;
u_int n;
@@ -366,8 +367,12 @@ g_post_event_ep_va(g_event_t *func, void *arg, int wuflag,
TAILQ_INSERT_TAIL(&g_events, ep, events);
mtx_unlock(&g_eventlock);
wakeup(&g_wait_event);
- curthread->td_pflags |= TDP_GEOM;
- ast_sched(curthread, TDA_GEOM);
+
+ td = curthread;
+ if ((td->td_pflags & TDP_KTHREAD) == 0) {
+ td->td_pflags |= TDP_GEOM;
+ ast_sched(td, TDA_GEOM);
+ }
}
void
diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c
index 0491b0069be4..935293950c37 100644
--- a/sys/geom/geom_slice.c
+++ b/sys/geom/geom_slice.c
@@ -529,7 +529,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co
g_topology_assert();
vp = (void **)extrap;
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
gsp = g_slice_alloc(slices, extra);
gsp->start = start;
gp->softc = gsp;
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index 1429c84942ed..2a6ce1ab6486 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -368,20 +368,15 @@ g_retaste(struct g_class *mp)
}
struct g_geom *
-g_new_geomf(struct g_class *mp, const char *fmt, ...)
+g_new_geom(struct g_class *mp, const char *name)
{
+ int len;
struct g_geom *gp;
- va_list ap;
- struct sbuf *sb;
g_topology_assert();
G_VALID_CLASS(mp);
- sb = sbuf_new_auto();
- va_start(ap, fmt);
- sbuf_vprintf(sb, fmt, ap);
- va_end(ap);
- sbuf_finish(sb);
- gp = g_malloc(sizeof(*gp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+ len = strlen(name);
+ gp = g_malloc(sizeof(*gp) + len + 1, M_WAITOK | M_ZERO);
gp->name = (char *)(gp + 1);
gp->class = mp;
gp->rank = 1;
@@ -389,8 +384,7 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...)
LIST_INIT(&gp->provider);
LIST_INSERT_HEAD(&mp->geom, gp, geom);
TAILQ_INSERT_HEAD(&geoms, gp, geoms);
- strcpy(gp->name, sbuf_data(sb));
- sbuf_delete(sb);
+ memcpy(gp->name, name, len);
/* Fill in defaults from class */
gp->start = mp->start;
gp->spoiled = mp->spoiled;
@@ -404,6 +398,23 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...)
return (gp);
}
+struct g_geom *
+g_new_geomf(struct g_class *mp, const char *fmt, ...)
+{
+ struct g_geom *gp;
+ va_list ap;
+ struct sbuf *sb;
+
+ sb = sbuf_new_auto();
+ va_start(ap, fmt);
+ sbuf_vprintf(sb, fmt, ap);
+ va_end(ap);
+ sbuf_finish(sb);
+ gp = g_new_geom(mp, sbuf_data(sb));
+ sbuf_delete(sb);
+ return (gp);
+}
+
void
g_destroy_geom(struct g_geom *gp)
{
diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c
index 6d9f6239e632..b520194b7d7c 100644
--- a/sys/geom/journal/g_journal.c
+++ b/sys/geom/journal/g_journal.c
@@ -2477,7 +2477,7 @@ g_journal_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
if (pp->geom->class == mp)
return (NULL);
- gp = g_new_geomf(mp, "journal:taste");
+ gp = g_new_geom(mp, "journal:taste");
/* This orphan function should be never called. */
gp->orphan = g_journal_taste_orphan;
cp = g_new_consumer(gp);
diff --git a/sys/geom/label/g_label.c b/sys/geom/label/g_label.c
index acb17d40914e..faefbd7c2ef6 100644
--- a/sys/geom/label/g_label.c
+++ b/sys/geom/label/g_label.c
@@ -399,7 +399,7 @@ g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
if (strcmp(pp->geom->class->name, mp->name) == 0)
return (NULL);
- gp = g_new_geomf(mp, "label:taste");
+ gp = g_new_geom(mp, "label:taste");
gp->start = g_label_start_taste;
gp->access = g_label_access_taste;
gp->orphan = g_label_orphan_taste;
diff --git a/sys/geom/linux_lvm/g_linux_lvm.c b/sys/geom/linux_lvm/g_linux_lvm.c
index c63318fed729..f333c08f45d9 100644
--- a/sys/geom/linux_lvm/g_linux_lvm.c
+++ b/sys/geom/linux_lvm/g_linux_lvm.c
@@ -537,7 +537,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_topology_assert();
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
- gp = g_new_geomf(mp, "linux_lvm:taste");
+ gp = g_new_geom(mp, "linux_lvm:taste");
/* This orphan function should be never called. */
gp->orphan = g_llvm_taste_orphan;
cp = g_new_consumer(gp);
@@ -557,7 +557,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
vg = md.md_vg;
if (vg->vg_geom == NULL) {
/* new volume group */
- gp = g_new_geomf(mp, "%s", vg->vg_name);
+ gp = g_new_geom(mp, vg->vg_name);
gp->start = g_llvm_start;
gp->spoiled = g_llvm_orphan;
gp->orphan = g_llvm_orphan;
diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c
index 25c0490938ef..03902a2f2491 100644
--- a/sys/geom/mirror/g_mirror.c
+++ b/sys/geom/mirror/g_mirror.c
@@ -3149,7 +3149,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
/*
* Action geom.
*/
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
gp->start = g_mirror_start;
gp->orphan = g_mirror_orphan;
@@ -3290,7 +3290,7 @@ g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "mirror:taste");
+ gp = g_new_geom(mp, "mirror:taste");
/*
* This orphan function should be never called.
*/
diff --git a/sys/geom/mirror/g_mirror_ctl.c b/sys/geom/mirror/g_mirror_ctl.c
index 82bc05a142c0..b31bf098ac4b 100644
--- a/sys/geom/mirror/g_mirror_ctl.c
+++ b/sys/geom/mirror/g_mirror_ctl.c
@@ -433,7 +433,7 @@ g_mirror_ctl_create(struct gctl_req *req, struct g_class *mp)
g_topology_lock();
mediasize = OFF_MAX;
sectorsize = 0;
- gp = g_new_geomf(mp, "%s", md.md_name);
+ gp = g_new_geom(mp, md.md_name);
gp->orphan = g_mirror_create_orphan;
cp = g_new_consumer(gp);
for (no = 1; no < *nargs; no++) {
diff --git a/sys/geom/mountver/g_mountver.c b/sys/geom/mountver/g_mountver.c
index de3a298735d4..c7d55c4734a2 100644
--- a/sys/geom/mountver/g_mountver.c
+++ b/sys/geom/mountver/g_mountver.c
@@ -291,7 +291,7 @@ g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *p
return (EEXIST);
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE);
TAILQ_INIT(&sc->sc_queue);
diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c
index a4935df7eaa1..250a2c60ffee 100644
--- a/sys/geom/multipath/g_multipath.c
+++ b/sys/geom/multipath/g_multipath.c
@@ -549,7 +549,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
memcpy(sc->sc_uuid, md->md_uuid, sizeof(sc->sc_uuid));
@@ -821,7 +821,7 @@ g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_topology_assert();
- gp = g_new_geomf(mp, "multipath:taste");
+ gp = g_new_geom(mp, "multipath:taste");
gp->start = g_multipath_start;
gp->access = g_multipath_access;
gp->orphan = g_multipath_orphan;
diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c
index a32111e3a29a..1fb99f4a0a5b 100644
--- a/sys/geom/nop/g_nop.c
+++ b/sys/geom/nop/g_nop.c
@@ -416,7 +416,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
return (EEXIST);
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
sc->sc_offset = offset;
sc->sc_explicitsize = explicitsize;
diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c
index 88e44b335b29..8a7f67d8a313 100644
--- a/sys/geom/part/g_part.c
+++ b/sys/geom/part/g_part.c
@@ -998,7 +998,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp)
}
if (null == NULL)
- gp = g_new_geomf(&g_part_class, "%s", pp->name);
+ gp = g_new_geom(&g_part_class, pp->name);
gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM,
M_WAITOK);
table = gp->softc;
@@ -1979,7 +1979,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
* With that we become part of the topology. Obtain read access
* to the provider.
*/
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
diff --git a/sys/geom/raid/g_raid.c b/sys/geom/raid/g_raid.c
index a483622d14a5..590f28aaa46c 100644
--- a/sys/geom/raid/g_raid.c
+++ b/sys/geom/raid/g_raid.c
@@ -1876,7 +1876,7 @@ g_raid_create_node(struct g_class *mp,
g_topology_assert();
G_RAID_DEBUG(1, "Creating array %s.", name);
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO);
gp->start = g_raid_start;
gp->orphan = g_raid_orphan;
@@ -2217,7 +2217,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
geom = NULL;
status = G_RAID_MD_TASTE_FAIL;
- gp = g_new_geomf(mp, "raid:taste");
+ gp = g_new_geom(mp, "raid:taste");
/*
* This orphan function should be never called.
*/
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c
index c2d05b48d80d..64951bd01deb 100644
--- a/sys/geom/raid3/g_raid3.c
+++ b/sys/geom/raid3/g_raid3.c
@@ -3164,7 +3164,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
/*
* Action geom.
*/
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
M_WAITOK | M_ZERO);
@@ -3338,7 +3338,7 @@ g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "raid3:taste");
+ gp = g_new_geom(mp, "raid3:taste");
/* This orphan function should be never called. */
gp->orphan = g_raid3_taste_orphan;
cp = g_new_consumer(gp);
diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c
index 824de07e4836..5eafcce917cf 100644
--- a/sys/geom/raid3/g_raid3_ctl.c
+++ b/sys/geom/raid3/g_raid3_ctl.c
@@ -425,7 +425,7 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
no = gctl_get_paraml(req, "number", sizeof(*no));
else
no = NULL;
- gp = g_new_geomf(mp, "raid3:insert");
+ gp = g_new_geom(mp, "raid3:insert");
gp->orphan = g_raid3_ctl_insert_orphan;
cp = g_new_consumer(gp);
error = g_attach(cp, pp);
diff --git a/sys/geom/shsec/g_shsec.c b/sys/geom/shsec/g_shsec.c
index 3ccc23e7eb8b..9da814e5eb34 100644
--- a/sys/geom/shsec/g_shsec.c
+++ b/sys/geom/shsec/g_shsec.c
@@ -545,7 +545,7 @@ g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md)
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO);
gp->start = g_shsec_start;
gp->spoiled = g_shsec_orphan;
@@ -643,7 +643,7 @@ g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_SHSEC_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "shsec:taste");
+ gp = g_new_geom(mp, "shsec:taste");
gp->start = g_shsec_start;
gp->access = g_shsec_access;
gp->orphan = g_shsec_orphan;
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c
index 6f336c18c8e6..ba1953f036d3 100644
--- a/sys/geom/stripe/g_stripe.c
+++ b/sys/geom/stripe/g_stripe.c
@@ -454,11 +454,9 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_length = length;
- if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- bp->bio_ma_n = round_page(bp->bio_ma_offset +
- bp->bio_length) / PAGE_SIZE;
+ if ((bp->bio_flags & BIO_UNMAPPED) != 0)
addr = NULL;
- } else
+ else
addr = bp->bio_data;
cbp->bio_caller2 = sc->sc_disks[no];
@@ -864,7 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
gp->start = g_stripe_start;
gp->spoiled = g_stripe_orphan;
@@ -965,7 +963,7 @@ g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "stripe:taste");
+ gp = g_new_geom(mp, "stripe:taste");
gp->start = g_stripe_start;
gp->access = g_stripe_access;
gp->orphan = g_stripe_orphan;
diff --git a/sys/geom/union/g_union.c b/sys/geom/union/g_union.c
index 9734fc1bcfe3..302761597f6f 100644
--- a/sys/geom/union/g_union.c
+++ b/sys/geom/union/g_union.c
@@ -246,7 +246,7 @@ g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
return;
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
rw_init(&sc->sc_rwlock, "gunion");
TAILQ_INIT(&sc->sc_wiplist);
@@ -358,6 +358,7 @@ fail2:
fail1:
g_destroy_consumer(lowercp);
g_destroy_provider(newpp);
+ g_free(sc);
g_destroy_geom(gp);
}
diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c
index c7d737493f11..1490ed103329 100644
--- a/sys/geom/virstor/g_virstor.c
+++ b/sys/geom/virstor/g_virstor.c
@@ -771,7 +771,7 @@ g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
/* We need a dummy geom to attach a consumer to the given provider */
- gp = g_new_geomf(mp, "virstor:taste.helper");
+ gp = g_new_geom(mp, "virstor:taste.helper");
gp->start = (void *)invalid_call; /* XXX: hacked up so the */
gp->access = (void *)invalid_call; /* compiler doesn't complain. */
gp->orphan = (void *)invalid_call; /* I really want these to fail. */
@@ -1085,7 +1085,7 @@ create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
gp->softc = NULL; /* to circumevent races that test softc */
gp->start = g_virstor_start;
diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c
index 91ef0fb1ef95..e9934ba6c784 100644
--- a/sys/geom/zero/g_zero.c
+++ b/sys/geom/zero/g_zero.c
@@ -102,7 +102,7 @@ g_zero_init(struct g_class *mp)
struct g_provider *pp;
g_topology_assert();
- gp = g_new_geomf(mp, "gzero");
+ gp = g_new_geom(mp, "gzero");
gp->start = g_zero_start;
gp->access = g_std_access;
gpp = pp = g_new_providerf(gp, "%s", gp->name);
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 88b8967cd693..ac0cc4ba74e7 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -249,9 +249,9 @@ device wlan # 802.11 support
options IEEE80211_DEBUG # enable debug msgs
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 5a53fac50f2c..1bc2491a1a12 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -2610,11 +2610,13 @@ note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
int structsize;
p = arg;
- size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
+ size = sizeof(structsize) +
+ (1 + p->p_ucred->cr_ngroups) * sizeof(gid_t);
if (sb != NULL) {
KASSERT(*sizep == size, ("invalid size"));
structsize = sizeof(gid_t);
sbuf_bcat(sb, &structsize, sizeof(structsize));
+ sbuf_bcat(sb, &p->p_ucred->cr_gid, sizeof(gid_t));
sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
sizeof(gid_t));
}
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 36ce44b988be..87ffdb8dbf07 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -145,13 +145,6 @@ FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
#endif
/*
- * This ensures that there is at least one entry so that the sysinit_set
- * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never
- * executed.
- */
-SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);
-
-/*
* The sysinit linker set compiled into the kernel. These are placed onto the
* sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional
* sysinits to the linked list but the linker set here does not change.
@@ -296,7 +289,7 @@ mi_startup(void)
BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem);
#if defined(VERBOSE_SYSINIT)
- if (sip->subsystem > last && verbose_sysinit != 0) {
+ if (sip->subsystem != last && verbose_sysinit != 0) {
verbose = 1;
printf("subsystem %x\n", sip->subsystem);
}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index fcd232cde21e..e42e7dcf8b44 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -663,4 +663,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index a27ab33b34da..2a833d2eafbe 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -665,20 +665,26 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
} while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
got_set = tmp & ~flg;
got_cleared = flg & ~tmp;
- tmp = fp->f_flag & FNONBLOCK;
- error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
- if (error != 0)
- goto revert_f_setfl;
- tmp = fp->f_flag & FASYNC;
- error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
- if (error == 0) {
- fdrop(fp, td);
- break;
+ if (((got_set | got_cleared) & FNONBLOCK) != 0) {
+ tmp = fp->f_flag & FNONBLOCK;
+ error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ if (error != 0)
+ goto revert_flags;
+ }
+ if (((got_set | got_cleared) & FASYNC) != 0) {
+ tmp = fp->f_flag & FASYNC;
+ error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+ if (error != 0)
+ goto revert_nonblock;
+ }
+ fdrop(fp, td);
+ break;
+revert_nonblock:
+ if (((got_set | got_cleared) & FNONBLOCK) != 0) {
+ tmp = ~fp->f_flag & FNONBLOCK;
+ (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
}
- atomic_clear_int(&fp->f_flag, FNONBLOCK);
- tmp = 0;
- (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
-revert_f_setfl:
+revert_flags:
do {
tmp = flg = fp->f_flag;
tmp &= ~FCNTLFLAGS;
@@ -5250,6 +5256,8 @@ file_type_to_name(short type)
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
+ case DTYPE_JAILDESC:
+ return ("jail");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c
index 0cb0f566a839..7c0654769581 100644
--- a/sys/kern/kern_environment.c
+++ b/sys/kern/kern_environment.c
@@ -1098,65 +1098,65 @@ kernenv_next(char *cp)
}
void
-tunable_int_init(void *data)
+tunable_int_init(const void *data)
{
- struct tunable_int *d = (struct tunable_int *)data;
+ const struct tunable_int *d = data;
TUNABLE_INT_FETCH(d->path, d->var);
}
void
-tunable_long_init(void *data)
+tunable_long_init(const void *data)
{
- struct tunable_long *d = (struct tunable_long *)data;
+ const struct tunable_long *d = data;
TUNABLE_LONG_FETCH(d->path, d->var);
}
void
-tunable_ulong_init(void *data)
+tunable_ulong_init(const void *data)
{
- struct tunable_ulong *d = (struct tunable_ulong *)data;
+ const struct tunable_ulong *d = data;
TUNABLE_ULONG_FETCH(d->path, d->var);
}
void
-tunable_int64_init(void *data)
+tunable_int64_init(const void *data)
{
- struct tunable_int64 *d = (struct tunable_int64 *)data;
+ const struct tunable_int64 *d = data;
TUNABLE_INT64_FETCH(d->path, d->var);
}
void
-tunable_uint64_init(void *data)
+tunable_uint64_init(const void *data)
{
- struct tunable_uint64 *d = (struct tunable_uint64 *)data;
+ const struct tunable_uint64 *d = data;
TUNABLE_UINT64_FETCH(d->path, d->var);
}
void
-tunable_quad_init(void *data)
+tunable_quad_init(const void *data)
{
- struct tunable_quad *d = (struct tunable_quad *)data;
+ const struct tunable_quad *d = data;
TUNABLE_QUAD_FETCH(d->path, d->var);
}
void
-tunable_bool_init(void *data)
+tunable_bool_init(const void *data)
{
- struct tunable_bool *d = (struct tunable_bool *)data;
+ const struct tunable_bool *d = data;
TUNABLE_BOOL_FETCH(d->path, d->var);
}
void
-tunable_str_init(void *data)
+tunable_str_init(const void *data)
{
- struct tunable_str *d = (struct tunable_str *)data;
+ const struct tunable_str *d = data;
TUNABLE_STR_FETCH(d->path, d->var, d->size);
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index eb77a5064113..b5360f3a1055 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -50,6 +50,7 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
+#include <sys/jail.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
#include <sys/queue.h>
@@ -163,6 +164,9 @@ static int filt_kqueue(struct knote *kn, long hint);
static int filt_procattach(struct knote *kn);
static void filt_procdetach(struct knote *kn);
static int filt_proc(struct knote *kn, long hint);
+static int filt_jailattach(struct knote *kn);
+static void filt_jaildetach(struct knote *kn);
+static int filt_jail(struct knote *kn, long hint);
static int filt_fileattach(struct knote *kn);
static void filt_timerexpire(void *knx);
static void filt_timerexpire_l(struct knote *kn, bool proc_locked);
@@ -195,6 +199,12 @@ static const struct filterops proc_filtops = {
.f_detach = filt_procdetach,
.f_event = filt_proc,
};
+static const struct filterops jail_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_jailattach,
+ .f_detach = filt_jaildetach,
+ .f_event = filt_jail,
+};
static const struct filterops timer_filtops = {
.f_isfd = 0,
.f_attach = filt_timerattach,
@@ -365,6 +375,7 @@ static struct {
[~EVFILT_USER] = { &user_filtops, 1 },
[~EVFILT_SENDFILE] = { &null_filtops },
[~EVFILT_EMPTY] = { &file_filtops, 1 },
+ [~EVFILT_JAIL] = { &jail_filtops, 1 },
};
/*
@@ -528,7 +539,8 @@ filt_proc(struct knote *kn, long hint)
* process forked. Additionally, for each knote attached to the
* parent, check whether user wants to track the new process. If so
* attach a new knote to it, and immediately report an event with the
- * child's pid.
+ * child's pid. This is also called on jail creation, which is treated
+ * the same way by jail events.
*/
void
knote_fork(struct knlist *list, int pid)
@@ -555,6 +567,8 @@ knote_fork(struct knlist *list, int pid)
/*
* The same as knote(), activate the event.
*/
+ _Static_assert(NOTE_JAIL_CHILD == NOTE_FORK,
+ "NOTE_JAIL_CHILD should be the same as NOTE_FORK");
if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
if (kn->kn_fop->f_event(kn, NOTE_FORK))
KNOTE_ACTIVATE(kn, 1);
@@ -614,6 +628,124 @@ knote_fork(struct knlist *list, int pid)
}
}
+int
+filt_jailattach(struct knote *kn)
+{
+ struct prison *pr;
+ bool immediate;
+
+ immediate = false;
+ if (kn->kn_id == 0) {
+ /* Let jid=0 watch the current prison (including prison0). */
+ pr = curthread->td_ucred->cr_prison;
+ mtx_lock(&pr->pr_mtx);
+ } else if (kn->kn_flags & (EV_FLAG1 | EV_FLAG2)) {
+ /*
+ * The kernel registers prisons before they are valid,
+ * so prison_find_child will fail.
+ */
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ if (pr->pr_id < kn->kn_id)
+ continue;
+ if (pr->pr_id > kn->kn_id) {
+ pr = NULL;
+ break;
+ }
+ mtx_lock(&pr->pr_mtx);
+ break;
+ }
+ if (pr == NULL)
+ return (ENOENT);
+ } else {
+ sx_slock(&allprison_lock);
+ pr = prison_find_child(curthread->td_ucred->cr_prison,
+ kn->kn_id);
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ return (ENOENT);
+ if (!prison_isalive(pr)) {
+ mtx_unlock(&pr->pr_mtx);
+ return (ENOENT);
+ }
+ }
+ kn->kn_ptr.p_prison = pr;
+ kn->kn_flags |= EV_CLEAR;
+
+ /*
+ * Internal flag indicating registration done by kernel for the
+ * purposes of getting a NOTE_CHILD notification.
+ */
+ if (kn->kn_flags & EV_FLAG2) {
+ kn->kn_flags &= ~EV_FLAG2;
+ kn->kn_data = kn->kn_sdata; /* parent id */
+ kn->kn_fflags = NOTE_CHILD;
+ kn->kn_sfflags &= ~NOTE_JAIL_CTRLMASK;
+ immediate = true; /* Force immediate activation of child note. */
+ }
+ /*
+ * Internal flag indicating registration done by kernel (for other than
+ * NOTE_CHILD).
+ */
+ if (kn->kn_flags & EV_FLAG1) {
+ kn->kn_flags &= ~EV_FLAG1;
+ }
+
+ knlist_add(pr->pr_klist, kn, 1);
+
+ /* Immediately activate any child notes. */
+ if (immediate)
+ KNOTE_ACTIVATE(kn, 0);
+
+ mtx_unlock(&pr->pr_mtx);
+ return (0);
+}
+
+void
+filt_jaildetach(struct knote *kn)
+{
+ if (kn->kn_ptr.p_prison != NULL) {
+ knlist_remove(kn->kn_knlist, kn, 0);
+ kn->kn_ptr.p_prison = NULL;
+ } else
+ kn->kn_status |= KN_DETACHED;
+}
+
+int
+filt_jail(struct knote *kn, long hint)
+{
+ struct prison *pr;
+ u_int event;
+
+ pr = kn->kn_ptr.p_prison;
+ if (pr == NULL) /* already activated, from attach filter */
+ return (0);
+
+ /* Mask off extra data. */
+ event = (u_int)hint & NOTE_JAIL_CTRLMASK;
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event)
+ kn->kn_fflags |= event;
+
+ /* Report the attached process id. */
+ if (event == NOTE_JAIL_ATTACH) {
+ if (kn->kn_data != 0)
+ kn->kn_fflags |= NOTE_JAIL_ATTACH_MULTI;
+ kn->kn_data = hint & NOTE_JAIL_DATAMASK;
+ }
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ kn->kn_ptr.p_prison = NULL;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
/*
* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
* interval timer support code.
@@ -1597,8 +1729,8 @@ findkn:
/*
* If possible, find an existing knote to use for this kevent.
*/
- if (kev->filter == EVFILT_PROC &&
- (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
+ if ((kev->filter == EVFILT_PROC || kev->filter == EVFILT_JAIL)
+ && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
/* This is an internal creation of a process tracking
* note. Don't attempt to coalesce this with an
* existing note.
@@ -1771,7 +1903,7 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp)
kq = fp->f_data;
if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
- return (EBADF);
+ return (EINVAL);
*kqp = kq;
KQ_LOCK(kq);
if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
@@ -2800,6 +2932,7 @@ knote_init(void)
knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue);
+ prison0.pr_klist = knlist_alloc(&prison0.pr_mtx);
}
SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 7c9a15ae18f3..3d18b03119ff 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,15 +39,18 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/epoch.h>
+#include <sys/event.h>
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@@ -154,7 +157,8 @@ static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
static int prison_lock_xlock(struct prison *pr, int flags);
-static void prison_cleanup(struct prison *pr);
+static void prison_cleanup_locked(struct prison *pr);
+static void prison_cleanup_unlocked(struct prison *pr);
static void prison_free_not_last(struct prison *pr);
static void prison_proc_free_not_last(struct prison *pr);
static void prison_proc_relink(struct prison *opr, struct prison *npr,
@@ -167,6 +171,7 @@ static void prison_racct_attach(struct prison *pr);
static void prison_racct_modify(struct prison *pr);
static void prison_racct_detach(struct prison *pr);
#endif
+static void prison_knote(struct prison *pr, long hint);
/* Flags for prison_deref */
#define PD_DEREF 0x01 /* Decrement pr_ref */
@@ -985,6 +990,7 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
+ struct file *jfp_out;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@@ -995,6 +1001,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+ struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@@ -1008,7 +1015,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int deadid, jid, jsys, len, level;
+ int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@@ -1018,22 +1025,32 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int ip6s;
bool redo_ip6;
#endif
+ bool maybe_changed;
uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
uint64_t pr_allow_diff;
unsigned tallow;
char numbuf[12];
- error = priv_check(td, PRIV_JAIL_SET);
- if (!error && (flags & JAIL_ATTACH))
- error = priv_check(td, PRIV_JAIL_ATTACH);
- if (error)
- return (error);
mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+ if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE) &&
+ mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) ==
+ (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
+ prison_hold(mypr);
+#ifdef INET
+ ip4 = NULL;
+#endif
+#ifdef INET6
+ ip6 = NULL;
+#endif
+ g_path = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@@ -1046,14 +1063,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
*/
error = vfs_buildopts(optuio, &opts);
if (error)
- return (error);
-#ifdef INET
- ip4 = NULL;
-#endif
-#ifdef INET6
- ip6 = NULL;
-#endif
- g_path = NULL;
+ goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@@ -1062,6 +1072,61 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_errmsg;
}
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done_errmsg;
+ }
+ jfd_in = -1;
+ } else if (error != 0)
+ goto done_free;
+ else {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done_errmsg;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /*
+ * Look up and create jails based on the
+ * descriptor's prison.
+ */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &mypr, NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done_errmsg;
+ }
+ if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+ error = EPERM;
+ goto done_free;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done_free;
+ }
+ }
+
+ /*
+ * Delay the permission check if using a jail descriptor,
+ * until we get the descriptor's credentials.
+ */
+ if (!(flags & JAIL_USE_DESC)) {
+ error = priv_check(td, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check(td, PRIV_JAIL_ATTACH);
+ if (error)
+ goto done_free;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1422,6 +1487,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
pr = NULL;
inspr = NULL;
deadpr = NULL;
+ maybe_changed = false;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
@@ -1436,7 +1502,45 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = EAGAIN;
goto done_deref;
}
- if (jid != 0) {
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &pr, &jdcred);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done_deref;
+ }
+ drflags |= PD_DEREF;
+ error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto done_deref;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (cuflags == JAIL_CREATE) {
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists",
+ pr->pr_id);
+ goto done_deref;
+ }
+ if (!prison_isalive(pr)) {
+ /* While a jid can be resurrected, the prison
+ * itself cannot.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying", pr->pr_id);
+ goto done_deref;
+ }
+ if (jid != 0 && jid != pr->pr_id) {
+ error = EINVAL;
+ vfs_opterror(opts, "cannot change jid");
+ goto done_deref;
+ }
+ jid = pr->pr_id;
+ } else if (jid != 0) {
if (jid < 0) {
error = EINVAL;
vfs_opterror(opts, "negative jid");
@@ -1570,7 +1674,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
}
- /* Update: must provide a jid or name. */
+ /* Update: must provide a desc, jid, or name. */
else if (cuflags == JAIL_UPDATE && pr == NULL) {
error = ENOENT;
vfs_opterror(opts, "update specified no jail");
@@ -1643,6 +1747,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
tpr->pr_childcount++;
+ pr->pr_klist = knlist_alloc(&pr->pr_mtx);
/* Set some default values, and inherit some from the parent. */
if (namelc == NULL)
@@ -1722,8 +1827,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -1880,6 +1987,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref;
}
}
+ maybe_changed = true;
/* Set the parameters of the prison. */
#ifdef INET
@@ -2112,7 +2220,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* reference via persistence, or is about to gain one via attachment.
*/
if (created) {
- drflags = prison_lock_xlock(pr, drflags);
+ sx_assert(&allprison_lock, SX_XLOCKED);
+ mtx_lock(&ppr->pr_mtx);
+ knote_fork(ppr->pr_klist, pr->pr_id);
+ mtx_unlock(&ppr->pr_mtx);
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
pr->pr_state = PRISON_STATE_ALIVE;
}
@@ -2146,10 +2259,37 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
printf("Warning jail jid=%d: mountd/nfsd requires a separate"
" file system\n", pr->pr_id);
+ /*
+ * Now that the prison is fully created without error, set the
+ * jail descriptor if one was requested. This is the only
+ * parameter that is returned to the caller (except the error
+ * message).
+ */
+ if (jfd_out >= 0) {
+ if (!(drflags & PD_LOCKED)) {
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ }
+ jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1;
+ if (optuio->uio_segflg == UIO_SYSSPACE)
+ *(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out;
+ else
+ (void)copyout(&jfd_out,
+ optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out));
+ jaildesc_set_prison(jfp_out, pr);
+ }
+
drflags &= ~PD_KILL;
td->td_retval[0] = pr->pr_id;
done_deref:
+ /*
+ * Report changes to kevent. This can happen even if the
+ * system call fails, as changes might have been made before
+ * the failure.
+ */
+ if (maybe_changed && !created)
+ prison_knote(pr, NOTE_JAIL_SET);
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
prison_deref(pr, drflags);
@@ -2176,15 +2316,21 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
done_free:
+ /* Clean up other resources. */
#ifdef INET
prison_ip_free(ip4);
#endif
#ifdef INET6
prison_ip_free(ip6);
#endif
+ if (jfp_out != NULL)
+ fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (g_path != NULL)
free(g_path, M_TEMP);
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2329,16 +2475,21 @@ int
kern_jail_get(struct thread *td, struct uio *optuio, int flags)
{
struct bool_flags *bf;
+ struct file *jfp_out;
struct jailsys_flags *jsf;
struct prison *pr, *mypr;
struct vfsopt *opt;
struct vfsoptlist *opts;
char *errmsg, *name;
int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
+ int jfd_in, jfd_out;
unsigned f;
if (flags & ~JAIL_GET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) ==
+ (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
/* Get the parameter list. */
error = vfs_buildopts(optuio, &opts);
@@ -2346,13 +2497,70 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
return (error);
errmsg_pos = vfs_getopt_pos(opts, "errmsg");
mypr = td->td_ucred->cr_prison;
+ prison_hold(mypr);
pr = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
- * Find the prison specified by one of: lastjid, jid, name.
+ * Find the prison specified by one of: desc, lastjid, jid, name.
*/
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
+
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done;
+ }
+ } else if (error == 0) {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done;
+ }
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &pr, NULL);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done;
+ }
+ drflags |= PD_DEREF;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying",
+ pr->pr_id);
+ goto done;
+ }
+ goto found_prison;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /* Look up jails based on the descriptor's prison. */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &mypr, NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done;
+ }
+ } else
+ goto done;
+
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
@@ -2421,9 +2629,17 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
found_prison:
/* Get the parameters of the prison. */
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
td->td_retval[0] = pr->pr_id;
+ if (jfd_out >= 0) {
+ error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ jaildesc_set_prison(jfp_out, pr);
+ }
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
if (error != 0 && error != ENOENT)
goto done;
@@ -2603,6 +2819,13 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
prison_deref(pr, drflags);
else if (drflags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
+ else if (drflags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
+ /* Clean up other resources. */
+ if (jfp_out != NULL)
+ (void)fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (error && errmsg_pos >= 0) {
/* Write the error message back to userspace. */
vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
@@ -2619,6 +2842,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
}
}
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2643,14 +2867,54 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
sx_xunlock(&allprison_lock);
return (EINVAL);
}
+ prison_hold(pr);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * struct jail_remove_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap)
+{
+ struct prison *pr;
+ struct ucred *jdcred;
+ int error;
+
+ error = jaildesc_find(td, uap->fd, &pr, &jdcred);
+ if (error)
+ return (error);
+ error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE);
+ crfree(jdcred);
+ if (error) {
+ prison_free(pr);
+ return (error);
+ }
+ sx_xlock(&allprison_lock);
+ mtx_lock(&pr->pr_mtx);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * Begin the removal process for a prison. The allprison lock should
+ * be held exclusively, and the prison should be both locked and held.
+ */
+void
+prison_remove(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
if (!prison_isalive(pr)) {
/* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
sx_xunlock(&allprison_lock);
- return (0);
+ return;
}
- prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
- return (0);
+ prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
}
/*
@@ -2685,6 +2949,44 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
+/*
+ * struct jail_attach_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap)
+{
+ struct prison *pr;
+ struct ucred *jdcred;
+ int drflags, error;
+
+ sx_slock(&allprison_lock);
+ drflags = PD_LIST_SLOCKED;
+ error = jaildesc_find(td, uap->fd, &pr, &jdcred);
+ if (error)
+ goto fail;
+ drflags |= PD_DEREF;
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto fail;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+
+ /* Do not allow a process to attach to a prison that is not alive. */
+ if (!prison_isalive(pr)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ return (do_jail_attach(td, pr, drflags));
+
+ fail:
+ prison_deref(pr, drflags);
+ return (error);
+}
+
static int
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
@@ -2703,9 +3005,12 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
* a process root from one prison, but attached to the jail
* of another.
*/
- prison_hold(pr);
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
refcount_acquire(&pr->pr_uref);
- drflags |= PD_DEREF | PD_DEUREF;
+ drflags |= PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@@ -2755,6 +3060,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
prison_proc_relink(oldcred->cr_prison, pr, p);
prison_deref(oldcred->cr_prison, drflags);
crfree(oldcred);
+ prison_knote(pr, NOTE_JAIL_ATTACH | td->td_proc->p_pid);
/*
* If the prison was killed while changing credentials, die along
@@ -3182,9 +3488,10 @@ prison_deref(struct prison *pr, int flags)
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
pr->pr_state = PRISON_STATE_DYING;
+ prison_cleanup_locked(pr);
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
}
}
}
@@ -3327,8 +3634,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
}
if (!(cpr->pr_flags & PR_REMOVE))
continue;
- prison_cleanup(cpr);
+ prison_cleanup_unlocked(cpr);
mtx_lock(&cpr->pr_mtx);
+ prison_cleanup_locked(cpr);
cpr->pr_flags &= ~PR_REMOVE;
if (cpr->pr_flags & PR_PERSIST) {
cpr->pr_flags &= ~PR_PERSIST;
@@ -3363,8 +3671,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
if (rpr != NULL)
LIST_REMOVE(rpr, pr_sibling);
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
mtx_lock(&pr->pr_mtx);
+ prison_cleanup_locked(pr);
if (pr->pr_flags & PR_PERSIST) {
pr->pr_flags &= ~PR_PERSIST;
prison_proc_free_not_last(pr);
@@ -3411,10 +3720,22 @@ prison_lock_xlock(struct prison *pr, int flags)
/*
* Release a prison's resources when it starts dying (when the last user
- * reference is dropped, or when it is killed).
+ * reference is dropped, or when it is killed). Two functions are called,
+ * for work that requires a locked prison or an unlocked one.
*/
static void
-prison_cleanup(struct prison *pr)
+prison_cleanup_locked(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ prison_knote(pr, NOTE_JAIL_REMOVE);
+ knlist_detach(pr->pr_klist);
+ jaildesc_prison_cleanup(pr);
+ pr->pr_klist = NULL;
+}
+
+static void
+prison_cleanup_unlocked(struct prison *pr)
{
sx_assert(&allprison_lock, SA_XLOCKED);
mtx_assert(&pr->pr_mtx, MA_NOTOWNED);
@@ -4616,6 +4937,7 @@ sysctl_jail_param(SYSCTL_HANDLER_ARGS)
* jail creation time but cannot be changed in an existing jail.
*/
SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
+SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor");
SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
@@ -5039,6 +5361,22 @@ prison_racct_detach(struct prison *pr)
}
#endif /* RACCT */
+/*
+ * Submit a knote for a prison, locking if necessary.
+ */
+static void
+prison_knote(struct prison *pr, long hint)
+{
+ int locked;
+
+ locked = mtx_owned(&pr->pr_mtx);
+ if (!locked)
+ mtx_lock(&pr->pr_mtx);
+ KNOTE_LOCKED(pr->pr_klist, hint);
+ if (!locked)
+ mtx_unlock(&pr->pr_mtx);
+}
+
#ifdef DDB
static void
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
new file mode 100644
index 000000000000..c9e80f5d8941
--- /dev/null
+++ b/sys/kern/kern_jaildesc.c
@@ -0,0 +1,278 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+
+MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
+
+static fo_stat_t jaildesc_stat;
+static fo_close_t jaildesc_close;
+static fo_fill_kinfo_t jaildesc_fill_kinfo;
+static fo_cmp_t jaildesc_cmp;
+
+static struct fileops jaildesc_ops = {
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = invfo_ioctl,
+ .fo_poll = invfo_poll,
+ .fo_kqfilter = invfo_kqfilter,
+ .fo_stat = jaildesc_stat,
+ .fo_close = jaildesc_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = jaildesc_fill_kinfo,
+ .fo_cmp = jaildesc_cmp,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+/*
+ * Given a jail descriptor number, return its prison and/or its
+ * credential. They are returned held, and will need to be released
+ * by the caller.
+ */
+int
+jaildesc_find(struct thread *td, int fd, struct prison **prp,
+ struct ucred **ucredp)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ struct prison *pr;
+ int error;
+
+ error = fget(td, fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_JAILDESC) {
+ error = EINVAL;
+ goto out;
+ }
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr == NULL || !prison_isvalid(pr)) {
+ error = ENOENT;
+ JAILDESC_UNLOCK(jd);
+ goto out;
+ }
+ if (prp != NULL) {
+ prison_hold(pr);
+ *prp = pr;
+ }
+ JAILDESC_UNLOCK(jd);
+ if (ucredp != NULL)
+ *ucredp = crhold(fp->f_cred);
+ out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Allocate a new jail decriptor, not yet associated with a prison.
+ * Return the file pointer (with a reference held) and the descriptor
+ * number.
+ */
+int
+jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ int error;
+
+ if (owning) {
+ error = priv_check(td, PRIV_JAIL_REMOVE);
+ if (error != 0)
+ return (error);
+ }
+ jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
+ error = falloc_caps(td, &fp, fdp, 0, NULL);
+ if (error != 0) {
+ free(jd, M_JAILDESC);
+ return (error);
+ }
+ finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
+ FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
+ JAILDESC_LOCK_INIT(jd);
+ if (owning)
+ jd->jd_flags |= JDF_OWNING;
+ *fpp = fp;
+ return (0);
+}
+
+/*
+ * Assocate a jail descriptor with its prison.
+ */
+void
+jaildesc_set_prison(struct file *fp, struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ jd->jd_prison = pr;
+ LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+}
+
+/*
+ * Detach all the jail descriptors from a prison.
+ */
+void
+jaildesc_prison_cleanup(struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ while ((jd = LIST_FIRST(&pr->pr_descs))) {
+ JAILDESC_LOCK(jd);
+ LIST_REMOVE(jd, jd_list);
+ jd->jd_prison = NULL;
+ JAILDESC_UNLOCK(jd);
+ prison_free(pr);
+ }
+}
+
+static int
+jaildesc_close(struct file *fp, struct thread *td)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+
+ jd = fp->f_data;
+ fp->f_data = NULL;
+ if (jd != NULL) {
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr != NULL) {
+ /*
+ * Free or remove the associated prison.
+ * This requires a second check after re-
+ * ordering locks. This jaildesc can remain
+ * unlocked once we have a prison reference,
+ * because that prison is the only place that
+ * still points back to it.
+ */
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+ if (jd->jd_flags & JDF_OWNING) {
+ sx_xlock(&allprison_lock);
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ /*
+ * Unlink the prison, but don't free
+ * it; that will be done as part of
+ * of prison_remove.
+ */
+ LIST_REMOVE(jd, jd_list);
+ prison_remove(pr);
+ } else {
+ prison_unlock(pr);
+ sx_xunlock(&allprison_lock);
+ }
+ } else {
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ LIST_REMOVE(jd, jd_list);
+ prison_free(pr);
+ }
+ prison_unlock(pr);
+ }
+ prison_free(pr);
+ }
+ JAILDESC_LOCK_DESTROY(jd);
+ free(jd, M_JAILDESC);
+ }
+ return (0);
+}
+
+static int
+jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
+{
+ struct jaildesc *jd;
+
+ bzero(sb, sizeof(struct stat));
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_prison != NULL) {
+ sb->st_ino = jd->jd_prison->pr_id;
+ sb->st_mode = S_IFREG | S_IRWXU;
+ } else
+ sb->st_mode = S_IFREG;
+ JAILDESC_UNLOCK(jd);
+ return (0);
+}
+
+static int
+jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ kif->kf_type = KF_TYPE_JAILDESC;
+ kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
+ return (0);
+}
+
+static int
+jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
+{
+ struct jaildesc *jd1, *jd2;
+ int jid1, jid2;
+
+ if (fp2->f_type != DTYPE_JAILDESC)
+ return (3);
+ jd1 = fp1->f_data;
+ JAILDESC_LOCK(jd1);
+ jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd1);
+ jd2 = fp2->f_data;
+ JAILDESC_LOCK(jd2);
+ jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd2);
+ return (kcmp_cmp(jid1, jid2));
+}
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 879220be050b..653ce1ee556b 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -751,11 +751,14 @@ malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds,
return (malloc_large(size, mtp, DOMAINSET_RR(), flags
DEBUG_REDZONE_ARG));
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
- do {
- va = malloc_domain(&size, &indx, mtp, domain, flags);
- } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0);
+ indx = -1;
+ va = NULL;
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) == 0)
+ do {
+ va = malloc_domain(&size, &indx, mtp, domain, flags);
+ } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0);
malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
+
if (__predict_false(va == NULL)) {
KASSERT((flags & M_WAITOK) == 0,
("malloc(M_WAITOK) returned NULL"));
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index f952b3fc8805..8b5908f5219a 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -1136,9 +1136,9 @@ __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
* General init routine used by the MTX_SYSINIT() macro.
*/
void
-mtx_sysinit(void *arg)
+mtx_sysinit(const void *arg)
{
- struct mtx_args *margs = arg;
+ const struct mtx_args *margs = arg;
mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
margs->ma_opts);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 379fbda619c0..6e56664d12ce 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1112,13 +1112,14 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
if (cred->cr_flags & CRED_FLAG_CAPMODE)
kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
/* XXX bde doesn't like KI_NGROUPS */
- if (cred->cr_ngroups > KI_NGROUPS) {
+ if (1 + cred->cr_ngroups > KI_NGROUPS) {
kp->ki_ngroups = KI_NGROUPS;
kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
} else
- kp->ki_ngroups = cred->cr_ngroups;
- bcopy(cred->cr_groups, kp->ki_groups,
- kp->ki_ngroups * sizeof(gid_t));
+ kp->ki_ngroups = 1 + cred->cr_ngroups;
+ kp->ki_groups[0] = cred->cr_gid;
+ bcopy(cred->cr_groups, kp->ki_groups + 1,
+ (kp->ki_ngroups - 1) * sizeof(gid_t));
kp->ki_rgid = cred->cr_rgid;
kp->ki_svgid = cred->cr_svgid;
/* If jailed(cred), emulate the old P_JAILED flag. */
@@ -2943,8 +2944,11 @@ sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
cred = crhold(p->p_ucred);
PROC_UNLOCK(p);
- error = SYSCTL_OUT(req, cred->cr_groups,
- cred->cr_ngroups * sizeof(gid_t));
+ error = SYSCTL_OUT(req, &cred->cr_gid, sizeof(gid_t));
+ if (error == 0)
+ error = SYSCTL_OUT(req, cred->cr_groups,
+ cred->cr_ngroups * sizeof(gid_t));
+
crfree(cred);
return (error);
}
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index c1633dd19de2..7206572ffc02 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -337,9 +337,9 @@ rm_wowned(const struct rmlock *rm)
}
void
-rm_sysinit(void *arg)
+rm_sysinit(const void *arg)
{
- struct rm_args *args;
+ const struct rm_args *args;
args = arg;
rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags);
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index e182d1fe9baf..84a3a890be63 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -266,9 +266,9 @@ _rw_destroy(volatile uintptr_t *c)
}
void
-rw_sysinit(void *arg)
+rw_sysinit(const void *arg)
{
- struct rw_args *args;
+ const struct rw_args *args;
args = arg;
rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index accea5d288eb..c005e112d3b9 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -222,9 +222,9 @@ owner_sx(const struct lock_object *lock, struct thread **owner)
#endif
void
-sx_sysinit(void *arg)
+sx_sysinit(const void *arg)
{
- struct sx_args *sargs = arg;
+ const struct sx_args *sargs = arg;
sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
}
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 0e8c2b9f362e..4329959a2ef4 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -347,6 +347,17 @@ kern_thr_exit(struct thread *td)
p = td->td_proc;
/*
+ * Clear kernel ASTs in advance of selecting the last exiting
+ * thread and acquiring schedulers locks. It is fine to
+ * clear the ASTs here even if we are not going to exit after
+ * all. On the other hand, leaving them pending could trigger
+ * execution in subsystems in a context where they are not
+ * prepared to handle top kernel actions, even in execution of
+ * an unrelated thread.
+ */
+ ast_kclear(td);
+
+ /*
* If all of the threads in a process call this routine to
* exit (e.g. all threads call pthread_exit()), exactly one
* thread should return to the caller to terminate the process
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 50b040132396..3180c66cb42b 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -1694,8 +1694,10 @@ thread_single_end(struct proc *p, int mode)
thread_unlock(td);
}
}
- KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
- ("inconsistent boundary count %d", p->p_boundary_count));
+ KASSERT(mode != SINGLE_BOUNDARY || P_SHOULDSTOP(p) ||
+ p->p_boundary_count == 0,
+ ("pid %d proc %p flags %#x inconsistent boundary count %d",
+ p->p_pid, p, p->p_flag, p->p_boundary_count));
PROC_SUNLOCK(p);
wakeup(&p->p_flag);
}
diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c
index fbf81d423b95..09070eea284f 100644
--- a/sys/kern/kern_tslog.c
+++ b/sys/kern/kern_tslog.c
@@ -220,3 +220,13 @@ SYSCTL_PROC(_debug, OID_AUTO, tslog_user,
CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_SKIP,
0, 0, sysctl_debug_tslog_user,
"", "Dump recorded userland event timestamps");
+
+void
+sysinit_tslog_shim(const void *data)
+{
+ const struct sysinit_tslog *x = data;
+
+ tslog(curthread, TS_ENTER, "SYSINIT", x->name);
+ (x->func)(x->data);
+ tslog(curthread, TS_EXIT, "SYSINIT", x->name);
+}
diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index 62a3da964c37..bf5bda7e058d 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -280,6 +280,9 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS)
struct sbuf sb;
device_t dev = (device_t)arg1;
device_t iommu;
+#ifdef IOMMU
+ device_t requester;
+#endif
int error;
uint16_t rid;
const char *c;
@@ -314,9 +317,15 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS)
}
rid = 0;
#ifdef IOMMU
- iommu_get_requester(dev, &rid);
+ error = iommu_get_requester(dev, &requester, &rid);
+ /*
+ * Do not return requester error from sysctl, iommu
+ * unit might be assigned by other means.
+ */
+#else
+ error = ENXIO;
#endif
- if (rid != 0)
+ if (error == 0)
sbuf_printf(&sb, "%srid=%#x", c, rid);
break;
default:
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index ab47b6ad29a3..a65c3ca128d9 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -57,7 +57,7 @@
* b : public affirmation by word or example of usually
* religious faith or conviction <the heroic witness to divine
* life -- Pilot>
- * 6 capitalized : a member of the Jehovah's Witnesses
+ * 6 capitalized : a member of the Jehovah's Witnesses
*/
/*
@@ -131,7 +131,7 @@
#define LI_SLEEPABLE 0x00040000 /* Lock may be held while sleeping. */
#ifndef WITNESS_COUNT
-#define WITNESS_COUNT 1536
+#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
#define WITNESS_PENDLIST (512 + (MAXCPU * 4))
@@ -158,20 +158,18 @@
* These flags go in the witness relationship matrix and describe the
* relationship between any two struct witness objects.
*/
-#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
-#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
-#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
-#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
-#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
-#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
-#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
-#define WITNESS_RELATED_MASK \
- (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
-#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been
- * observed. */
-#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
-#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
-#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
+#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
+#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
+#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
+#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
+#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
+#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
+#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
+#define WITNESS_RELATED_MASK (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
+#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been observed. */
+#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
+#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
+#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
/* Descendant to ancestor flags */
#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2)
@@ -218,20 +216,18 @@ struct lock_list_entry {
* (for example, "vnode interlock").
*/
struct witness {
- char w_name[MAX_W_NAME];
- uint32_t w_index; /* Index in the relationship matrix */
+ char w_name[MAX_W_NAME];
+ uint32_t w_index; /* Index in the relationship matrix */
struct lock_class *w_class;
- STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
- STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
- struct witness *w_hash_next; /* Linked list in hash buckets. */
- const char *w_file; /* File where last acquired */
- uint32_t w_line; /* Line where last acquired */
- uint32_t w_refcount;
- uint16_t w_num_ancestors; /* direct/indirect
- * ancestor count */
- uint16_t w_num_descendants; /* direct/indirect
- * descendant count */
- int16_t w_ddb_level;
+ STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
+ STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
+ struct witness *w_hash_next; /* Linked list in hash buckets. */
+ const char *w_file; /* File where last acquired */
+ uint32_t w_line; /* Line where last acquired */
+ uint32_t w_refcount;
+ uint16_t w_num_ancestors; /* direct/indirect ancestor count */
+ uint16_t w_num_descendants; /* direct/indirect descendant count */
+ int16_t w_ddb_level;
unsigned w_displayed:1;
unsigned w_reversed:1;
};
@@ -265,7 +261,7 @@ struct witness_lock_order_data {
/*
* The witness lock order data hash table. Keys are witness index tuples
* (struct witness_lock_order_key), elements are lock order data objects
- * (struct witness_lock_order_data).
+ * (struct witness_lock_order_data).
*/
struct witness_lock_order_hash {
struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE];
@@ -295,7 +291,6 @@ struct witness_order_list_entry {
static __inline int
witness_lock_type_equal(struct witness *w1, struct witness *w2)
{
-
return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
}
@@ -304,7 +299,6 @@ static __inline int
witness_lock_order_key_equal(const struct witness_lock_order_key *a,
const struct witness_lock_order_key *b)
{
-
return (a->from == b->from && a->to == b->to);
}
@@ -415,7 +409,7 @@ SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin,
int badstack_sbuf_size;
int witness_count = WITNESS_COUNT;
-SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
+SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
&witness_count, 0, "");
/*
@@ -760,7 +754,6 @@ static int witness_spin_warn = 0;
static const char *
fixup_filename(const char *file)
{
-
if (file == NULL)
return (NULL);
while (strncmp(file, "../", 3) == 0)
@@ -835,7 +828,7 @@ witness_startup(void *mem)
w_free_cnt--;
for (i = 0; i < witness_count; i++) {
- memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
+ memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
(witness_count + 1));
}
@@ -989,16 +982,16 @@ witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
{
int i;
- for (i = 0; i < indent; i++)
- prnt(" ");
+ for (i = 0; i < indent; i++)
+ prnt(" ");
prnt("%s (type: %s, depth: %d, active refs: %d)",
w->w_name, w->w_class->lc_name,
w->w_ddb_level, w->w_refcount);
- if (w->w_displayed) {
- prnt(" -- (already displayed)\n");
- return;
- }
- w->w_displayed = 1;
+ if (w->w_displayed) {
+ prnt(" -- (already displayed)\n");
+ return;
+ }
+ w->w_displayed = 1;
if (w->w_file != NULL && w->w_line != 0)
prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
w->w_line);
@@ -1079,7 +1072,6 @@ witness_ddb_display(int(*prnt)(const char *fmt, ...))
int
witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
{
-
if (witness_watch == -1 || KERNEL_PANICKED())
return (0);
@@ -1257,7 +1249,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
w->w_reversed = 1;
mtx_unlock_spin(&w_mtx);
witness_output(
- "acquiring duplicate lock of same type: \"%s\"\n",
+ "acquiring duplicate lock of same type: \"%s\"\n",
w->w_name);
witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
fixup_filename(plock->li_file), plock->li_line);
@@ -1743,7 +1735,7 @@ found:
/*
* In order to reduce contention on w_mtx, we want to keep always an
- * head object into lists so that frequent allocation from the
+ * head object into lists so that frequent allocation from the
* free witness pool (and subsequent locking) is avoided.
* In order to maintain the current code simple, when the head
* object is totally unloaded it means also that we do not have
@@ -1781,7 +1773,7 @@ witness_thread_exit(struct thread *td)
n++;
witness_list_lock(&lle->ll_children[i],
witness_output);
-
+
}
kassert_panic(
"Thread %p cannot exit while holding sleeplocks\n", td);
@@ -1948,7 +1940,6 @@ found:
static void
depart(struct witness *w)
{
-
MPASS(w->w_refcount == 0);
if (w->w_class->lc_flags & LC_SLEEPLOCK) {
w_sleep_cnt--;
@@ -1999,18 +1990,18 @@ adopt(struct witness *parent, struct witness *child)
child->w_num_ancestors++;
}
- /*
- * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
+ /*
+ * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
* an ancestor of 'pi' during this loop.
*/
for (i = 1; i <= w_max_used_index; i++) {
- if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
(i != pi))
continue;
/* Find each descendant of 'i' and mark it as a descendant. */
for (j = 1; j <= w_max_used_index; j++) {
- /*
+ /*
* Skip children that are already marked as
* descendants of 'i'.
*/
@@ -2021,7 +2012,7 @@ adopt(struct witness *parent, struct witness *child)
* We are only interested in descendants of 'ci'. Note
* that 'ci' itself is counted as a descendant of 'ci'.
*/
- if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
(j != ci))
continue;
w_rmatrix[i][j] |= WITNESS_ANCESTOR;
@@ -2029,16 +2020,16 @@ adopt(struct witness *parent, struct witness *child)
w_data[i].w_num_descendants++;
w_data[j].w_num_ancestors++;
- /*
+ /*
* Make sure we aren't marking a node as both an
- * ancestor and descendant. We should have caught
+ * ancestor and descendant. We should have caught
* this as a lock order reversal earlier.
*/
if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
(w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- i, j, w_rmatrix[i][j]);
+ i, j, w_rmatrix[i][j]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2047,7 +2038,7 @@ adopt(struct witness *parent, struct witness *child)
(w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- j, i, w_rmatrix[j][i]);
+ j, i, w_rmatrix[j][i]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2124,7 +2115,6 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
static int
isitmychild(struct witness *parent, struct witness *child)
{
-
return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
}
@@ -2134,7 +2124,6 @@ isitmychild(struct witness *parent, struct witness *child)
static int
isitmydescendant(struct witness *ancestor, struct witness *descendant)
{
-
return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
__func__));
}
@@ -2182,7 +2171,7 @@ witness_get(void)
STAILQ_REMOVE_HEAD(&w_free, w_list);
w_free_cnt--;
index = w->w_index;
- MPASS(index > 0 && index == w_max_used_index+1 &&
+ MPASS(index > 0 && index == w_max_used_index + 1 &&
index < witness_count);
bzero(w, sizeof(*w));
w->w_index = index;
@@ -2194,7 +2183,6 @@ witness_get(void)
static void
witness_free(struct witness *w)
{
-
STAILQ_INSERT_HEAD(&w_free, w, w_list);
w_free_cnt++;
}
@@ -2219,11 +2207,10 @@ witness_lock_list_get(void)
bzero(lle, sizeof(*lle));
return (lle);
}
-
+
static void
witness_lock_list_free(struct lock_list_entry *lle)
{
-
mtx_lock_spin(&w_mtx);
lle->ll_next = w_lock_list_free;
w_lock_list_free = lle;
@@ -2297,7 +2284,6 @@ witness_voutput(const char *fmt, va_list ap)
static int
witness_thread_has_locks(struct thread *td)
{
-
if (td->td_sleeplocks == NULL)
return (0);
return (td->td_sleeplocks->ll_count != 0);
@@ -2573,14 +2559,12 @@ witness_setflag(struct lock_object *lock, int flag, int set)
void
witness_norelease(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 1);
}
void
witness_releaseok(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 0);
}
@@ -2588,7 +2572,6 @@ witness_releaseok(struct lock_object *lock)
static void
witness_ddb_list(struct thread *td)
{
-
KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
KASSERT(kdb_active, ("%s: not in the debugger", __func__));
@@ -2653,7 +2636,6 @@ DB_SHOW_ALIAS_FLAGS(alllocks, db_witness_list_all, DB_CMD_MEMSAFE);
DB_SHOW_COMMAND_FLAGS(witness, db_witness_display, DB_CMD_MEMSAFE)
{
-
witness_ddb_display(db_printf);
}
#endif
@@ -2673,9 +2655,9 @@ sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx)
/* Allocate and init temporary storage space. */
tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
- tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
- tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
stack_zero(&tmp_data1->wlod_stack);
stack_zero(&tmp_data2->wlod_stack);
@@ -2750,12 +2732,12 @@ restart:
sbuf_printf(sb,
"\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
if (data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data1->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2763,7 +2745,7 @@ restart:
if (data2 && data2 != data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w2->w_name, tmp_w2->w_class->lc_name,
+ tmp_w2->w_name, tmp_w2->w_class->lc_name,
tmp_w1->w_name, tmp_w1->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data2->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2823,7 +2805,6 @@ sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
static int
sbuf_db_printf_drain(void *arg __unused, const char *data, int len)
{
-
return (db_printf("%.*s", len, data));
}
@@ -3068,7 +3049,7 @@ witness_lock_order_get(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN) == 0)
goto out;
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
data = w_lohash.wloh_array[hash];
while (data != NULL) {
@@ -3089,7 +3070,6 @@ out:
static int
witness_lock_order_check(struct witness *parent, struct witness *child)
{
-
if (parent != child &&
w_rmatrix[parent->w_index][child->w_index]
& WITNESS_LOCK_ORDER_KNOWN &&
@@ -3115,7 +3095,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN)
return (1);
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
data = w_lofree;
@@ -3134,7 +3114,6 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
static void
witness_increment_graph_generation(void)
{
-
if (witness_cold == 0)
mtx_assert(&w_mtx, MA_OWNED);
w_generation++;
@@ -3143,7 +3122,6 @@ witness_increment_graph_generation(void)
static int
witness_output_drain(void *arg __unused, const char *data, int len)
{
-
witness_output("%.*s", len, data);
return (len);
}
diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c
index 11bd1b6f30e1..54b03fc82c90 100644
--- a/sys/kern/sys_procdesc.c
+++ b/sys/kern/sys_procdesc.c
@@ -129,7 +129,7 @@ procdesc_find(struct thread *td, int fd, const cap_rights_t *rightsp,
if (error)
return (error);
if (fp->f_type != DTYPE_PROCDESC) {
- error = EBADF;
+ error = EINVAL;
goto out;
}
pd = fp->f_data;
@@ -175,7 +175,7 @@ kern_pdgetpid(struct thread *td, int fd, const cap_rights_t *rightsp,
if (error)
return (error);
if (fp->f_type != DTYPE_PROCDESC) {
- error = EBADF;
+ error = EINVAL;
goto out;
}
*pidp = procdesc_pid(fp);
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 4122f9261871..4cef89cd5219 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -602,4 +602,6 @@ const char *syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index fa64597d14a5..911f9093824b 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3383,5 +3383,15 @@
_In_reads_(gidsetsize) const gid_t *gidset
);
}
+597 AUE_JAIL_ATTACH STD {
+ int jail_attach_jd(
+ int fd
+ );
+ }
+598 AUE_JAIL_REMOVE STD {
+ int jail_remove_jd(
+ int fd
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 2b1ea9eed8d4..e28fef931ea8 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3500,6 +3500,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9367,6 +9381,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11365,6 +11399,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 19870e989437..6138e543fae7 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1807,9 +1807,7 @@ uipc_filt_sowrite(struct knote *kn, long hint)
kn->kn_data = uipc_stream_sbspace(&so2->so_rcv);
if (so2->so_rcv.sb_state & SBS_CANTRCVMORE) {
- /*
- * XXXGL: maybe kn->kn_flags |= EV_EOF ?
- */
+ kn->kn_flags |= EV_EOF;
return (1);
} else if (kn->kn_sfflags & NOTE_LOWAT)
return (kn->kn_data >= kn->kn_sdata);
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index cd30d5cfae47..ceda770cb714 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -103,6 +103,16 @@ struct vattr va_null;
* Routines having to do with the management of the vnode table.
*/
+void
+vfs_unref_vfsconf(struct vfsconf *vfsp)
+{
+ vfsconf_lock();
+ KASSERT(vfsp->vfc_refcount > 0,
+ ("vfs %p refcount underflow %d", vfsp, vfsp->vfc_refcount));
+ vfsp->vfc_refcount--;
+ vfsconf_unlock();
+}
+
static struct vfsconf *
vfs_byname_locked(const char *name)
{
@@ -123,9 +133,11 @@ vfs_byname(const char *name)
{
struct vfsconf *vfsp;
- vfsconf_slock();
+ vfsconf_lock();
vfsp = vfs_byname_locked(name);
- vfsconf_sunlock();
+ if (vfsp != NULL)
+ vfsp->vfc_refcount++;
+ vfsconf_unlock();
return (vfsp);
}
@@ -387,7 +399,7 @@ vfs_register(struct vfsconf *vfc)
static int once;
struct vfsconf *tvfc;
uint32_t hashval;
- int secondpass;
+ int error, prevmaxconf, secondpass;
if (!once) {
vattr_null(&va_null);
@@ -405,6 +417,7 @@ vfs_register(struct vfsconf *vfc)
return (EEXIST);
}
+ prevmaxconf = maxvfsconf;
if (vfs_typenumhash != 0) {
/*
* Calculate a hash on vfc_name to use for vfc_typenum. Unless
@@ -497,16 +510,24 @@ vfs_register(struct vfsconf *vfc)
vfc->vfc_vfsops = &vfsops_sigdefer;
}
- if (vfc->vfc_flags & VFCF_JAIL)
- prison_add_vfs(vfc);
-
/*
* Call init function for this VFS...
*/
if ((vfc->vfc_flags & VFCF_SBDRY) != 0)
- vfc->vfc_vfsops_sd->vfs_init(vfc);
+ error = vfc->vfc_vfsops_sd->vfs_init(vfc);
else
- vfc->vfc_vfsops->vfs_init(vfc);
+ error = vfc->vfc_vfsops->vfs_init(vfc);
+
+ if (error != 0) {
+ maxvfsconf = prevmaxconf;
+ TAILQ_REMOVE(&vfsconf, vfc, vfc_list);
+ vfsconf_unlock();
+ return (error);
+ }
+
+ if ((vfc->vfc_flags & VFCF_JAIL) != 0)
+ prison_add_vfs(vfc);
+
vfsconf_unlock();
/*
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 8e64a7fe966b..13403acacc08 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -683,7 +683,6 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
MPASSERT(mp->mnt_vfs_ops == 1, mp,
("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
(void) vfs_busy(mp, MBF_NOWAIT);
- atomic_add_acq_int(&vfsp->vfc_refcount, 1);
mp->mnt_op = vfsp->vfc_vfsops;
mp->mnt_vfc = vfsp;
mp->mnt_stat.f_type = vfsp->vfc_typenum;
@@ -731,7 +730,6 @@ vfs_mount_destroy(struct mount *mp)
__FILE__, __LINE__));
MPPASS(mp->mnt_writeopcount == 0, mp);
MPPASS(mp->mnt_secondary_writes == 0, mp);
- atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
struct vnode *vp;
@@ -769,6 +767,9 @@ vfs_mount_destroy(struct mount *mp)
vfs_free_addrlist(mp->mnt_export);
free(mp->mnt_export, M_MOUNT);
}
+ vfsconf_lock();
+ mp->mnt_vfc->vfc_refcount--;
+ vfsconf_unlock();
crfree(mp->mnt_cred);
uma_zfree(mount_zone, mp);
}
@@ -1133,6 +1134,7 @@ vfs_domount_first(
if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (EPERM);
}
@@ -1169,6 +1171,7 @@ vfs_domount_first(
}
if (error != 0) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (error);
}
vn_seqc_write_begin(vp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index a6e38be89291..57732ddab7d9 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2186,6 +2186,8 @@ freevnode(struct vnode *vp)
{
struct bufobj *bo;
+ ASSERT_VOP_UNLOCKED(vp, __func__);
+
/*
* The vnode has been marked for destruction, so free it.
*
@@ -2222,12 +2224,16 @@ freevnode(struct vnode *vp)
mac_vnode_destroy(vp);
#endif
if (vp->v_pollinfo != NULL) {
+ int error __diagused;
+
/*
* Use LK_NOWAIT to shut up witness about the lock. We may get
* here while having another vnode locked when trying to
* satisfy a lookup and needing to recycle.
*/
- VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ VNASSERT(error == 0, vp,
+ ("freevnode: cannot lock vp %p for pollinfo destroy", vp));
destroy_vpollinfo(vp->v_pollinfo);
VOP_UNLOCK(vp);
vp->v_pollinfo = NULL;
diff --git a/sys/libkern/arm64/crc32c_armv8.S b/sys/libkern/arm64/crc32c_armv8.S
index 649afff4b711..430b24f7615a 100644
--- a/sys/libkern/arm64/crc32c_armv8.S
+++ b/sys/libkern/arm64/crc32c_armv8.S
@@ -39,14 +39,14 @@ ENTRY(armv8_crc32c)
cbz w2, end
tbz x1, #0x0, half_word_aligned
sub w2, w2, 0x1
- ldr w10, [x1], #0x1
+ ldrb w10, [x1], #0x1
crc32cb w0, w0, w10
half_word_aligned:
cmp w2, #0x2
b.lo last_byte
tbz x1, #0x1, word_aligned
sub w2, w2, 0x2
- ldr w10, [x1], #0x2
+ ldrh w10, [x1], #0x2
crc32ch w0, w0, w10
word_aligned:
cmp w2, #0x4
@@ -69,11 +69,11 @@ last_word:
crc32cw w0, w0, w10
last_half_word:
tbz w2, #0x1, last_byte
- ldr w10, [x1], #0x2
+ ldrh w10, [x1], #0x2
crc32ch w0, w0, w10
last_byte:
tbz w2, #0x0, end
- ldr w10, [x1], #0x1
+ ldrb w10, [x1], #0x1
crc32cb w0, w0, w10
end:
ret
diff --git a/sys/modules/dtb/rockchip/Makefile b/sys/modules/dtb/rockchip/Makefile
index 33c2048cbb15..9c8ca1acc837 100644
--- a/sys/modules/dtb/rockchip/Makefile
+++ b/sys/modules/dtb/rockchip/Makefile
@@ -21,7 +21,8 @@ DTS= \
rockchip/rk3566-quartz64-a.dts \
rockchip/rk3568-nanopi-r5s.dts \
rockchip/rk3566-radxa-zero-3e.dts \
- rockchip/rk3566-radxa-zero-3w.dts
+ rockchip/rk3566-radxa-zero-3w.dts \
+ rockchip/rk3568-bpi-r2-pro.dts
DTSO= rk3328-analog-sound.dtso \
rk3328-i2c0.dtso \
diff --git a/sys/modules/irdma/Makefile b/sys/modules/irdma/Makefile
index b2ffb67ca66f..a9ef6e63d3f2 100644
--- a/sys/modules/irdma/Makefile
+++ b/sys/modules/irdma/Makefile
@@ -1,8 +1,8 @@
.include <bsd.own.mk>
-OFED_INC_DIR = ${.CURDIR}/../../ofed/include
-ICE_DIR = ${.CURDIR}/../../dev/ice
-.PATH: ${.CURDIR}/../../dev/irdma
+OFED_INC_DIR = ${SRCTOP}/sys/ofed/include
+ICE_DIR = ${SRCTOP}/sys/dev/ice
+.PATH: ${SRCTOP}/sys/dev/irdma
KMOD= irdma
SRCS= icrdma.c
diff --git a/sys/modules/sound/driver/hda/Makefile b/sys/modules/sound/driver/hda/Makefile
index 0eec98fc53e1..1e137dc5671c 100644
--- a/sys/modules/sound/driver/hda/Makefile
+++ b/sys/modules/sound/driver/hda/Makefile
@@ -2,7 +2,7 @@
KMOD= snd_hda
SRCS= device_if.h bus_if.h pci_if.h channel_if.h mixer_if.h hdac_if.h
-SRCS+= hdaa.c hdaa.h hdaa_patches.c hdac.c hdac_if.h hdac_if.c
-SRCS+= hdacc.c hdac_private.h hdac_reg.h hda_reg.h hdac.h
+SRCS+= hdaa.c hdaa.h hdaa_patches.c hdacc.c hdac.c hdac_if.c
+SRCS+= hdac_private.h hdac_reg.h hda_reg.h hdac.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 72167b752e53..8b3e64eba2f3 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -258,6 +258,9 @@
/* dops->d_revalidate() takes 4 args */
/* #undef HAVE_D_REVALIDATE_4ARGS */
+/* Define if d_set_d_op() is available */
+/* #undef HAVE_D_SET_D_OP */
+
/* Define to 1 if you have the 'execvpe' function. */
#define HAVE_EXECVPE 1
@@ -483,9 +486,6 @@
/* building against unsupported kernel version */
/* #undef HAVE_LINUX_EXPERIMENTAL */
-/* Define to 1 if you have the <linux/stat.h> header file. */
-/* #undef HAVE_LINUX_STAT_H */
-
/* makedev() is declared in sys/mkdev.h */
/* #undef HAVE_MAKEDEV_IN_MKDEV */
@@ -840,7 +840,7 @@
/* #undef ZFS_DEVICE_MINOR */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.4.0-rc1-FreeBSD_g00dfa094a"
+#define ZFS_META_ALIAS "zfs-2.4.99-29-FreeBSD_g7939bad5e"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -870,10 +870,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "zfs-2.4.0-rc1-FreeBSD_g00dfa094a"
+#define ZFS_META_RELEASE "29-FreeBSD_g7939bad5e"
/* Define the project version. */
-#define ZFS_META_VERSION "2.4.0"
+#define ZFS_META_VERSION "2.4.99"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 2b5d717da216..fff89435a0ff 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.4.0-rc1-0-g00dfa094a"
+#define ZFS_META_GITREV "zfs-2.4.99-29-g7939bad5e"
diff --git a/sys/net/if.c b/sys/net/if.c
index 202be4794f6e..b6a798aa0fab 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -74,7 +74,6 @@
#include <vm/uma.h>
#include <net/bpf.h>
-#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_clone.h>
@@ -1102,6 +1101,7 @@ if_detach_internal(struct ifnet *ifp, bool vmove)
struct ifaddr *ifa;
int i;
struct domain *dp;
+ void *if_afdata[AF_MAX];
#ifdef VIMAGE
bool shutdown;
@@ -1225,15 +1225,30 @@ finish_vnet_shutdown:
IF_AFDATA_LOCK(ifp);
i = ifp->if_afdata_initialized;
ifp->if_afdata_initialized = 0;
+ if (i != 0) {
+ /*
+ * Defer the dom_ifdetach call.
+ */
+ _Static_assert(sizeof(if_afdata) == sizeof(ifp->if_afdata),
+ "array size mismatch");
+ memcpy(if_afdata, ifp->if_afdata, sizeof(if_afdata));
+ memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
+ }
IF_AFDATA_UNLOCK(ifp);
if (i == 0)
return;
+ /*
+ * XXXZL: This net epoch wait is not necessary if we have done right.
+ * But if we do not, at least we can make a guarantee that threads those
+ * enter net epoch will see NULL address family dependent data,
+ * e.g. if_afdata[AF_INET6]. A clear NULL pointer derefence is much
+ * better than writing to freed memory.
+ */
+ NET_EPOCH_WAIT();
SLIST_FOREACH(dp, &domains, dom_next) {
- if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
- (*dp->dom_ifdetach)(ifp,
- ifp->if_afdata[dp->dom_family]);
- ifp->if_afdata[dp->dom_family] = NULL;
- }
+ if (dp->dom_ifdetach != NULL &&
+ if_afdata[dp->dom_family] != NULL)
+ (*dp->dom_ifdetach)(ifp, if_afdata[dp->dom_family]);
}
}
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 66555fd1feb5..cea7f1cb5e23 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -522,11 +522,11 @@ SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap,
"Log MAC address port flapping");
/* allow IP addresses on bridge members */
-VNET_DEFINE_STATIC(bool, member_ifaddrs) = false;
+VNET_DEFINE_STATIC(bool, member_ifaddrs) = true;
#define V_member_ifaddrs VNET(member_ifaddrs)
SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs,
CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false,
- "Allow layer 3 addresses on bridge members");
+ "Allow layer 3 addresses on bridge members (deprecated)");
static bool
bridge_member_ifaddrs(void)
@@ -1448,24 +1448,30 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
/*
* If member_ifaddrs is disabled, do not allow an interface with
- * assigned IP addresses to be added to a bridge.
+ * assigned IP addresses to be added to a bridge. Skip this check
+ * for gif interfaces, because the IP address assigned to a gif
+ * interface is separate from the bridge's Ethernet segment.
*/
- if (!V_member_ifaddrs) {
+ if (ifs->if_type != IFT_GIF) {
struct ifaddr *ifa;
CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) {
-#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET)
- return (EXTERROR(EINVAL,
- "Member interface may not have "
- "an IPv4 address configured"));
-#endif
-#ifdef INET6
- if (ifa->ifa_addr->sa_family == AF_INET6)
+ if (ifa->ifa_addr->sa_family != AF_INET &&
+ ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ if (V_member_ifaddrs) {
+ if_printf(sc->sc_ifp,
+ "WARNING: Adding member interface %s which "
+ "has an IP address assigned is deprecated "
+ "and will be unsupported in a future "
+ "release.\n", ifs->if_xname);
+ break;
+ } else {
return (EXTERROR(EINVAL,
"Member interface may not have "
- "an IPv6 address configured"));
-#endif
+ "an IP address assigned"));
+ }
}
}
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 581c2434b8fb..fbffa8f359a0 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -69,6 +69,7 @@
#include <net/if_media.h>
#include <net/if_private.h>
#include <net/if_types.h>
+#include <net/if_vlan_var.h>
#include <net/netisr.h>
#ifdef RSS
#include <net/rss_config.h>
@@ -434,6 +435,21 @@ epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
}
+/*
+ * Update ifp->if_hwassist according to the current value of ifp->if_capenable.
+ */
+static void
+epair_caps_changed(struct ifnet *ifp)
+{
+ uint64_t hwassist = 0;
+
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ hwassist |= CSUM_IP_TCP | CSUM_IP_UDP;
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP;
+ ifp->if_hwassist = hwassist;
+}
+
static int
epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
@@ -461,6 +477,44 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
break;
+ case SIOCGIFCAP:
+ ifr->ifr_reqcap = ifp->if_capabilities;
+ ifr->ifr_curcap = ifp->if_capenable;
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ /*
+ * Enable/disable capabilities as requested, besides
+ * IFCAP_RXCSUM(_IPV6), which always remain enabled.
+ * Incoming packets may have the mbuf flag CSUM_DATA_VALID set.
+ * Without IFCAP_RXCSUM(_IPV6), this flag would have to be
+ * removed, which does not seem helpful.
+ */
+ ifp->if_capenable = ifr->ifr_reqcap | IFCAP_RXCSUM |
+ IFCAP_RXCSUM_IPV6;
+ epair_caps_changed(ifp);
+ /*
+ * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the
+ * other epair interface as well.
+ * A bridge disables IFCAP_TXCSUM(_IPV6) when adding one epair
+ * interface if another interface in the bridge has it disabled.
+ * In that case this capability needs to be disabled on the
+ * other epair interface to avoid sending packets in the bridge
+ * that rely on this capability.
+ */
+ sc = ifp->if_softc;
+ if ((ifp->if_capenable ^ sc->oifp->if_capenable) &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) {
+ sc->oifp->if_capenable &=
+ ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ sc->oifp->if_capenable |= ifp->if_capenable &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ epair_caps_changed(sc->oifp);
+ }
+ VLAN_CAPABILITIES(ifp);
+ error = 0;
+ break;
+
default:
/* Let the common ethernet handler process this. */
error = ether_ioctl(ifp, cmd, data);
@@ -572,8 +626,11 @@ epair_setup_ifp(struct epair_softc *sc, char *name, int unit)
ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_capabilities = IFCAP_VLAN_MTU;
- ifp->if_capenable = IFCAP_VLAN_MTU;
+ ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+ ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+ epair_caps_changed(ifp);
ifp->if_transmit = epair_transmit;
ifp->if_qflush = epair_qflush;
ifp->if_start = epair_start;
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 3ae0c01c0efc..9c157bf3d3c2 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -695,7 +695,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
* seen by upper protocol layers.
*/
if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
- bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
+ memcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
m->m_flags |= M_PROMISC;
}
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 98c59e5de988..1e6d98291c04 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -712,7 +712,7 @@ static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
static void iflib_altq_if_start(if_t ifp);
static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m);
#endif
-static int iflib_register(if_ctx_t);
+static void iflib_register(if_ctx_t);
static void iflib_deregister(if_ctx_t);
static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
static uint16_t iflib_get_mbuf_size_for(unsigned int size);
@@ -3646,6 +3646,12 @@ defrag:
bus_dmamap_unload(buf_tag, map);
DBG_COUNTER_INC(encap_txq_avail_fail);
DBG_COUNTER_INC(encap_txd_encap_fail);
+ if (ctx->ifc_sysctl_simple_tx) {
+ *m_headp = m_head = iflib_remove_mbuf(txq);
+ m_freem(*m_headp);
+ DBG_COUNTER_INC(tx_frees);
+ *m_headp = NULL;
+ }
if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
GROUPTASK_ENQUEUE(&txq->ift_task);
return (ENOBUFS);
@@ -4298,6 +4304,10 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
m_freem(m);
DBG_COUNTER_INC(tx_frees);
+ if (err == ENOBUFS)
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+ else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
return (err);
@@ -5136,10 +5146,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
ctx->ifc_dev = dev;
ctx->ifc_softc = sc;
- if ((err = iflib_register(ctx)) != 0) {
- device_printf(dev, "iflib_register failed %d\n", err);
- goto fail_ctx_free;
- }
+ iflib_register(ctx);
iflib_add_device_sysctl_pre(ctx);
scctx = &ctx->ifc_softc_ctx;
@@ -5363,7 +5370,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
DEBUGNET_SET(ctx->ifc_ifp, iflib);
- if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
iflib_add_device_sysctl_post(ctx);
iflib_add_pfil(ctx);
ctx->ifc_flags |= IFC_INIT_DONE;
@@ -5387,7 +5393,6 @@ fail_unlock:
CTX_UNLOCK(ctx);
IFNET_WUNLOCK();
iflib_deregister(ctx);
-fail_ctx_free:
device_set_softc(ctx->ifc_dev, NULL);
if (ctx->ifc_flags & IFC_SC_ALLOCATED)
free(ctx->ifc_softc, M_IFLIB);
@@ -5685,7 +5690,7 @@ _iflib_pre_assert(if_softc_ctx_t scctx)
MPASS(scctx->isc_txrx->ift_rxd_flush);
}
-static int
+static void
iflib_register(if_ctx_t ctx)
{
if_shared_ctx_t sctx = ctx->ifc_sctx;
@@ -5718,6 +5723,7 @@ iflib_register(if_ctx_t ctx)
if_settransmitfn(ifp, iflib_if_transmit);
#endif
if_setqflushfn(ifp, iflib_if_qflush);
+ if_setgetcounterfn(ifp, iflib_if_get_counter);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
ctx->ifc_vlan_attach_event =
EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
@@ -5731,7 +5737,6 @@ iflib_register(if_ctx_t ctx)
ifmedia_init(ctx->ifc_mediap, IFM_IMASK,
iflib_media_change, iflib_media_status);
}
- return (0);
}
static void
@@ -7146,6 +7151,11 @@ iflib_simple_transmit(if_t ifp, struct mbuf *m)
bytes_sent += m->m_pkthdr.len;
mcast_sent += !!(m->m_flags & M_MCAST);
(void)iflib_txd_db_check(txq, true);
+ } else {
+ if (error == ENOBUFS)
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+ else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
(void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
mtx_unlock(&txq->ift_mtx);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index d6c13470f2eb..e6fb1c2c3e1b 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -326,6 +326,7 @@ pf_counter_u64_zero(struct pf_counter_u64 *pfcu64)
_Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size");
SYSCTL_DECL(_net_pf);
+MALLOC_DECLARE(M_PF);
MALLOC_DECLARE(M_PFHASH);
MALLOC_DECLARE(M_PF_RULE_ITEM);
@@ -645,6 +646,7 @@ struct pf_kpool {
int tblidx;
u_int16_t proxy_port[2];
u_int8_t opts;
+ sa_family_t ipv6_nexthop_af;
};
struct pf_rule_actions {
@@ -859,8 +861,8 @@ struct pf_krule {
u_int8_t keep_state;
sa_family_t af;
u_int8_t proto;
- u_int8_t type;
- u_int8_t code;
+ uint16_t type;
+ uint16_t code;
u_int8_t flags;
u_int8_t flagset;
u_int8_t min_ttl;
@@ -2421,7 +2423,7 @@ int pf_multihome_scan_init(int, int, struct pf_pdesc *);
int pf_multihome_scan_asconf(int, int, struct pf_pdesc *);
u_int32_t pf_new_isn(struct pf_kstate *);
-void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, u_short *,
+void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *,
sa_family_t);
void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
@@ -2612,6 +2614,7 @@ struct pf_kruleset *pf_find_kruleset(const char *);
struct pf_kruleset *pf_get_leaf_kruleset(char *, char **);
struct pf_kruleset *pf_find_or_create_kruleset(const char *);
void pf_rs_initialize(void);
+void pf_rule_tree_free(struct pf_krule_global *);
struct pf_krule *pf_krule_alloc(void);
@@ -2680,7 +2683,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *,
struct pf_addr *, struct pf_kpool *);
u_short pf_map_addr_sn(u_int8_t, struct pf_krule *,
struct pf_addr *, struct pf_addr *,
- sa_family_t *, struct pfi_kkif **nkif,
+ sa_family_t *, struct pfi_kkif **,
struct pf_addr *, struct pf_kpool *,
pf_sn_types_t);
int pf_get_transaddr_af(struct pf_krule *,
diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c
index d96d7988a864..1dd8e38b9896 100644
--- a/sys/net80211/ieee80211_ddb.c
+++ b/sys/net80211/ieee80211_ddb.c
@@ -296,7 +296,7 @@ _db_show_sta(const struct ieee80211_node *ni)
ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan);
db_printf("\thtopmode 0x%x htstbc 0x%x chw %d (%s)\n",
ni->ni_htopmode, ni->ni_htstbc,
- ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
+ ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw));
/* XXX ampdu state */
for (i = 0; i < WME_NUM_TID; i++)
diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h
index 3684fba52c5c..954801d95787 100644
--- a/sys/net80211/ieee80211_freebsd.h
+++ b/sys/net80211/ieee80211_freebsd.h
@@ -341,11 +341,16 @@ struct mbuf *ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen);
#define M_AGE_SUB(m,adj) (m->m_pkthdr.csum_data -= adj)
/*
- * Store the sequence number.
+ * Store / retrieve the sequence number in an mbuf.
+ *
+ * The sequence number being stored/retreived is the 12 bit
+ * base sequence number, not the 16 bit sequence number field.
+ * I.e., it's from 0..4095 inclusive, with no 4 bit padding for
+ * fragment numbers.
*/
#define M_SEQNO_SET(m, seqno) \
- ((m)->m_pkthdr.tso_segsz = (seqno))
-#define M_SEQNO_GET(m) ((m)->m_pkthdr.tso_segsz)
+ ((m)->m_pkthdr.tso_segsz = ((seqno) % IEEE80211_SEQ_RANGE))
+#define M_SEQNO_GET(m) (((m)->m_pkthdr.tso_segsz) % IEEE80211_SEQ_RANGE)
#define MTAG_ABI_NET80211 1132948340 /* net80211 ABI */
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index c28f124648a1..3af56a228295 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -1476,7 +1476,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni)
ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20;
if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) {
ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40;
- ni->ni_chw = IEEE80211_STA_RX_BW_40;
+ ni->ni_chw = NET80211_STA_RX_BW_40;
if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan))
ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE;
else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan))
@@ -1484,7 +1484,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni)
if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40)
ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40;
} else {
- ni->ni_chw = IEEE80211_STA_RX_BW_20;
+ ni->ni_chw = NET80211_STA_RX_BW_20;
ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE;
}
ni->ni_htctlchan = ni->ni_chan->ic_ieee;
@@ -1580,7 +1580,7 @@ ieee80211_ht_node_join(struct ieee80211_node *ni)
if (ni->ni_flags & IEEE80211_NODE_HT) {
vap->iv_ht_sta_assoc++;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
vap->iv_ht40_sta_assoc++;
}
htinfo_update(vap);
@@ -1598,7 +1598,7 @@ ieee80211_ht_node_leave(struct ieee80211_node *ni)
if (ni->ni_flags & IEEE80211_NODE_HT) {
vap->iv_ht_sta_assoc--;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
vap->iv_ht40_sta_assoc--;
}
htinfo_update(vap);
@@ -1827,7 +1827,7 @@ htinfo_update_chw(struct ieee80211_node *ni, int htflags, int vhtflags)
done:
/* update node's (11n) tx channel width */
ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ?
- IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+ NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20;
return (ret);
}
@@ -1933,7 +1933,7 @@ ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags)
{
#define _RETURN_CHAN_BITS(_cb) \
do { \
- IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \
+ if (0) IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \
"%s:%d: selected %b", __func__, __LINE__, \
(_cb), IEEE80211_CHAN_BITS); \
return (_cb); \
@@ -2689,11 +2689,11 @@ ht_recv_action_ht_txchwidth(struct ieee80211_node *ni,
* here.
*/
chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ?
- IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+ NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20;
IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
"%s: HT txchwidth, width %d%s (%s)", __func__,
- chw, ni->ni_chw != chw ? "*" : "", ieee80211_ni_chw_to_str(chw));
+ chw, ni->ni_chw != chw ? "*" : "", net80211_ni_chw_to_str(chw));
if (chw != ni->ni_chw) {
/* XXX does this need to change the ht40 station count? */
ni->ni_chw = chw;
@@ -3832,5 +3832,5 @@ ieee80211_ht_check_tx_ht40(const struct ieee80211_node *ni)
return (IEEE80211_IS_CHAN_HT40(bss_chan) &&
IEEE80211_IS_CHAN_HT40(ni->ni_chan) &&
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
}
diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c
index a201d1b278f0..49ba00299fee 100644
--- a/sys/net80211/ieee80211_node.c
+++ b/sys/net80211/ieee80211_node.c
@@ -2673,7 +2673,7 @@ ieee80211_dump_node(struct ieee80211_node_table *nt __unused,
ni->ni_htctlchan, ni->ni_ht2ndchan);
net80211_printf("\thtopmode %x htstbc %x htchw %d (%s)\n",
ni->ni_htopmode, ni->ni_htstbc,
- ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
+ ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw));
net80211_printf("\tvhtcap %x freq1 %d freq2 %d vhtbasicmcs %x\n",
ni->ni_vhtcap, (int) ni->ni_vht_chan1, (int) ni->ni_vht_chan2,
(int) ni->ni_vht_basicmcs);
@@ -2831,7 +2831,7 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp)
ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "",
/* XXX update for VHT string */
ni->ni_flags & IEEE80211_NODE_HT ?
- (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
+ (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "",
ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "",
ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" :
diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h
index ef25fa0d7fdd..f1246dd12419 100644
--- a/sys/net80211/ieee80211_node.h
+++ b/sys/net80211/ieee80211_node.h
@@ -109,33 +109,33 @@ enum ieee80211_mesh_mlstate {
"\20\1IDLE\2OPENSNT\2OPENRCV\3CONFIRMRCV\4ESTABLISHED\5HOLDING"
/*
- * This structure is shared with LinuxKPI 802.11 code describing up-to
- * which channel width the station can receive.
+ * This enum was shared with the LinuxKPI enum ieee80211_sta_rx_bandwidth
+ * describing up-to which channel width the station can receive.
* Rather than using hardcoded MHz values for the channel width use an enum with
* flags. This allows us to keep the uint8_t slot for ni_chw in
- * struct ieee80211_node and means we do not have to sync to the value for
- * LinuxKPI.
+ * struct ieee80211_node it means we do not have to sync to the value for
+ * LinuxKPI (just the names).
*
* NB: BW_20 needs to 0 and values need to be sorted! Cannot make it
* bitfield-alike for use with %b.
*/
-enum ieee80211_sta_rx_bw {
- IEEE80211_STA_RX_BW_20 = 0x00,
- IEEE80211_STA_RX_BW_40,
- IEEE80211_STA_RX_BW_80,
- IEEE80211_STA_RX_BW_160,
- IEEE80211_STA_RX_BW_320,
+enum net80211_sta_rx_bw {
+ NET80211_STA_RX_BW_20 = 0x00,
+ NET80211_STA_RX_BW_40,
+ NET80211_STA_RX_BW_80,
+ NET80211_STA_RX_BW_160,
+ NET80211_STA_RX_BW_320,
} __packed;
static inline const char *
-ieee80211_ni_chw_to_str(enum ieee80211_sta_rx_bw bw)
+net80211_ni_chw_to_str(enum net80211_sta_rx_bw bw)
{
switch (bw) {
- case IEEE80211_STA_RX_BW_20: return ("BW_20");
- case IEEE80211_STA_RX_BW_40: return ("BW_40");
- case IEEE80211_STA_RX_BW_80: return ("BW_80");
- case IEEE80211_STA_RX_BW_160: return ("BW_160");
- case IEEE80211_STA_RX_BW_320: return ("BW_320");
+ case NET80211_STA_RX_BW_20: return ("BW_20");
+ case NET80211_STA_RX_BW_40: return ("BW_40");
+ case NET80211_STA_RX_BW_80: return ("BW_80");
+ case NET80211_STA_RX_BW_160: return ("BW_160");
+ case NET80211_STA_RX_BW_320: return ("BW_320");
}
}
@@ -285,7 +285,7 @@ struct ieee80211_node {
uint8_t ni_ht2ndchan; /* HT 2nd channel */
uint8_t ni_htopmode; /* HT operating mode */
uint8_t ni_htstbc; /* HT */
- enum ieee80211_sta_rx_bw ni_chw; /* negotiated channel width */
+ enum net80211_sta_rx_bw ni_chw; /* negotiated channel width */
struct ieee80211_htrateset ni_htrates; /* negotiated ht rate set */
struct ieee80211_tx_ampdu ni_tx_ampdu[WME_NUM_TID];
struct ieee80211_rx_ampdu ni_rx_ampdu[WME_NUM_TID];
diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c
index 57fe687adffe..116fc76a9ce1 100644
--- a/sys/net80211/ieee80211_output.c
+++ b/sys/net80211/ieee80211_output.c
@@ -1082,6 +1082,12 @@ ieee80211_send_nulldata(struct ieee80211_node *ni)
uint8_t *frm;
int ret;
+ /* Don't send NULL frames if we've been configured not to do so. */
+ if ((ic->ic_flags_ext & IEEE80211_FEXT_NO_NULLDATA) != 0) {
+ ieee80211_node_decref(ni);
+ return (0);
+ }
+
if (vap->iv_state == IEEE80211_S_CAC) {
IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH,
ni, "block %s frame in CAC state", "null data");
diff --git a/sys/net80211/ieee80211_phy.c b/sys/net80211/ieee80211_phy.c
index eb96d74a2bd9..7f53c717152b 100644
--- a/sys/net80211/ieee80211_phy.c
+++ b/sys/net80211/ieee80211_phy.c
@@ -658,26 +658,26 @@ static uint16_t ieee80211_vht_mcs_allowed_list_160[] = {
*
* See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details.
*
- * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw
+ * @param bw channel bandwidth, via enum net80211_sta_rx_bw
* @param nss number of spatial streams, 1..8
* @returns bitmask of valid MCS rates from 0..9
*/
uint16_t
-ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss)
+ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw bw, uint8_t nss)
{
if (nss == 0 || nss > 8)
return (0);
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
return (ieee80211_vht_mcs_allowed_list_20[nss - 1]);
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
return (ieee80211_vht_mcs_allowed_list_40[nss - 1]);
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
return (ieee80211_vht_mcs_allowed_list_80[nss - 1]);
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
return (ieee80211_vht_mcs_allowed_list_160[nss - 1]);
- case IEEE80211_STA_RX_BW_320:
+ case NET80211_STA_RX_BW_320:
/* invalid for VHT */
return (0);
}
@@ -689,14 +689,14 @@ ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss)
*
* See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details.
*
- * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw
+ * @param bw channel bandwidth, via enum net80211_sta_rx_bw
* @param nss number of spatial streams, 1..8
* @param mcs MCS rate, 0..9
* @retval true if the NSS / MCS / bandwidth combination is valid
* @retval false if the NSS / MCS / bandwidth combination is not valid
*/
bool
-ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw bw, uint8_t nss,
+ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw bw, uint8_t nss,
uint8_t mcs)
{
uint16_t mask;
@@ -737,7 +737,7 @@ static struct mcs_entry mcs_entries[] = {
/**
* @brief Calculate the bitrate of the given VHT MCS rate.
*
- * @param bw Channel bandwidth (enum ieee80211_sta_rx_bw)
+ * @param bw Channel bandwidth (enum net80211_sta_rx_bw)
* @param nss Number of spatial streams, 1..8
* @param mcs MCS, 0..9
* @param is_shortgi True if short guard-interval (400nS)
@@ -746,7 +746,7 @@ static struct mcs_entry mcs_entries[] = {
* @returns The bitrate in kbit/sec.
*/
uint32_t
-ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw,
+ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw bw,
uint8_t nss, uint8_t mcs, bool is_shortgi)
{
uint32_t sym_len, n_carriers;
@@ -773,16 +773,16 @@ ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw,
* See 802.11-2020 Table 21-5 (Timing-related constraints.)
*/
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
n_carriers = 52;
break;
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
n_carriers = 108;
break;
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
n_carriers = 234;
break;
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
n_carriers = 468;
break;
default:
diff --git a/sys/net80211/ieee80211_phy.h b/sys/net80211/ieee80211_phy.h
index 749b082e34e9..391c8bfc5010 100644
--- a/sys/net80211/ieee80211_phy.h
+++ b/sys/net80211/ieee80211_phy.h
@@ -221,13 +221,13 @@ uint32_t ieee80211_compute_duration_ht(uint32_t frameLen,
uint16_t rate, int streams, int isht40,
int isShortGI);
-enum ieee80211_sta_rx_bw;
+enum net80211_sta_rx_bw;
-uint16_t ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw,
+uint16_t ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw,
uint8_t);
-bool ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw,
+bool ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw,
uint8_t, uint8_t);
-uint32_t ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw,
+uint32_t ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw,
uint8_t, uint8_t, bool);
#endif /* _KERNEL */
diff --git a/sys/net80211/ieee80211_sta.c b/sys/net80211/ieee80211_sta.c
index 463a8b16773b..19e5ffe9a367 100644
--- a/sys/net80211/ieee80211_sta.c
+++ b/sys/net80211/ieee80211_sta.c
@@ -1934,7 +1934,7 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype,
vap->iv_flags&IEEE80211_F_USEPROT ? ", protection" : "",
ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "",
ni->ni_flags & IEEE80211_NODE_HT ?
- (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
+ (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "",
ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "",
ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" :
diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h
index a0293f814899..b9bc2357428d 100644
--- a/sys/net80211/ieee80211_var.h
+++ b/sys/net80211/ieee80211_var.h
@@ -700,13 +700,14 @@ MALLOC_DECLARE(M_80211_VAP);
#define IEEE80211_FEXT_QUIET_IE 0x00800000 /* STATUS: quiet IE in a beacon has been added */
#define IEEE80211_FEXT_UAPSD 0x01000000 /* CONF: enable U-APSD */
#define IEEE80211_FEXT_AMPDU_OFFLOAD 0x02000000 /* CONF: driver/fw handles AMPDU[-TX] itself */
+#define IEEE80211_FEXT_NO_NULLDATA 0x04000000 /* CONF: don't originate NULL data frames from net80211 */
#define IEEE80211_FEXT_BITS \
"\20\2INACT\3SCANWAIT\4BGSCAN\5WPS\6TSN\7SCANREQ\10RESUME" \
"\0114ADDR\12NONEPR_PR\13SWBMISS\14DFS\15DOTD\16STATEWAIT\17REINIT" \
"\20BPF\21WDSLEGACY\22PROBECHAN\23UNIQMAC\24SCAN_OFFLOAD\25SEQNO_OFFLOAD" \
"\26FRAG_OFFLOAD\27VHT" \
- "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD"
+ "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD\33NO_NULLDATA"
/* ic_flags_ht/iv_flags_ht */
#define IEEE80211_FHT_NONHT_PR 0x00000001 /* STATUS: non-HT sta present */
diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index de0b691d4d2a..10a5fc7f08ab 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -974,7 +974,7 @@ ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni)
return (IEEE80211_IS_CHAN_VHT40(bss_chan) &&
IEEE80211_IS_CHAN_VHT40(ni->ni_chan) &&
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
}
/*
@@ -1003,7 +1003,7 @@ ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni)
*/
return (IEEE80211_IS_CHAN_VHT80(bss_chan) &&
IEEE80211_IS_CHAN_VHT80(ni->ni_chan) &&
- (ni->ni_chw != IEEE80211_STA_RX_BW_20));
+ (ni->ni_chw != NET80211_STA_RX_BW_20));
}
/*
@@ -1030,7 +1030,7 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni)
* If a HT TX width action frame sets it to 20MHz
* then reject doing 160MHz.
*/
- if (ni->ni_chw == IEEE80211_STA_RX_BW_20)
+ if (ni->ni_chw == NET80211_STA_RX_BW_20)
return (false);
if (IEEE80211_IS_CHAN_VHT160(bss_chan) &&
@@ -1062,19 +1062,19 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni)
*/
bool
ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni,
- enum ieee80211_sta_rx_bw bw)
+ enum net80211_sta_rx_bw bw)
{
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
return (ieee80211_vht_check_tx_vht(ni));
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
return (ieee80211_vht_check_tx_vht40(ni));
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
return (ieee80211_vht_check_tx_vht80(ni));
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
return (ieee80211_vht_check_tx_vht160(ni));
- case IEEE80211_STA_RX_BW_320:
+ case NET80211_STA_RX_BW_320:
return (false);
default:
return (false);
@@ -1096,7 +1096,7 @@ ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni,
*/
bool
ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *ni,
- enum ieee80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs)
+ enum net80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs)
{
uint8_t mc;
diff --git a/sys/net80211/ieee80211_vht.h b/sys/net80211/ieee80211_vht.h
index a1529df4a85b..b9b19fbc6008 100644
--- a/sys/net80211/ieee80211_vht.h
+++ b/sys/net80211/ieee80211_vht.h
@@ -65,8 +65,8 @@ void ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni,
bool ieee80211_vht_check_tx_vht(const struct ieee80211_node *);
bool ieee80211_vht_check_tx_bw(const struct ieee80211_node *,
- enum ieee80211_sta_rx_bw);
+ enum net80211_sta_rx_bw);
bool ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *,
- enum ieee80211_sta_rx_bw bw, uint8_t, uint8_t);
+ enum net80211_sta_rx_bw bw, uint8_t, uint8_t);
#endif /* _NET80211_IEEE80211_VHT_H_ */
diff --git a/sys/netgraph/ng_parse.c b/sys/netgraph/ng_parse.c
index 448ecc92f075..5e1a1bb47ac0 100644
--- a/sys/netgraph/ng_parse.c
+++ b/sys/netgraph/ng_parse.c
@@ -1199,14 +1199,14 @@ ng_parse_composite(const struct ng_parse_type *type, const char *s,
int *off, const u_char *const start, u_char *const buf, int *buflen,
const enum comptype ctype)
{
- const int num = ng_get_composite_len(type, start, buf, ctype);
int nextIndex = 0; /* next implicit array index */
u_int index; /* field or element index */
int *foff; /* field value offsets in string */
int align, len, blen, error = 0;
/* Initialize */
- if (num < 0)
+ const int num = ng_get_composite_len(type, start, buf, ctype);
+ if (num < 0 || num > INT_MAX / sizeof(*foff))
return (EINVAL);
foff = malloc(num * sizeof(*foff), M_NETGRAPH_PARSE, M_NOWAIT | M_ZERO);
if (foff == NULL) {
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 963449d4b4b1..70a61dbf93a3 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -522,9 +522,16 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred
/*
* Check if bridge wants to allow adding addrs to member interfaces.
*/
- if (ifp->if_bridge && bridge_member_ifaddrs_p &&
- !bridge_member_ifaddrs_p())
- return (EINVAL);
+ if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF &&
+ bridge_member_ifaddrs_p != NULL) {
+ if (bridge_member_ifaddrs_p())
+ if_printf(ifp, "WARNING: Assigning an IP address to "
+ "an interface which is also a bridge member is "
+ "deprecated and will be unsupported in a future "
+ "release.\n");
+ else
+ return (EINVAL);
+ }
/*
* See whether address already exist.
@@ -1882,6 +1889,8 @@ in_domifdetach(struct ifnet *ifp, void *aux)
{
struct in_ifinfo *ii = (struct in_ifinfo *)aux;
+ MPASS(ifp->if_afdata[AF_INET] == NULL);
+
igmp_domifdetach(ifp);
lltable_free(ii->ii_llt);
free(ii, M_IFADDR);
diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c
index e24790ece43d..473c534ef83d 100644
--- a/sys/netinet/tcp_log_buf.c
+++ b/sys/netinet/tcp_log_buf.c
@@ -61,6 +61,9 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#ifdef DDB
+#include <netinet/in_kdtrace.h>
+#endif
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/tcp_var.h>
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index b6c55fac50b3..6e08ad2796a8 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -128,8 +128,25 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
"Enable/Disable TCP SACK support");
VNET_DEFINE(int, tcp_do_newsack) = 1;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_newsack), 0,
+
+static int
+sysctl_net_inet_tcp_sack_revised(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int new;
+
+ new = V_tcp_do_newsack;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ V_tcp_do_newsack = new;
+ gone_in(16, "net.inet.tcp.sack.revised will be deprecated."
+ " net.inet.tcp.sack.enable will always follow RFC6675 SACK.\n");
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_INT,
+ &VNET_NAME(tcp_do_newsack), 0, sysctl_net_inet_tcp_sack_revised, "CU",
"Use revised SACK loss recovery per RFC 6675");
VNET_DEFINE(int, tcp_do_lrd) = 1;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 2e039ebbfdd2..cc83a21773a8 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -645,14 +645,14 @@ out:
static int
sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
{
- int error = ENOENT;
struct tcp_function_set fs;
struct tcp_function_block *blk;
+ int error;
- memset(&fs, 0, sizeof(fs));
+ memset(&fs, 0, sizeof(struct tcp_function_set));
rw_rlock(&tcp_function_lock);
blk = find_tcp_fb_locked(V_tcp_func_set_ptr, NULL);
- if (blk) {
+ if (blk != NULL) {
/* Found him */
strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
fs.pcbcnt = blk->tfb_refcnt;
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 80e6b53d10df..1ee6c6e31f33 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -102,15 +102,15 @@
#include <security/mac/mac_framework.h>
-VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
+VNET_DEFINE_STATIC(bool, tcp_syncookies) = true;
#define V_tcp_syncookies VNET(tcp_syncookies)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookies), 0,
"Use TCP SYN cookies if the syncache overflows");
-VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
+VNET_DEFINE_STATIC(bool, tcp_syncookiesonly) = false;
#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
@@ -553,9 +553,8 @@ syncache_timer(void *xsch)
static inline bool
syncache_cookiesonly(void)
{
-
- return (V_tcp_syncookies && (V_tcp_syncache.paused ||
- V_tcp_syncookiesonly));
+ return ((V_tcp_syncookies && V_tcp_syncache.paused) ||
+ V_tcp_syncookiesonly);
}
/*
@@ -1083,40 +1082,48 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
if (sc == NULL) {
- /*
- * There is no syncache entry, so see if this ACK is
- * a returning syncookie. To do this, first:
- * A. Check if syncookies are used in case of syncache
- * overflows
- * B. See if this socket has had a syncache entry dropped in
- * the recent past. We don't want to accept a bogus
- * syncookie if we've never received a SYN or accept it
- * twice.
- * C. check that the syncookie is valid. If it is, then
- * cobble up a fake syncache entry, and return.
- */
- if (locked && !V_tcp_syncookies) {
- SCH_UNLOCK(sch);
- TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: Spurious ACK, "
- "segment rejected (syncookies disabled)\n",
- s, __func__);
- goto failed;
- }
- if (locked && !V_tcp_syncookiesonly &&
- sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+ if (locked) {
+ /*
+ * The syncache is currently in use (neither disabled,
+ * nor paused), but no entry was found.
+ */
+ if (!V_tcp_syncookies) {
+ /*
+ * Since no syncookies are used in case of
+ * a bucket overflow, don't even check for
+ * a valid syncookie.
+ */
+ SCH_UNLOCK(sch);
+ TCPSTAT_INC(tcps_sc_spurcookie);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected "
+ "(syncookies disabled)\n",
+ s, __func__);
+ goto failed;
+ }
+ if (sch->sch_last_overflow <
+ time_uptime - SYNCOOKIE_LIFETIME) {
+ /*
+ * Since the bucket did not overflow recently,
+ * don't even check for a valid syncookie.
+ */
+ SCH_UNLOCK(sch);
+ TCPSTAT_INC(tcps_sc_spurcookie);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected "
+ "(no syncache entry)\n",
+ s, __func__);
+ goto failed;
+ }
SCH_UNLOCK(sch);
- TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: Spurious ACK, "
- "segment rejected (no syncache entry)\n",
- s, __func__);
- goto failed;
}
- if (locked)
- SCH_UNLOCK(sch);
bzero(&scs, sizeof(scs));
+ /*
+ * Now check, if the syncookie is valid. If it is, create an on
+ * stack syncache entry.
+ */
if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) {
sc = &scs;
TCPSTAT_INC(tcps_sc_recvcookie);
@@ -1291,10 +1298,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (__predict_false(*lsop == NULL)) {
TCPSTAT_INC(tcps_sc_aborted);
TCPSTATES_DEC(TCPS_SYN_RECEIVED);
- } else
+ } else if (sc != &scs)
TCPSTAT_INC(tcps_sc_completed);
-/* how do we find the inp for the new socket? */
if (sc != &scs)
syncache_free(sc);
return (1);
@@ -1669,7 +1675,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) {
- int wscale = 0;
+ u_int wscale = 0;
/*
* Pick the smallest possible scaling factor that
@@ -1719,13 +1725,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0)
sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
- if (V_tcp_syncookies)
+ if (V_tcp_syncookies || V_tcp_syncookiesonly)
sc->sc_iss = syncookie_generate(sch, sc);
else
sc->sc_iss = arc4random();
#ifdef INET6
if (autoflowlabel) {
- if (V_tcp_syncookies)
+ if (V_tcp_syncookies || V_tcp_syncookiesonly)
sc->sc_flowlabel = sc->sc_iss;
else
sc->sc_flowlabel = ip6_randomflowlabel();
@@ -2265,7 +2271,7 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
uint32_t hash;
uint8_t *secbits;
tcp_seq ack, seq;
- int wnd, wscale = 0;
+ int wnd;
union syncookie cookie;
/*
@@ -2316,12 +2322,14 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
- /* We can simply recompute receive window scale we sent earlier. */
- while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
- wscale++;
-
/* Only use wscale if it was enabled in the orignal SYN. */
if (cookie.flags.wscale_idx > 0) {
+ u_int wscale = 0;
+
+ /* Recompute the receive window scale that was sent earlier. */
+ while (wscale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << wscale) < sb_max)
+ wscale++;
sc->sc_requested_r_scale = wscale;
sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
sc->sc_flags |= SCF_WINSCALE;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 3e6519118a40..cea8a916679b 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -223,16 +223,18 @@ VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
* udp_append() will convert to a sockaddr_in6 before passing the address
* into the socket code.
*
- * In the normal case udp_append() will return 0, indicating that you
- * must unlock the inp. However if a tunneling protocol is in place we increment
- * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
- * then decrement the reference count. If the inp_rele returns 1, indicating the
- * inp is gone, we return that to the caller to tell them *not* to unlock
- * the inp. In the case of multi-cast this will cause the distribution
- * to stop (though most tunneling protocols known currently do *not* use
- * multicast).
+ * In the normal case udp_append() will return 'false', indicating that you
+ * must unlock the inpcb. However if a tunneling protocol is in place we
+ * increment the inpcb refcnt and unlock the inpcb, on return from the tunneling
+ * protocol we then decrement the reference count. If in_pcbrele_rlocked()
+ * returns 'true', indicating the inpcb is gone, we return that to the caller
+ * to tell them *not* to unlock the inpcb. In the case of multicast this will
+ * cause the distribution to stop (though most tunneling protocols known
+ * currently do *not* use multicast).
+ *
+ * The mbuf is always consumed.
*/
-static int
+static bool
udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
struct sockaddr_in *udp_in)
{
@@ -255,15 +257,16 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
in_pcbref(inp);
INP_RUNLOCK(inp);
- filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
- up->u_tun_ctx);
+ filtered = (*up->u_tun_func)(n, off, inp,
+ (struct sockaddr *)&udp_in[0], up->u_tun_ctx);
INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (1);
- if (filtered) {
- INP_RUNLOCK(inp);
- return (1);
+ if (in_pcbrele_rlocked(inp)) {
+ if (!filtered)
+ m_freem(n);
+ return (true);
}
+ if (filtered)
+ return (false);
}
off += sizeof(struct udphdr);
@@ -273,18 +276,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
if (IPSEC_ENABLED(ipv4) &&
IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
m_freem(n);
- return (0);
+ return (false);
}
if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
if (IPSEC_ENABLED(ipv4) &&
UDPENCAP_INPUT(ipv4, n, off, AF_INET) != 0)
- return (0); /* Consumed. */
+ return (false);
}
#endif /* IPSEC */
#ifdef MAC
if (mac_inpcb_check_deliver(inp, n) != 0) {
m_freem(n);
- return (0);
+ return (false);
}
#endif /* MAC */
if (inp->inp_flags & INP_CONTROLOPTS ||
@@ -330,7 +333,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
UDPSTAT_INC(udps_fullsock);
} else
sorwakeup_locked(so);
- return (0);
+ return (false);
}
static bool
@@ -699,7 +702,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
else
UDP_PROBE(receive, NULL, inp, ip, inp, uh);
- if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
+ if (!udp_append(inp, ip, m, iphlen, udp_in))
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index ce0655408a28..4f756a75fac7 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -1235,11 +1235,20 @@ in6_addifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *i
int carp_attached = 0;
int error;
- /* Check if this interface is a bridge member */
- if (ifp->if_bridge && bridge_member_ifaddrs_p &&
- !bridge_member_ifaddrs_p()) {
- error = EINVAL;
- goto out;
+ /*
+ * Check if bridge wants to allow adding addrs to member interfaces.
+ */
+ if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF &&
+ bridge_member_ifaddrs_p != NULL) {
+ if (bridge_member_ifaddrs_p()) {
+ if_printf(ifp, "WARNING: Assigning an IP address to "
+ "an interface which is also a bridge member is "
+ "deprecated and will be unsupported in a future "
+ "release.\n");
+ } else {
+ error = EINVAL;
+ goto out;
+ }
}
/*
@@ -2618,6 +2627,8 @@ in6_domifdetach(struct ifnet *ifp, void *aux)
{
struct in6_ifextra *ext = (struct in6_ifextra *)aux;
+ MPASS(ifp->if_afdata[AF_INET6] == NULL);
+
mld_domifdetach(ifp);
scope6_ifdetach(ext->scope6_id);
nd6_ifdetach(ifp, ext->nd_ifinfo);
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index f284f7fa5ffc..cc149616006e 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -83,7 +83,6 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo);
#define V_ripcbinfo VNET(ripcbinfo)
static int get_rand_ifid(struct ifnet *, struct in6_addr *);
-static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *);
static int in6_ifattach_loopback(struct ifnet *);
static void in6_purgemaddrs(struct ifnet *);
@@ -271,8 +270,8 @@ found:
*
* altifp - secondary EUI64 source
*/
-static int
-get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
+int
+in6_get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
struct in6_addr *in6)
{
struct ifnet *ifp;
@@ -356,7 +355,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
} else {
NET_EPOCH_ENTER(et);
- error = get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr);
+ error = in6_get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr);
NET_EPOCH_EXIT(et);
if (error != 0) {
nd6log((LOG_ERR,
diff --git a/sys/netinet6/in6_ifattach.h b/sys/netinet6/in6_ifattach.h
index 897926e90078..fd52422b10be 100644
--- a/sys/netinet6/in6_ifattach.h
+++ b/sys/netinet6/in6_ifattach.h
@@ -41,6 +41,7 @@ void in6_ifdetach(struct ifnet *);
void in6_ifdetach_destroy(struct ifnet *);
void in6_tmpaddrtimer(void *);
int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
+int in6_get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *);
#endif /* _KERNEL */
diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
index b9af0a78a584..6fe78083df23 100644
--- a/sys/netinet6/nd6_rtr.c
+++ b/sys/netinet6/nd6_rtr.c
@@ -1182,9 +1182,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
struct ifnet *ifp = pr->ndpr_ifp;
struct ifaddr *ifa;
struct in6_aliasreq ifra;
- struct in6_ifaddr *ia, *ib;
+ struct in6_ifaddr *ia = NULL, *ib = NULL;
int error, plen0;
- struct in6_addr mask;
+ struct in6_addr *ifid_addr = NULL, mask;
int prefixlen = pr->ndpr_plen;
int updateflags;
char ip6buf[INET6_ADDRSTRLEN];
@@ -1212,18 +1212,42 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
* with different interface identifiers.
*/
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
- if (ifa)
+ if (ifa) {
ib = (struct in6_ifaddr *)ifa;
- else
- return NULL;
+ ifid_addr = &ib->ia_addr.sin6_addr;
+
+ /* prefixlen + ifidlen must be equal to 128 */
+ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
+ if (prefixlen != plen0) {
+ ifa_free(ifa);
+ ifid_addr = NULL;
+ nd6log((LOG_DEBUG,
+ "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
+ __func__, if_name(ifp), prefixlen, 128 - plen0));
+ }
+ }
- /* prefixlen + ifidlen must be equal to 128 */
- plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
- if (prefixlen != plen0) {
- ifa_free(ifa);
+ /* No suitable LL address, get the ifid directly */
+ if (ifid_addr == NULL) {
+ struct in6_addr taddr;
+ ifa = ifa_alloc(sizeof(taddr), M_WAITOK);
+ if (ifa) {
+ ib = (struct in6_ifaddr *)ifa;
+ ifid_addr = &ib->ia_addr.sin6_addr;
+ if(in6_get_ifid(ifp, NULL, ifid_addr) != 0) {
+ nd6log((LOG_DEBUG,
+ "%s: failed to get ifid for %s\n",
+ __func__, if_name(ifp)));
+ ifa_free(ifa);
+ ifid_addr = NULL;
+ }
+ }
+ }
+
+ if (ifid_addr == NULL) {
nd6log((LOG_INFO,
- "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
- __func__, if_name(ifp), prefixlen, 128 - plen0));
+ "%s: could not determine ifid for %s\n",
+ __func__, if_name(ifp)));
return NULL;
}
@@ -1233,13 +1257,13 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
/* interface ID */
ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
- (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
+ (ifid_addr->s6_addr32[0] & ~mask.s6_addr32[0]);
ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
- (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
+ (ifid_addr->s6_addr32[1] & ~mask.s6_addr32[1]);
ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
- (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
+ (ifid_addr->s6_addr32[2] & ~mask.s6_addr32[2]);
ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
- (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
+ (ifid_addr->s6_addr32[3] & ~mask.s6_addr32[3]);
ifa_free(ifa);
/* lifetimes. */
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 585c196391c0..7b9405ee1f8d 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -605,7 +605,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
rt_kif = rpool_first->kif;
/*
* Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = r->af;
} else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) {
@@ -634,8 +635,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
}
rt = sp->pfs_1400.rt;
/*
- * Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * Guess the AF of the route address, FreeBSD 14 does
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = sp->pfs_1400.af;
}
@@ -1741,16 +1743,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
return (EINVAL);
- data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
+ data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK);
if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
ifr->ifr_cap_nv.length)) != 0) {
- free(data, M_TEMP);
+ free(data, M_PF);
return (error);
}
if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
- free(data, M_TEMP);
+ free(data, M_PF);
return (EINVAL);
}
@@ -1758,7 +1760,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
pfsync_nvstatus_to_kstatus(nvl, &status);
nvlist_destroy(nvl);
- free(data, M_TEMP);
+ free(data, M_PF);
error = pfsync_kstatus_to_softc(&status, sc);
return (error);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 8cd4fff95b15..5889bb9d68e6 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -1667,7 +1667,6 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
#ifdef INET6
struct nd_neighbor_solicit nd;
struct pf_addr *target;
- u_short action, reason;
if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
goto copy;
@@ -1676,7 +1675,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
case ND_NEIGHBOR_SOLICIT:
if (multi)
return (-1);
- if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
+ if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
+ pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
daddr = target;
@@ -1684,7 +1684,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
case ND_NEIGHBOR_ADVERT:
if (multi)
return (-1);
- if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
+ if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
+ pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
saddr = target;
@@ -3632,6 +3633,18 @@ pf_translate_af(struct pf_pdesc *pd)
pd->src = (struct pf_addr *)&ip4->ip_src;
pd->dst = (struct pf_addr *)&ip4->ip_dst;
pd->off = sizeof(struct ip);
+ if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_TCP;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_UDP;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_SCTP;
+ }
break;
case AF_INET6:
ip6 = mtod(pd->m, struct ip6_hdr *);
@@ -3649,6 +3662,18 @@ pf_translate_af(struct pf_pdesc *pd)
pd->src = (struct pf_addr *)&ip6->ip6_src;
pd->dst = (struct pf_addr *)&ip6->ip6_dst;
pd->off = sizeof(struct ip6_hdr);
+ if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6;
+ }
/*
* If we're dealing with a reassembled packet we need to adjust
@@ -4019,7 +4044,7 @@ pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
optsoff = pd->off + sizeof(struct tcphdr);
#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
if (olen < TCPOLEN_MINSACK ||
- !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
+ !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
return (0);
eoh = opts + olen;
@@ -5082,7 +5107,7 @@ pf_get_wscale(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
- pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -5107,7 +5132,7 @@ pf_get_mss(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
- pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -5960,7 +5985,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
if (r->rt) {
/*
* Set act.rt here instead of in pf_rule_to_actions() because
- * it is applied only from the last pass rule.
+ * it is applied only from the last pass rule. For rules
+ * with the prefer-ipv6-nexthop option act.rt_af is a hint
+ * about AF of the forwarded packet and might be changed.
*/
pd->act.rt = r->rt;
if (r->rt == PF_REPLYTO)
@@ -7633,7 +7660,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
while (off < len) {
struct sctp_paramhdr h;
- if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL,
+ if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL,
pd->af))
return (PF_DROP);
@@ -7653,7 +7680,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
if (in_nullhost(t))
@@ -7697,7 +7724,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
break;
@@ -7727,7 +7754,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
@@ -7742,7 +7769,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
ntohs(ah.ph.param_length) - sizeof(ah), pd,
@@ -8024,7 +8051,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
ipoff2 = pd->off + ICMP_MINLEN;
if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip)");
@@ -8045,6 +8072,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
return (PF_DROP);
pd2.tot_len = ntohs(h2.ip_len);
+ pd2.ttl = h2.ip_ttl;
pd2.src = (struct pf_addr *)&h2.ip_src;
pd2.dst = (struct pf_addr *)&h2.ip_dst;
pd2.ip_sum = &h2.ip_sum;
@@ -8055,7 +8083,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
ipoff2 = pd->off + sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip6)");
@@ -8067,6 +8095,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
pd2.tot_len = ntohs(h2_6.ip6_plen) +
sizeof(struct ip6_hdr);
+ pd2.ttl = h2_6.ip6_hlim;
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
pd2.ip_sum = NULL;
@@ -8107,7 +8136,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
* expected. Don't access any TCP header fields after
* th_seq, an ackskew test is not possible.
*/
- if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason,
+ if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
@@ -8303,7 +8332,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
int action;
if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(udp)");
@@ -8434,7 +8463,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
int copyback = 0;
int action;
- if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason,
+ if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
@@ -8590,7 +8619,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
}
if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short i"
"(icmp)");
@@ -8710,7 +8739,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
}
if (!pf_pull_hdr(pd->m, pd2.off, iih,
- sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+ sizeof(struct icmp6_hdr), reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(icmp6)");
@@ -8825,6 +8854,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
default: {
int action;
+ /*
+ * Placeholder value, so future calls to pf_change_ap()
+ * don't try to update a NULL checksum pointer.
+ */
+ pd->pcksum = &pd->sctp_dummy_sum;
key.af = pd2.af;
key.proto = pd2.proto;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
@@ -8887,7 +8921,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
*/
void *
pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
- u_short *actionp, u_short *reasonp, sa_family_t af)
+ u_short *reasonp, sa_family_t af)
{
int iplen = 0;
switch (af) {
@@ -8897,12 +8931,7 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
if (fragoff) {
- if (fragoff >= len)
- ACTION_SET(actionp, PF_PASS);
- else {
- ACTION_SET(actionp, PF_DROP);
- REASON_SET(reasonp, PFRES_FRAG);
- }
+ REASON_SET(reasonp, PFRES_FRAG);
return (NULL);
}
iplen = ntohs(h->ip_len);
@@ -8919,7 +8948,6 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
#endif /* INET6 */
}
if (m->m_pkthdr.len < off + len || iplen < off + len) {
- ACTION_SET(actionp, PF_DROP);
REASON_SET(reasonp, PFRES_SHORT);
return (NULL);
}
@@ -8974,9 +9002,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *m1, *md;
- struct route ro;
- const struct sockaddr *gw = &ro.ro_dst;
- struct sockaddr_in *dst;
+ struct route_in6 ro;
+ union sockaddr_union rt_gw;
+ const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
+ union sockaddr_union *dst;
struct ip *ip;
struct ifnet *ifp = NULL;
int error = 0;
@@ -9071,10 +9100,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
ip = mtod(m0, struct ip *);
bzero(&ro, sizeof(ro));
- dst = (struct sockaddr_in *)&ro.ro_dst;
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(struct sockaddr_in);
- dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ dst = (union sockaddr_union *)&ro.ro_dst;
+ dst->sin.sin_family = AF_INET;
+ dst->sin.sin_len = sizeof(struct sockaddr_in);
+ dst->sin.sin_addr = ip->ip_dst;
+ if (ifp) { /* Only needed in forward direction and route-to */
+ bzero(&rt_gw, sizeof(rt_gw));
+ ro.ro_flags |= RT_HAS_GW;
+ gw = &rt_gw;
+ switch (pd->act.rt_af) {
+#ifdef INET
+ case AF_INET:
+ rt_gw.sin.sin_family = AF_INET;
+ rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
+ rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ rt_gw.sin6.sin6_family = AF_INET6;
+ rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
+ &pd->act.rt_addr, AF_INET6);
+ break;
+#endif /* INET6 */
+ default:
+ /* Normal af-to without route-to */
+ break;
+ }
+ }
if (pd->dir == PF_IN) {
if (ip->ip_ttl <= IPTTLDEC) {
@@ -9098,10 +9152,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
/* Use the gateway if needed. */
if (nh->nh_flags & NHF_GATEWAY) {
- gw = &nh->gw_sa;
+ gw = (const union sockaddr_union *)&nh->gw_sa;
ro.ro_flags |= RT_HAS_GW;
} else {
- dst->sin_addr = ip->ip_dst;
+ dst->sin.sin_addr = ip->ip_dst;
}
/*
@@ -9126,6 +9180,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
PF_STATE_UNLOCK(s);
}
+ /* It must have been either set from rt_af or from fib4_lookup */
+ KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
+
if (ifp == NULL) {
m0 = pd->m;
pd->m = NULL;
@@ -9210,9 +9267,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
m_clrprotoflags(m0); /* Avoid confusing lower layers. */
md = m0;
- error = pf_dummynet_route(pd, s, r, ifp, gw, &md);
+ error = pf_dummynet_route(pd, s, r, ifp,
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw, (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
goto done;
@@ -9253,9 +9312,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
md = m0;
pd->pf_mtag = pf_find_mtag(md);
error = pf_dummynet_route(pd, s, r, ifp,
- gw, &md);
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw,
+ (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
} else
@@ -9962,9 +10023,12 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
pd->proto = h->ip_p;
/* IGMP packets have router alert options, allow them */
if (pd->proto == IPPROTO_IGMP) {
- /* According to RFC 1112 ttl must be set to 1. */
- if ((h->ip_ttl != 1) ||
- !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) {
+ /*
+ * According to RFC 1112 ttl must be set to 1 in all IGMP
+ * packets sent to 224.0.0.1
+ */
+ if ((h->ip_ttl != 1) &&
+ (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
@@ -9982,7 +10046,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
end < pd->off + sizeof(ext))
return (PF_PASS);
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET)) {
+ reason, AF_INET)) {
DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
return (PF_DROP);
}
@@ -10008,7 +10072,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
while (off < end) {
if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
- sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) {
+ sizeof(opt.ip6o_type), reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
return (PF_DROP);
}
@@ -10016,7 +10080,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
off++;
continue;
}
- if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL,
+ if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
return (PF_DROP);
@@ -10041,7 +10105,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
- if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL,
+ if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
return (PF_DROP);
@@ -10090,7 +10154,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
break;
case IPPROTO_HOPOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
@@ -10117,7 +10181,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
return (PF_DROP);
}
@@ -10145,7 +10209,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
return (PF_DROP);
}
@@ -10166,7 +10230,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
case IPPROTO_AH:
case IPPROTO_DSTOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
@@ -10199,7 +10263,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC,
"IPv6 short icmp6hdr");
return (PF_DROP);
@@ -10432,7 +10496,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
- if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action,
+ if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
@@ -10448,7 +10512,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
case IPPROTO_UDP: {
struct udphdr *uh = &pd->hdr.udp;
- if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action,
+ if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
@@ -10469,7 +10533,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
case IPPROTO_SCTP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10499,7 +10563,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
case IPPROTO_ICMP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10513,7 +10577,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
size_t icmp_hlen = sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10539,7 +10603,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
if (icmp_hlen > sizeof(struct icmp6_hdr) &&
!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10549,6 +10613,13 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
break;
}
#endif /* INET6 */
+ default:
+ /*
+ * Placeholder value, so future calls to pf_change_ap() don't
+ * try to update a NULL checksum pointer.
+ */
+ pd->pcksum = &pd->sctp_dummy_sum;
+ break;
}
if (pd->sport)
@@ -10556,6 +10627,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
if (pd->dport)
pd->odport = pd->ndport = *pd->dport;
+ MPASS(pd->pcksum != NULL);
+
return (0);
}
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 51b3fd6390e1..8edd5a5110a1 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
#define PF_POOL_ENDPI 0x40
+#define PF_POOL_IPV6NH 0x80
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index e5da05a958f6..06c40a03f575 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -187,6 +187,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z);
#define V_pf_tag_z VNET(pf_tag_z)
static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
+MALLOC_DEFINE(M_PF, "pf", "pf(4)");
#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
@@ -1181,18 +1182,18 @@ pf_rule_tree_alloc(int flags)
{
struct pf_krule_global *tree;
- tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags);
+ tree = malloc(sizeof(struct pf_krule_global), M_PF, flags);
if (tree == NULL)
return (NULL);
RB_INIT(tree);
return (tree);
}
-static void
+void
pf_rule_tree_free(struct pf_krule_global *tree)
{
- free(tree, M_TEMP);
+ free(tree, M_PF);
}
static int
@@ -1211,7 +1212,7 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
return (ENOMEM);
rs = pf_find_or_create_kruleset(anchor);
if (rs == NULL) {
- free(tree, M_TEMP);
+ pf_rule_tree_free(tree);
return (EINVAL);
}
pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
@@ -1432,7 +1433,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
rs->rules[rs_num].inactive.rcount = 0;
rs->rules[rs_num].inactive.open = 0;
pf_remove_if_empty_kruleset(rs);
- free(old_tree, M_TEMP);
+ pf_rule_tree_free(old_tree);
return (0);
}
@@ -2276,6 +2277,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
rule->nat.cur = TAILQ_FIRST(&rule->nat.list);
rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list);
rule->route.cur = TAILQ_FIRST(&rule->route.list);
+ rule->route.ipv6_nexthop_af = AF_INET6;
TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
rule, entries);
ruleset->rules[rs_num].inactive.rcount++;
@@ -4076,7 +4078,7 @@ DIOCCHANGERULE_error:
out = ps->ps_states;
pstore = mallocarray(slice_count,
- sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO);
nr = 0;
for (i = 0; i <= V_pf_hashmask; i++) {
@@ -4098,10 +4100,10 @@ DIOCGETSTATES_retry:
if (count > slice_count) {
PF_HASHROW_UNLOCK(ih);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
slice_count = count * 2;
pstore = mallocarray(slice_count,
- sizeof(struct pfsync_state_1301), M_TEMP,
+ sizeof(struct pfsync_state_1301), M_PF,
M_WAITOK | M_ZERO);
goto DIOCGETSTATES_retry;
}
@@ -4123,13 +4125,15 @@ DIOCGETSTATES_retry:
PF_HASHROW_UNLOCK(ih);
error = copyout(pstore, out,
sizeof(struct pfsync_state_1301) * count);
- if (error)
+ if (error) {
+ free(pstore, M_PF);
goto fail;
+ }
out = ps->ps_states + nr;
}
DIOCGETSTATES_full:
ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -4155,7 +4159,7 @@ DIOCGETSTATES_full:
out = ps->ps_states;
pstore = mallocarray(slice_count,
- sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO);
nr = 0;
for (i = 0; i <= V_pf_hashmask; i++) {
@@ -4177,10 +4181,10 @@ DIOCGETSTATESV2_retry:
if (count > slice_count) {
PF_HASHROW_UNLOCK(ih);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
slice_count = count * 2;
pstore = mallocarray(slice_count,
- sizeof(struct pf_state_export), M_TEMP,
+ sizeof(struct pf_state_export), M_PF,
M_WAITOK | M_ZERO);
goto DIOCGETSTATESV2_retry;
}
@@ -4201,13 +4205,15 @@ DIOCGETSTATESV2_retry:
PF_HASHROW_UNLOCK(ih);
error = copyout(pstore, out,
sizeof(struct pf_state_export) * count);
- if (error)
+ if (error) {
+ free(pstore, M_PF);
goto fail;
+ }
out = ps->ps_states + nr;
}
DIOCGETSTATESV2_full:
ps->ps_len = nr * sizeof(struct pf_state_export);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -4737,17 +4743,17 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
error = pfr_add_tables(pfrts, io->pfrio_size,
&io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4769,17 +4775,17 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
error = pfr_del_tables(pfrts, io->pfrio_size,
&io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4805,7 +4811,7 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_NOWAIT | M_ZERO);
+ M_PF, M_NOWAIT | M_ZERO);
if (pfrts == NULL) {
error = ENOMEM;
PF_RULES_RUNLOCK();
@@ -4816,7 +4822,7 @@ DIOCCHANGEADDR_error:
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfrts, io->pfrio_buffer, totlen);
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4843,7 +4849,7 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_tstats);
pfrtstats = mallocarray(io->pfrio_size,
- sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
+ sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO);
if (pfrtstats == NULL) {
error = ENOMEM;
PF_RULES_RUNLOCK();
@@ -4856,7 +4862,7 @@ DIOCCHANGEADDR_error:
PF_TABLE_STATS_UNLOCK();
if (error == 0)
error = copyout(pfrtstats, io->pfrio_buffer, totlen);
- free(pfrtstats, M_TEMP);
+ free(pfrtstats, M_PF);
break;
}
@@ -4881,10 +4887,10 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
@@ -4894,7 +4900,7 @@ DIOCCHANGEADDR_error:
&io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
PF_TABLE_STATS_UNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4922,10 +4928,10 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -4933,7 +4939,7 @@ DIOCCHANGEADDR_error:
io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
&io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4968,10 +4974,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -4982,7 +4988,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5003,10 +5009,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5016,7 +5022,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5040,11 +5046,11 @@ DIOCCHANGEADDR_error:
goto fail;
}
totlen = count * sizeof(struct pfr_addr);
- pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
+ pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF,
M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5055,7 +5061,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5076,14 +5082,14 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK | M_ZERO);
+ M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
error = pfr_get_addrs(&io->pfrio_table, pfras,
&io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5104,14 +5110,14 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_astats);
pfrastats = mallocarray(io->pfrio_size,
- sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
error = pfr_get_astats(&io->pfrio_table, pfrastats,
&io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfrastats, io->pfrio_buffer, totlen);
- free(pfrastats, M_TEMP);
+ free(pfrastats, M_PF);
break;
}
@@ -5132,10 +5138,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5145,7 +5151,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5166,10 +5172,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_RLOCK();
@@ -5179,7 +5185,7 @@ DIOCCHANGEADDR_error:
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5200,10 +5206,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5211,7 +5217,7 @@ DIOCCHANGEADDR_error:
io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5249,10 +5255,10 @@ DIOCCHANGEADDR_error:
}
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5262,7 +5268,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ETH:
if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5270,13 +5276,13 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if ((error = pf_begin_altq(&ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5291,7 +5297,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_begin(&table,
&ioe->ticket, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5300,7 +5306,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_begin_rules(&ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5308,7 +5314,7 @@ DIOCCHANGEADDR_error:
}
PF_RULES_WUNLOCK();
error = copyout(ioes, io->array, totlen);
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5330,10 +5336,10 @@ DIOCCHANGEADDR_error:
}
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5344,7 +5350,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_rollback_eth(ioe->ticket,
ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5352,13 +5358,13 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if ((error = pf_rollback_altq(ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5373,7 +5379,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_rollback(&table,
ioe->ticket, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5382,14 +5388,14 @@ DIOCCHANGEADDR_error:
if ((error = pf_rollback_rules(ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
}
}
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5415,10 +5421,10 @@ DIOCCHANGEADDR_error:
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5431,7 +5437,7 @@ DIOCCHANGEADDR_error:
if (ers == NULL || ioe->ticket == 0 ||
ioe->ticket != ers->inactive.ticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
@@ -5440,14 +5446,14 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if (!V_altqs_inactive_open || ioe->ticket !=
V_ticket_altqs_inactive) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5458,7 +5464,7 @@ DIOCCHANGEADDR_error:
if (rs == NULL || !rs->topen || ioe->ticket !=
rs->tticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5467,7 +5473,7 @@ DIOCCHANGEADDR_error:
if (ioe->rs_num < 0 || ioe->rs_num >=
PF_RULESET_MAX) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
@@ -5477,7 +5483,7 @@ DIOCCHANGEADDR_error:
rs->rules[ioe->rs_num].inactive.ticket !=
ioe->ticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5490,7 +5496,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ETH:
if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5498,7 +5504,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if ((error = pf_commit_altq(ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5513,7 +5519,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_commit(&table,
ioe->ticket, NULL, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5522,7 +5528,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_commit_rules(ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5536,7 +5542,7 @@ DIOCCHANGEADDR_error:
else
dehook_pf_eth();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5565,7 +5571,7 @@ DIOCCHANGEADDR_error:
nr = 0;
- p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
+ p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO);
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask;
i++, sh++) {
PF_HASHROW_LOCK(sh);
@@ -5584,11 +5590,11 @@ DIOCCHANGEADDR_error:
error = copyout(pstore, psn->psn_src_nodes,
sizeof(struct pf_src_node) * nr);
if (error) {
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
goto fail;
}
psn->psn_len = sizeof(struct pf_src_node) * nr;
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -5655,13 +5661,13 @@ DIOCCHANGEADDR_error:
bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
- M_TEMP, M_WAITOK | M_ZERO);
+ M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
PF_RULES_RUNLOCK();
error = copyout(ifstore, io->pfiio_buffer, bufsiz);
- free(ifstore, M_TEMP);
+ free(ifstore, M_PF);
break;
}
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index bc9e1dc72902..b8b5157c9b15 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
uint64_t hashidx;
int cnt;
sa_family_t wanted_af;
+ u_int8_t pool_type;
+ bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
KASSERT(saf != 0, ("%s: saf == 0", __func__));
KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
+ /*
+ * Given (*naf) is a hint about AF of the forwarded packet.
+ * It might be changed if prefer_ipv6_nexthop is enabled and
+ * the combination of nexthop AF and packet AF allows for it.
+ */
wanted_af = (*naf);
mtx_lock(&rpool->mtx);
@@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
} else {
raddr = &rpool->cur->addr.v.a.addr;
rmask = &rpool->cur->addr.v.a.mask;
- /*
- * For single addresses check their address family. Unless they
- * have none, which happens when addresses are added with
- * the old ioctl mechanism. In such case trust that the address
- * has the proper AF.
- */
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ }
+
+ /*
+ * For pools with a single host with the prefer-ipv6-nexthop option
+ * we can return pool address of any AF, unless the forwarded packet
+ * is IPv6, then we can return only if pool address is IPv6.
+ * For non-prefer-ipv6-nexthop we can return pool address only
+ * of wanted AF, unless the pool address'es AF is unknown, which
+ * happens in case old ioctls have been used to set up the pool.
+ *
+ * Round-robin pools have their own logic for retrying next addresses.
+ */
+ pool_type = rpool->opts & PF_POOL_TYPEMASK;
+ if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
+ ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
+ rpool->cur->addr.type != PF_ADDR_TABLE &&
+ rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
+ if (prefer_ipv6_nexthop) {
+ if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+ wanted_af = rpool->cur->af;
+ } else {
+ if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
}
- switch (rpool->opts & PF_POOL_TYPEMASK) {
+ switch (pool_type) {
case PF_POOL_NONE:
pf_addrcpy(naddr, raddr, wanted_af);
break;
@@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)arc4random_uniform(cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_random:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_random;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else if (init_addr != NULL && PF_AZERO(init_addr,
@@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)(hashidx % cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_srchash:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_srchash;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else {
@@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
{
struct pf_kpooladdr *acur = rpool->cur;
+ retry_other_af_rr:
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af,
@@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
&rpool->tblidx, &rpool->counter, wanted_af,
pf_islinklocal, true))
goto get_addr;
- } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
- wanted_af))
+ } else if (rpool->cur->af == wanted_af &&
+ pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
goto get_addr;
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ /* Reset table index when changing wanted AF. */
+ rpool->tblidx = -1;
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto retry_other_af_rr;
+ }
try_next:
+ /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
+ rpool->ipv6_nexthop_af = AF_INET6;
if (TAILQ_NEXT(rpool->cur, entries) == NULL)
rpool->cur = TAILQ_FIRST(&rpool->list);
else
rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+ try_next_ipv6_nexthop_rr:
+ /* Reset table index when iterating pools or changing wanted AF. */
rpool->tblidx = -1;
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af, NULL,
- true)) {
- /* table contains no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ true))
+ goto get_addr;
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter, wanted_af,
- pf_islinklocal, true)) {
- /* interface has no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
+ true))
+ goto get_addr;
} else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ if (rpool->cur->af == wanted_af) {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ pf_addrcpy(&rpool->counter, raddr, wanted_af);
+ goto get_addr;
}
- pf_addrcpy(&rpool->counter, raddr, wanted_af);
}
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto try_next_ipv6_nexthop_rr;
+ }
+ if (rpool->cur != acur)
+ goto try_next;
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
get_addr:
pf_addrcpy(naddr, &rpool->counter, wanted_af);
if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
@@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
}
}
+ if (wanted_af == 0) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+
if (nkif)
*nkif = rpool->cur->kif;
+ (*naf) = wanted_af;
+
done_pool_mtx:
mtx_unlock(&rpool->mtx);
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index 45b5b8dd5fef..73f018db0266 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -763,6 +763,8 @@ static const struct nlattr_parser nla_p_rule[] = {
{ .type = PF_RT_RCV_IFNOT, .off = _OUT(rcvifnot), .cb = nlattr_get_bool },
{ .type = PF_RT_PKTRATE, .off = _OUT(pktrate), .arg = &threshold_parser, .cb = nlattr_get_nested },
{ .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 },
+ { .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 },
+ { .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 },
};
NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule);
#undef _OUT
@@ -984,8 +986,12 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt)
nlattr_add_u8(nw, PF_RT_AF, rule->af);
nlattr_add_u8(nw, PF_RT_NAF, rule->naf);
nlattr_add_u8(nw, PF_RT_PROTO, rule->proto);
+
nlattr_add_u8(nw, PF_RT_TYPE, rule->type);
nlattr_add_u8(nw, PF_RT_CODE, rule->code);
+ nlattr_add_u16(nw, PF_RT_TYPE_2, rule->type);
+ nlattr_add_u16(nw, PF_RT_CODE_2, rule->code);
+
nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags);
nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset);
nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl);
@@ -1945,7 +1951,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt)
n = pfr_table_count(&attrs.pfrio_table, attrs.pfrio_flags);
pfrtstats = mallocarray(n,
- sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
+ sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO);
error = pfr_get_tstats(&attrs.pfrio_table, pfrtstats,
&n, attrs.pfrio_flags | PFR_FLAG_USERIOCTL);
@@ -1997,7 +2003,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt)
}
}
}
- free(pfrtstats, M_TEMP);
+ free(pfrtstats, M_PF);
if (!nlmsg_end_dump(npt->nw, error, hdr)) {
NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h
index 87daac393821..b60d3d4797c6 100644
--- a/sys/netpfil/pf/pf_nl.h
+++ b/sys/netpfil/pf/pf_nl.h
@@ -283,6 +283,8 @@ enum pf_rule_type_t {
PF_RT_SRC_NODES_ROUTE = 81, /* u64 */
PF_RT_PKTRATE = 82, /* nested, pf_threshold_type_t */
PF_RT_MAX_PKT_SIZE = 83, /* u16 */
+ PF_RT_TYPE_2 = 84, /* u16 */
+ PF_RT_CODE_2 = 85, /* u16 */
};
enum pf_addrule_type_t {
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index a684d778ab42..56074bedbc40 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1354,7 +1354,7 @@ pf_normalize_ip6(int off, u_short *reason,
pf_rule_to_actions(r, &pd->act);
}
- if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), NULL, reason, AF_INET6))
+ if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6))
return (PF_DROP);
/* Offset now points to data portion. */
@@ -1542,7 +1542,7 @@ pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th,
olen = (th->th_off << 2) - sizeof(*th);
if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m,
- pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(*th), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -1645,7 +1645,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
if (olen >= TCPOLEN_TIMESTAMP &&
((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
(dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
- pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) {
+ pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) {
/* Modulate the timestamps. Can be used for NAT detection, OS
* uptime determination or reboot detection.
*/
@@ -1975,7 +1975,7 @@ pf_normalize_mss(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
optsoff = pd->off + sizeof(struct tcphdr);
if (olen < TCPOLEN_MAXSEG ||
- !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
+ !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -2009,7 +2009,7 @@ pf_scan_sctp(struct pf_pdesc *pd)
int ret;
while (pd->off + chunk_off < pd->tot_len) {
- if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL,
+ if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch),
NULL, pd->af))
return (PF_DROP);
@@ -2026,7 +2026,7 @@ pf_scan_sctp(struct pf_pdesc *pd)
struct sctp_init_chunk init;
if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init,
- sizeof(init), NULL, NULL, pd->af))
+ sizeof(init), NULL, pd->af))
return (PF_DROP);
/*
diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c
index 89486928e6e1..2f484e2dabc6 100644
--- a/sys/netpfil/pf/pf_nv.c
+++ b/sys/netpfil/pf/pf_nv.c
@@ -505,6 +505,7 @@ int
pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule)
{
int error = 0;
+ uint8_t tmp;
#define ERROUT(x) ERROUT_FUNCTION(errout, x)
@@ -610,8 +611,10 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule)
PFNV_CHK(pf_nvuint8(nvl, "keep_state", &rule->keep_state));
PFNV_CHK(pf_nvuint8(nvl, "af", &rule->af));
PFNV_CHK(pf_nvuint8(nvl, "proto", &rule->proto));
- PFNV_CHK(pf_nvuint8(nvl, "type", &rule->type));
- PFNV_CHK(pf_nvuint8(nvl, "code", &rule->code));
+ PFNV_CHK(pf_nvuint8(nvl, "type", &tmp));
+ rule->type = tmp;
+ PFNV_CHK(pf_nvuint8(nvl, "code", &tmp));
+ rule->code = tmp;
PFNV_CHK(pf_nvuint8(nvl, "flags", &rule->flags));
PFNV_CHK(pf_nvuint8(nvl, "flagset", &rule->flagset));
PFNV_CHK(pf_nvuint8(nvl, "min_ttl", &rule->min_ttl));
diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c
index 150626c5f3fb..8c041d45eae8 100644
--- a/sys/netpfil/pf/pf_osfp.c
+++ b/sys/netpfil/pf/pf_osfp.c
@@ -82,7 +82,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, const struct tcphdr *tcp)
ip6 = mtod(pd->m, struct ip6_hdr *);
break;
}
- if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, NULL,
+ if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL,
pd->af)) return (NULL);
return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 039908a53126..1711e690f6bb 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -59,8 +59,8 @@
#error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead."
#endif
-#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
-#define rs_free(x) free(x, M_TEMP)
+#define rs_malloc(x) malloc(x, M_PF, M_NOWAIT|M_ZERO)
+#define rs_free(x) free(x, M_PF)
VNET_DEFINE(struct pf_kanchor_global, pf_anchors);
VNET_DEFINE(struct pf_kanchor, pf_main_anchor);
@@ -336,6 +336,12 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset)
int i;
while (ruleset != NULL) {
+ for (int i = 0; i < PF_RULESET_MAX; i++) {
+ pf_rule_tree_free(ruleset->rules[i].active.tree);
+ ruleset->rules[i].active.tree = NULL;
+ pf_rule_tree_free(ruleset->rules[i].inactive.tree);
+ ruleset->rules[i].inactive.tree = NULL;
+ }
if (ruleset == &pf_main_ruleset ||
!RB_EMPTY(&ruleset->anchor->children) ||
ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64
index 630c88b97dd7..48f9df5b7e38 100644
--- a/sys/powerpc/conf/GENERIC64
+++ b/sys/powerpc/conf/GENERIC64
@@ -234,9 +234,9 @@ device wlan # 802.11 support
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
options IEEE80211_DEBUG # enable debug msgs
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/powerpc/conf/GENERIC64LE b/sys/powerpc/conf/GENERIC64LE
index eb9a9441425d..9af71f30626d 100644
--- a/sys/powerpc/conf/GENERIC64LE
+++ b/sys/powerpc/conf/GENERIC64LE
@@ -230,9 +230,9 @@ device wlan # 802.11 support
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
options IEEE80211_DEBUG # enable debug msgs
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h
index 1221521be368..de7119dd534a 100644
--- a/sys/riscv/include/vmm.h
+++ b/sys/riscv/include/vmm.h
@@ -49,6 +49,7 @@ enum vm_suspend_how {
VM_SUSPEND_RESET,
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index 7528ef6e4698..ec4514f70fa6 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -1036,10 +1036,14 @@ vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
static int
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
{
+ struct vm *vm;
+ vm = vcpu->vm;
vcpu_lock(vcpu);
-
while (1) {
+ if (vm->suspend)
+ break;
+
if (aplic_check_pending(vcpu->cookie))
break;
diff --git a/sys/sys/cpu.h b/sys/sys/cpu.h
index b6a0094f0c51..5bb55679a05b 100644
--- a/sys/sys/cpu.h
+++ b/sys/sys/cpu.h
@@ -40,25 +40,31 @@
#define CPU_IVAR_CPUID_SIZE 3
#define CPU_IVAR_CPUID 4
-static __inline struct pcpu *cpu_get_pcpu(device_t dev)
+static __inline struct pcpu *
+cpu_get_pcpu(device_t dev)
{
uintptr_t v = 0;
+
BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_PCPU, &v);
return ((struct pcpu *)v);
}
-static __inline int32_t cpu_get_nominal_mhz(device_t dev)
+static __inline int32_t
+cpu_get_nominal_mhz(device_t dev)
{
uintptr_t v = 0;
+
if (BUS_READ_IVAR(device_get_parent(dev), dev,
CPU_IVAR_NOMINAL_MHZ, &v) != 0)
return (-1);
return ((int32_t)v);
}
-static __inline const uint32_t *cpu_get_cpuid(device_t dev, size_t *count)
+static __inline const uint32_t *
+cpu_get_cpuid(device_t dev, size_t *count)
{
uintptr_t v = 0;
+
if (BUS_READ_IVAR(device_get_parent(dev), dev,
CPU_IVAR_CPUID_SIZE, &v) != 0)
return (NULL);
@@ -124,10 +130,10 @@ TAILQ_HEAD(cf_level_lst, cf_level);
* state. It is probably a bug to not combine this with "info only"
*/
#define CPUFREQ_TYPE_MASK 0xffff
-#define CPUFREQ_TYPE_RELATIVE (1<<0)
-#define CPUFREQ_TYPE_ABSOLUTE (1<<1)
-#define CPUFREQ_FLAG_INFO_ONLY (1<<16)
-#define CPUFREQ_FLAG_UNCACHED (1<<17)
+#define CPUFREQ_TYPE_RELATIVE (1 << 0)
+#define CPUFREQ_TYPE_ABSOLUTE (1 << 1)
+#define CPUFREQ_FLAG_INFO_ONLY (1 << 16)
+#define CPUFREQ_FLAG_UNCACHED (1 << 17)
/*
* When setting a level, the caller indicates the priority of this request.
@@ -162,7 +168,7 @@ int cpufreq_settings_changed(device_t dev);
* The new level and the result of the change (0 is success) is passed in.
* If the driver wishes to revoke the change from cpufreq_pre_change, it
* stores a non-zero error code in the result parameter and the change will
- * not be made. If the post-change eventhandler gets a non-zero result,
+ * not be made. If the post-change eventhandler gets a non-zero result,
* no change was made and the previous level remains in effect. If a change
* is revoked, the post-change eventhandler is still called with the error
* value supplied by the revoking driver. This gives listeners who cached
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 1b30e4292de8..f161d2c938c1 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -45,7 +45,8 @@
#define EVFILT_USER (-11) /* User events */
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_EMPTY (-13) /* empty send socket buf */
-#define EVFILT_SYSCOUNT 13
+#define EVFILT_JAIL (-14) /* attached to struct prison */
+#define EVFILT_SYSCOUNT 14
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
@@ -204,10 +205,19 @@ struct freebsd11_kevent32 {
#define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */
#define NOTE_PDATAMASK 0x000fffff /* mask for pid */
-/* additional flags for EVFILT_PROC */
-#define NOTE_TRACK 0x00000001 /* follow across forks */
+/* data/hint flags for EVFILT_JAIL */
+#define NOTE_JAIL_SET 0x80000000 /* jail was modified */
+#define NOTE_JAIL_CHILD 0x40000000 /* child jail was created */
+#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
+#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */
+#define NOTE_JAIL_ATTACH_MULTI 0x08000000 /* multiple procs attached */
+#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */
+#define NOTE_JAIL_DATAMASK 0x000fffff /* mask for pid */
+
+/* additional flags for EVFILT_PROC and EVFILT_JAIL */
+#define NOTE_TRACK 0x00000001 /* follow across fork/create */
#define NOTE_TRACKERR 0x00000002 /* could not track child */
-#define NOTE_CHILD 0x00000004 /* am a child process */
+#define NOTE_CHILD 0x00000004 /* am a child process/jail */
/* additional flags for EVFILT_TIMER */
#define NOTE_SECONDS 0x00000001 /* data is seconds */
@@ -309,6 +319,7 @@ struct knote {
struct proc *p_proc; /* proc pointer */
struct kaiocb *p_aio; /* AIO job pointer */
struct aioliojob *p_lio; /* LIO job pointer */
+ struct prison *p_prison; /* prison pointer */
void *p_v; /* generic other pointer */
} kn_ptr;
const struct filterops *kn_fop;
diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h
index 7bf1d264ff5e..6783a0d2d84f 100644
--- a/sys/sys/exterrvar.h
+++ b/sys/sys/exterrvar.h
@@ -31,7 +31,7 @@
#error "Specify error category before including sys/exterrvar.h"
#endif
-#ifdef BLOAT_KERNEL_WITH_EXTERR
+#ifdef EXTERR_STRINGS
#define SET_ERROR_MSG(mmsg) (mmsg)
#else
#define SET_ERROR_MSG(mmsg) NULL
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 63313926c4f0..cc3c733580fd 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -72,6 +72,7 @@ struct nameidata;
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
#define DTYPE_INOTIFY 15 /* inotify descriptor */
+#define DTYPE_JAILDESC 16 /* jail descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index d2655c52e832..e12e8c3178c9 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -99,8 +99,12 @@ enum prison_state {
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
-#define JAIL_GET_MASK 0x08
+#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */
+#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */
+#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */
+#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */
+#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */
+#define JAIL_GET_MASK 0xf8
#define JAIL_SYS_DISABLE 0
#define JAIL_SYS_NEW 1
@@ -115,7 +119,9 @@ int jail(struct jail *);
int jail_set(struct iovec *, unsigned int, int);
int jail_get(struct iovec *, unsigned int, int);
int jail_attach(int);
+int jail_attach_jd(int);
int jail_remove(int);
+int jail_remove_jd(int);
__END_DECLS
#else /* _KERNEL */
@@ -144,6 +150,8 @@ MALLOC_DECLARE(M_PRISON);
#define JAIL_META_PRIVATE "meta"
#define JAIL_META_SHARED "env"
+struct jaildesc;
+struct knlist;
struct racct;
struct prison_racct;
@@ -189,7 +197,9 @@ struct prison {
struct vnode *pr_root; /* (c) vnode to rdir */
struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
- void *pr_sparep[3];
+ struct knlist *pr_klist; /* (m) attached knotes */
+ LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */
+ void *pr_sparep;
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -425,10 +435,11 @@ SYSCTL_DECL(_security_jail_param);
/*
* Kernel support functions for jail().
*/
-struct ucred;
+struct knote;
struct mount;
struct sockaddr;
struct statfs;
+struct ucred;
struct vfsconf;
/*
@@ -463,6 +474,7 @@ void prison_proc_free(struct prison *);
void prison_proc_link(struct prison *, struct proc *);
void prison_proc_unlink(struct prison *, struct proc *);
void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
+void prison_remove(struct prison *);
void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
bool prison_ischild(struct prison *, struct prison *);
bool prison_isalive(const struct prison *);
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
new file mode 100644
index 000000000000..2451b04f7302
--- /dev/null
+++ b/sys/sys/jaildesc.h
@@ -0,0 +1,83 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_JAILDESC_H_
+#define _SYS_JAILDESC_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/_types.h>
+
+struct prison;
+
+/*-
+ * struct jaildesc describes a jail descriptor, which points to a struct
+ * prison. struct prison in turn has a linked list of struct jaildesc.
+ *
+ * Locking key:
+ * (c) set on creation, remains unchanged
+ * (d) jd_lock
+ * (p) jd_prison->pr_mtx
+ */
+struct jaildesc {
+ LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
+ struct prison *jd_prison; /* (d) the prison */
+ struct mtx jd_lock;
+ unsigned jd_flags; /* (d) JDF_* flags */
+};
+
+/*
+ * Locking macros for the jaildesc.
+ */
+#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock)
+#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \
+ NULL, MTX_DEF)
+#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock)
+#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock)
+
+/*
+ * Flags for the jd_flags field
+ */
+#define JDF_REMOVED 0x00000002 /* jail was removed */
+#define JDF_OWNING 0x00000004 /* closing descriptor removes jail */
+
+int jaildesc_find(struct thread *td, int fd, struct prison **prp,
+ struct ucred **ucredp);
+int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
+void jaildesc_set_prison(struct file *jd, struct prison *pr);
+void jaildesc_prison_cleanup(struct prison *pr);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_JAILDESC_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 380099092107..417afd4dbbe4 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -249,15 +249,8 @@ struct sysinit_tslog {
const void *data;
const char *name;
};
-static inline void
-sysinit_tslog_shim(const void *data)
-{
- const struct sysinit_tslog *x = data;
-
- TSRAW(curthread, TS_ENTER, "SYSINIT", x->name);
- (x->func)(x->data);
- TSRAW(curthread, TS_EXIT, "SYSINIT", x->name);
-}
+void sysinit_tslog_shim(const void *);
+
#define C_SYSINIT(uniquifier, subsystem, order, func, ident) \
static struct sysinit_tslog uniquifier ## _sys_init_tslog = { \
func, \
@@ -322,7 +315,7 @@ void sysinit_add(struct sysinit **set, struct sysinit **set_end);
* int
* please avoid using for new tunables!
*/
-extern void tunable_int_init(void *);
+extern void tunable_int_init(const void *);
struct tunable_int {
const char *path;
int *var;
@@ -341,7 +334,7 @@ struct tunable_int {
/*
* long
*/
-extern void tunable_long_init(void *);
+extern void tunable_long_init(const void *);
struct tunable_long {
const char *path;
long *var;
@@ -360,7 +353,7 @@ struct tunable_long {
/*
* unsigned long
*/
-extern void tunable_ulong_init(void *);
+extern void tunable_ulong_init(const void *);
struct tunable_ulong {
const char *path;
unsigned long *var;
@@ -379,7 +372,7 @@ struct tunable_ulong {
/*
* int64_t
*/
-extern void tunable_int64_init(void *);
+extern void tunable_int64_init(const void *);
struct tunable_int64 {
const char *path;
int64_t *var;
@@ -398,7 +391,7 @@ struct tunable_int64 {
/*
* uint64_t
*/
-extern void tunable_uint64_init(void *);
+extern void tunable_uint64_init(const void *);
struct tunable_uint64 {
const char *path;
uint64_t *var;
@@ -417,7 +410,7 @@ struct tunable_uint64 {
/*
* quad
*/
-extern void tunable_quad_init(void *);
+extern void tunable_quad_init(const void *);
struct tunable_quad {
const char *path;
quad_t *var;
@@ -436,7 +429,7 @@ struct tunable_quad {
/*
* bool
*/
-extern void tunable_bool_init(void *);
+extern void tunable_bool_init(const void *);
struct tunable_bool {
const char *path;
bool *var;
@@ -452,7 +445,7 @@ struct tunable_bool {
#define TUNABLE_BOOL_FETCH(path, var) getenv_bool((path), (var))
-extern void tunable_str_init(void *);
+extern void tunable_str_init(const void *);
struct tunable_str {
const char *path;
char *var;
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index f6480b173a5c..18f85192f6c3 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1007,6 +1007,7 @@ struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val,
void statfs_scale_blocks(struct statfs *sf, long max_size);
struct vfsconf *vfs_byname(const char *);
struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *);
+void vfs_unref_vfsconf(struct vfsconf *vfsp);
void vfs_mount_destroy(struct mount *);
void vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
void vfs_freeopts(struct vfsoptlist *opts);
diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h
index 56c03a1b0be9..08d4e2d28b33 100644
--- a/sys/sys/mutex.h
+++ b/sys/sys/mutex.h
@@ -91,7 +91,7 @@
void _mtx_init(volatile uintptr_t *c, const char *name, const char *type,
int opts);
void _mtx_destroy(volatile uintptr_t *c);
-void mtx_sysinit(void *arg);
+void mtx_sysinit(const void *arg);
int _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF);
int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
int line);
diff --git a/sys/sys/param.h b/sys/sys/param.h
index fc2a78883f1e..ce91430909ce 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500063
+#define __FreeBSD_version 1600000
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/pciio.h b/sys/sys/pciio.h
index 6467e82b1b3d..64c0b32cb8e2 100644
--- a/sys/sys/pciio.h
+++ b/sys/sys/pciio.h
@@ -77,6 +77,9 @@ struct pci_conf {
u_int8_t pc_revid; /* chip revision ID */
char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
u_long pd_unit; /* device unit number */
+ int pd_numa_domain; /* device NUMA domain */
+ size_t pc_reported_len;/* length of PCI data reported */
+ char pc_spare[64]; /* space for future fields */
};
struct pci_match_conf {
@@ -165,7 +168,6 @@ struct pci_bar_ioreq {
#define PCIIO_BAR_MMAP_RW 0x04
#define PCIIO_BAR_MMAP_ACTIVATE 0x08
-#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io)
#define PCIOCREAD _IOWR('p', 2, struct pci_io)
#define PCIOCWRITE _IOWR('p', 3, struct pci_io)
#define PCIOCATTACHED _IOWR('p', 4, struct pci_io)
@@ -173,5 +175,6 @@ struct pci_bar_ioreq {
#define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io)
#define PCIOCBARMMAP _IOWR('p', 8, struct pci_bar_mmap)
#define PCIOCBARIO _IOWR('p', 9, struct pci_bar_ioreq)
+#define PCIOCGETCONF _IOWR('p', 10, struct pci_conf_io)
#endif /* !_SYS_PCIIO_H_ */
diff --git a/sys/sys/power.h b/sys/sys/power.h
index 9afa55dd403a..3ee021b0e587 100644
--- a/sys/sys/power.h
+++ b/sys/sys/power.h
@@ -28,6 +28,7 @@
#ifndef _SYS_POWER_H_
#define _SYS_POWER_H_
+#ifdef _KERNEL
#include <sys/_eventhandler.h>
@@ -60,4 +61,5 @@ extern void power_profile_set_state(int);
typedef void (*power_profile_change_hook)(void *, int);
EVENTHANDLER_DECLARE(power_profile_change, power_profile_change_hook);
+#endif /* _KERNEL */
#endif /* !_SYS_POWER_H_ */
diff --git a/sys/sys/random.h b/sys/sys/random.h
index 5abf762cd200..2a68f0c99b6d 100644
--- a/sys/sys/random.h
+++ b/sys/sys/random.h
@@ -142,9 +142,6 @@ random_harvest_direct(const void *entropy, u_int size, enum random_entropy_sourc
random_harvest_direct_(entropy, size, origin);
}
-void random_harvest_register_source(enum random_entropy_source);
-void random_harvest_deregister_source(enum random_entropy_source);
-
#if defined(RANDOM_ENABLE_UMA)
#define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c)
#else /* !defined(RANDOM_ENABLE_UMA) */
diff --git a/sys/sys/rmlock.h b/sys/sys/rmlock.h
index 664356998438..eae7342527e3 100644
--- a/sys/sys/rmlock.h
+++ b/sys/sys/rmlock.h
@@ -52,7 +52,7 @@ void rm_init(struct rmlock *rm, const char *name);
void rm_init_flags(struct rmlock *rm, const char *name, int opts);
void rm_destroy(struct rmlock *rm);
int rm_wowned(const struct rmlock *rm);
-void rm_sysinit(void *arg);
+void rm_sysinit(const void *arg);
void _rm_wlock_debug(struct rmlock *rm, const char *file, int line);
void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line);
diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h
index 0ebe90e09bed..929f78c1d204 100644
--- a/sys/sys/rwlock.h
+++ b/sys/sys/rwlock.h
@@ -128,7 +128,7 @@
*/
void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
void _rw_destroy(volatile uintptr_t *c);
-void rw_sysinit(void *arg);
+void rw_sysinit(const void *arg);
int _rw_wowned(const volatile uintptr_t *c);
void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index deb277decc75..d28cae9d01e5 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -99,7 +99,7 @@
* Function prototipes. Routines that start with an underscore are not part
* of the public interface and are wrappered with a macro.
*/
-void sx_sysinit(void *arg);
+void sx_sysinit(const void *arg);
#define sx_init(sx, desc) sx_init_flags((sx), (desc), 0)
void sx_init_flags(struct sx *sx, const char *description, int opts);
void sx_destroy(struct sx *sx);
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 2d6903967e15..cff27b8be316 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -535,4 +535,6 @@
#define SYS_inotify_rm_watch 594
#define SYS_getgroups 595
#define SYS_setgroups 596
-#define SYS_MAXSYSCALL 597
+#define SYS_jail_attach_jd 597
+#define SYS_jail_remove_jd 598
+#define SYS_MAXSYSCALL 599
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index d1172c2dc7bf..443dbadcfbff 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -438,4 +438,6 @@ MIASM = \
inotify_add_watch_at.o \
inotify_rm_watch.o \
getgroups.o \
- setgroups.o
+ setgroups.o \
+ jail_attach_jd.o \
+ jail_remove_jd.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index 98311a6dbf94..8dda4b4533ea 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1901,6 +1901,12 @@ struct setgroups_args {
char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)];
char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)];
};
+struct jail_attach_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
+struct jail_remove_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
int sys__exit(struct thread *, struct _exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@@ -2305,6 +2311,8 @@ int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *
int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
int sys_getgroups(struct thread *, struct getgroups_args *);
int sys_setgroups(struct thread *, struct setgroups_args *);
+int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *);
+int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *);
#ifdef COMPAT_43
@@ -3301,6 +3309,8 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *);
#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
#define SYS_AUE_getgroups AUE_GETGROUPS
#define SYS_AUE_setgroups AUE_SETGROUPS
+#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH
+#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE
#undef PAD_
#undef PADL_
diff --git a/sys/sys/ttycom.h b/sys/sys/ttycom.h
index d7ddc66b09fb..43e8b98a5bc4 100644
--- a/sys/sys/ttycom.h
+++ b/sys/sys/ttycom.h
@@ -69,8 +69,8 @@
/* 89-91 conflicts: tun and tap */
#define TIOCTIMESTAMP _IOR('t', 89, struct timeval) /* enable/get timestamp
* of last input event */
-#define TIOCMGDTRWAIT _IOR('t', 90, int) /* modem: get wait on close */
-#define TIOCMSDTRWAIT _IOW('t', 91, int) /* modem: set wait on close */
+/* TIOCMGDTRWAIT _IOR('t', 90, int) * was modem: get wait on close */
+/* TIOCMSDTRWAIT _IOW('t', 91, int) * was modem: set wait on close */
/* 92-93 tun and tap */
/* 94-97 conflicts: tun and tap */
#define TIOCDRAIN _IO('t', 94) /* wait till output drained */
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 103236b6ed1b..3183f0792256 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -266,6 +266,7 @@ struct user {
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
#define KF_TYPE_INOTIFY 15
+#define KF_TYPE_JAILDESC 16
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -453,6 +454,9 @@ struct kinfo_file {
uint64_t kf_timerfd_addr;
} kf_timerfd;
struct {
+ int32_t kf_jid;
+ } kf_jail;
+ struct {
uint64_t kf_kqueue_addr;
int32_t kf_kqueue_count;
int32_t kf_kqueue_state;
diff --git a/sys/tools/makeobjops.awk b/sys/tools/makeobjops.awk
index 5ea658c5a3b3..522fb04ec4d1 100644
--- a/sys/tools/makeobjops.awk
+++ b/sys/tools/makeobjops.awk
@@ -315,7 +315,7 @@ function handle_method (static, doc)
printh("\t" join(";\n\t", arguments, num_arguments) ";");
}
else {
- prototype = "static __inline " ret " " umname "(";
+ prototype = "static __inline " ret "\n" umname "(";
printh(format_line(prototype argument_list ")",
line_width, length(prototype)));
}
@@ -327,7 +327,7 @@ function handle_method (static, doc)
firstvar = "((kobj_t)" firstvar ")";
if (prolog != "")
printh(prolog);
- printh("\tKOBJOPLOOKUP(" firstvar "->ops," mname ");");
+ printh("\tKOBJOPLOOKUP(" firstvar "->ops, " mname ");");
rceq = (ret != "void") ? "rc = " : "";
printh("\t" rceq "((" mname "_t *) _m)(" varname_list ");");
if (epilog != "")
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 5189f7405400..679b2e20e88b 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -4017,8 +4017,9 @@ restart:
rr = rdomain == UMA_ANYDOMAIN;
if (rr) {
aflags = (flags & ~M_WAITOK) | M_NOWAIT;
- vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
- &aflags);
+ if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags) != 0)
+ return (NULL);
} else {
aflags = flags;
domain = rdomain;
@@ -5245,8 +5246,9 @@ uma_prealloc(uma_zone_t zone, int items)
slabs = howmany(items, keg->uk_ipers);
while (slabs-- > 0) {
aflags = M_NOWAIT;
- vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
- &aflags);
+ if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags) != 0)
+ panic("%s: Domainset is empty", __func__);
for (;;) {
slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
aflags);
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index b44bdb96b0d4..9fa17da954f7 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -58,6 +58,9 @@
static int vm_domainset_default_stride = 64;
+static bool vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain);
+
+
/*
* Determine which policy is to be used for this allocation.
*/
@@ -93,28 +96,15 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
pindex += (((uintptr_t)obj) / sizeof(*obj));
di->di_offset = pindex;
}
- /* Skip domains below min on the first pass. */
- di->di_minskip = true;
}
static void
vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
{
+ /* Grab the next domain in 'ds_order'. */
*domain = di->di_domain->ds_order[
- ++(*di->di_iter) % di->di_domain->ds_cnt];
-}
-
-static void
-vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
-{
- int d;
-
- do {
- d = di->di_domain->ds_order[
- ++(*di->di_iter) % di->di_domain->ds_cnt];
- } while (d == di->di_domain->ds_prefer);
- *domain = d;
+ (*di->di_iter)++ % di->di_domain->ds_cnt];
}
static void
@@ -127,79 +117,144 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
*domain = di->di_domain->ds_order[d];
}
-static void
-vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
+/*
+ * Internal function determining the current phase's first candidate domain.
+ *
+ * Returns whether these is an eligible domain, which is returned through
+ * '*domain'. '*domain' can be modified even if there is no eligible domain.
+ *
+ * See herald comment of vm_domainset_iter_first() below about phases.
+ */
+static bool
+vm_domainset_iter_phase_first(struct vm_domainset_iter *di, int *domain)
{
-
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
switch (di->di_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
- /*
- * To prevent impossible allocations we convert an invalid
- * first-touch to round-robin.
- */
- /* FALLTHROUGH */
- case DOMAINSET_POLICY_INTERLEAVE:
- /* FALLTHROUGH */
+ *domain = PCPU_GET(domain);
+ break;
case DOMAINSET_POLICY_ROUNDROBIN:
vm_domainset_iter_rr(di, domain);
break;
case DOMAINSET_POLICY_PREFER:
- vm_domainset_iter_prefer(di, domain);
+ *domain = di->di_domain->ds_prefer;
+ break;
+ case DOMAINSET_POLICY_INTERLEAVE:
+ vm_domainset_iter_interleave(di, domain);
break;
default:
panic("%s: Unknown policy %d", __func__, di->di_policy);
}
KASSERT(*domain < vm_ndomains,
("%s: Invalid domain %d", __func__, *domain));
+
+ /*
+ * Has the policy's start domain already been visited?
+ */
+ if (!DOMAINSET_ISSET(*domain, &di->di_remain_mask))
+ return (vm_domainset_iter_next(di, domain));
+
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+
+ /* Does it have enough free pages (phase 1)? */
+ if (di->di_minskip && vm_page_count_min_domain(*domain)) {
+ /* Mark the domain as eligible for phase 2. */
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ return (vm_domainset_iter_next(di, domain));
+ }
+
+ return (true);
}
-static void
+/*
+ * Resets an iterator to point to the first candidate domain.
+ *
+ * Returns whether there is an eligible domain to start with. '*domain' may be
+ * modified even if there is none.
+ *
+ * There must have been one call to vm_domainset_iter_init() before.
+ *
+ * This function must be called at least once before calling
+ * vm_domainset_iter_next(). Note that functions wrapping
+ * vm_domainset_iter_init() usually do that themselves.
+ *
+ * This function may be called again to reset the iterator to the policy's first
+ * candidate domain. After each reset, the iterator will visit the same domains
+ * as in the previous iteration minus those on which vm_domainset_iter_ignore()
+ * has been called. Note that the first candidate domain may change at each
+ * reset (at time of this writing, only on the DOMAINSET_POLICY_ROUNDROBIN
+ * policy).
+ *
+ * Domains which have a number of free pages over 'v_free_min' are always
+ * visited first (this is called the "phase 1" in comments, "phase 2" being the
+ * examination of the remaining domains; no domains are ever visited twice).
+ */
+static bool
vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
{
+ /* Initialize the mask of domains to visit. */
+ DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask);
+ /*
+ * No candidate domains for phase 2 at start. This will be filled by
+ * phase 1.
+ */
+ DOMAINSET_ZERO(&di->di_min_mask);
+ /* Skip domains below 'v_free_min' on phase 1. */
+ di->di_minskip = true;
- switch (di->di_policy) {
- case DOMAINSET_POLICY_FIRSTTOUCH:
- *domain = PCPU_GET(domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) {
- /*
- * Add an extra iteration because we will visit the
- * current domain a second time in the rr iterator.
- */
- di->di_n = di->di_domain->ds_cnt + 1;
- break;
- }
- /*
- * To prevent impossible allocations we convert an invalid
- * first-touch to round-robin.
- */
- /* FALLTHROUGH */
- case DOMAINSET_POLICY_ROUNDROBIN:
- di->di_n = di->di_domain->ds_cnt;
+ return (vm_domainset_iter_phase_first(di, domain));
+}
+
+/*
+ * Advances the iterator to the next candidate domain.
+ *
+ * Returns whether there was another domain to visit. '*domain' may be modified
+ * even if there is none.
+ *
+ * vm_domainset_iter_first() must have been called at least once before using
+ * this function (see its herald comment for more details on iterators).
+ */
+static bool
+vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
+{
+ /* Loop while there remains domains to visit in the current phase. */
+ while (!DOMAINSET_EMPTY(&di->di_remain_mask)) {
+ /* Grab the next domain in 'ds_order'. */
vm_domainset_iter_rr(di, domain);
- break;
- case DOMAINSET_POLICY_PREFER:
- *domain = di->di_domain->ds_prefer;
- di->di_n = di->di_domain->ds_cnt;
- break;
- case DOMAINSET_POLICY_INTERLEAVE:
- vm_domainset_iter_interleave(di, domain);
- di->di_n = di->di_domain->ds_cnt;
- break;
- default:
- panic("%s: Unknown policy %d", __func__, di->di_policy);
+ KASSERT(*domain < vm_ndomains,
+ ("%s: Invalid domain %d", __func__, *domain));
+
+ if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) {
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
+ return (true);
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ }
}
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
- KASSERT(*domain < vm_ndomains,
- ("%s: Invalid domain %d", __func__, *domain));
+
+ /*
+ * If phase 1 (skip low memory domains) is over, start phase 2 (consider
+ * low memory domains).
+ */
+ if (di->di_minskip) {
+ di->di_minskip = false;
+ /* Browse domains that were under 'v_free_min'. */
+ DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask);
+ return (vm_domainset_iter_phase_first(di, domain));
+ }
+
+ return (false);
}
-void
+int
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
- vm_pindex_t pindex, int *domain, int *req, struct pctrie_iter *pages)
+ vm_pindex_t pindex, int *domain, int *req)
{
struct domainset_ref *dr;
+ di->di_flags = *req;
+ *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
+ VM_ALLOC_NOWAIT;
+
/*
* Object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
@@ -209,36 +264,21 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
dr = &obj->domain;
else
dr = &curthread->td_domain;
+
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
- di->di_flags = *req;
- *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
- VM_ALLOC_NOWAIT;
- vm_domainset_iter_first(di, domain);
- if (vm_page_count_min_domain(*domain))
- vm_domainset_iter_page(di, obj, domain, pages);
+ /*
+ * XXXOC: Shouldn't we just panic on 'false' if VM_ALLOC_WAITOK was
+ * passed?
+ */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
int
vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain, struct pctrie_iter *pages)
{
- if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask)))
- return (ENOMEM);
-
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
- vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
- }
-
- /* If we skipped domains below min restart the search. */
- if (di->di_minskip) {
- di->di_minskip = false;
- vm_domainset_iter_first(di, domain);
+ if (vm_domainset_iter_next(di, domain))
return (0);
- }
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
@@ -257,61 +297,43 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
return (ENOMEM);
/* Restart the search. */
- vm_domainset_iter_first(di, domain);
-
- return (0);
+ /* XXXOC: Shouldn't we just panic on 'false'? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
-static void
+static int
_vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
int *flags)
{
-
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
- vm_domainset_iter_first(di, domain);
- if (vm_page_count_min_domain(*domain))
- vm_domainset_iter_policy(di, domain);
+ /* XXXOC: Shouldn't we just panic on 'false' if M_WAITOK was passed? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
-void
+int
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
- _vm_domainset_iter_policy_init(di, domain, flags);
+ return (_vm_domainset_iter_policy_init(di, domain, flags));
}
-void
+int
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
- _vm_domainset_iter_policy_init(di, domain, flags);
+ return (_vm_domainset_iter_policy_init(di, domain, flags));
}
int
vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{
- if (DOMAINSET_EMPTY(&di->di_valid_mask))
- return (ENOMEM);
-
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
- vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
- }
-
- /* If we skipped domains below min restart the search. */
- if (di->di_minskip) {
- di->di_minskip = false;
- vm_domainset_iter_first(di, domain);
+ if (vm_domainset_iter_next(di, domain))
return (0);
- }
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & M_WAITOK) == 0)
@@ -321,9 +343,8 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
vm_wait_doms(&di->di_valid_mask, 0);
/* Restart the search. */
- vm_domainset_iter_first(di, domain);
-
- return (0);
+ /* XXXOC: Shouldn't we just panic on 'false'? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
void
@@ -345,12 +366,12 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
return (EJUSTRETURN);
}
-void
+int
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
- vm_pindex_t pindex, int *domain, int *flags, struct pctrie_iter *pages)
+ vm_pindex_t pindex, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
int
@@ -360,20 +381,20 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
return (EJUSTRETURN);
}
-void
+int
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
-void
+int
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
void
diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h
index 0d325a642f40..ef86c8ccb5e4 100644
--- a/sys/vm/vm_domainset.h
+++ b/sys/vm/vm_domainset.h
@@ -33,23 +33,26 @@ struct pctrie_iter;
struct vm_domainset_iter {
struct domainset *di_domain;
unsigned int *di_iter;
+ /* Initialized from 'di_domain', initial value after reset. */
domainset_t di_valid_mask;
+ /* Domains to browse in the current phase. */
+ domainset_t di_remain_mask;
+ /* Domains skipped in phase 1 because under 'v_free_min'. */
+ domainset_t di_min_mask;
vm_pindex_t di_offset;
int di_flags;
uint16_t di_policy;
- domainid_t di_n;
bool di_minskip;
};
int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *,
int *, struct pctrie_iter *);
-void vm_domainset_iter_page_init(struct vm_domainset_iter *,
- struct vm_object *, vm_pindex_t, int *, int *,
- struct pctrie_iter *);
+int vm_domainset_iter_page_init(struct vm_domainset_iter *,
+ struct vm_object *, vm_pindex_t, int *, int *);
int vm_domainset_iter_policy(struct vm_domainset_iter *, int *);
-void vm_domainset_iter_policy_init(struct vm_domainset_iter *,
+int vm_domainset_iter_policy_init(struct vm_domainset_iter *,
struct domainset *, int *, int *);
-void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
+int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
struct domainset_ref *, int *, int *);
void vm_domainset_iter_ignore(struct vm_domainset_iter *, int);
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 93ec6014c27d..1fd6518cf4ed 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -91,6 +91,8 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
vm_ooffset_t *);
int vm_fault_disable_pagefaults(void);
void vm_fault_enable_pagefaults(int save);
+int vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count);
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vm_prot_t prot, vm_page_t *ma, int max_count);
int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 3e57e8d4f1d0..58f8ac16fa0c 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1995,32 +1995,43 @@ vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
}
/*
- * Hold each of the physical pages that are mapped by the specified range of
- * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
- * and allow the specified types of access, "prot". If all of the implied
- * pages are successfully held, then the number of held pages is returned
- * together with pointers to those pages in the array "ma". However, if any
- * of the pages cannot be held, -1 is returned.
+ * Hold each of the physical pages that are mapped by the specified
+ * range of virtual addresses, ["addr", "addr" + "len"), if those
+ * mappings are valid and allow the specified types of access, "prot".
+ * If all of the implied pages are successfully held, then the number
+ * of held pages is assigned to *ppages_count, together with pointers
+ * to those pages in the array "ma". The returned value is zero.
+ *
+ * However, if any of the pages cannot be held, an error is returned,
+ * and no pages are held.
+ * Error values:
+ * ENOMEM - the range is not valid
+ * EINVAL - the provided vm_page array is too small to hold all pages
+ * EAGAIN - a page was not mapped, and the thread is in nofaulting mode
+ * EFAULT - a page with requested permissions cannot be mapped
+ * (more detailed result from vm_fault() is lost)
*/
int
-vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
- vm_prot_t prot, vm_page_t *ma, int max_count)
+vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count)
{
vm_offset_t end, va;
vm_page_t *mp;
- int count;
+ int count, error;
boolean_t pmap_failed;
- if (len == 0)
+ if (len == 0) {
+ *ppages_count = 0;
return (0);
+ }
end = round_page(addr + len);
addr = trunc_page(addr);
if (!vm_map_range_valid(map, addr, end))
- return (-1);
+ return (ENOMEM);
if (atop(end - addr) > max_count)
- panic("vm_fault_quick_hold_pages: count > max_count");
+ return (EINVAL);
count = atop(end - addr);
/*
@@ -2062,19 +2073,49 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
* the proper behaviour explicitly.
*/
if ((prot & VM_PROT_QUICK_NOFAULT) != 0 &&
- (curthread->td_pflags & TDP_NOFAULTING) != 0)
- goto error;
- for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
+ (curthread->td_pflags & TDP_NOFAULTING) != 0) {
+ error = EAGAIN;
+ goto fail;
+ }
+ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
if (*mp == NULL && vm_fault(map, va, prot,
- VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
- goto error;
+ VM_FAULT_NORMAL, mp) != KERN_SUCCESS) {
+ error = EFAULT;
+ goto fail;
+ }
+ }
}
- return (count);
-error:
+ *ppages_count = count;
+ return (0);
+fail:
for (mp = ma; mp < ma + count; mp++)
if (*mp != NULL)
vm_page_unwire(*mp, PQ_INACTIVE);
- return (-1);
+ return (error);
+}
+
+ /*
+ * Hold each of the physical pages that are mapped by the specified range of
+ * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
+ * and allow the specified types of access, "prot". If all of the implied
+ * pages are successfully held, then the number of held pages is returned
+ * together with pointers to those pages in the array "ma". However, if any
+ * of the pages cannot be held, -1 is returned.
+ */
+int
+vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count)
+{
+ int error, pages_count;
+
+ error = vm_fault_hold_pages(map, addr, len, prot, ma,
+ max_count, &pages_count);
+ if (error != 0) {
+ if (error == EINVAL)
+ panic("vm_fault_quick_hold_pages: count > max_count");
+ return (-1);
+ }
+ return (pages_count);
}
/*
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 94df2c2f9a9e..e0f1807a1b32 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -453,7 +453,7 @@ vm_thread_stack_create(struct domainset *ds, int pages)
obj = vm_thread_kstack_size_to_obj(pages);
if (vm_ndomains > 1)
obj->domain.dr_policy = ds;
- vm_domainset_iter_page_init(&di, obj, 0, &domain, &req, NULL);
+ vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
do {
/*
* Get a kernel virtual address for this thread's kstack.
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index e7d7b6726d2c..ac327aa37b72 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -323,7 +323,9 @@ kmem_alloc_attr_domainset(struct domainset *ds, vm_size_t size, int flags,
start_segind = -1;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0)
+ return (NULL);
+
do {
addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
memattr);
@@ -417,7 +419,9 @@ kmem_alloc_contig_domainset(struct domainset *ds, vm_size_t size, int flags,
start_segind = -1;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags))
+ return (NULL);
+
do {
addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
alignment, boundary, memattr);
@@ -517,7 +521,9 @@ kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags)
void *addr;
int domain;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0)
+ return (NULL);
+
do {
addr = kmem_malloc_domain(domain, size, flags);
if (addr != NULL)
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index abad5efb8a79..16878604fa11 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2015,8 +2015,9 @@ vm_page_alloc_iter(vm_object_t object, vm_pindex_t pindex, int req,
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, object, pindex, &domain, &req,
- pages);
+ if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_domain_iter(object, pindex, domain, req,
pages);
@@ -2268,7 +2269,9 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
start_segind = -1;
- vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_contig_domain(object, pindex, domain, req,
npages, low, high, alignment, boundary, memattr);
@@ -2596,7 +2599,9 @@ vm_page_alloc_noobj(int req)
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_noobj_domain(domain, req);
if (m != NULL)
@@ -2615,7 +2620,9 @@ vm_page_alloc_noobj_contig(int req, u_long npages, vm_paddr_t low,
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_noobj_contig_domain(domain, req, npages, low,
high, alignment, boundary, memattr);
@@ -3334,7 +3341,9 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
ret = ERANGE;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (ret);
+
do {
status = vm_page_reclaim_contig_domain(domain, req, npages, low,
high, alignment, boundary);
diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c
index a4c1a7836268..f8900fe0561f 100644
--- a/sys/x86/iommu/amd_intrmap.c
+++ b/sys/x86/iommu/amd_intrmap.c
@@ -112,6 +112,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
{
struct amdiommu_ctx *ctx;
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
bool is_iommu;
@@ -180,7 +182,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
@@ -220,6 +223,7 @@ static struct amdiommu_ctx *
amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
{
devclass_t src_class;
+ device_t requester;
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
uint32_t edte;
@@ -251,7 +255,8 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
bootverbose);
if (error == 0) {
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
ctx = amdiommu_get_ctx_for_dev(unit, src,
rid, 0, false /* XXXKIB */, false, dte, edte);
}
@@ -266,6 +271,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
u_int cookie)
{
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
MPASS(ctx != NULL);
@@ -291,7 +298,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
atomic_thread_fence_rel();
bzero(irte, sizeof(*irte));
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c
index 06e41523624b..f12a0c9bae9b 100644
--- a/sys/x86/iommu/intel_intrmap.c
+++ b/sys/x86/iommu/intel_intrmap.c
@@ -234,6 +234,8 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
{
devclass_t src_class;
struct dmar_unit *unit;
+ device_t requester;
+ int error __diagused;
/*
* We need to determine if the interrupt source generates FSB
@@ -253,8 +255,10 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
unit = dmar_find_hpet(src, rid);
} else {
unit = dmar_find(src, bootverbose);
- if (unit != NULL && rid != NULL)
- iommu_get_requester(src, rid);
+ if (unit != NULL && rid != NULL) {
+ error = iommu_get_requester(src, &requester, rid);
+ MPASS(error == 0);
+ }
}
return (unit);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 4d64eaf78b29..7661c82f4394 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -2613,7 +2613,7 @@ print_vmx_info(void)
"\020EPT#VE" /* EPT-violation #VE */
"\021XSAVES" /* Enable XSAVES/XRSTORS */
);
- printf("\n Exit Controls=0x%b", mask,
+ printf("\n Exit Controls=0x%b", exit,
"\020"
"\003DR" /* Save debug controls */
/* Ignore Host address-space size */
@@ -2625,7 +2625,7 @@ print_vmx_info(void)
"\026EFER-LD" /* Load MSR_EFER */
"\027PTMR-SV" /* Save VMX-preemption timer value */
);
- printf("\n Entry Controls=0x%b", mask,
+ printf("\n Entry Controls=0x%b", entry,
"\020"
"\003DR" /* Save debug controls */
/* Ignore IA-32e mode guest */