aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/conf/GENERIC4
-rw-r--r--sys/amd64/include/vmm.h1
-rw-r--r--sys/amd64/vmm/intel/vmx_support.S8
-rw-r--r--sys/arm/arm/generic_timer.c22
-rw-r--r--sys/arm/conf/TEGRA1242
-rw-r--r--sys/arm64/arm64/cpu_feat.c29
-rw-r--r--sys/arm64/arm64/elf32_machdep.c2
-rw-r--r--sys/arm64/arm64/identcpu.c55
-rw-r--r--sys/arm64/arm64/machdep.c23
-rw-r--r--sys/arm64/arm64/pmap.c140
-rw-r--r--sys/arm64/arm64/ptrauth.c23
-rw-r--r--sys/arm64/arm64/trap.c1
-rw-r--r--sys/arm64/conf/std.arm641
-rw-r--r--sys/arm64/include/cpu.h30
-rw-r--r--sys/arm64/include/cpu_feat.h46
-rw-r--r--sys/arm64/include/pmap.h3
-rw-r--r--sys/arm64/include/proc.h1
-rw-r--r--sys/arm64/include/vmm.h1
-rw-r--r--sys/arm64/vmm/vmm.c6
-rw-r--r--sys/cam/ata/ata_all.c4
-rw-r--r--sys/cam/scsi/scsi_da.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c11
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h4
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_systrace_args.c44
-rw-r--r--sys/compat/lindebugfs/lindebugfs.c8
-rw-r--r--sys/compat/linprocfs/linprocfs.c256
-rw-r--r--sys/compat/linsysfs/linsysfs.c159
-rw-r--r--sys/compat/linsysfs/linsysfs_net.c24
-rw-r--r--sys/compat/linux/linux_netlink.c39
-rw-r--r--sys/compat/linuxkpi/common/include/acpi/acpi.h4
-rw-r--r--sys/compat/linuxkpi/common/include/kunit/static_stub.h15
-rw-r--r--sys/compat/linuxkpi/common/include/linux/cleanup.h49
-rw-r--r--sys/compat/linuxkpi/common/include/linux/compiler.h6
-rw-r--r--sys/compat/linuxkpi/common/include/linux/device.h5
-rw-r--r--sys/compat/linuxkpi/common/include/linux/ieee80211.h34
-rw-r--r--sys/compat/linuxkpi/common/include/linux/math.h2
-rw-r--r--sys/compat/linuxkpi/common/include/linux/math64.h6
-rw-r--r--sys/compat/linuxkpi/common/include/linux/netdevice.h9
-rw-r--r--sys/compat/linuxkpi/common/include/linux/overflow.h180
-rw-r--r--sys/compat/linuxkpi/common/include/linux/pci.h17
-rw-r--r--sys/compat/linuxkpi/common/include/linux/rcupdate.h5
-rw-r--r--sys/compat/linuxkpi/common/include/linux/skbuff.h18
-rw-r--r--sys/compat/linuxkpi/common/include/linux/slab.h4
-rw-r--r--sys/compat/linuxkpi/common/include/linux/string_helpers.h2
-rw-r--r--sys/compat/linuxkpi/common/include/linux/timer.h21
-rw-r--r--sys/compat/linuxkpi/common/include/net/cfg80211.h34
-rw-r--r--sys/compat/linuxkpi/common/include/net/mac80211.h20
-rw-r--r--sys/compat/linuxkpi/common/src/linux_80211.c981
-rw-r--r--sys/compat/linuxkpi/common/src/linux_80211.h39
-rw-r--r--sys/compat/linuxkpi/common/src/linux_80211_macops.c2
-rw-r--r--sys/compat/linuxkpi/common/src/linux_compat.c11
-rw-r--r--sys/compat/linuxkpi/common/src/linux_devres.c26
-rw-r--r--sys/compat/linuxkpi/common/src/linux_pci.c128
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/skbuff.h0
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/test-bug.h0
-rw-r--r--sys/compat/linuxkpi/dummy/include/kunit/test.h0
-rw-r--r--sys/conf/NOTES10
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/files.amd647
-rw-r--r--sys/conf/files.arm2
-rw-r--r--sys/conf/files.powerpc3
-rw-r--r--sys/conf/files.x861
-rw-r--r--sys/conf/kern.pre.mk4
-rw-r--r--sys/conf/newvers.sh4
-rw-r--r--sys/conf/options2
-rw-r--r--sys/contrib/dev/acpica/components/executer/extrace.c2
-rw-r--r--sys/contrib/dev/qat/qat_402xx.binbin0 -> 665360 bytes
-rw-r--r--sys/contrib/dev/qat/qat_402xx_mmp.binbin0 -> 150084 bytes
-rw-r--r--sys/contrib/dev/rtw88/main.c57
-rw-r--r--sys/contrib/dev/rtw89/fw.c4
-rw-r--r--sys/contrib/libnv/nvlist.c10
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py2
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh10
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh14
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh2
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh14
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-qemu.yml21
-rw-r--r--sys/contrib/openzfs/META2
-rw-r--r--sys/contrib/openzfs/Makefile.am4
-rw-r--r--sys/contrib/openzfs/cmd/Makefile.am11
-rwxr-xr-xsys/contrib/openzfs/cmd/zarcstat.in (renamed from sys/contrib/openzfs/cmd/arcstat.in)22
-rwxr-xr-xsys/contrib/openzfs/cmd/zarcsummary (renamed from sys/contrib/openzfs/cmd/arc_summary)6
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c84
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.h2
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb_il.c2
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am24
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh)0
l---------sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh1
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh)0
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh)0
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh1
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_exec.c111
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c6
-rw-r--r--sys/contrib/openzfs/cmd/zhack.c201
-rw-r--r--sys/contrib/openzfs/cmd/zpool/Makefile.am5
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.448
-rw-r--r--sys/contrib/openzfs/cmd/zstream/Makefile.am5
-rw-r--r--sys/contrib/openzfs/config/always-arch.m417
-rw-r--r--sys/contrib/openzfs/config/always-compiler-options.m462
-rw-r--r--sys/contrib/openzfs/config/kernel-blkdev.m49
-rw-r--r--sys/contrib/openzfs/config/kernel-dentry-operations.m412
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rw-r--r--sys/contrib/openzfs/config/user-statx.m46
-rw-r--r--sys/contrib/openzfs/config/zfs-build.m42
-rw-r--r--sys/contrib/openzfs/contrib/debian/copyright4
-rw-r--r--sys/contrib/openzfs/contrib/debian/not-installed4
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in6
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in4
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install2
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install72
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links3
-rwxr-xr-xsys/contrib/openzfs/contrib/debian/rules.in45
-rwxr-xr-xsys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev2
-rw-r--r--sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in9
-rw-r--r--sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c4
-rw-r--r--sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h11
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h26
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/stat.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/time.h8
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_impl.h4
-rw-r--r--sys/contrib/openzfs/include/sys/dnode.h14
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_deleg.h1
-rw-r--r--sys/contrib/openzfs/include/sys/fm/fs/zfs.h1
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h3
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h6
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz.h3
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz_impl.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_file.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h3
-rw-r--r--sys/contrib/openzfs/include/sys/zvol.h2
-rw-r--r--sys/contrib/openzfs/include/zfs_deleg.h1
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h2
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/sys/time.h9
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi4
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_pool.c14
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_util.c5
-rw-r--r--sys/contrib/openzfs/lib/libzpool/kernel.c6
-rw-r--r--sys/contrib/openzfs/man/Makefile.am2
-rw-r--r--sys/contrib/openzfs/man/man1/cstyle.12
-rw-r--r--sys/contrib/openzfs/man/man1/zarcstat.1 (renamed from sys/contrib/openzfs/man/man1/arcstat.1)6
-rw-r--r--sys/contrib/openzfs/man/man1/zhack.122
-rw-r--r--sys/contrib/openzfs/man/man1/ztest.12
-rw-r--r--sys/contrib/openzfs/man/man4/spl.42
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.439
-rw-r--r--sys/contrib/openzfs/man/man5/vdev_id.conf.52
-rw-r--r--sys/contrib/openzfs/man/man7/dracut.zfs.72
-rw-r--r--sys/contrib/openzfs/man/man7/vdevprops.741
-rw-r--r--sys/contrib/openzfs/man/man7/zfsconcepts.72
-rw-r--r--sys/contrib/openzfs/man/man7/zfsprops.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolconcepts.72
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolprops.72
-rw-r--r--sys/contrib/openzfs/man/man8/zdb.814
-rw-r--r--sys/contrib/openzfs/man/man8/zed.8.in34
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-allow.85
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-bookmark.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-clone.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-create.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-destroy.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-diff.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-hold.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-jail.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-list.847
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-load-key.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in2
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-mount.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-project.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-promote.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-rename.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-rewrite.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-send.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-set.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-share.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-snapshot.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-upgrade.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-userspace.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-wait.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-zone.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs_ids_to_path.82
-rw-r--r--sys/contrib/openzfs/man/man8/zgenhostid.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-attach.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-checkpoint.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-clear.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-create.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-destroy.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-detach.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-events.810
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-export.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-get.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-history.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-import.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-initialize.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-iostat.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-labelclear.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-list.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-offline.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-reguid.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-remove.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-reopen.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-replace.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-resilver.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-scrub.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-split.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-sync.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-trim.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-upgrade.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-wait.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool.82
-rw-r--r--sys/contrib/openzfs/man/man8/zstream.82
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c3
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c244
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c23
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c55
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_deleg.c1
-rw-r--r--sys/contrib/openzfs/module/zcommon/zpool_prop.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c32
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c21
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_log.c30
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode.c103
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c119
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c28
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_file.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c345
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfeature.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_crrd.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c23
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in6
-rwxr-xr-xsys/contrib/openzfs/scripts/spdxcheck.pl4
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run25
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run6
-rw-r--r--sys/contrib/openzfs/tests/runfiles/sanity.run4
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg6
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib54
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg8
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am21
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh43
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh50
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh111
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh70
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh76
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcstat_001_pos.ksh (renamed from sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arcstat_001_pos.ksh)2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_001_pos.ksh (renamed from sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh)10
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_002_neg.ksh (renamed from sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_002_neg.ksh)6
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh43
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh50
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh145
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh106
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh102
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh116
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh149
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh111
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh46
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh189
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh199
-rw-r--r--sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c3
-rw-r--r--sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c232
-rw-r--r--sys/crypto/openssl/arm/ossl_aes_gcm_neon.c (renamed from sys/crypto/openssl/arm/ossl_aes_gcm.c)0
-rw-r--r--sys/crypto/openssl/ossl_aes_gcm.c (renamed from sys/crypto/openssl/amd64/ossl_aes_gcm.c)397
-rw-r--r--sys/crypto/openssl/ossl_ppc.c9
-rw-r--r--sys/crypto/openssl/ossl_x86.c6
-rw-r--r--sys/ddb/db_ps.c11
-rw-r--r--sys/dev/acpica/acpi.c428
-rw-r--r--sys/dev/acpica/acpi_lid.c4
-rw-r--r--sys/dev/acpica/acpivar.h16
-rw-r--r--sys/dev/ahci/ahci_pci.c1
-rw-r--r--sys/dev/amdgpio/amdgpio.c136
-rw-r--r--sys/dev/amdgpio/amdgpio.h9
-rw-r--r--sys/dev/ath/ath_rate/sample/sample.c8
-rw-r--r--sys/dev/ath/if_ath_tx_ht.c6
-rw-r--r--sys/dev/axgbe/if_axgbe_pci.c3
-rw-r--r--sys/dev/bce/if_bce.c2
-rw-r--r--sys/dev/bnxt/bnxt_en/bnxt_hwrm.c2
-rw-r--r--sys/dev/cxgbe/cxgbei/icl_cxgbei.c2
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/qp.c2
-rw-r--r--sys/dev/cxgbe/t4_main.c2
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c133
-rw-r--r--sys/dev/cxgbe/tom/t4_ddp.c2
-rw-r--r--sys/dev/cxgbe/tom/t4_tls.c69
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.c2
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.h12
-rw-r--r--sys/dev/cyapa/cyapa.c95
-rw-r--r--sys/dev/e1000/if_em.c353
-rw-r--r--sys/dev/e1000/if_em.h19
-rw-r--r--sys/dev/enetc/if_enetc.c3
-rw-r--r--sys/dev/gpio/gpio_if.m26
-rw-r--r--sys/dev/gpio/gpiobus.c101
-rw-r--r--sys/dev/gpio/gpiobus_if.m30
-rw-r--r--sys/dev/gpio/gpiobus_internal.h1
-rw-r--r--sys/dev/gpio/gpioc.c157
-rw-r--r--sys/dev/gpio/gpioled.c108
-rw-r--r--sys/dev/gpio/ofw_gpiobus.c3
-rw-r--r--sys/dev/hid/hkbd.c19
-rw-r--r--sys/dev/hpt27xx/hptintf.h6
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c21
-rw-r--r--sys/dev/hwt/hwt_ioctl.c9
-rw-r--r--sys/dev/ice/ice_fw_logging.c2
-rw-r--r--sys/dev/ice/ice_lib.c6
-rw-r--r--sys/dev/ichsmb/ichsmb_pci.c3
-rw-r--r--sys/dev/ichwd/i6300esbwd.c245
-rw-r--r--sys/dev/ichwd/i6300esbwd.h46
-rw-r--r--sys/dev/ichwd/ichwd.c2
-rw-r--r--sys/dev/ichwd/ichwd.h3
-rw-r--r--sys/dev/igc/if_igc.c4
-rw-r--r--sys/dev/iicbus/iicbb.c7
-rw-r--r--sys/dev/iommu/busdma_iommu.c54
-rw-r--r--sys/dev/iommu/iommu.h2
-rw-r--r--sys/dev/irdma/irdma_cm.c2
-rw-r--r--sys/dev/irdma/irdma_utils.c4
-rw-r--r--sys/dev/iwx/if_iwx.c7
-rw-r--r--sys/dev/ixgbe/if_ix.c36
-rw-r--r--sys/dev/ixgbe/ixgbe_e610.c34
-rw-r--r--sys/dev/ixl/if_ixl.c4
-rw-r--r--sys/dev/mpi3mr/mpi3mr.c9
-rw-r--r--sys/dev/mpi3mr/mpi3mr_cam.c5
-rw-r--r--sys/dev/mpr/mpr.c10
-rw-r--r--sys/dev/mpr/mpr_mapping.c18
-rw-r--r--sys/dev/mpr/mprvar.h1
-rw-r--r--sys/dev/mwl/if_mwl.c4
-rw-r--r--sys/dev/nvme/nvme.c1
-rw-r--r--sys/dev/nvme/nvme_ahci.c1
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c117
-rw-r--r--sys/dev/nvme/nvme_ctrlr_cmd.c3
-rw-r--r--sys/dev/nvme/nvme_ns.c3
-rw-r--r--sys/dev/nvme/nvme_pci.c1
-rw-r--r--sys/dev/nvme/nvme_private.h3
-rw-r--r--sys/dev/nvme/nvme_qpair.c3
-rw-r--r--sys/dev/nvme/nvme_sim.c1
-rw-r--r--sys/dev/nvme/nvme_sysctl.c1
-rw-r--r--sys/dev/nvme/nvme_util.c23
-rw-r--r--sys/dev/nvmf/nvmf_tcp.c2
-rw-r--r--sys/dev/pci/pci_user.c121
-rw-r--r--sys/dev/puc/pucdata.c43
-rw-r--r--sys/dev/qat/include/common/adf_accel_devices.h4
-rw-r--r--sys/dev/qat/qat_api/include/icp_sal_versions.h2
-rw-r--r--sys/dev/qat/qat_common/adf_gen4_timer.c2
-rw-r--r--sys/dev/qat/qat_common/qat_uclo.c1
-rw-r--r--sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.c58
-rw-r--r--sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.h6
-rw-r--r--sys/dev/qat/qat_hw/qat_4xxx/adf_drv.c15
-rw-r--r--sys/dev/qat/qat_hw/qat_4xxxvf/adf_drv.c15
-rw-r--r--sys/dev/qlnx/qlnxe/ecore_dev.c6
-rw-r--r--sys/dev/qlnx/qlnxe/ecore_mcp.c2
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_def.h16
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_os.c25
-rw-r--r--sys/dev/random/random_harvestq.c148
-rw-r--r--sys/dev/random/randomdev.h3
-rw-r--r--sys/dev/re/if_re.c3
-rw-r--r--sys/dev/rtwn/if_rtwn.c3
-rw-r--r--sys/dev/rtwn/if_rtwn_tx.c8
-rw-r--r--sys/dev/rtwn/rtl8192c/r92c_tx.c11
-rw-r--r--sys/dev/rtwn/rtl8812a/r12a_tx.c16
-rw-r--r--sys/dev/sound/pci/hda/hdaa_patches.c6
-rw-r--r--sys/dev/sound/pci/hda/hdac.c11
-rw-r--r--sys/dev/sound/pci/hda/hdac.h2
-rw-r--r--sys/dev/tpm/tpm20.c13
-rw-r--r--sys/dev/tpm/tpm_tis_core.c7
-rw-r--r--sys/dev/uart/uart_bus_pci.c2
-rw-r--r--sys/dev/ufshci/ufshci.h69
-rw-r--r--sys/dev/ufshci/ufshci_ctrlr.c45
-rw-r--r--sys/dev/ufshci/ufshci_dev.c355
-rw-r--r--sys/dev/ufshci/ufshci_pci.c3
-rw-r--r--sys/dev/ufshci/ufshci_private.h15
-rw-r--r--sys/dev/ufshci/ufshci_reg.h2
-rw-r--r--sys/dev/ufshci/ufshci_sysctl.c20
-rw-r--r--sys/dev/ufshci/ufshci_uic_cmd.c19
-rw-r--r--sys/dev/usb/controller/xhci.c85
-rw-r--r--sys/dev/usb/controller/xhci_pci.c2
-rw-r--r--sys/dev/usb/controller/xhcireg.h5
-rw-r--r--sys/dev/usb/net/if_umb.c6
-rw-r--r--sys/dev/usb/serial/udbc.c404
-rw-r--r--sys/dev/usb/usb_hub.c3
-rw-r--r--sys/dev/usb/wlan/if_rsu.c66
-rw-r--r--sys/dev/usb/wlan/if_rsureg.h9
-rw-r--r--sys/dev/usb/wlan/if_run.c14
-rw-r--r--sys/dev/virtio/network/if_vtnet.c443
-rw-r--r--sys/dev/virtio/network/if_vtnetvar.h2
-rw-r--r--sys/dev/vmgenc/vmgenc_acpi.c8
-rw-r--r--sys/dev/vmm/vmm_dev.c1
-rw-r--r--sys/fs/fuse/fuse_vnops.c2
-rw-r--r--sys/fs/msdosfs/bootsect.h2
-rw-r--r--sys/fs/nfs/nfs_commonport.c2
-rw-r--r--sys/fs/nfsclient/nfs_clport.c7
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c7
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c3
-rw-r--r--sys/fs/procfs/procfs.c68
-rw-r--r--sys/fs/procfs/procfs_status.c8
-rw-r--r--sys/fs/pseudofs/pseudofs.c69
-rw-r--r--sys/fs/pseudofs/pseudofs.h19
-rw-r--r--sys/fs/tarfs/tarfs_vnops.c4
-rw-r--r--sys/geom/cache/g_cache.c4
-rw-r--r--sys/geom/concat/g_concat.c6
-rw-r--r--sys/geom/eli/g_eli.c2
-rw-r--r--sys/geom/gate/g_gate.c2
-rw-r--r--sys/geom/geom.h5
-rw-r--r--sys/geom/geom_dev.c2
-rw-r--r--sys/geom/geom_event.c9
-rw-r--r--sys/geom/geom_slice.c2
-rw-r--r--sys/geom/geom_subr.c33
-rw-r--r--sys/geom/journal/g_journal.c2
-rw-r--r--sys/geom/label/g_label.c2
-rw-r--r--sys/geom/linux_lvm/g_linux_lvm.c4
-rw-r--r--sys/geom/mirror/g_mirror.c4
-rw-r--r--sys/geom/mirror/g_mirror_ctl.c2
-rw-r--r--sys/geom/mountver/g_mountver.c2
-rw-r--r--sys/geom/multipath/g_multipath.c7
-rw-r--r--sys/geom/nop/g_nop.c2
-rw-r--r--sys/geom/part/g_part.c6
-rw-r--r--sys/geom/raid/g_raid.c6
-rw-r--r--sys/geom/raid3/g_raid3.c4
-rw-r--r--sys/geom/raid3/g_raid3_ctl.c2
-rw-r--r--sys/geom/shsec/g_shsec.c4
-rw-r--r--sys/geom/stripe/g_stripe.c10
-rw-r--r--sys/geom/union/g_union.c4
-rw-r--r--sys/geom/virstor/g_virstor.c4
-rw-r--r--sys/geom/zero/g_zero.c2
-rw-r--r--sys/i386/conf/GENERIC2
-rw-r--r--sys/kern/imgact_elf.c4
-rw-r--r--sys/kern/init_main.c9
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_descrip.c34
-rw-r--r--sys/kern/kern_environment.c32
-rw-r--r--sys/kern/kern_event.c98
-rw-r--r--sys/kern/kern_jail.c404
-rw-r--r--sys/kern/kern_jaildesc.c412
-rw-r--r--sys/kern/kern_malloc.c11
-rw-r--r--sys/kern/kern_mutex.c4
-rw-r--r--sys/kern/kern_proc.c16
-rw-r--r--sys/kern/kern_rmlock.c4
-rw-r--r--sys/kern/kern_rwlock.c4
-rw-r--r--sys/kern/kern_sx.c4
-rw-r--r--sys/kern/kern_thr.c11
-rw-r--r--sys/kern/kern_thread.c6
-rw-r--r--sys/kern/kern_tslog.c10
-rw-r--r--sys/kern/subr_bus.c13
-rw-r--r--sys/kern/subr_power.c130
-rw-r--r--sys/kern/subr_witness.c134
-rw-r--r--sys/kern/sys_procdesc.c2
-rw-r--r--sys/kern/syscalls.c2
-rw-r--r--sys/kern/syscalls.master10
-rw-r--r--sys/kern/systrace_args.c44
-rw-r--r--sys/kern/uipc_usrreq.c4
-rw-r--r--sys/kern/vfs_cache.c162
-rw-r--r--sys/kern/vfs_init.c37
-rw-r--r--sys/kern/vfs_mount.c7
-rw-r--r--sys/kern/vfs_subr.c19
-rw-r--r--sys/libkern/arm64/crc32c_armv8.S8
-rw-r--r--sys/modules/Makefile3
-rw-r--r--sys/modules/dtb/rockchip/Makefile3
-rw-r--r--sys/modules/e6000sw/Makefile2
-rw-r--r--sys/modules/etherswitch/Makefile2
-rw-r--r--sys/modules/evdev/Makefile2
-rw-r--r--sys/modules/gpio/gpioaei/Makefile2
-rw-r--r--sys/modules/gve/Makefile2
-rw-r--r--sys/modules/ichwd/Makefile2
-rw-r--r--sys/modules/if_infiniband/Makefile3
-rw-r--r--sys/modules/if_vlan/Makefile2
-rw-r--r--sys/modules/irdma/Makefile6
-rw-r--r--sys/modules/linux64/Makefile1
-rw-r--r--sys/modules/md/Makefile2
-rw-r--r--sys/modules/miiproxy/Makefile2
-rw-r--r--sys/modules/mlx5/Makefile2
-rw-r--r--sys/modules/mlx5en/Makefile2
-rw-r--r--sys/modules/netgraph/ksocket/Makefile2
-rw-r--r--sys/modules/nvmf/nvmf/Makefile3
-rw-r--r--sys/modules/ossl/Makefile7
-rw-r--r--sys/modules/qatfw/qat_4xxx/Makefile5
-rw-r--r--sys/modules/qlnx/qlnxev/Makefile1
-rw-r--r--sys/modules/sound/driver/hda/Makefile4
-rw-r--r--sys/modules/uinput/Makefile2
-rw-r--r--sys/modules/usb/Makefile7
-rw-r--r--sys/modules/usb/udbc/Makefile9
-rw-r--r--sys/modules/usb/usie/Makefile2
-rw-r--r--sys/modules/usb/wmt/Makefile2
-rw-r--r--sys/modules/zfs/zfs_config.h12
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
-rw-r--r--sys/net/if.c64
-rw-r--r--sys/net/if_bridge.c56
-rw-r--r--sys/net/if_bridgevar.h2
-rw-r--r--sys/net/if_epair.c62
-rw-r--r--sys/net/if_ethersubr.c2
-rw-r--r--sys/net/if_var.h1
-rw-r--r--sys/net/iflib.c28
-rw-r--r--sys/net/pfvar.h15
-rw-r--r--sys/net80211/ieee80211_ddb.c2
-rw-r--r--sys/net80211/ieee80211_freebsd.h33
-rw-r--r--sys/net80211/ieee80211_ht.c18
-rw-r--r--sys/net80211/ieee80211_node.c4
-rw-r--r--sys/net80211/ieee80211_node.h34
-rw-r--r--sys/net80211/ieee80211_output.c17
-rw-r--r--sys/net80211/ieee80211_phy.c30
-rw-r--r--sys/net80211/ieee80211_phy.h8
-rw-r--r--sys/net80211/ieee80211_sta.c2
-rw-r--r--sys/net80211/ieee80211_var.h3
-rw-r--r--sys/net80211/ieee80211_vht.c20
-rw-r--r--sys/net80211/ieee80211_vht.h4
-rw-r--r--sys/netgraph/bluetooth/include/ng_hci.h2
-rw-r--r--sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c4
-rw-r--r--sys/netgraph/ng_device.c106
-rw-r--r--sys/netgraph/ng_nat.c95
-rw-r--r--sys/netgraph/ng_parse.c4
-rw-r--r--sys/netinet/cc/cc.c2
-rw-r--r--sys/netinet/in.c15
-rw-r--r--sys/netinet/ip_output.c13
-rw-r--r--sys/netinet/ip_var.h1
-rw-r--r--sys/netinet/tcp_hpts.c2
-rw-r--r--sys/netinet/tcp_input.c497
-rw-r--r--sys/netinet/tcp_log_buf.c3
-rw-r--r--sys/netinet/tcp_sack.c21
-rw-r--r--sys/netinet/tcp_stacks/bbr.c2
-rw-r--r--sys/netinet/tcp_stacks/rack.c2
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.c2
-rw-r--r--sys/netinet/tcp_stacks/rack_pcm.c2
-rw-r--r--sys/netinet/tcp_stacks/tailq_hash.c2
-rw-r--r--sys/netinet/tcp_subr.c6
-rw-r--r--sys/netinet/tcp_syncache.c104
-rw-r--r--sys/netinet/udp_usrreq.c45
-rw-r--r--sys/netinet6/in6.c21
-rw-r--r--sys/netinet6/in6_ifattach.c7
-rw-r--r--sys/netinet6/in6_ifattach.h1
-rw-r--r--sys/netinet6/nd6_rtr.c54
-rw-r--r--sys/netlink/route/iface.c1
-rw-r--r--sys/netlink/route/iface_drivers.c47
-rw-r--r--sys/netlink/route/route_var.h1
-rw-r--r--sys/netpfil/pf/if_pfsync.c16
-rw-r--r--sys/netpfil/pf/pf.c189
-rw-r--r--sys/netpfil/pf/pf.h7
-rw-r--r--sys/netpfil/pf/pf_ioctl.c186
-rw-r--r--sys/netpfil/pf/pf_lb.c151
-rw-r--r--sys/netpfil/pf/pf_nl.c13
-rw-r--r--sys/netpfil/pf/pf_nl.h4
-rw-r--r--sys/netpfil/pf/pf_norm.c24
-rw-r--r--sys/netpfil/pf/pf_nv.c7
-rw-r--r--sys/netpfil/pf/pf_osfp.c2
-rw-r--r--sys/netpfil/pf/pf_ruleset.c10
-rw-r--r--sys/powerpc/conf/GENERIC642
-rw-r--r--sys/powerpc/conf/GENERIC64LE2
-rw-r--r--sys/riscv/include/vmm.h1
-rw-r--r--sys/riscv/vmm/vmm.c6
-rw-r--r--sys/sys/bus.h38
-rw-r--r--sys/sys/cpu.h22
-rw-r--r--sys/sys/efi.h67
-rw-r--r--sys/sys/event.h15
-rw-r--r--sys/sys/exterrvar.h2
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/jail.h20
-rw-r--r--sys/sys/jaildesc.h87
-rw-r--r--sys/sys/kernel.h27
-rw-r--r--sys/sys/mount.h1
-rw-r--r--sys/sys/mutex.h2
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/pciio.h5
-rw-r--r--sys/sys/power.h55
-rw-r--r--sys/sys/random.h3
-rw-r--r--sys/sys/rmlock.h2
-rw-r--r--sys/sys/rwlock.h2
-rw-r--r--sys/sys/sx.h2
-rw-r--r--sys/sys/syscall.h4
-rw-r--r--sys/sys/syscall.mk4
-rw-r--r--sys/sys/sysproto.h10
-rw-r--r--sys/sys/ttycom.h4
-rw-r--r--sys/sys/user.h4
-rw-r--r--sys/tools/amd64_ia32_vdso.sh2
-rw-r--r--sys/tools/amd64_vdso.sh2
-rw-r--r--sys/tools/makeobjops.awk4
-rw-r--r--sys/ufs/ffs/ffs_rawread.c2
-rw-r--r--sys/vm/uma_core.c10
-rw-r--r--sys/vm/vm_domainset.c265
-rw-r--r--sys/vm/vm_domainset.h15
-rw-r--r--sys/vm/vm_extern.h2
-rw-r--r--sys/vm/vm_fault.c248
-rw-r--r--sys/vm/vm_glue.c2
-rw-r--r--sys/vm/vm_kern.c12
-rw-r--r--sys/vm/vm_page.c21
-rw-r--r--sys/x86/acpica/acpi_apm.c25
-rw-r--r--sys/x86/iommu/amd_intrmap.c14
-rw-r--r--sys/x86/iommu/intel_intrmap.c8
-rw-r--r--sys/x86/x86/identcpu.c4
612 files changed, 12985 insertions, 4355 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 81427b5b18b6..d5a1adb2dc65 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -26,7 +26,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
-options BLOAT_KERNEL_WITH_EXTERR
+options EXTERR_STRINGS
options VIMAGE # Subsystem virtualization, e.g. VNET
options INET # InterNETworking
options INET6 # IPv6 communications protocols
@@ -287,9 +287,9 @@ device wlan # 802.11 support
options IEEE80211_DEBUG # enable debug msgs
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 0b3daed4f69e..e35119af8572 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -46,6 +46,7 @@ enum vm_suspend_how {
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
VM_SUSPEND_TRIPLEFAULT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S
index 130130b64541..877e377f892d 100644
--- a/sys/amd64/vmm/intel/vmx_support.S
+++ b/sys/amd64/vmm/intel/vmx_support.S
@@ -171,13 +171,11 @@ do_launch:
*/
movq %rsp, %rdi /* point %rdi back to 'vmxctx' */
movl $VMX_VMLAUNCH_ERROR, %eax
- jmp decode_inst_error
-
+ /* FALLTHROUGH */
decode_inst_error:
movl $VM_FAIL_VALID, %r11d
- jz inst_error
- movl $VM_FAIL_INVALID, %r11d
-inst_error:
+ movl $VM_FAIL_INVALID, %esi
+ cmovnzl %esi, %r11d
movl %r11d, VMXCTX_INST_FAIL_STATUS(%rdi)
/*
diff --git a/sys/arm/arm/generic_timer.c b/sys/arm/arm/generic_timer.c
index a8c779dcba6d..dacef8de2257 100644
--- a/sys/arm/arm/generic_timer.c
+++ b/sys/arm/arm/generic_timer.c
@@ -882,32 +882,32 @@ DELAY(int usec)
TSEXIT();
}
-static bool
+static cpu_feat_en
wfxt_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64isar2;
if (!get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2))
- return (false);
- return (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) != ID_AA64ISAR2_WFxT_NONE);
+ return (FEAT_ALWAYS_DISABLE);
+ if (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) >= ID_AA64ISAR2_WFxT_IMPL)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
-static void
+static bool
wfxt_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
{
/* will be called if wfxt_check returns true */
enable_wfxt = true;
+ return (true);
}
-static struct cpu_feat feat_wfxt = {
- .feat_name = "FEAT_WFXT",
- .feat_check = wfxt_check,
- .feat_enable = wfxt_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM,
-};
-DATA_SET(cpu_feat_set, feat_wfxt);
+CPU_FEAT(feat_wfxt, "WFE and WFI instructions with timeout",
+ wfxt_check, NULL, wfxt_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM);
#endif
static uint32_t
diff --git a/sys/arm/conf/TEGRA124 b/sys/arm/conf/TEGRA124
index ad5532427eda..ff23e63f77bd 100644
--- a/sys/arm/conf/TEGRA124
+++ b/sys/arm/conf/TEGRA124
@@ -107,9 +107,9 @@ device ums # USB mouse
# Wireless NIC cards
#device wlan # 802.11 support
#device wlan_wep # 802.11 WEP support
+#device wlan_tkip # 802.11 TKIP support
#device wlan_ccmp # 802.11 CCMP support
#device wlan_gcmp # 802.11 GCMP support
-#device wlan_tkip # 802.11 TKIP support
#device wlan_amrr # AMRR transmit rate control algorithm
# PCI
diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c
index cc262394913d..986d5079e980 100644
--- a/sys/arm64/arm64/cpu_feat.c
+++ b/sys/arm64/arm64/cpu_feat.c
@@ -32,16 +32,21 @@
#include <machine/cpu.h>
#include <machine/cpu_feat.h>
+SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata");
+
/* TODO: Make this a list if we ever grow a callback other than smccc_errata */
static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL;
void
enable_cpu_feat(uint32_t stage)
{
+ char tunable[32];
struct cpu_feat **featp, *feat;
uint32_t midr;
u_int errata_count, *errata_list;
cpu_feat_errata errata_status;
+ cpu_feat_en check_status;
+ bool val;
MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0);
@@ -58,9 +63,26 @@ enable_cpu_feat(uint32_t stage)
PCPU_GET(cpuid) != 0)
continue;
- if (feat->feat_check != NULL && !feat->feat_check(feat, midr))
+ if (feat->feat_check != NULL)
+ continue;
+
+ check_status = feat->feat_check(feat, midr);
+ /* Ignore features that are not present */
+ if (check_status == FEAT_ALWAYS_DISABLE)
continue;
+ snprintf(tunable, sizeof(tunable), "hw.feat.%s",
+ feat->feat_name);
+ if (TUNABLE_BOOL_FETCH(tunable, &val)) {
+ /* Is the feature disabled by the tunable? */
+ if (!val)
+ continue;
+ /* If enabled by the tunable then enable it */
+ } else if (check_status == FEAT_DEFAULT_DISABLE) {
+ /* No tunable set and disabled by default */
+ continue;
+ }
+
/*
* Check if the feature has any errata that may need a
* workaround applied (or it is to install the workaround for
@@ -97,8 +119,9 @@ enable_cpu_feat(uint32_t stage)
/* Shouldn't be possible */
MPASS(errata_status != ERRATA_UNKNOWN);
- feat->feat_enable(feat, errata_status, errata_list,
- errata_count);
+ if (feat->feat_enable(feat, errata_status, errata_list,
+ errata_count))
+ feat->feat_enabled = true;
}
}
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 5c81c6cdce3d..8f8a934ad520 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -225,7 +225,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->args[i] = ap[i];
if (narg > nap) {
if (narg - nap > nitems(args))
- panic("Too many system call arguiments");
+ panic("Too many system call arguments");
error = copyin((void *)td->td_frame->tf_x[13], args,
(narg - nap) * sizeof(int));
if (error != 0)
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index bcacea43ad2f..01b4ece59861 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -232,6 +232,10 @@ static const struct cpu_parts cpu_parts_arm[] = {
{ CPU_PART_CORTEX_X2, "Cortex-X2" },
{ CPU_PART_CORTEX_X3, "Cortex-X3" },
{ CPU_PART_CORTEX_X4, "Cortex-X4" },
+ { CPU_PART_C1_NANO, "C1-Nano" },
+ { CPU_PART_C1_PRO, "C1-Pro" },
+ { CPU_PART_C1_PREMIUM, "C1-Premium" },
+ { CPU_PART_C1_ULTRA, "C1-Ultra" },
{ CPU_PART_NEOVERSE_E1, "Neoverse-E1" },
{ CPU_PART_NEOVERSE_N1, "Neoverse-N1" },
{ CPU_PART_NEOVERSE_N2, "Neoverse-N2" },
@@ -2272,37 +2276,25 @@ static const struct mrs_user_reg user_regs[] = {
static bool
user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr)
{
- /* Skip non-Neoverse-N1 */
- if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_NEOVERSE_N1, 0, 0))
- return (false);
-
- switch (CPU_VAR(midr)) {
- default:
- break;
- case 4:
- /* Fixed in r4p1 */
- if (CPU_REV(midr) > 0)
- break;
- /* FALLTHROUGH */
- case 3:
- /* If DIC is enabled (coherent icache) then we are affected */
- return (CTR_DIC_VAL(ctr) != 0);
- }
-
- return (false);
+ /*
+ * Neoverse-N1 erratum 1542419
+ * Present in r3p0 - r4p0
+ * Fixed in r4p1
+ */
+ return (midr_check_var_part_range(midr, CPU_IMPL_ARM,
+ CPU_PART_NEOVERSE_N1, 3, 0, 4, 0) && CTR_DIC_VAL(ctr) != 0);
}
-static bool
-user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+static cpu_feat_en
+user_ctr_check(const struct cpu_feat *feat __unused, u_int midr)
{
if (emulate_ctr)
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0)))
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -2320,7 +2312,7 @@ user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr,
return (false);
}
-static void
+static bool
user_ctr_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count)
{
@@ -2356,16 +2348,13 @@ user_ctr_enable(const struct cpu_feat *feat __unused,
WRITE_SPECIALREG(sctlr_el1,
READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT);
isb();
+
+ return (true);
}
-static struct cpu_feat user_ctr = {
- .feat_name = "Trap CTR_EL0",
- .feat_check = user_ctr_check,
- .feat_has_errata = user_ctr_has_errata,
- .feat_enable = user_ctr_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, user_ctr);
+CPU_FEAT(trap_ctr, "Trap CTR_EL0",
+ user_ctr_check, user_ctr_has_errata, user_ctr_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
static bool
user_ctr_handler(uint64_t esr, struct trapframe *frame)
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 53856dd90cae..47c701e8588c 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -173,16 +173,19 @@ SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
#endif
-static bool
+static cpu_feat_en
pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mfr1;
id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
+ if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
}
-static void
+static bool
pan_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -200,15 +203,13 @@ pan_enable(const struct cpu_feat *feat __unused,
".arch_extension pan \n"
"msr pan, #1 \n"
".arch_extension nopan \n");
+
+ return (true);
}
-static struct cpu_feat feat_pan = {
- .feat_name = "FEAT_PAN",
- .feat_check = pan_check,
- .feat_enable = pan_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_pan);
+CPU_FEAT(feat_pan, "Privileged access never",
+ pan_check, NULL, pan_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
bool
has_hyp(void)
@@ -857,7 +858,7 @@ initarm(struct arm64_bootparams *abp)
cninit();
set_ttbr0(abp->kern_ttbr0);
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
if (!valid)
panic("Invalid bus configuration: %s",
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ec89c4573799..8a4395aa1c89 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr;
#define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \
ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW))
+static bool __read_mostly pmap_multiple_tlbi = false;
+
struct pmap_large_md_page {
struct rwlock pv_lock;
struct md_page pv_page;
@@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen)
}
}
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
bs_state.dmap_valid = true;
@@ -1399,7 +1401,7 @@ pmap_bootstrap(void)
/* And the l3 tables for the early devmap */
pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE));
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
@@ -1656,14 +1658,17 @@ pmap_init_pv_table(void)
}
}
-static bool
+static cpu_feat_en
pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mmfr1;
id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
- ID_AA64MMFR1_HAFDBS_AF_DBS);
+ if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
+ ID_AA64MMFR1_HAFDBS_AF_DBS)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -1671,8 +1676,8 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
u_int **errata_list, u_int *errata_count)
{
/* Disable on Cortex-A55 for erratum 1024718 - all revisions */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_CORTEX_A55, 0, 0)) {
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55) {
static u_int errata_id = 1024718;
*errata_list = &errata_id;
@@ -1681,21 +1686,19 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
}
/* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK,
- CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) {
- if (CPU_REV(PCPU_GET(midr)) < 3) {
- static u_int errata_id = 2051678;
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 0, 2)) {
+ static u_int errata_id = 2051678;
- *errata_list = &errata_id;
- *errata_count = 1;
- return (true);
- }
+ *errata_list = &errata_id;
+ *errata_count = 1;
+ return (true);
}
return (false);
}
-static void
+static bool
pmap_dbm_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list __unused,
u_int errata_count)
@@ -1704,7 +1707,7 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
/* Skip if there is an erratum affecting DBM */
if (errata_status != ERRATA_NONE)
- return;
+ return (false);
tcr = READ_SPECIALREG(tcr_el1) | TCR_HD;
WRITE_SPECIALREG(tcr_el1, tcr);
@@ -1714,16 +1717,58 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
__asm __volatile("tlbi vmalle1");
dsb(nsh);
isb();
+
+ return (true);
}
-static struct cpu_feat feat_dbm = {
- .feat_name = "FEAT_HAFDBS (DBM)",
- .feat_check = pmap_dbm_check,
- .feat_has_errata = pmap_dbm_has_errata,
- .feat_enable = pmap_dbm_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_dbm);
+CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state",
+ pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ /*
+ * Cortex-A55 erratum 2441007 (Cat B rare)
+ * Present in all revisions
+ */
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55)
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A76 erratum 1286807 (Cat B rare)
+ * Present in r0p0 - r3p0
+ * Fixed in r3p1
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76,
+ 0, 0, 3, 0))
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A510 erratum 2441009 (Cat B rare)
+ * Present in r0p0 - r1p1
+ * Fixed in r1p2
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 1, 1))
+ return (FEAT_DEFAULT_DISABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ pmap_multiple_tlbi = true;
+ return (true);
+}
+
+CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata",
+ pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
/*
* Initialize the pmap module.
@@ -1878,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
r = TLBI_VA(va);
if (pmap == kernel_pmap) {
pmap_s1_invalidate_kernel(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
pmap_s1_invalidate_user(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1922,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
end = TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_kernel(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= TLBI_VA(sva);
end |= TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_user(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1963,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
pmap_s2_invalidate_range(pmap, sva, eva, final_only);
}
+void
+pmap_s1_invalidate_all_kernel(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ if (pmap_multiple_tlbi) {
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ }
+ isb();
+}
+
/*
* Invalidates all cached intermediate- and final-level TLB entries for the
* given virtual address space.
@@ -1977,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap)
dsb(ishst);
if (pmap == kernel_pmap) {
__asm __volatile("tlbi vmalle1is");
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi vmalle1is");
+ }
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
__asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ }
}
dsb(ish);
isb();
@@ -7967,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
pa += L2_SIZE;
}
if ((old_l2e & ATTR_DESCR_VALID) != 0)
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
else {
/*
* Because the old entries were invalid and the new
@@ -8058,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size)
}
}
if (preinit_map) {
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
return;
}
diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c
index dbe0c69b8d60..fdab5414e24c 100644
--- a/sys/arm64/arm64/ptrauth.c
+++ b/sys/arm64/arm64/ptrauth.c
@@ -82,7 +82,7 @@ ptrauth_disable(void)
return (false);
}
-static bool
+static cpu_feat_en
ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t isar;
@@ -116,14 +116,14 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) {
if (ID_AA64ISAR1_APA_VAL(isar) > 0 ||
ID_AA64ISAR1_API_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
/* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */
if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) {
if (ID_AA64ISAR2_APA3_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
@@ -138,10 +138,10 @@ out:
ID_AA64ISAR1_GPI_MASK, 0);
update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
-static void
+static bool
ptrauth_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -153,16 +153,13 @@ ptrauth_enable(const struct cpu_feat *feat __unused,
elf64_addr_mask_14.code |= PAC_ADDR_MASK_14;
elf64_addr_mask_14.data |= PAC_ADDR_MASK_14;
#endif
-}
+ return (true);
+}
-static struct cpu_feat feat_pauth = {
- .feat_name = "FEAT_PAuth",
- .feat_check = ptrauth_check,
- .feat_enable = ptrauth_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM,
-};
-DATA_SET(cpu_feat_set, feat_pauth);
+CPU_FEAT(feat_pauth, "Pointer Authentication",
+ ptrauth_check, NULL, ptrauth_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM);
/* Copy the keys when forking a new process */
void
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
index bed58095201a..75c9b5f87892 100644
--- a/sys/arm64/arm64/trap.c
+++ b/sys/arm64/arm64/trap.c
@@ -246,6 +246,7 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
print_registers(frame);
print_gp_register("far", far);
+ printf(" esr: 0x%.16lx\n", esr);
panic("Unhandled external data abort");
}
diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64
index c83e98c17a33..a0568466cfaf 100644
--- a/sys/arm64/conf/std.arm64
+++ b/sys/arm64/conf/std.arm64
@@ -7,6 +7,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
+options EXTERR_STRINGS
options VIMAGE # Subsystem virtualization, e.g. VNET
options INET # InterNETworking
options INET6 # IPv6 communications protocols
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 59cda36f275e..124da8c215ed 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -125,7 +125,11 @@
#define CPU_PART_NEOVERSE_V3 0xD84
#define CPU_PART_CORTEX_X925 0xD85
#define CPU_PART_CORTEX_A725 0xD87
+#define CPU_PART_C1_NANO 0xD8A
+#define CPU_PART_C1_PRO 0xD8B
+#define CPU_PART_C1_ULTRA 0xD8C
#define CPU_PART_NEOVERSE_N3 0xD8E
+#define CPU_PART_C1_PREMIUM 0xD90
/* Cavium Part numbers */
#define CPU_PART_THUNDERX 0x0A1
@@ -193,8 +197,30 @@
(((mask) & PCPU_GET(midr)) == \
((mask) & CPU_ID_RAW((impl), (part), (var), (rev))))
-#define CPU_MATCH_RAW(mask, devid) \
- (((mask) & PCPU_GET(midr)) == ((mask) & (devid)))
+#if !defined(__ASSEMBLER__)
+static inline bool
+midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low,
+ u_int part_low, u_int var_high, u_int part_high)
+{
+ /* Check for the correct part */
+ if (CPU_IMPL(midr) != impl || CPU_PART(midr) != part)
+ return (false);
+
+ /* Check if the variant is between var_low and var_high inclusive */
+ if (CPU_VAR(midr) < var_low || CPU_VAR(midr) > var_high)
+ return (false);
+
+ /* If the variant is the low value, check if the part is high enough */
+ if (CPU_VAR(midr) == var_low && CPU_PART(midr) < part_low)
+ return (false);
+
+ /* If the variant is the high value, check if the part is low enough */
+ if (CPU_VAR(midr) == var_high && CPU_PART(midr) > part_high)
+ return (false);
+
+ return (true);
+}
+#endif
/*
* Chip-specific errata. This defines are intended to be
diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h
index 9fe6a9dd95d9..6a554b6baedf 100644
--- a/sys/arm64/include/cpu_feat.h
+++ b/sys/arm64/include/cpu_feat.h
@@ -29,6 +29,7 @@
#define _MACHINE_CPU_FEAT_H_
#include <sys/linker_set.h>
+#include <sys/sysctl.h>
typedef enum {
ERRATA_UNKNOWN, /* Unknown erratum */
@@ -39,6 +40,31 @@ typedef enum {
/* kernel component. */
} cpu_feat_errata;
+typedef enum {
+ /*
+ * Don't implement the feature or erratum wrokarount,
+ * e.g. the feature is not implemented or erratum is
+ * for another CPU.
+ */
+ FEAT_ALWAYS_DISABLE,
+
+ /*
+ * Disable by default, but allow the user to enable,
+ * e.g. For a rare erratum with a workaround, Arm
+ * Category B (rare) or similar.
+ */
+ FEAT_DEFAULT_DISABLE,
+
+ /*
+ * Enabled by default, bit allow the user to disable,
+ * e.g. For a common erratum with a workaround, Arm
+ * Category A or B or similar.
+ */
+ FEAT_DEFAULT_ENABLE,
+
+ /* We could add FEAT_ALWAYS_ENABLE if a need was found. */
+} cpu_feat_en;
+
#define CPU_FEAT_STAGE_MASK 0x00000001
#define CPU_FEAT_EARLY_BOOT 0x00000000
#define CPU_FEAT_AFTER_DEV 0x00000001
@@ -49,10 +75,10 @@ typedef enum {
struct cpu_feat;
-typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int);
+typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int);
typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int,
u_int **, u_int *);
-typedef void (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
+typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
u_int *, u_int);
struct cpu_feat {
@@ -61,9 +87,25 @@ struct cpu_feat {
cpu_feat_has_errata *feat_has_errata;
cpu_feat_enable *feat_enable;
uint32_t feat_flags;
+ bool feat_enabled;
};
SET_DECLARE(cpu_feat_set, struct cpu_feat);
+SYSCTL_DECL(_hw_feat);
+
+#define CPU_FEAT(name, descr, check, has_errata, enable, flags) \
+static struct cpu_feat name = { \
+ .feat_name = #name, \
+ .feat_check = check, \
+ .feat_has_errata = has_errata, \
+ .feat_enable = enable, \
+ .feat_flags = flags, \
+ .feat_enabled = false, \
+}; \
+DATA_SET(cpu_feat_set, name); \
+SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \
+ 0, descr)
+
/*
* Allow drivers to mark an erratum as worked around, e.g. the Errata
* Management ABI may know the workaround isn't needed on a given system.
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 0f23f200f0f6..406b6e2c5e0a 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -69,6 +69,7 @@ struct md_page {
TAILQ_HEAD(,pv_entry) pv_list;
int pv_gen;
vm_memattr_t pv_memattr;
+ uint8_t pv_reserve[3];
};
enum pmap_stage {
@@ -174,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t);
struct pcb *pmap_switch(struct thread *);
+void pmap_s1_invalidate_all_kernel(void);
+
extern void (*pmap_clean_stage2_tlbi)(void);
extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t,
bool);
diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h
index 184743d4cc80..b40990e89385 100644
--- a/sys/arm64/include/proc.h
+++ b/sys/arm64/include/proc.h
@@ -75,6 +75,7 @@ struct mdthread {
struct mdproc {
uint64_t md_tcr; /* TCR_EL1 fields to update */
+ uint64_t md_reserved[2];
};
#endif /* !LOCORE */
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index 73b5b4a09591..e839b5dd92c9 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -42,6 +42,7 @@ enum vm_suspend_how {
VM_SUSPEND_RESET,
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 3082d2941221..1dcefa1489e9 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -1342,8 +1342,14 @@ vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
static int
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
{
+ struct vm *vm;
+
+ vm = vcpu->vm;
vcpu_lock(vcpu);
while (1) {
+ if (vm->suspend)
+ break;
+
if (vgic_has_pending_irq(vcpu->cookie))
break;
diff --git a/sys/cam/ata/ata_all.c b/sys/cam/ata/ata_all.c
index f9a2b86f0c06..7cd768a9811a 100644
--- a/sys/cam/ata/ata_all.c
+++ b/sys/cam/ata/ata_all.c
@@ -1151,7 +1151,7 @@ ata_zac_mgmt_out(struct ccb_ataio *ataio, uint32_t retries,
/*
* For SEND FPDMA QUEUED, the transfer length is
* encoded in the FEATURE register, and 0 means
- * that 65536 512 byte blocks are to be tranferred.
+ * that 65536 512 byte blocks are to be transferred.
* In practice, it seems unlikely that we'll see
* a transfer that large.
*/
@@ -1220,7 +1220,7 @@ ata_zac_mgmt_in(struct ccb_ataio *ataio, uint32_t retries,
/*
* For RECEIVE FPDMA QUEUED, the transfer length is
* encoded in the FEATURE register, and 0 means
- * that 65536 512 byte blocks are to be tranferred.
+ * that 65536 512 byte blocks are to be transferred.
* In practice, it is unlikely we will see a transfer that
* large.
*/
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index d02750aaacaf..fc8c0413448d 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -6830,7 +6830,7 @@ scsi_ata_zac_mgmt_out(struct ccb_scsiio *csio, uint32_t retries,
/*
* For SEND FPDMA QUEUED, the transfer length is
* encoded in the FEATURE register, and 0 means
- * that 65536 512 byte blocks are to be tranferred.
+ * that 65536 512 byte blocks are to be transferred.
* In practice, it seems unlikely that we'll see
* a transfer that large, and it may confuse the
* the SAT layer, because generally that means that
@@ -6916,7 +6916,7 @@ scsi_ata_zac_mgmt_in(struct ccb_scsiio *csio, uint32_t retries,
/*
* For RECEIVE FPDMA QUEUED, the transfer length is
* encoded in the FEATURE register, and 0 means
- * that 65536 512 byte blocks are to be tranferred.
+ * that 65536 512 byte blocks are to be transferred.
* In practice, it seems unlikely that we'll see
* a transfer that large, and it may confuse the
* the SAT layer, because generally that means that
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index 7192df200ae2..8078f3f6d4b1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -7761,7 +7761,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
uintptr_t *memref = (uintptr_t *)(uintptr_t) val;
if (!DTRACE_INSCRATCHPTR(&mstate,
- (uintptr_t)memref, 2 * sizeof(uintptr_t))) {
+ (uintptr_t) memref,
+ sizeof (uintptr_t) + sizeof (size_t))) {
*flags |= CPU_DTRACE_BADADDR;
continue;
}
@@ -7773,21 +7774,21 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
* Check if the size exceeds the allocated
* buffer size.
*/
- if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) {
+ if (size + sizeof (size_t) >
+ dp->dtdo_rtype.dtdt_size) {
/* Flag a drop! */
*flags |= CPU_DTRACE_DROP;
continue;
}
/* Store the size in the buffer first. */
- DTRACE_STORE(uintptr_t, tomax,
- valoffs, size);
+ DTRACE_STORE(size_t, tomax, valoffs, size);
/*
* Offset the buffer address to the start
* of the data.
*/
- valoffs += sizeof(uintptr_t);
+ valoffs += sizeof(size_t);
/*
* Reset to the memory address rather than
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 90cd21a80923..54063150eef9 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -515,4 +515,6 @@
#define FREEBSD32_SYS_inotify_rm_watch 594
#define FREEBSD32_SYS_getgroups 595
#define FREEBSD32_SYS_setgroups 596
-#define FREEBSD32_SYS_MAXSYSCALL 597
+#define FREEBSD32_SYS_jail_attach_jd 597
+#define FREEBSD32_SYS_jail_remove_jd 598
+#define FREEBSD32_SYS_MAXSYSCALL 599
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index f0f8d26554b5..f7cc4c284e4d 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 12f1a346c3e9..18f809ef04e3 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index e471c5148021..29a5497e9efa 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/compat/lindebugfs/lindebugfs.c b/sys/compat/lindebugfs/lindebugfs.c
index 50f9377ffec3..8cddc6f390bc 100644
--- a/sys/compat/lindebugfs/lindebugfs.c
+++ b/sys/compat/lindebugfs/lindebugfs.c
@@ -206,7 +206,7 @@ debugfs_create_file(const char *name, umode_t mode,
pnode = debugfs_root;
flags = fops->write ? PFS_RDWR : PFS_RD;
- dnode->d_pfs_node = pfs_create_file(pnode, name, debugfs_fill,
+ pfs_create_file(pnode, &dnode->d_pfs_node, name, debugfs_fill,
debugfs_attr, NULL, debugfs_destroy, flags | PFS_NOWAIT);
if (dnode->d_pfs_node == NULL) {
free(dm, M_DFSINT);
@@ -283,7 +283,8 @@ debugfs_create_dir(const char *name, struct dentry *parent)
else
pnode = debugfs_root;
- dnode->d_pfs_node = pfs_create_dir(pnode, name, debugfs_attr, NULL, debugfs_destroy, PFS_RD | PFS_NOWAIT);
+ pfs_create_dir(pnode, &dnode->d_pfs_node, name, debugfs_attr, NULL,
+ debugfs_destroy, PFS_RD | PFS_NOWAIT);
if (dnode->d_pfs_node == NULL) {
free(dm, M_DFSINT);
return (NULL);
@@ -316,7 +317,8 @@ debugfs_create_symlink(const char *name, struct dentry *parent,
else
pnode = debugfs_root;
- dnode->d_pfs_node = pfs_create_link(pnode, name, &debugfs_fill_data, NULL, NULL, NULL, PFS_NOWAIT);
+ pfs_create_link(pnode, &dnode->d_pfs_node, name, &debugfs_fill_data,
+ NULL, NULL, NULL, PFS_NOWAIT);
if (dnode->d_pfs_node == NULL)
goto fail;
dnode->d_pfs_node->pn_data = dm;
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 1c6d64d6b8bc..95b212be1306 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -2320,165 +2320,165 @@ linprocfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
/* /proc/... */
- pfs_create_file(root, "cmdline", &linprocfs_docmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "devices", &linprocfs_dodevices,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "modules", &linprocfs_domodules,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "mounts", &linprocfs_domtab,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "mtab", &linprocfs_domtab,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "partitions", &linprocfs_dopartitions,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(root, "self", &procfs_docurproc,
- NULL, NULL, NULL, 0);
- pfs_create_file(root, "stat", &linprocfs_dostat,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "swaps", &linprocfs_doswaps,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "uptime", &linprocfs_douptime,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(root, "version", &linprocfs_doversion,
+ pfs_create_file(root, NULL, "cmdline", &linprocfs_docmdline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "cpuinfo", &linprocfs_docpuinfo, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "devices", &linprocfs_dodevices, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "filesystems", &linprocfs_dofilesystems,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(root, NULL, "loadavg", &linprocfs_doloadavg, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "meminfo", &linprocfs_domeminfo, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "modules", &linprocfs_domodules, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "mounts", &linprocfs_domtab, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "mtab", &linprocfs_domtab, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(root, NULL, "partitions", &linprocfs_dopartitions, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(root, NULL, "self", &procfs_docurproc, NULL, NULL, NULL,
+ 0);
+ pfs_create_file(root, NULL, "stat", &linprocfs_dostat, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(root, NULL, "swaps", &linprocfs_doswaps, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "uptime", &linprocfs_douptime, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(root, NULL, "version", &linprocfs_doversion, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/bus/... */
- dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
- dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
- dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &dir, "pci", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &dir, "devices", NULL, NULL, NULL, 0);
/* /proc/net/... */
- dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "dev", &linprocfs_donetdev,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "route", &linprocfs_donetroute,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(root, &dir, "net", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "dev", &linprocfs_donetdev, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "route", &linprocfs_donetroute, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/<pid>/... */
- dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
- pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
- NULL, &procfs_candebug, NULL, PFS_RD);
- pfs_create_link(dir, "exe", &procfs_doprocfile,
- NULL, &procfs_notsystem, NULL, 0);
- pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
- NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
- pfs_create_file(dir, "mem", &linprocfs_doprocmem,
- procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "mounts", &linprocfs_domtab,
+ pfs_create_dir(root, &dir, "pid", NULL, NULL, NULL, PFS_PROCDEP);
+ pfs_create_file(dir, NULL, "cmdline", &linprocfs_doproccmdline, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "cwd", &linprocfs_doproccwd, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, NULL, "environ", &linprocfs_doprocenviron, NULL,
+ &procfs_candebug, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "exe", &procfs_doprocfile, NULL,
+ &procfs_notsystem, NULL, 0);
+ pfs_create_file(dir, NULL, "maps", &linprocfs_doprocmaps, NULL, NULL,
+ NULL, PFS_RD | PFS_AUTODRAIN);
+ pfs_create_file(dir, NULL, "mem", &linprocfs_doprocmem, procfs_attr_rw,
+ &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "mountinfo", &linprocfs_doprocmountinfo,
NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "root", &linprocfs_doprocroot,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "stat", &linprocfs_doprocstat,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "status", &linprocfs_doprocstatus,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
- NULL, NULL, NULL, 0);
- pfs_create_file(dir, "auxv", &linprocfs_doauxv,
- NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
- pfs_create_file(dir, "limits", &linprocfs_doproclimits,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj,
+ pfs_create_file(dir, NULL, "mounts", &linprocfs_domtab, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "root", &linprocfs_doprocroot, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, NULL, "stat", &linprocfs_doprocstat, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "statm", &linprocfs_doprocstatm, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "status", &linprocfs_doprocstatus, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_link(dir, NULL, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL,
+ 0);
+ pfs_create_file(dir, NULL, "auxv", &linprocfs_doauxv, NULL,
+ &procfs_candebug, NULL, PFS_RD | PFS_RAWRD);
+ pfs_create_file(dir, NULL, "limits", &linprocfs_doproclimits, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "oom_score_adj", &linprocfs_do_oom_score_adj,
procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR);
/* /proc/<pid>/task/... */
- dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
- pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(dir, &dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, ".dummy", &linprocfs_dotaskdummy, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/scsi/... */
- dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
+ pfs_create_dir(root, &dir, "scsi", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "device_info", &linprocfs_doscsidevinfo,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "scsi", &linprocfs_doscsiscsi, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/... */
- sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &sys, "sys", NULL, NULL, NULL, 0);
/* /proc/sys/kernel/... */
- dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "ostype", &linprocfs_doostype,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "version", &linprocfs_doosbuild,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "sem", &linprocfs_dosem,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmall", &linprocfs_doshmall,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "tainted", &linprocfs_dotainted,
+ pfs_create_dir(sys, &dir, "kernel", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "osrelease", &linprocfs_doosrelease, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "ostype", &linprocfs_doostype, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "version", &linprocfs_doosbuild, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmax", &linprocfs_domsgmax, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmni", &linprocfs_domsgmni, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgmnb", &linprocfs_domsgmnb, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "ngroups_max", &linprocfs_dongroups_max,
NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "pid_max", &linprocfs_dopid_max, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "sem", &linprocfs_dosem, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "shmall", &linprocfs_doshmall, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shmmax", &linprocfs_doshmmax, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shmmni", &linprocfs_doshmmni, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "tainted", &linprocfs_dotainted, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/kernel/random/... */
- dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "uuid", &linprocfs_douuid,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "boot_id", &linprocfs_doboot_id,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(dir, &dir, "random", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "uuid", &linprocfs_douuid, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "boot_id", &linprocfs_doboot_id, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/vm/.... */
- dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
+ pfs_create_dir(sys, &dir, "vm", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "min_free_kbytes", &linprocfs_dominfree,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt,
+ pfs_create_file(dir, NULL, "max_map_count", &linprocfs_domax_map_cnt,
NULL, NULL, NULL, PFS_RD);
/* /proc/sysvipc/... */
- dir = pfs_create_dir(root, "sysvipc", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "msg", &linprocfs_dosysvipc_msg,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "sem", &linprocfs_dosysvipc_sem,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "shm", &linprocfs_dosysvipc_shm,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_dir(root, &dir, "sysvipc", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "msg", &linprocfs_dosysvipc_msg, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "sem", &linprocfs_dosysvipc_sem, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "shm", &linprocfs_dosysvipc_shm, NULL, NULL,
+ NULL, PFS_RD);
/* /proc/sys/fs/... */
- dir = pfs_create_dir(sys, "fs", NULL, NULL, NULL, 0);
+ pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0);
/* /proc/sys/fs/mqueue/... */
- dir = pfs_create_dir(dir, "mqueue", NULL, NULL, NULL, 0);
- pfs_create_file(dir, "msg_default", &linprocfs_domqueue_msg_default,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgsize_default", &linprocfs_domqueue_msgsize_default,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msg_max", &linprocfs_domqueue_msg_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "msgsize_max", &linprocfs_domqueue_msgsize_max,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "queues_max", &linprocfs_domqueue_queues_max,
+ pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0);
+ pfs_create_file(dir, NULL, "msg_default",
+ &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgsize_default",
+ &linprocfs_domqueue_msgsize_default, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msg_max", &linprocfs_domqueue_msg_max, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "msgsize_max",
+ &linprocfs_domqueue_msgsize_max, NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "queues_max", &linprocfs_domqueue_queues_max,
NULL, NULL, NULL, PFS_RD);
return (0);
diff --git a/sys/compat/linsysfs/linsysfs.c b/sys/compat/linsysfs/linsysfs.c
index 7f70221b420d..5a41c5193415 100644
--- a/sys/compat/linsysfs/linsysfs.c
+++ b/sys/compat/linsysfs/linsysfs.c
@@ -267,6 +267,8 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
struct pci_devinfo *dinfo;
char *device, *host, *new_path, *devname;
+ children = NULL;
+ device = host = NULL;
new_path = path;
devname = malloc(16, M_TEMP, M_WAITOK);
@@ -292,39 +294,43 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
dinfo->cfg.func);
strcat(new_path, "/");
strcat(new_path, device);
- dir = pfs_create_dir(dir, device,
+ error = pfs_create_dir(dir, &dir, device,
NULL, NULL, NULL, 0);
- cur_file = pfs_create_file(dir, "vendor",
+ if (error != 0)
+ goto out;
+ pfs_create_dir(dir, &dir, device, NULL, NULL,
+ NULL, 0);
+ pfs_create_file(dir, &cur_file, "vendor",
&linsysfs_fill_vendor, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "device",
+ pfs_create_file(dir, &cur_file, "device",
&linsysfs_fill_device, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir,
+ pfs_create_file(dir, &cur_file,
"subsystem_vendor",
&linsysfs_fill_subvendor, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir,
+ pfs_create_file(dir, &cur_file,
"subsystem_device",
&linsysfs_fill_subdevice, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "revision",
+ pfs_create_file(dir, &cur_file, "revision",
&linsysfs_fill_revid, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "config",
+ pfs_create_file(dir, &cur_file, "config",
&linsysfs_fill_config, NULL, NULL, NULL,
PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_file(dir, "uevent",
- &linsysfs_fill_uevent_pci, NULL, NULL,
- NULL, PFS_RD);
+ pfs_create_file(dir, &cur_file, "uevent",
+ &linsysfs_fill_uevent_pci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dev;
- cur_file = pfs_create_link(dir, "subsystem",
+ pfs_create_link(dir, &cur_file, "subsystem",
&linsysfs_fill_data, NULL, NULL, NULL, 0);
/* libdrm just checks that the link ends in "/pci" */
cur_file->pn_data = "/sys/bus/pci";
@@ -334,34 +340,32 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
sprintf(host, "host%d", host_number++);
strcat(new_path, "/");
strcat(new_path, host);
- pfs_create_dir(dir, host,
- NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, NULL, host, NULL,
+ NULL, NULL, 0);
scsi_host = malloc(sizeof(
struct scsi_host_queue),
- M_DEVBUF, M_NOWAIT);
+ M_DEVBUF, M_WAITOK);
scsi_host->path = malloc(
strlen(new_path) + 1,
- M_DEVBUF, M_NOWAIT);
+ M_DEVBUF, M_WAITOK);
scsi_host->path[0] = '\000';
bcopy(new_path, scsi_host->path,
strlen(new_path) + 1);
scsi_host->name = "unknown";
- sub_dir = pfs_create_dir(scsi, host,
+ pfs_create_dir(scsi, &sub_dir, host,
NULL, NULL, NULL, 0);
- pfs_create_link(sub_dir, "device",
- &linsysfs_link_scsi_host,
- NULL, NULL, NULL, 0);
- pfs_create_file(sub_dir, "proc_name",
- &linsysfs_scsiname,
+ pfs_create_link(sub_dir, NULL, "device",
+ &linsysfs_link_scsi_host, NULL,
+ NULL, NULL, 0);
+ pfs_create_file(sub_dir, NULL,
+ "proc_name", &linsysfs_scsiname,
NULL, NULL, NULL, PFS_RD);
scsi_host->name
= linux_driver_get_name_dev(dev);
TAILQ_INSERT_TAIL(&scsi_host_q,
scsi_host, scsi_host_next);
}
- free(device, M_TEMP);
- free(host, M_TEMP);
}
}
@@ -374,26 +378,27 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
device_get_unit(dev) >= 0) {
dinfo = device_get_ivars(parent);
if (dinfo != NULL && dinfo->cfg.baseclass == PCIC_DISPLAY) {
- pfs_create_dir(dir, "drm", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, NULL, "drm", NULL, NULL,
+ NULL, 0);
sprintf(devname, "226:%d",
device_get_unit(dev));
- sub_dir = pfs_create_dir(chardev,
- devname, NULL, NULL, NULL, 0);
- cur_file = pfs_create_link(sub_dir,
- "device", &linsysfs_fill_vgapci, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_dir(chardev, &sub_dir, devname, NULL,
+ NULL, NULL, 0);
+ pfs_create_link(sub_dir, &cur_file, "device",
+ &linsysfs_fill_vgapci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dir;
- cur_file = pfs_create_file(sub_dir,
- "uevent", &linsysfs_fill_uevent_drm, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_file(sub_dir, &cur_file, "uevent",
+ &linsysfs_fill_uevent_drm, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dev;
sprintf(devname, "card%d",
device_get_unit(dev));
- sub_dir = pfs_create_dir(drm,
- devname, NULL, NULL, NULL, 0);
- cur_file = pfs_create_link(sub_dir,
- "device", &linsysfs_fill_vgapci, NULL,
- NULL, NULL, PFS_RD);
+ pfs_create_dir(drm, &sub_dir, devname, NULL,
+ NULL, NULL, 0);
+ pfs_create_link(sub_dir, &cur_file, "device",
+ &linsysfs_fill_vgapci, NULL, NULL, NULL,
+ PFS_RD);
cur_file->pn_data = (void*)dir;
}
}
@@ -401,17 +406,37 @@ linsysfs_run_bus(device_t dev, struct pfs_node *dir, struct pfs_node *scsi,
error = device_get_children(dev, &children, &nchildren);
if (error == 0) {
- for (i = 0; i < nchildren; i++)
- if (children[i])
- linsysfs_run_bus(children[i], dir, scsi,
+ for (i = 0; i < nchildren; i++) {
+ if (children[i]) {
+ error = linsysfs_run_bus(children[i], dir, scsi,
chardev, drm, new_path, prefix);
- free(children, M_TEMP);
+ if (error != 0) {
+ printf(
+ "linsysfs_run_bus: %s omitted from sysfs tree, error %d\n",
+ device_get_nameunit(children[i]),
+ error);
+ }
+ }
+ }
+
+ /*
+ * We override the error to avoid cascading failures; the
+ * innermost device that failed in a tree is probably the most
+ * significant one for diagnostics, its parents would be noise.
+ */
+ error = 0;
}
+
+out:
+ free(host, M_TEMP);
+ free(device, M_TEMP);
+ if (children != NULL)
+ free(children, M_TEMP);
if (new_path != path)
free(new_path, M_TEMP);
free(devname, M_TEMP);
- return (1);
+ return (error);
}
/*
@@ -455,10 +480,10 @@ linsysfs_listcpus(struct pfs_node *dir)
for (i = 0; i < mp_ncpus; ++i) {
/* /sys/devices/system/cpu/cpuX */
sprintf(name, "cpu%d", i);
- cpu = pfs_create_dir(dir, name, NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &cpu, name, NULL, NULL, NULL, 0);
- pfs_create_file(cpu, "online", &linsysfs_cpuxonline,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "online", &linsysfs_cpuxonline, NULL,
+ NULL, NULL, PFS_RD);
}
free(name, M_TEMP);
}
@@ -485,52 +510,56 @@ linsysfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
/* /sys/bus/... */
- dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "bus", NULL, NULL, NULL, 0);
/* /sys/class/... */
- class = pfs_create_dir(root, "class", NULL, NULL, NULL, 0);
- scsi = pfs_create_dir(class, "scsi_host", NULL, NULL, NULL, 0);
- drm = pfs_create_dir(class, "drm", NULL, NULL, NULL, 0);
- pfs_create_dir(class, "power_supply", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &class, "class", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &scsi, "scsi_host", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &drm, "drm", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, NULL, "power_supply", NULL, NULL, NULL, 0);
/* /sys/class/net/.. */
- net = pfs_create_dir(class, "net", NULL, NULL, NULL, 0);
+ pfs_create_dir(class, &net, "net", NULL, NULL, NULL, 0);
/* /sys/dev/... */
- devdir = pfs_create_dir(root, "dev", NULL, NULL, NULL, 0);
- chardev = pfs_create_dir(devdir, "char", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &devdir, "dev", NULL, NULL, NULL, 0);
+ pfs_create_dir(devdir, &chardev, "char", NULL, NULL, NULL, 0);
/* /sys/devices/... */
- dir = pfs_create_dir(root, "devices", NULL, NULL, NULL, 0);
- pci = pfs_create_dir(dir, "pci0000:00", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &dir, "devices", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &pci, "pci0000:00", NULL, NULL, NULL, 0);
devclass = devclass_find("root");
if (devclass == NULL) {
return (0);
}
+ /*
+ * This assumes that the root node is unlikely to error out in
+ * linsysfs_run_bus, which may or may not be true.
+ */
dev = devclass_get_device(devclass, 0);
linsysfs_run_bus(dev, pci, scsi, chardev, drm, "/pci0000:00", "0000");
/* /sys/devices/system */
- sys = pfs_create_dir(dir, "system", NULL, NULL, NULL, 0);
+ pfs_create_dir(dir, &sys, "system", NULL, NULL, NULL, 0);
/* /sys/devices/system/cpu */
- cpu = pfs_create_dir(sys, "cpu", NULL, NULL, NULL, 0);
+ pfs_create_dir(sys, &cpu, "cpu", NULL, NULL, NULL, 0);
- pfs_create_file(cpu, "online", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(cpu, "possible", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(cpu, "present", &linsysfs_cpuonline,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "online", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "possible", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(cpu, NULL, "present", &linsysfs_cpuonline, NULL, NULL,
+ NULL, PFS_RD);
linsysfs_listcpus(cpu);
/* /sys/kernel */
- kernel = pfs_create_dir(root, "kernel", NULL, NULL, NULL, 0);
+ pfs_create_dir(root, &kernel, "kernel", NULL, NULL, NULL, 0);
/* /sys/kernel/debug, mountpoint for lindebugfs. */
- pfs_create_dir(kernel, "debug", NULL, NULL, NULL, 0);
+ pfs_create_dir(kernel, NULL, "debug", NULL, NULL, NULL, 0);
linsysfs_net_init();
diff --git a/sys/compat/linsysfs/linsysfs_net.c b/sys/compat/linsysfs/linsysfs_net.c
index 73602b0132a4..751dbb5b3713 100644
--- a/sys/compat/linsysfs/linsysfs_net.c
+++ b/sys/compat/linsysfs/linsysfs_net.c
@@ -237,22 +237,22 @@ linsysfs_net_addif(if_t ifp, void *arg)
nic = pfs_find_node(dir, ifname);
if (nic == NULL) {
- nic = pfs_create_dir(dir, ifname, NULL, linsysfs_if_visible,
+ pfs_create_dir(dir, &nic, ifname, NULL, linsysfs_if_visible,
NULL, 0);
- pfs_create_file(nic, "address", &linsysfs_if_addr,
+ pfs_create_file(nic, NULL, "address", &linsysfs_if_addr, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "addr_len", &linsysfs_if_addrlen,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "addr_len", &linsysfs_if_addrlen,
+ pfs_create_file(nic, NULL, "flags", &linsysfs_if_flags, NULL,
+ NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "ifindex", &linsysfs_if_ifindex,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "flags", &linsysfs_if_flags,
+ pfs_create_file(nic, NULL, "mtu", &linsysfs_if_mtu, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "tx_queue_len", &linsysfs_if_txq_len,
NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "ifindex", &linsysfs_if_ifindex,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "mtu", &linsysfs_if_mtu,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "tx_queue_len", &linsysfs_if_txq_len,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(nic, "type", &linsysfs_if_type,
- NULL, NULL, NULL, PFS_RD);
+ pfs_create_file(nic, NULL, "type", &linsysfs_if_type, NULL,
+ NULL, NULL, PFS_RD);
}
/*
* There is a small window between registering the if_arrival
diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c
index 927f3689e2b6..6aeafe84adc6 100644
--- a/sys/compat/linux/linux_netlink.c
+++ b/sys/compat/linux/linux_netlink.c
@@ -242,9 +242,24 @@ nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
}
/*
+ * Translate a FreeBSD interface name to a Linux interface name.
+ */
+static bool
+nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
+{
+ char ifname[LINUX_IFNAMSIZ];
+
+ if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
+ sizeof(ifname)) <= 0)
+ return (false);
+ return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
+}
+
+#define LINUX_NLA_UNHANDLED -1
+/*
* Translate a FreeBSD attribute to a Linux attribute.
- * Returns false when the attribute is not processed and the caller must take
- * care of it.
+ * Returns LINUX_NLA_UNHANDLED when the attribute is not processed
+ * and the caller must take care of it, otherwise the result is returned.
*/
static int
nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
@@ -256,27 +271,22 @@ nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
case NL_RTM_DELLINK:
case NL_RTM_GETLINK:
switch (nla->nla_type) {
- case IFLA_IFNAME: {
- char ifname[LINUX_IFNAMSIZ];
-
- if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
- sizeof(ifname)) > 0)
- return (true);
- break;
- }
+ case IFLA_IFNAME:
+ return (nlmsg_translate_ifname_nla(nla, nw));
default:
break;
}
default:
break;
}
- return (false);
+ return (LINUX_NLA_UNHANDLED);
}
static bool
nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
{
struct nlattr *nla;
+ int ret;
int hdrlen = NETLINK_ALIGN(raw_hdrlen);
int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
@@ -287,12 +297,15 @@ nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
if (nla->nla_len < sizeof(struct nlattr)) {
return (false);
}
- if (!nlmsg_translate_all_nla(hdr, nla, nw) &&
- !nlmsg_copy_nla(nla, nw))
+ ret = nlmsg_translate_all_nla(hdr, nla, nw);
+ if (ret == LINUX_NLA_UNHANDLED)
+ ret = nlmsg_copy_nla(nla, nw);
+ if (!ret)
return (false);
}
return (true);
}
+#undef LINUX_NLA_UNHANDLED
static unsigned int
rtnl_if_flags_to_linux(unsigned int if_flags)
diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi.h b/sys/compat/linuxkpi/common/include/acpi/acpi.h
index 1e398d05ba20..9bb435591daa 100644
--- a/sys/compat/linuxkpi/common/include/acpi/acpi.h
+++ b/sys/compat/linuxkpi/common/include/acpi/acpi.h
@@ -37,7 +37,7 @@
/*
* LINUXKPI_WANT_LINUX_ACPI is a temporary workaround to allow drm-kmod
* to update all needed branches without breaking builds.
- * Once that happened and checks are implemented based on __FreeBSD_verison
+ * Once that happened and checks are implemented based on __FreeBSD_version
* we will remove these conditions again.
*/
@@ -131,7 +131,7 @@ acpi_format_exception(ACPI_STATUS Exception)
}
static inline ACPI_STATUS
-acpi_get_handle(ACPI_HANDLE Parent, ACPI_STRING Pathname,
+acpi_get_handle(ACPI_HANDLE Parent, const char *Pathname,
ACPI_HANDLE *RetHandle)
{
return (AcpiGetHandle(Parent, Pathname, RetHandle));
diff --git a/sys/compat/linuxkpi/common/include/kunit/static_stub.h b/sys/compat/linuxkpi/common/include/kunit/static_stub.h
new file mode 100644
index 000000000000..9d425d46dbb0
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/kunit/static_stub.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Björn Zeeb under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _LINUXKPI_KUNIT_STATIC_STUB_H
+#define _LINUXKPI_KUNIT_STATIC_STUB_H
+
+#define KUNIT_STATIC_STUB_REDIRECT(_fn, ...) do { } while(0)
+
+#endif /* _LINUXKPI_KUNIT_STATIC_STUB_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/cleanup.h b/sys/compat/linuxkpi/common/include/linux/cleanup.h
index 01f234f0cbe7..5bb146f082ed 100644
--- a/sys/compat/linuxkpi/common/include/linux/cleanup.h
+++ b/sys/compat/linuxkpi/common/include/linux/cleanup.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2024 The FreeBSD Foundation
+ * Copyright (c) 2024-2025 The FreeBSD Foundation
*
* This software was developed by Björn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -43,4 +43,51 @@
guard_ ## _n ## _t guard_ ## _n ## _ ## __COUNTER__ \
__cleanup(guard_ ## _n ## _destroy) = guard_ ## _n ## _create
+#define DEFINE_FREE(_n, _t, _f) \
+ static inline void \
+ __free_ ## _n(void *p) \
+ { \
+ _t _T; \
+ \
+ _T = *(_t *)p; \
+ _f; \
+ }
+
+#define __free(_n) __cleanup(__free_##_n)
+
+/*
+ * Given this is a _0 version it should likely be broken up into parts.
+ * But we have no idead what a _1, _2, ... version would do different
+ * until we see a call.
+ * This is used for a not-real-type (rcu). We use a bool to "simulate"
+ * the lock held. Also _T still special, may not always be used, so tag
+ * with __unused (or better the LinuxKPI __maybe_unused).
+ */
+#define DEFINE_LOCK_GUARD_0(_n, _lock, _unlock, ...) \
+ \
+ typedef struct { \
+ bool lock; \
+ __VA_ARGS__; \
+ } guard_ ## _n ## _t; \
+ \
+ static inline void \
+ guard_ ## _n ## _destroy(guard_ ## _n ## _t *_T) \
+ { \
+ if (_T->lock) { \
+ _unlock; \
+ } \
+ } \
+ \
+ static inline guard_ ## _n ## _t \
+ guard_ ## _n ## _create(void) \
+ { \
+ guard_ ## _n ## _t _tmp; \
+ guard_ ## _n ## _t *_T __maybe_unused; \
+ \
+ _tmp.lock = true; \
+ _T = &_tmp; \
+ _lock; \
+ return (_tmp); \
+ }
+
#endif /* _LINUXKPI_LINUX_CLEANUP_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h
index fb5ad3bf4fe4..948396144ad6 100644
--- a/sys/compat/linuxkpi/common/include/linux/compiler.h
+++ b/sys/compat/linuxkpi/common/include/linux/compiler.h
@@ -130,4 +130,10 @@
#define is_signed_type(t) ((t)-1 < (t)1)
#define is_unsigned_type(t) ((t)-1 > (t)1)
+#if __has_builtin(__builtin_dynamic_object_size)
+#define __struct_size(_s) __builtin_dynamic_object_size(_s, 0)
+#else
+#define __struct_size(_s) __builtin_object_size(_s, 0)
+#endif
+
#endif /* _LINUXKPI_LINUX_COMPILER_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h
index 2556b0c45e49..7dd6340746d2 100644
--- a/sys/compat/linuxkpi/common/include/linux/device.h
+++ b/sys/compat/linuxkpi/common/include/linux/device.h
@@ -4,7 +4,7 @@
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2021-2022 The FreeBSD Foundation
+ * Copyright (c) 2021-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -284,7 +284,8 @@ int lkpi_devres_destroy(struct device *, void(*release)(struct device *, void *)
void lkpi_devres_release_free_list(struct device *);
void lkpi_devres_unlink(struct device *, void *);
void lkpi_devm_kmalloc_release(struct device *, void *);
-#define devm_kfree(_d, _p) lkpi_devm_kmalloc_release(_d, _p)
+void lkpi_devm_kfree(struct device *, const void *);
+#define devm_kfree(_d, _p) lkpi_devm_kfree(_d, _p)
static inline const char *
dev_driver_string(const struct device *dev)
diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h
index 5851ac08f083..17041bb03ce8 100644
--- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h
+++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h
@@ -147,9 +147,9 @@ enum ieee80211_vht_max_ampdu_len_exp {
enum wlan_ht_cap_sm_ps {
WLAN_HT_CAP_SM_PS_STATIC = 0,
- WLAN_HT_CAP_SM_PS_DYNAMIC,
- WLAN_HT_CAP_SM_PS_INVALID,
- WLAN_HT_CAP_SM_PS_DISABLED,
+ WLAN_HT_CAP_SM_PS_DYNAMIC = 1,
+ WLAN_HT_CAP_SM_PS_INVALID = 2,
+ WLAN_HT_CAP_SM_PS_DISABLED = 3
};
#define WLAN_MAX_KEY_LEN 32
@@ -408,6 +408,14 @@ enum ieee80211_sta_state {
IEEE80211_STA_AUTHORIZED = 4, /* 802.1x */
};
+enum ieee80211_sta_rx_bandwidth {
+ IEEE80211_STA_RX_BW_20 = 0,
+ IEEE80211_STA_RX_BW_40,
+ IEEE80211_STA_RX_BW_80,
+ IEEE80211_STA_RX_BW_160,
+ IEEE80211_STA_RX_BW_320,
+};
+
enum ieee80211_tx_info_flags {
/* XXX TODO .. right shift numbers - not sure where that came from? */
IEEE80211_TX_CTL_AMPDU = BIT(0),
@@ -524,24 +532,24 @@ struct ieee80211_mgmt {
uint16_t beacon_int;
uint16_t capab_info;
uint8_t variable[0];
- } beacon;
+ } __packed beacon;
/* 9.3.3.5 Association Request frame format */
struct {
uint16_t capab_info;
uint16_t listen_interval;
uint8_t variable[0];
- } assoc_req;
+ } __packed assoc_req;
/* 9.3.3.10 Probe Request frame format */
struct {
uint8_t variable[0];
- } probe_req;
+ } __packed probe_req;
/* 9.3.3.11 Probe Response frame format */
struct {
uint64_t timestamp;
uint16_t beacon_int;
uint16_t capab_info;
uint8_t variable[0];
- } probe_resp;
+ } __packed probe_resp;
/* 9.3.3.14 Action frame format */
struct {
/* 9.4.1.11 Action field */
@@ -557,7 +565,7 @@ struct ieee80211_mgmt {
uint8_t tpc_elem_length;
uint8_t tpc_elem_tx_power;
uint8_t tpc_elem_link_margin;
- } tpc_report;
+ } __packed tpc_report;
/* 9.6.8.33 Fine Timing Measurement frame format */
struct {
uint8_t dialog_token;
@@ -567,7 +575,7 @@ struct ieee80211_mgmt {
uint16_t tod_error;
uint16_t toa_error;
uint8_t variable[0];
- } ftm;
+ } __packed ftm;
/* 802.11-2016, 9.6.5.2 ADDBA Request frame format */
struct {
uint8_t action_code;
@@ -577,16 +585,16 @@ struct ieee80211_mgmt {
uint16_t start_seq_num;
/* Optional follows... */
uint8_t variable[0];
- } addba_req;
+ } __packed addba_req;
/* XXX */
struct {
uint8_t dialog_token;
- } wnm_timing_msr;
+ } __packed wnm_timing_msr;
} u;
- } action;
+ } __packed action;
DECLARE_FLEX_ARRAY(uint8_t, body);
} u;
-};
+} __packed __aligned(2);
struct ieee80211_cts { /* net80211::ieee80211_frame_cts */
__le16 frame_control;
diff --git a/sys/compat/linuxkpi/common/include/linux/math.h b/sys/compat/linuxkpi/common/include/linux/math.h
index 5a348a57747b..1d50e011f66d 100644
--- a/sys/compat/linuxkpi/common/include/linux/math.h
+++ b/sys/compat/linuxkpi/common/include/linux/math.h
@@ -56,7 +56,7 @@
__ret; \
})
-#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 60600
+#if !defined(LINUXKPI_VERSION) || (LINUXKPI_VERSION >= 60600)
#define abs_diff(x, y) ({ \
__typeof(x) _x = (x); \
__typeof(y) _y = (y); \
diff --git a/sys/compat/linuxkpi/common/include/linux/math64.h b/sys/compat/linuxkpi/common/include/linux/math64.h
index a216d350570f..25ca9da1b622 100644
--- a/sys/compat/linuxkpi/common/include/linux/math64.h
+++ b/sys/compat/linuxkpi/common/include/linux/math64.h
@@ -98,6 +98,12 @@ div64_u64_round_up(uint64_t dividend, uint64_t divisor)
return ((dividend + divisor - 1) / divisor);
}
+static inline uint64_t
+roundup_u64(uint64_t x1, uint32_t x2)
+{
+ return (div_u64(x1 + x2 - 1, x2) * x2);
+}
+
#define DIV64_U64_ROUND_UP(...) \
div64_u64_round_up(__VA_ARGS__)
diff --git a/sys/compat/linuxkpi/common/include/linux/netdevice.h b/sys/compat/linuxkpi/common/include/linux/netdevice.h
index cd7d23077a62..3b808a4a1749 100644
--- a/sys/compat/linuxkpi/common/include/linux/netdevice.h
+++ b/sys/compat/linuxkpi/common/include/linux/netdevice.h
@@ -4,7 +4,7 @@
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2019 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2020-2021 The FreeBSD Foundation
+ * Copyright (c) 2020-2025 The FreeBSD Foundation
* Copyright (c) 2020-2022 Bjoern A. Zeeb
*
* Portions of this software were developed by Björn Zeeb
@@ -302,6 +302,13 @@ netdev_rss_key_fill(uint32_t *buf, size_t len)
get_random_bytes(buf, len);
}
+static inline void
+__hw_addr_init(struct netdev_hw_addr_list *list)
+{
+ list->count = 0;
+ INIT_LIST_HEAD(&list->addr_list);
+}
+
static inline int
netdev_hw_addr_list_count(struct netdev_hw_addr_list *list)
{
diff --git a/sys/compat/linuxkpi/common/include/linux/overflow.h b/sys/compat/linuxkpi/common/include/linux/overflow.h
index 9ba9b9500f11..e811037b8ecc 100644
--- a/sys/compat/linuxkpi/common/include/linux/overflow.h
+++ b/sys/compat/linuxkpi/common/include/linux/overflow.h
@@ -33,8 +33,10 @@
* credit to Christian Biere.
*/
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
-#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
-#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_max(t) __type_max(typeof(t))
+#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define type_min(t) __type_min(typeof(t))
/*
* Avoids triggering -Wtype-limits compilation warning,
@@ -59,46 +61,123 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* @b: second addend
* @d: pointer to store sum
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted addition, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * sum has overflowed or been truncated.
+ * *@d holds the results of the attempted addition, regardless of whether
+ * wrap-around occurred.
*/
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
/**
+ * wrapping_add() - Intentionally perform a wrapping addition
+ * @type: type for result of calculation
+ * @a: first addend
+ * @b: second addend
+ *
+ * Return the potentially wrapped-around addition without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_add(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_add_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_add() - Intentionally perform a wrapping increment assignment
+ * @var: variable to be incremented
+ * @offset: amount to add
+ *
+ * Increments @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_add(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_add(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted subtraction, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * difference has underflowed or been truncated.
+ * *@d holds the results of the attempted subtraction, regardless of whether
+ * wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
+ * wrapping_sub() - Intentionally perform a wrapping subtraction
+ * @type: type for result of calculation
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ *
+ * Return the potentially wrapped-around subtraction without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_sub(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_sub_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
+ * @var: variable to be decremented
+ * @offset: amount to subtract
+ *
+ * Decrements @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_sub(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted multiplication, but is not
- * considered "safe for use" on a non-zero return value, which indicates
- * that the product has overflowed or been truncated.
+ * *@d holds the results of the attempted multiplication, regardless of whether
+ * wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
+ * wrapping_mul() - Intentionally perform a wrapping multiplication
+ * @type: type for result of calculation
+ * @a: first factor
+ * @b: second factor
+ *
+ * Return the potentially wrapped-around multiplication without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_mul(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_mul_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
@@ -122,7 +201,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
- u64 _a_full = _a; \
+ unsigned long long _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
@@ -132,10 +211,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
- (x) > type_max(typeof(T)) : \
+ (x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
- (x) < 0 || (x) > type_max(typeof(T)) : \
- (x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+ (x) < 0 || (x) > type_max(T) : \
+ (x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
@@ -312,27 +391,40 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
struct_size((type *)NULL, member, count)
/**
- * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
- * Enables caller macro to pass (different) initializer.
+ * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass arbitrary trailing expressions
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
- * @initializer: initializer expression (could be empty for no init).
+ * @trailer: Trailing expressions for attributes and/or initializers.
*/
-#define _DEFINE_FLEX(type, name, member, count, initializer) \
+#define __DEFINE_FLEX(type, name, member, count, trailer...) \
_Static_assert(__builtin_constant_p(count), \
"onstack flex array members require compile-time const count"); \
union { \
u8 bytes[struct_size_t(type, member, count)]; \
type obj; \
- } name##_u initializer; \
+ } name##_u trailer; \
type *name = (type *)&name##_u
/**
- * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
- * flexible array member.
+ * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass (different) initializer.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ */
+#define _DEFINE_FLEX(type, name, member, count, initializer...) \
+ __DEFINE_FLEX(type, name, member, count, = { .obj initializer })
+
+/**
+ * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member, when it does not have a __counted_by annotation.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
@@ -342,8 +434,42 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
* Define a zeroed, on-stack, instance of @type structure with a trailing
* flexible array member.
* Use __struct_size(@name) to get compile-time size of it afterwards.
+ * Use __member_size(@name->member) to get compile-time size of @name members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
+ */
+#define DEFINE_RAW_FLEX(type, name, member, count) \
+ __DEFINE_FLEX(type, name, member, count, = { })
+
+/**
+ * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member.
+ *
+ * @TYPE: structure type name, including "struct" keyword.
+ * @NAME: Name for a variable to define.
+ * @MEMBER: Name of the array member.
+ * @COUNTER: Name of the __counted_by member.
+ * @COUNT: Number of elements in the array; must be compile-time const.
+ *
+ * Define a zeroed, on-stack, instance of @TYPE structure with a trailing
+ * flexible array member.
+ * Use __struct_size(@NAME) to get compile-time size of it afterwards.
+ * Use __member_size(@NAME->member) to get compile-time size of @NAME members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
+ */
+#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \
+ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, })
+
+/**
+ * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family.
+ * Returns the number of elements in @array.
+ *
+ * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX().
+ * @array: Name of the array member.
*/
-#define DEFINE_FLEX(type, name, member, count) \
- _DEFINE_FLEX(type, name, member, count, = {})
+#define STACK_FLEX_ARRAY_SIZE(name, array) \
+ (__member_size((name)->array) / sizeof(*(name)->array) + \
+ __must_be_array((name)->array))
#endif /* _LINUXKPI_LINUX_OVERFLOW_H */
diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h
index 3fd4191b9917..d891d0df3546 100644
--- a/sys/compat/linuxkpi/common/include/linux/pci.h
+++ b/sys/compat/linuxkpi/common/include/linux/pci.h
@@ -355,7 +355,6 @@ struct pci_dev {
TAILQ_HEAD(, pci_mmio_region) mmio;
};
-int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name);
int pci_alloc_irq_vectors(struct pci_dev *pdev, int minv, int maxv,
unsigned int flags);
bool pci_device_is_present(struct pci_dev *pdev);
@@ -365,10 +364,13 @@ void __iomem **linuxkpi_pcim_iomap_table(struct pci_dev *pdev);
void *linuxkpi_pci_iomap_range(struct pci_dev *, int,
unsigned long, unsigned long);
void *linuxkpi_pci_iomap(struct pci_dev *, int, unsigned long);
+void *linuxkpi_pcim_iomap(struct pci_dev *, int, unsigned long);
void linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res);
int linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask,
const char *name);
+int linuxkpi_pci_request_region(struct pci_dev *, int, const char *);
int linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name);
+int linuxkpi_pcim_request_all_regions(struct pci_dev *, const char *);
void linuxkpi_pci_release_region(struct pci_dev *pdev, int bar);
void linuxkpi_pci_release_regions(struct pci_dev *pdev);
int linuxkpi_pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries,
@@ -561,12 +563,16 @@ done:
return (pdev->bus->self);
}
+#define pci_request_region(pdev, bar, res_name) \
+ linuxkpi_pci_request_region(pdev, bar, res_name)
#define pci_release_region(pdev, bar) \
linuxkpi_pci_release_region(pdev, bar)
-#define pci_release_regions(pdev) \
- linuxkpi_pci_release_regions(pdev)
#define pci_request_regions(pdev, res_name) \
linuxkpi_pci_request_regions(pdev, res_name)
+#define pci_release_regions(pdev) \
+ linuxkpi_pci_release_regions(pdev)
+#define pcim_request_all_regions(pdev, name) \
+ linuxkpi_pcim_request_all_regions(pdev, name)
static inline void
lkpi_pci_disable_msix(struct pci_dev *pdev)
@@ -803,6 +809,8 @@ static inline void pci_disable_sriov(struct pci_dev *dev)
linuxkpi_pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size)
#define pci_iomap(pdev, mmio_bar, mmio_size) \
linuxkpi_pci_iomap(pdev, mmio_bar, mmio_size)
+#define pcim_iomap(pdev, bar, maxlen) \
+ linuxkpi_pcim_iomap(pdev, bar, maxlen)
#define pci_iounmap(pdev, res) \
linuxkpi_pci_iounmap(pdev, res)
@@ -1445,6 +1453,9 @@ linuxkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
return (lkpi_pci_get_device(vendor, device, odev));
}
+#define for_each_pci_dev(_pdev) \
+ while ((_pdev = linuxkpi_pci_get_device(PCI_ANY_ID, PCI_ANY_ID, _pdev)) != NULL)
+
/* This is a FreeBSD extension so we can use bus_*(). */
static inline void
linuxkpi_pcim_want_to_use_bus_functions(struct pci_dev *pdev)
diff --git a/sys/compat/linuxkpi/common/include/linux/rcupdate.h b/sys/compat/linuxkpi/common/include/linux/rcupdate.h
index 85d766c8dbc9..4aceb7296cd6 100644
--- a/sys/compat/linuxkpi/common/include/linux/rcupdate.h
+++ b/sys/compat/linuxkpi/common/include/linux/rcupdate.h
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2016-2017 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2024 The FreeBSD Foundation
+ * Copyright (c) 2024-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -35,6 +35,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/cleanup.h>
#include <machine/atomic.h>
@@ -162,4 +163,6 @@ void linux_synchronize_rcu(unsigned type);
#define init_rcu_head_on_stack(...)
#define destroy_rcu_head_on_stack(...)
+DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
+
#endif /* _LINUXKPI_LINUX_RCUPDATE_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/skbuff.h b/sys/compat/linuxkpi/common/include/linux/skbuff.h
index c8ad90281e34..6e41c368a8b8 100644
--- a/sys/compat/linuxkpi/common/include/linux/skbuff.h
+++ b/sys/compat/linuxkpi/common/include/linux/skbuff.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2020-2025 The FreeBSD Foundation
- * Copyright (c) 2021-2023 Bjoern A. Zeeb
+ * Copyright (c) 2021-2025 Bjoern A. Zeeb
*
* This software was developed by Björn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -47,13 +47,11 @@
#include <linux/ktime.h>
#include <linux/compiler.h>
-#include "opt_wlan.h"
-
-/* Currently this is only used for wlan so we can depend on that. */
-#if defined(IEEE80211_DEBUG) && !defined(SKB_DEBUG)
-#define SKB_DEBUG
-#endif
-
+/*
+ * At least the net/intel-irdma-kmod port pulls this header in; likely through
+ * if_ether.h (see PR289268). This means we no longer can rely on
+ * IEEE80211_DEBUG (opt_wlan.h) to automatically set SKB_DEBUG.
+ */
/* #define SKB_DEBUG */
#ifdef SKB_DEBUG
@@ -120,7 +118,7 @@ enum sk_checksum_flags {
CHECKSUM_NONE = 0x00,
CHECKSUM_UNNECESSARY = 0x01,
CHECKSUM_PARTIAL = 0x02,
- CHECKSUM_COMPLETE = 0x04,
+ CHECKSUM_COMPLETE = 0x03,
};
struct skb_frag {
@@ -170,7 +168,7 @@ struct sk_buff {
};
};
uint16_t protocol;
- uint8_t ip_summed;
+ uint8_t ip_summed; /* 2 bit only. */
/* uint8_t */
/* "Scratch" area for layers to store metadata. */
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h
index 47e3d133eb6c..0e649e1e3c4a 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -40,8 +40,10 @@
#include <linux/compat.h>
#include <linux/types.h>
#include <linux/gfp.h>
+#include <linux/err.h>
#include <linux/llist.h>
#include <linux/overflow.h>
+#include <linux/cleanup.h>
MALLOC_DECLARE(M_KMALLOC);
@@ -153,6 +155,8 @@ kfree(const void *ptr)
lkpi_kfree(ptr);
}
+DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
+
/*
* Other k*alloc() funtions using the above as underlying allocator.
*/
diff --git a/sys/compat/linuxkpi/common/include/linux/string_helpers.h b/sys/compat/linuxkpi/common/include/linux/string_helpers.h
index 1bdff2730361..2c6fe0b1708d 100644
--- a/sys/compat/linuxkpi/common/include/linux/string_helpers.h
+++ b/sys/compat/linuxkpi/common/include/linux/string_helpers.h
@@ -66,4 +66,6 @@ str_enable_disable(bool value)
return "disable";
}
+#define str_disable_enable(_v) str_enable_disable(!(_v))
+
#endif
diff --git a/sys/compat/linuxkpi/common/include/linux/timer.h b/sys/compat/linuxkpi/common/include/linux/timer.h
index a635f0faea59..d48939e28a02 100644
--- a/sys/compat/linuxkpi/common/include/linux/timer.h
+++ b/sys/compat/linuxkpi/common/include/linux/timer.h
@@ -49,8 +49,13 @@ extern unsigned long linux_timer_hz_mask;
#define TIMER_IRQSAFE 0x0001
+#if defined(LINUXKPI_VERSION) && (LINUXKPI_VERSION < 61600)
#define from_timer(var, arg, field) \
container_of(arg, typeof(*(var)), field)
+#else
+#define timer_container_of(var, arg, field) \
+ container_of(arg, typeof(*(var)), field)
+#endif
#define timer_setup(timer, func, flags) do { \
CTASSERT(((flags) & ~TIMER_IRQSAFE) == 0); \
@@ -79,11 +84,23 @@ extern unsigned long linux_timer_hz_mask;
extern int mod_timer(struct timer_list *, unsigned long);
extern void add_timer(struct timer_list *);
extern void add_timer_on(struct timer_list *, int cpu);
-extern int del_timer(struct timer_list *);
-extern int del_timer_sync(struct timer_list *);
+
+extern int timer_delete(struct timer_list *);
extern int timer_delete_sync(struct timer_list *);
extern int timer_shutdown_sync(struct timer_list *);
+static inline int
+del_timer(struct timer_list *tl)
+{
+ return (timer_delete(tl));
+}
+
+static inline int
+del_timer_sync(struct timer_list *tl)
+{
+ return (timer_delete_sync(tl));
+}
+
#define timer_pending(timer) callout_pending(&(timer)->callout)
#define round_jiffies(j) \
((unsigned long)(((j) + linux_timer_hz_mask) & ~linux_timer_hz_mask))
diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h
index 044f348ef08b..239b4a5ae7b8 100644
--- a/sys/compat/linuxkpi/common/include/net/cfg80211.h
+++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h
@@ -57,8 +57,8 @@ extern int linuxkpi_debug_80211;
#endif
#define TODO(fmt, ...) if (linuxkpi_debug_80211 & D80211_TODO) \
printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__)
-#define IMPROVE(...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \
- printf("%s:%d: XXX LKPI80211 IMPROVE\n", __func__, __LINE__)
+#define IMPROVE(fmt, ...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \
+ printf("%s:%d: XXX LKPI80211 IMPROVE " fmt "\n", __func__, __LINE__, ##__VA_ARGS__)
enum rfkill_hard_block_reasons {
RFKILL_HARD_BLOCK_NOT_OWNER = BIT(0),
@@ -128,19 +128,24 @@ struct ieee80211_txrx_stypes {
uint16_t rx;
};
-/* XXX net80211 has an ieee80211_channel as well. */
+/*
+ * net80211 has an ieee80211_channel as well; we use the linuxkpi_ version
+ * interally in LinuxKPI and re-define ieee80211_channel for the drivers
+ * at the end of the file.
+ */
struct linuxkpi_ieee80211_channel {
- /* TODO FIXME */
- uint32_t hw_value; /* ic_ieee */
- uint32_t center_freq; /* ic_freq */
- enum ieee80211_channel_flags flags; /* ic_flags */
+ uint32_t center_freq;
+ uint16_t hw_value;
+ enum ieee80211_channel_flags flags;
enum nl80211_band band;
- int8_t max_power; /* ic_maxpower */
bool beacon_found;
- int max_antenna_gain, max_reg_power;
- int orig_flags;
- int dfs_cac_ms, dfs_state;
- int orig_mpwr;
+ enum nl80211_dfs_state dfs_state;
+ unsigned int dfs_cac_ms;
+ int max_antenna_gain;
+ int max_power;
+ int max_reg_power;
+ uint32_t orig_flags;
+ int orig_mpwr;
};
struct cfg80211_bitrate_mask {
@@ -1299,10 +1304,9 @@ reg_query_regdb_wmm(uint8_t *alpha2, uint32_t center_freq,
struct ieee80211_reg_rule *rule)
{
- /* ETSI has special rules. FreeBSD regdb needs to learn about them. */
- TODO();
+ IMPROVE("regdomain.xml needs to grow wmm information for at least ETSI");
- return (-ENXIO);
+ return (-ENODATA);
}
static __inline const u8 *
diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h
index 2ed595095f9e..8de03410c6b6 100644
--- a/sys/compat/linuxkpi/common/include/net/mac80211.h
+++ b/sys/compat/linuxkpi/common/include/net/mac80211.h
@@ -87,6 +87,9 @@ enum mcast_filter_flags {
FIF_PSPOLL = BIT(5),
FIF_CONTROL = BIT(6),
FIF_MCAST_ACTION = BIT(7),
+
+ /* Must stay last. */
+ FIF_FLAGS_MASK = BIT(8)-1,
};
enum ieee80211_bss_changed {
@@ -734,7 +737,7 @@ struct ieee80211_link_sta {
struct ieee80211_he_6ghz_capa he_6ghz_capa;
struct ieee80211_sta_eht_cap eht_cap;
uint8_t rx_nss;
- enum ieee80211_sta_rx_bw bandwidth;
+ enum ieee80211_sta_rx_bandwidth bandwidth;
enum ieee80211_smps_mode smps_mode;
struct ieee80211_sta_agg agg;
struct ieee80211_sta_txpwr txpwr;
@@ -1135,7 +1138,7 @@ extern const struct cfg80211_ops linuxkpi_mac80211cfgops;
struct ieee80211_hw *linuxkpi_ieee80211_alloc_hw(size_t,
const struct ieee80211_ops *);
void linuxkpi_ieee80211_iffree(struct ieee80211_hw *);
-void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *, char *);
+void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *);
int linuxkpi_ieee80211_ifattach(struct ieee80211_hw *);
void linuxkpi_ieee80211_ifdetach(struct ieee80211_hw *);
void linuxkpi_ieee80211_unregister_hw(struct ieee80211_hw *);
@@ -1255,7 +1258,7 @@ SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev)
{
set_wiphy_dev(hw->wiphy, dev);
- linuxkpi_set_ieee80211_dev(hw, dev_name(dev));
+ linuxkpi_set_ieee80211_dev(hw);
IMPROVE();
}
@@ -1741,12 +1744,15 @@ ieee80211_request_smps(struct ieee80211_vif *vif, u_int link_id,
"SMPS_STATIC",
"SMPS_DYNAMIC",
"SMPS_AUTOMATIC",
- "SMPS_NUM_MODES"
};
- if (linuxkpi_debug_80211 & D80211_TODO)
- printf("%s:%d: XXX LKPI80211 TODO smps %d %s\n",
- __func__, __LINE__, smps, smps_mode_name[smps]);
+ if (vif->type != NL80211_IFTYPE_STATION)
+ return;
+
+ if (smps >= nitems(smps_mode_name))
+ panic("%s: unsupported smps value: %d\n", __func__, smps);
+
+ IMPROVE("XXX LKPI80211 TODO smps %d %s\n", smps, smps_mode_name[smps]);
}
static __inline void
diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c
index a7d6003843ba..bc4b334de28e 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.c
+++ b/sys/compat/linuxkpi/common/src/linux_80211.c
@@ -77,6 +77,8 @@
#include <linux/rculist.h>
#include "linux_80211.h"
+/* #define LKPI_80211_USE_SCANLIST */
+/* #define LKPI_80211_BGSCAN */
#define LKPI_80211_WME
#define LKPI_80211_HW_CRYPTO
#define LKPI_80211_HT
@@ -103,6 +105,10 @@ SYSCTL_DECL(_compat_linuxkpi);
SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, 80211, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"LinuxKPI 802.11 compatibility layer");
+static bool lkpi_order_scanlist = false;
+SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, order_scanlist, CTLFLAG_RW,
+ &lkpi_order_scanlist, 0, "Enable LinuxKPI 802.11 scan list shuffeling");
+
#if defined(LKPI_80211_HW_CRYPTO)
static bool lkpi_hwcrypto = false;
SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, hw_crypto, CTLFLAG_RDTUN,
@@ -167,6 +173,7 @@ const struct cfg80211_ops linuxkpi_mac80211cfgops = {
static struct lkpi_sta *lkpi_find_lsta_by_ni(struct lkpi_vif *,
struct ieee80211_node *);
#endif
+static void lkpi_sw_scan_task(void *, int);
static void lkpi_80211_txq_tx_one(struct lkpi_sta *, struct mbuf *);
static void lkpi_80211_txq_task(void *, int);
static void lkpi_80211_lhw_rxq_task(void *, int);
@@ -274,48 +281,40 @@ lkpi_nl80211_sta_info_to_str(struct sbuf *s, const char *prefix,
sbuf_printf(s, "\n");
}
-static int
-lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS)
+static void
+lkpi_80211_dump_lvif_stas(struct lkpi_vif *lvif, struct sbuf *s)
{
struct lkpi_hw *lhw;
struct ieee80211_hw *hw;
struct ieee80211vap *vap;
- struct lkpi_vif *lvif;
struct ieee80211_vif *vif;
struct lkpi_sta *lsta;
struct ieee80211_sta *sta;
struct station_info sinfo;
- struct sbuf s;
int error;
- if (req->newptr)
- return (EPERM);
-
- lvif = (struct lkpi_vif *)arg1;
vif = LVIF_TO_VIF(lvif);
vap = LVIF_TO_VAP(lvif);
lhw = vap->iv_ic->ic_softc;
hw = LHW_TO_HW(lhw);
- sbuf_new_for_sysctl(&s, NULL, 1024, req);
-
wiphy_lock(hw->wiphy);
list_for_each_entry(lsta, &lvif->lsta_list, lsta_list) {
sta = LSTA_TO_STA(lsta);
- sbuf_putc(&s, '\n');
- sbuf_printf(&s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv);
+ sbuf_putc(s, '\n');
+ sbuf_printf(s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv);
memset(&sinfo, 0, sizeof(sinfo));
error = lkpi_80211_mo_sta_statistics(hw, vif, sta, &sinfo);
if (error == EEXIST) /* Not added to driver. */
continue;
if (error == ENOTSUPP) {
- sbuf_printf(&s, " sta_statistics not supported\n");
+ sbuf_printf(s, " sta_statistics not supported\n");
continue;
}
if (error != 0) {
- sbuf_printf(&s, " sta_statistics failed: %d\n", error);
+ sbuf_printf(s, " sta_statistics failed: %d\n", error);
continue;
}
@@ -325,51 +324,76 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS)
memcpy(&sinfo.rxrate, &lsta->sinfo.rxrate, sizeof(sinfo.rxrate));
sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
}
+ /* If no CHAIN_SIGNAL is reported, try to fill it in from the lsta sinfo. */
+ if ((sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) == 0 &&
+ (lsta->sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) != 0) {
+ sinfo.chains = lsta->sinfo.chains;
+ memcpy(sinfo.chain_signal, lsta->sinfo.chain_signal,
+ sizeof(sinfo.chain_signal));
+ sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
+ }
- lkpi_nl80211_sta_info_to_str(&s, " nl80211_sta_info (valid fields)", sinfo.filled);
- sbuf_printf(&s, " connected_time %u inactive_time %u\n",
+ lkpi_nl80211_sta_info_to_str(s, " nl80211_sta_info (valid fields)", sinfo.filled);
+ sbuf_printf(s, " connected_time %u inactive_time %u\n",
sinfo.connected_time, sinfo.inactive_time);
- sbuf_printf(&s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n",
+ sbuf_printf(s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n",
(uintmax_t)sinfo.rx_bytes, sinfo.rx_packets, sinfo.rx_dropped_misc);
- sbuf_printf(&s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n",
+ sbuf_printf(s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n",
(uintmax_t)sinfo.rx_duration, sinfo.rx_beacon, (int8_t)sinfo.rx_beacon_signal_avg);
- sbuf_printf(&s, " tx_bytes %ju tx_packets %u tx_failed %u\n",
+ sbuf_printf(s, " tx_bytes %ju tx_packets %u tx_failed %u\n",
(uintmax_t)sinfo.tx_bytes, sinfo.tx_packets, sinfo.tx_failed);
- sbuf_printf(&s, " tx_duration %ju tx_retries %u\n",
+ sbuf_printf(s, " tx_duration %ju tx_retries %u\n",
(uintmax_t)sinfo.tx_duration, sinfo.tx_retries);
- sbuf_printf(&s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n",
+ sbuf_printf(s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n",
sinfo.signal, sinfo.signal_avg, sinfo.ack_signal, sinfo.avg_ack_signal);
-
- sbuf_printf(&s, " generation %d assoc_req_ies_len %zu chains %d\n",
+ sbuf_printf(s, " generation %d assoc_req_ies_len %zu chains %#04x\n",
sinfo.generation, sinfo.assoc_req_ies_len, sinfo.chains);
- for (int i = 0; i < sinfo.chains && i < IEEE80211_MAX_CHAINS; i++) {
- sbuf_printf(&s, " chain[%d] signal %d signal_avg %d\n",
+ for (int i = 0; i < nitems(sinfo.chain_signal) && i < IEEE80211_MAX_CHAINS; i++) {
+ if (!(sinfo.chains & BIT(i)))
+ continue;
+ sbuf_printf(s, " chain[%d] signal %d signal_avg %d\n",
i, (int8_t)sinfo.chain_signal[i], (int8_t)sinfo.chain_signal_avg[i]);
}
/* assoc_req_ies, bss_param, sta_flags */
- sbuf_printf(&s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n",
+ sbuf_printf(s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n",
sinfo.rxrate.flags, CFG80211_RATE_INFO_FLAGS_BITS,
sinfo.rxrate.bw, lkpi_rate_info_bw_to_str(sinfo.rxrate.bw),
sinfo.rxrate.legacy * 100,
sinfo.rxrate.mcs, sinfo.rxrate.nss);
- sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n",
+ sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n",
sinfo.rxrate.he_dcm, sinfo.rxrate.he_gi, sinfo.rxrate.he_ru_alloc,
sinfo.rxrate.eht_gi);
- sbuf_printf(&s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n",
+ sbuf_printf(s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n",
sinfo.txrate.flags, CFG80211_RATE_INFO_FLAGS_BITS,
sinfo.txrate.bw, lkpi_rate_info_bw_to_str(sinfo.txrate.bw),
sinfo.txrate.legacy * 100,
sinfo.txrate.mcs, sinfo.txrate.nss);
- sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n",
+ sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n",
sinfo.txrate.he_dcm, sinfo.txrate.he_gi, sinfo.txrate.he_ru_alloc,
sinfo.txrate.eht_gi);
}
wiphy_unlock(hw->wiphy);
+}
+
+static int
+lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS)
+{
+ struct lkpi_vif *lvif;
+ struct sbuf s;
+
+ if (req->newptr)
+ return (EPERM);
+
+ lvif = (struct lkpi_vif *)arg1;
+
+ sbuf_new_for_sysctl(&s, NULL, 1024, req);
+
+ lkpi_80211_dump_lvif_stas(lvif, &s);
sbuf_finish(&s);
sbuf_delete(&s);
@@ -377,7 +401,7 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS)
return (0);
}
-static enum ieee80211_sta_rx_bw
+static enum ieee80211_sta_rx_bandwidth
lkpi_cw_to_rx_bw(enum nl80211_chan_width cw)
{
switch (cw) {
@@ -401,7 +425,7 @@ lkpi_cw_to_rx_bw(enum nl80211_chan_width cw)
}
static enum nl80211_chan_width
-lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bw rx_bw)
+lkpi_rx_bw_to_cw(enum ieee80211_sta_rx_bandwidth rx_bw)
{
switch (rx_bw) {
case IEEE80211_STA_RX_BW_20:
@@ -422,7 +446,7 @@ lkpi_sync_chanctx_cw_from_rx_bw(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, struct ieee80211_sta *sta)
{
struct ieee80211_chanctx_conf *chanctx_conf;
- enum ieee80211_sta_rx_bw old_bw;
+ enum ieee80211_sta_rx_bandwidth old_bw;
uint32_t changed;
chanctx_conf = rcu_dereference_protected(vif->bss_conf.chanctx_conf,
@@ -527,7 +551,7 @@ static void
lkpi_sta_sync_vht_from_ni(struct ieee80211_vif *vif, struct ieee80211_sta *sta,
struct ieee80211_node *ni)
{
- enum ieee80211_sta_rx_bw bw;
+ enum ieee80211_sta_rx_bandwidth bw;
uint32_t width;
int rx_nss;
uint16_t rx_mcs_map;
@@ -938,6 +962,30 @@ lkpi_nl80211_band_to_net80211_band(enum nl80211_band band)
return (0x00);
}
+#ifdef LINUXKPI_DEBUG_80211
+static const char *
+lkpi_nl80211_band_name(enum nl80211_band band)
+{
+ switch (band) {
+ case NL80211_BAND_2GHZ:
+ return "2Ghz";
+ break;
+ case NL80211_BAND_5GHZ:
+ return "5Ghz";
+ break;
+ case NL80211_BAND_60GHZ:
+ return "60Ghz";
+ break;
+ case NL80211_BAND_6GHZ:
+ return "6Ghz";
+ break;
+ default:
+ panic("%s: unsupported band %u\n", __func__, band);
+ break;
+ }
+}
+#endif
+
#if 0
static enum ieee80211_ac_numbers
lkpi_ac_net_to_l80211(int ac)
@@ -1162,7 +1210,7 @@ lkpi_find_lkpi80211_chan(struct lkpi_hw *lhw,
channels = hw->wiphy->bands[band]->channels;
for (i = 0; i < nchans; i++) {
- if (channels[i].hw_value == c->ic_ieee)
+ if (channels[i].center_freq == c->ic_freq)
return (&channels[i]);
}
@@ -1302,6 +1350,7 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
lhw = ic->ic_softc;
hw = LHW_TO_HW(lhw);
lvif = VAP_TO_LVIF(vap);
+ vif = LVIF_TO_VIF(lvif);
/*
* Make sure we do not make it here without going through
@@ -1309,6 +1358,23 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
*/
lockdep_assert_wiphy(hw->wiphy);
+ /*
+ * While we are assoc we may still send packets. We cannot delete the
+ * keys as otherwise packets could go out unencrypted. Some firmware
+ * does not like this and will fire an assert.
+ * net80211 needs to drive this better but given we want the disassoc
+ * frame out and have to unlock we are open to a race currently.
+ * This check should prevent problems.
+ * How to test: run 800Mbit/s UDP traffic and during that restart your
+ * supplicant. You want to survive that.
+ */
+ if (vif->cfg.assoc) {
+ if (linuxkpi_debug_80211 & D80211_TRACE_HW_CRYPTO)
+ ic_printf(ic, "%d %lu %s: vif still assoc; not deleting keys\n",
+ curthread->td_tid, jiffies, __func__);
+ return (0);
+ }
+
if (IEEE80211_KEY_UNDEFINED(k)) {
ic_printf(ic, "%s: vap %p key %p is undefined: %p %u\n",
__func__, vap, k, k->wk_cipher, k->wk_keyix);
@@ -1353,7 +1419,6 @@ lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
kc->keyidx, kc->hw_key_idx, kc->flags, IEEE80211_KEY_FLAG_BITS);
#endif
- vif = LVIF_TO_VIF(lvif);
error = lkpi_80211_mo_set_key(hw, DISABLE_KEY, vif, sta, kc);
if (error != 0) {
ic_printf(ic, "%d %lu %s: set_key cmd %d(%s) for sta %6D failed: %d\n",
@@ -1700,6 +1765,24 @@ lkpi_iv_key_update_end(struct ieee80211vap *vap)
}
#endif
+static void
+lkpi_cleanup_mcast_list_locked(struct lkpi_hw *lhw)
+{
+ struct list_head *le, *next;
+ struct netdev_hw_addr *addr;
+
+ if (lhw->mc_list.count != 0) {
+ list_for_each_safe(le, next, &lhw->mc_list.addr_list) {
+ addr = list_entry(le, struct netdev_hw_addr, addr_list);
+ list_del(le);
+ lhw->mc_list.count--;
+ free(addr, M_LKPI80211);
+ }
+ }
+ KASSERT(lhw->mc_list.count == 0, ("%s: mc_list %p count %d != 0\n",
+ __func__, &lhw->mc_list, lhw->mc_list.count));
+}
+
static u_int
lkpi_ic_update_mcast_copy(void *arg, struct sockaddr_dl *sdl, u_int cnt)
{
@@ -1736,16 +1819,13 @@ lkpi_ic_update_mcast_copy(void *arg, struct sockaddr_dl *sdl, u_int cnt)
}
static void
-lkpi_update_mcast_filter(struct ieee80211com *ic, bool force)
+lkpi_update_mcast_filter(struct ieee80211com *ic)
{
struct lkpi_hw *lhw;
struct ieee80211_hw *hw;
- struct netdev_hw_addr_list mc_list;
- struct list_head *le, *next;
- struct netdev_hw_addr *addr;
- struct ieee80211vap *vap;
u64 mc;
- unsigned int changed_flags, total_flags;
+ unsigned int changed_flags, flags;
+ bool scanning;
lhw = ic->ic_softc;
@@ -1753,44 +1833,32 @@ lkpi_update_mcast_filter(struct ieee80211com *ic, bool force)
lhw->ops->configure_filter == NULL)
return;
- if (!lhw->update_mc && !force)
- return;
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ scanning = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0;
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
- changed_flags = total_flags = 0;
- mc_list.count = 0;
- INIT_LIST_HEAD(&mc_list.addr_list);
- if (ic->ic_allmulti == 0) {
- TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next)
- if_foreach_llmaddr(vap->iv_ifp,
- lkpi_ic_update_mcast_copy, &mc_list);
- } else {
- changed_flags |= FIF_ALLMULTI;
- }
+ LKPI_80211_LHW_MC_LOCK(lhw);
+
+ flags = 0;
+ if (scanning)
+ flags |= FIF_BCN_PRBRESP_PROMISC;
+ if (lhw->mc_all_multi)
+ flags |= FIF_ALLMULTI;
hw = LHW_TO_HW(lhw);
- mc = lkpi_80211_mo_prepare_multicast(hw, &mc_list);
- /*
- * XXX-BZ make sure to get this sorted what is a change,
- * what gets all set; what was already set?
- */
- total_flags = changed_flags;
- lkpi_80211_mo_configure_filter(hw, changed_flags, &total_flags, mc);
+ mc = lkpi_80211_mo_prepare_multicast(hw, &lhw->mc_list);
+
+ changed_flags = (lhw->mc_flags ^ flags) & FIF_FLAGS_MASK;
+ lkpi_80211_mo_configure_filter(hw, changed_flags, &flags, mc);
+ lhw->mc_flags = flags;
#ifdef LINUXKPI_DEBUG_80211
if (linuxkpi_debug_80211 & D80211_TRACE)
- printf("%s: changed_flags %#06x count %d total_flags %#010x\n",
- __func__, changed_flags, mc_list.count, total_flags);
+ printf("%s: changed_flags %#06x count %d mc_flags %#010x\n",
+ __func__, changed_flags, lhw->mc_list.count, lhw->mc_flags);
#endif
- if (mc_list.count != 0) {
- list_for_each_safe(le, next, &mc_list.addr_list) {
- addr = list_entry(le, struct netdev_hw_addr, addr_list);
- free(addr, M_LKPI80211);
- mc_list.count--;
- }
- }
- KASSERT(mc_list.count == 0, ("%s: mc_list %p count %d != 0\n",
- __func__, &mc_list, mc_list.count));
+ LKPI_80211_LHW_MC_UNLOCK(lhw);
}
static enum ieee80211_bss_changed
@@ -1822,13 +1890,31 @@ lkpi_update_dtim_tsf(struct ieee80211_vif *vif, struct ieee80211_node *ni,
vif->bss_conf.beacon_int = 16;
bss_changed |= BSS_CHANGED_BEACON_INT;
}
- if (vif->bss_conf.dtim_period != vap->iv_dtim_period &&
- vap->iv_dtim_period > 0) {
- vif->bss_conf.dtim_period = vap->iv_dtim_period;
+
+ /*
+ * lkpi_iv_sta_recv_mgmt() will directly call into this function.
+ * iwlwifi(4) in iwl_mvm_bss_info_changed_station_common() will
+ * stop seesion protection the moment it sees
+ * BSS_CHANGED_BEACON_INFO (with the expectations that it was
+ * "a beacon from the associated AP"). It will also update
+ * the beacon filter in that case. This is the only place
+ * we set the BSS_CHANGED_BEACON_INFO on the non-teardown
+ * path so make sure we only do run this check once we are
+ * assoc. (*iv_recv_mgmt)() will be called before we enter
+ * here so the ni will be updates with information from the
+ * beacon via net80211::sta_recv_mgmt(). We also need to
+ * make sure we do not do it on every beacon we still may
+ * get so only do if something changed. vif->bss_conf.dtim_period
+ * should be 0 as we start up (we also reset it on teardown).
+ */
+ if (vif->cfg.assoc &&
+ vif->bss_conf.dtim_period != ni->ni_dtim_period &&
+ ni->ni_dtim_period > 0) {
+ vif->bss_conf.dtim_period = ni->ni_dtim_period;
bss_changed |= BSS_CHANGED_BEACON_INFO;
}
- vif->bss_conf.sync_dtim_count = vap->iv_dtim_count;
+ vif->bss_conf.sync_dtim_count = ni->ni_dtim_count;
vif->bss_conf.sync_tsf = le64toh(ni->ni_tstamp.tsf);
/* vif->bss_conf.sync_device_ts = set in linuxkpi_ieee80211_rx. */
@@ -1856,6 +1942,8 @@ lkpi_stop_hw_scan(struct lkpi_hw *lhw, struct ieee80211_vif *vif)
int error;
bool cancel;
+ TRACE_SCAN(lhw->ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
cancel = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -1909,19 +1997,19 @@ lkpi_disassoc(struct ieee80211_sta *sta, struct ieee80211_vif *vif,
struct lkpi_hw *lhw)
{
enum ieee80211_bss_changed changed;
+ struct lkpi_vif *lvif;
changed = 0;
sta->aid = 0;
if (vif->cfg.assoc) {
- lhw->update_mc = true;
- lkpi_update_mcast_filter(lhw->ic, true);
-
vif->cfg.assoc = false;
vif->cfg.aid = 0;
changed |= BSS_CHANGED_ASSOC;
IMPROVE();
+ lkpi_update_mcast_filter(lhw->ic);
+
/*
* Executing the bss_info_changed(BSS_CHANGED_ASSOC) with
* assoc = false right away here will remove the sta from
@@ -1932,6 +2020,9 @@ lkpi_disassoc(struct ieee80211_sta *sta, struct ieee80211_vif *vif,
* bss_info_changed() update.
* See lkpi_sta_run_to_init() for more detailed comment.
*/
+
+ lvif = VIF_TO_LVIF(vif);
+ lvif->beacons = 0;
}
return (changed);
@@ -2202,6 +2293,7 @@ lkpi_sta_scan_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int
/* vif->bss_conf.basic_rates ? Where exactly? */
+ lvif->beacons = 0;
/* Should almost assert it is this. */
vif->cfg.assoc = false;
vif->cfg.aid = 0;
@@ -2391,6 +2483,7 @@ lkpi_sta_auth_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int
struct lkpi_sta *lsta;
struct ieee80211_sta *sta;
struct ieee80211_prep_tx_info prep_tx_info;
+ enum ieee80211_bss_changed bss_changed;
int error;
lhw = vap->iv_ic->ic_softc;
@@ -2462,6 +2555,11 @@ lkpi_sta_auth_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int
lsta->added_to_drv = false; /* mo manages. */
#endif
+ bss_changed = 0;
+ vif->bss_conf.dtim_period = 0; /* go back to 0. */
+ bss_changed |= BSS_CHANGED_BEACON_INFO;
+ lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed);
+
lkpi_lsta_dump(lsta, ni, __func__, __LINE__);
LKPI_80211_LVIF_LOCK(lvif);
@@ -2470,12 +2568,6 @@ lkpi_sta_auth_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int
lvif->lvif_bss_synched = false;
LKPI_80211_LVIF_UNLOCK(lvif);
lkpi_lsta_remove(lsta, lvif);
- /*
- * The very last release the reference on the ni for the ni/lsta on
- * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
- * and potentially freed.
- */
- ieee80211_free_node(ni);
/* conf_tx */
@@ -2484,6 +2576,18 @@ lkpi_sta_auth_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int
out:
wiphy_unlock(hw->wiphy);
IEEE80211_LOCK(vap->iv_ic);
+ if (error == 0) {
+ /*
+ * We do this outside the wiphy lock as net80211::node_free() may call
+ * into crypto code to delete keys and we have a recursed on
+ * non-recursive sx panic. Also only do this if we get here w/o error.
+ *
+ * The very last release the reference on the ni for the ni/lsta on
+ * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
+ * and potentially freed.
+ */
+ ieee80211_free_node(ni);
+ }
return (error);
}
@@ -2768,6 +2872,14 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i
bss_changed = 0;
bss_changed |= lkpi_disassoc(sta, vif, lhw);
+#ifdef LKPI_80211_HW_CRYPTO
+ /*
+ * In theory we remove keys here but there must not exist any for this
+ * state change until we clean them up again into small steps and no
+ * code duplication.
+ */
+#endif
+
lkpi_lsta_dump(lsta, ni, __func__, __LINE__);
/* Adjust sta and change state (from NONE) to NOTEXIST. */
@@ -2790,6 +2902,8 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i
vif->cfg.ssid_len = 0;
memset(vif->cfg.ssid, '\0', sizeof(vif->cfg.ssid));
bss_changed |= BSS_CHANGED_BSSID;
+ vif->bss_conf.dtim_period = 0; /* go back to 0. */
+ bss_changed |= BSS_CHANGED_BEACON_INFO;
lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed);
LKPI_80211_LVIF_LOCK(lvif);
@@ -2798,12 +2912,6 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i
lvif->lvif_bss_synched = false;
LKPI_80211_LVIF_UNLOCK(lvif);
lkpi_lsta_remove(lsta, lvif);
- /*
- * The very last release the reference on the ni for the ni/lsta on
- * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
- * and potentially freed.
- */
- ieee80211_free_node(ni);
/* conf_tx */
@@ -2813,6 +2921,18 @@ _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, i
out:
wiphy_unlock(hw->wiphy);
IEEE80211_LOCK(vap->iv_ic);
+ if (error == EALREADY) {
+ /*
+ * We do this outside the wiphy lock as net80211::node_free() may call
+ * into crypto code to delete keys and we have a recursed on
+ * non-recursive sx panic. Also only do this if we get here w/o error.
+ *
+ * The very last release the reference on the ni for the ni/lsta on
+ * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
+ * and potentially freed.
+ */
+ ieee80211_free_node(ni);
+ }
outni:
return (error);
}
@@ -2922,6 +3042,7 @@ lkpi_sta_assoc_to_run(struct ieee80211vap *vap, enum ieee80211_state nstate, int
bss_changed |= lkpi_wme_update(lhw, vap, true);
#endif
if (!vif->cfg.assoc || vif->cfg.aid != IEEE80211_NODE_AID(ni)) {
+ lvif->beacons = 0;
vif->cfg.assoc = true;
vif->cfg.aid = IEEE80211_NODE_AID(ni);
bss_changed |= BSS_CHANGED_ASSOC;
@@ -2970,9 +3091,6 @@ lkpi_sta_assoc_to_run(struct ieee80211vap *vap, enum ieee80211_state nstate, int
* - set_key (?)
* - ipv6_addr_change (?)
*/
- /* Prepare_multicast && configure_filter. */
- lhw->update_mc = true;
- lkpi_update_mcast_filter(vap->iv_ic, true);
if (!ieee80211_node_is_authorized(ni)) {
IMPROVE("net80211 does not consider node authorized");
@@ -3011,6 +3129,9 @@ lkpi_sta_assoc_to_run(struct ieee80211vap *vap, enum ieee80211_state nstate, int
bss_changed |= lkpi_update_dtim_tsf(vif, ni, vap, __func__, __LINE__);
lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed);
+ /* Prepare_multicast && configure_filter. */
+ lkpi_update_mcast_filter(vap->iv_ic);
+
out:
wiphy_unlock(hw->wiphy);
IEEE80211_LOCK(vap->iv_ic);
@@ -3300,6 +3421,16 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
#ifdef LKPI_80211_HW_CRYPTO
if (lkpi_hwcrypto) {
+ /*
+ * In theory we only need to do this if we changed assoc.
+ * If we were not assoc, there should be no keys and we
+ * should not be here.
+ */
+#ifdef notyet
+ KASSERT((bss_changed & BSS_CHANGED_ASSOC) != 0, ("%s: "
+ "trying to remove keys but were not assoc: %#010jx, lvif %p\n",
+ __func__, (uintmax_t)bss_changed, lvif));
+#endif
error = lkpi_sta_del_keys(hw, vif, lsta);
if (error != 0) {
ic_printf(vap->iv_ic, "%s:%d: lkpi_sta_del_keys "
@@ -3361,6 +3492,9 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
* 4) call unassign_vif_chanctx
* 5) call lkpi_hw_conf_idle
* 6) call remove_chanctx
+ *
+ * Note: vif->driver_flags & IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC
+ * might change this.
*/
bss_changed |= lkpi_disassoc(sta, vif, lhw);
@@ -3391,6 +3525,8 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
vif->bss_conf.use_short_preamble = false;
vif->bss_conf.qos = false;
/* XXX BSS_CHANGED_???? */
+ vif->bss_conf.dtim_period = 0; /* go back to 0. */
+ bss_changed |= BSS_CHANGED_BEACON_INFO;
lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed);
LKPI_80211_LVIF_LOCK(lvif);
@@ -3398,12 +3534,6 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
lvif->lvif_bss = NULL;
lvif->lvif_bss_synched = false;
LKPI_80211_LVIF_UNLOCK(lvif);
- /*
- * The very last release the reference on the ni for the ni/lsta on
- * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
- * and potentially freed.
- */
- ieee80211_free_node(ni);
/* conf_tx */
@@ -3413,6 +3543,18 @@ lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int
out:
wiphy_unlock(hw->wiphy);
IEEE80211_LOCK(vap->iv_ic);
+ if (error == EALREADY) {
+ /*
+ * We do this outside the wiphy lock as net80211::node_free() may call
+ * into crypto code to delete keys and we have a recursed on
+ * non-recursive sx panic. Also only do this if we get here w/o error.
+ *
+ * The very last release the reference on the ni for the ni/lsta on
+ * lvif->lvif_bss. Upon return from this both ni and lsta are invalid
+ * and potentially freed.
+ */
+ ieee80211_free_node(ni);
+ }
outni:
return (error);
}
@@ -3510,7 +3652,7 @@ lkpi_iv_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
vif = LVIF_TO_VIF(lvif);
/* No need to replicate this in most state handlers. */
- if (ostate == IEEE80211_S_SCAN && nstate != IEEE80211_S_SCAN)
+ if (nstate > IEEE80211_S_SCAN)
lkpi_stop_hw_scan(lhw, vif);
s = sta_state_fsm;
@@ -3693,6 +3835,48 @@ lkpi_ic_wme_update(struct ieee80211com *ic)
return (0); /* unused */
}
+static void
+lkpi_iv_sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
+ int subtype, const struct ieee80211_rx_stats *rxs, int rssi, int nf)
+{
+ struct lkpi_hw *lhw;
+ struct ieee80211_hw *hw;
+ struct lkpi_vif *lvif;
+ struct ieee80211_vif *vif;
+ enum ieee80211_bss_changed bss_changed;
+
+ lvif = VAP_TO_LVIF(ni->ni_vap);
+ vif = LVIF_TO_VIF(lvif);
+
+ lvif->iv_recv_mgmt(ni, m0, subtype, rxs, rssi, nf);
+
+ switch (subtype) {
+ case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
+ break;
+ case IEEE80211_FC0_SUBTYPE_BEACON:
+ /*
+ * Only count beacons when assoc. SCAN has its own logging.
+ * This is for connection/beacon loss/session protection almost
+ * over debugging when trying to get into a stable RUN state.
+ */
+ if (vif->cfg.assoc)
+ lvif->beacons++;
+ break;
+ default:
+ return;
+ }
+
+ lhw = ni->ni_ic->ic_softc;
+ hw = LHW_TO_HW(lhw);
+
+ /*
+ * If this direct call to mo_bss_info_changed will not work due to
+ * locking, see if queue_work() is fast enough.
+ */
+ bss_changed = lkpi_update_dtim_tsf(vif, ni, ni->ni_vap, __func__, __LINE__);
+ lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed);
+}
+
/*
* Change link-layer address on the vif (if the vap is not started/"UP").
* This can happen if a user changes 'ether' using ifconfig.
@@ -3753,6 +3937,7 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
lvif = malloc(len, M_80211_VAP, M_WAITOK | M_ZERO);
mtx_init(&lvif->mtx, "lvif", NULL, MTX_DEF);
+ TASK_INIT(&lvif->sw_scan_task, 0, lkpi_sw_scan_task, lvif);
INIT_LIST_HEAD(&lvif->lsta_list);
lvif->lvif_bss = NULL;
refcount_init(&lvif->nt_unlocked, 0);
@@ -3826,8 +4011,10 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
IMPROVE();
+ wiphy_lock(hw->wiphy);
error = lkpi_80211_mo_start(hw);
if (error != 0) {
+ wiphy_unlock(hw->wiphy);
ic_printf(ic, "%s: failed to start hw: %d\n", __func__, error);
mtx_destroy(&lvif->mtx);
free(lvif, M_80211_VAP);
@@ -3837,11 +4024,13 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
error = lkpi_80211_mo_add_interface(hw, vif);
if (error != 0) {
IMPROVE(); /* XXX-BZ mo_stop()? */
+ wiphy_unlock(hw->wiphy);
ic_printf(ic, "%s: failed to add interface: %d\n", __func__, error);
mtx_destroy(&lvif->mtx);
free(lvif, M_80211_VAP);
return (NULL);
}
+ wiphy_unlock(hw->wiphy);
LKPI_80211_LHW_LVIF_LOCK(lhw);
TAILQ_INSERT_TAIL(&lhw->lvif_head, lvif, lvif_entry);
@@ -3871,17 +4060,21 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, changed);
/* Force MC init. */
- lkpi_update_mcast_filter(ic, true);
-
- IMPROVE();
+ lkpi_update_mcast_filter(ic);
ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
+ /* Now we have a valid vap->iv_ifp. Any checksum offloading goes below. */
+
+ IMPROVE();
+
/* Override with LinuxKPI method so we can drive mac80211/cfg80211. */
lvif->iv_newstate = vap->iv_newstate;
vap->iv_newstate = lkpi_iv_newstate;
lvif->iv_update_bss = vap->iv_update_bss;
vap->iv_update_bss = lkpi_iv_update_bss;
+ lvif->iv_recv_mgmt = vap->iv_recv_mgmt;
+ vap->iv_recv_mgmt = lkpi_iv_sta_recv_mgmt;
#ifdef LKPI_80211_HW_CRYPTO
/* Key management. */
@@ -3908,13 +4101,9 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ],
* Modern chipset/fw/drv will do A-MPDU in drv/fw and fail
* to do so if they cannot do the crypto too.
*/
- if (!lkpi_hwcrypto && ieee80211_hw_check(hw, AMPDU_AGGREGATION))
+ if (!lkpi_hwcrypto && IEEE80211_CONF_AMPDU_OFFLOAD(ic))
vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_RX;
#endif
-#if defined(LKPI_80211_HT)
- /* 20250125-BZ Keep A-MPDU TX cleared until we sorted out AddBA for all drivers. */
- vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_TX;
-#endif
if (hw->max_listen_interval == 0)
hw->max_listen_interval = 7 * (ic->ic_lintval / ic->ic_bintval);
@@ -3983,6 +4172,8 @@ lkpi_ic_vap_delete(struct ieee80211vap *vap)
/* Clear up per-VIF/VAP sysctls. */
sysctl_ctx_free(&lvif->sysctl_ctx);
+ ieee80211_draintask(ic, &lvif->sw_scan_task);
+
LKPI_80211_LHW_LVIF_LOCK(lhw);
TAILQ_REMOVE(&lhw->lvif_head, lvif, lvif_entry);
LKPI_80211_LHW_LVIF_UNLOCK(lhw);
@@ -4004,8 +4195,30 @@ lkpi_ic_vap_delete(struct ieee80211vap *vap)
static void
lkpi_ic_update_mcast(struct ieee80211com *ic)
{
+ struct ieee80211vap *vap;
+ struct lkpi_hw *lhw;
+
+ lhw = ic->ic_softc;
+ if (lhw->ops->prepare_multicast == NULL ||
+ lhw->ops->configure_filter == NULL)
+ return;
+
+ LKPI_80211_LHW_MC_LOCK(lhw);
+ /* Cleanup anything on the current list. */
+ lkpi_cleanup_mcast_list_locked(lhw);
+
+ /* Build up the new list (or allmulti). */
+ if (ic->ic_allmulti == 0) {
+ TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next)
+ if_foreach_llmaddr(vap->iv_ifp,
+ lkpi_ic_update_mcast_copy, &lhw->mc_list);
+ lhw->mc_all_multi = false;
+ } else {
+ lhw->mc_all_multi = true;
+ }
+ LKPI_80211_LHW_MC_UNLOCK(lhw);
- lkpi_update_mcast_filter(ic, false);
+ lkpi_update_mcast_filter(ic);
TRACEOK();
}
@@ -4202,6 +4415,113 @@ lkpi_scan_ies_add(uint8_t *p, struct ieee80211_scan_ies *scan_ies,
}
static void
+lkpi_enable_hw_scan(struct lkpi_hw *lhw)
+{
+
+ if (lhw->ops->hw_scan) {
+ /*
+ * Advertise full-offload scanning.
+ *
+ * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise
+ * we essentially disable hw_scan for all drivers not setting
+ * the flag.
+ */
+ lhw->ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
+ lhw->scan_flags |= LKPI_LHW_SCAN_HW;
+ }
+}
+
+#ifndef LKPI_80211_USE_SCANLIST
+static const uint32_t chan_pri[] = {
+ 5180, 5500, 5745,
+ 5260, 5580, 5660, 5825,
+ 5220, 5300, 5540, 5620, 5700, 5785, 5865,
+ 2437, 2412, 2422, 2462, 2472, 2432, 2452
+};
+
+static int
+lkpi_scan_chan_list_idx(const struct linuxkpi_ieee80211_channel *lc)
+{
+ int i;
+
+ for (i = 0; i < nitems(chan_pri); i++) {
+ if (lc->center_freq == chan_pri[i])
+ return (i);
+ }
+
+ return (-1);
+}
+
+static int
+lkpi_scan_chan_list_comp(const struct linuxkpi_ieee80211_channel *lc1,
+ const struct linuxkpi_ieee80211_channel *lc2)
+{
+ int idx1, idx2;
+
+ /* Find index in list. */
+ idx1 = lkpi_scan_chan_list_idx(lc1);
+ idx2 = lkpi_scan_chan_list_idx(lc2);
+
+ if (idx1 == -1 && idx2 != -1)
+ return (1);
+ if (idx1 != -1 && idx2 == -1)
+ return (-1);
+
+ /* Neither on the list, use center_freq. */
+ if (idx1 == -1 && idx2 == -1)
+ return (lc1->center_freq - lc2->center_freq);
+
+ /* Whichever is first in the list. */
+ return (idx1 - idx2);
+}
+
+static void
+lkpi_scan_chan_list_resort(struct linuxkpi_ieee80211_channel **cpp, size_t nchan)
+{
+ struct linuxkpi_ieee80211_channel *lc, *nc;
+ size_t i, j;
+ int rc;
+
+ for (i = (nchan - 1); i > 0; i--) {
+ for (j = i; j > 0 ; j--) {
+ lc = *(cpp + j);
+ nc = *(cpp + j - 1);
+ rc = lkpi_scan_chan_list_comp(lc, nc);
+ if (rc < 0) {
+ *(cpp + j) = nc;
+ *(cpp + j - 1) = lc;
+ }
+ }
+ }
+}
+
+static bool
+lkpi_scan_chan(struct linuxkpi_ieee80211_channel *c,
+ struct ieee80211com *ic, bool log)
+{
+
+ if ((c->flags & IEEE80211_CHAN_DISABLED) != 0) {
+ if (log)
+ TRACE_SCAN(ic, "Skipping disabled chan "
+ "on band %s [%#x/%u/%#x]",
+ lkpi_nl80211_band_name(c->band), c->hw_value,
+ c->center_freq, c->flags);
+ return (false);
+ }
+ if (isclr(ic->ic_chan_active, ieee80211_mhz2ieee(c->center_freq,
+ lkpi_nl80211_band_to_net80211_band(c->band)))) {
+ if (log)
+ TRACE_SCAN(ic, "Skipping !active chan "
+ "on band %s [%#x/%u/%#x]",
+ lkpi_nl80211_band_name(c->band), c->hw_value,
+ c->center_freq, c->flags);
+ return (false);
+ }
+ return (true);
+}
+#endif
+
+static void
lkpi_ic_scan_start(struct ieee80211com *ic)
{
struct lkpi_hw *lhw;
@@ -4214,33 +4534,52 @@ lkpi_ic_scan_start(struct ieee80211com *ic)
bool is_hw_scan;
lhw = ic->ic_softc;
+ ss = ic->ic_scan;
+ vap = ss->ss_vap;
+ TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) {
/* A scan is still running. */
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+ TRACE_SCAN(ic, "Trying to start new scan while still running; "
+ "cancelling new net80211 scan; scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+ ieee80211_cancel_scan(vap);
return;
}
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
- ss = ic->ic_scan;
- vap = ss->ss_vap;
+#if 0
if (vap->iv_state != IEEE80211_S_SCAN) {
- IMPROVE("We need to be able to scan if not in S_SCAN");
+ TODO("We need to be able to scan if not in S_SCAN");
+ TRACE_SCAN(ic, "scan_flags %b iv_state %d",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, vap->iv_state);
+ ieee80211_cancel_scan(vap);
return;
}
+#endif
hw = LHW_TO_HW(lhw);
if (!is_hw_scan) {
/* If hw_scan is cleared clear FEXT_SCAN_OFFLOAD too. */
vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD;
-sw_scan:
+
lvif = VAP_TO_LVIF(vap);
vif = LVIF_TO_VIF(lvif);
if (vap->iv_state == IEEE80211_S_SCAN)
lkpi_hw_conf_idle(hw, false);
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ lhw->scan_flags |= LKPI_LHW_SCAN_RUNNING;
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+
+ lkpi_update_mcast_filter(ic);
+
+ TRACE_SCAN(vap->iv_ic, "Starting SW_SCAN: scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
lkpi_80211_mo_sw_scan_start(hw, vif, vif->addr);
/* net80211::scan_start() handled PS for us. */
IMPROVE();
@@ -4255,6 +4594,9 @@ sw_scan:
struct cfg80211_scan_6ghz_params *s6gp;
size_t chan_len, nchan, ssids_len, s6ghzlen;
int band, i, ssid_count, common_ie_len;
+#ifndef LKPI_80211_USE_SCANLIST
+ int n;
+#endif
uint32_t band_mask;
uint8_t *ie, *ieend;
bool running;
@@ -4266,7 +4608,8 @@ sw_scan:
band_mask = 0;
nchan = 0;
if (ieee80211_hw_check(hw, SINGLE_SCAN_ON_ALL_BANDS)) {
-#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */
+#ifdef LKPI_80211_USE_SCANLIST
+ /* Avoid net80211 scan lists until it has proper scan offload support. */
for (i = ss->ss_next; i < ss->ss_last; i++) {
nchan++;
band = lkpi_net80211_chan_to_nl80211_band(
@@ -4284,8 +4627,17 @@ sw_scan:
continue;
}
if (hw->wiphy->bands[band] != NULL) {
- nchan += hw->wiphy->bands[band]->n_channels;
+ struct linuxkpi_ieee80211_channel *channels;
+ int n;
+
band_mask |= (1 << band);
+
+ channels = hw->wiphy->bands[band]->channels;
+ n = hw->wiphy->bands[band]->n_channels;
+ for (i = 0; i < n; i++) {
+ if (lkpi_scan_chan(&channels[i], ic, true))
+ nchan++;
+ }
}
}
#endif
@@ -4324,11 +4676,32 @@ sw_scan:
/* hw_req->req.wdev */
hw_req->req.wiphy = hw->wiphy;
hw_req->req.no_cck = false; /* XXX */
-#if 0
- /* This seems to pessimise default scanning behaviour. */
- hw_req->req.duration_mandatory = TICKS_2_USEC(ss->ss_mindwell);
- hw_req->req.duration = TICKS_2_USEC(ss->ss_maxdwell);
-#endif
+
+ /*
+ * In general setting duration[_mandatory] seems to pessimise
+ * default scanning behaviour. We only use it for BGSCANnig
+ * to keep the dwell times small.
+ * Setting duration_mandatory makes this the maximum dwell
+ * time (otherwise may be shorter). Duration is in TU.
+ */
+ if ((ic->ic_flags_ext & IEEE80211_FEXT_BGSCAN) != 0) {
+ unsigned long dwell;
+
+ if ((ic->ic_caps & IEEE80211_C_BGSCAN) == 0 ||
+ (vap->iv_flags & IEEE80211_F_BGSCAN) == 0)
+ ic_printf(ic, "BGSCAN despite off: %b, %b, %b\n",
+ ic->ic_flags_ext, IEEE80211_FEXT_BITS,
+ vap->iv_flags, IEEE80211_F_BITS,
+ ic->ic_caps, IEEE80211_C_BITS);
+
+ dwell = ss->ss_mindwell;
+ if (dwell == 0)
+ dwell = msecs_to_ticks(20);
+
+ hw_req->req.duration_mandatory = true;
+ hw_req->req.duration = TICKS_2_USEC(dwell) / 1024;
+ }
+
#ifdef __notyet__
hw_req->req.flags |= NL80211_SCAN_FLAG_RANDOM_ADDR;
memcpy(hw_req->req.mac_addr, xxx, IEEE80211_ADDR_LEN);
@@ -4339,16 +4712,16 @@ sw_scan:
hw_req->req.n_channels = nchan;
cpp = (struct linuxkpi_ieee80211_channel **)(hw_req + 1);
lc = (struct linuxkpi_ieee80211_channel *)(cpp + nchan);
+#ifdef LKPI_80211_USE_SCANLIST
for (i = 0; i < nchan; i++) {
*(cpp + i) =
(struct linuxkpi_ieee80211_channel *)(lc + i);
}
-#if 0 /* Avoid net80211 scan lists until it has proper scan offload support. */
+ /* Avoid net80211 scan lists until it has proper scan offload support. */
for (i = 0; i < nchan; i++) {
struct ieee80211_channel *c;
c = ss->ss_chans[ss->ss_next + i];
- lc->hw_value = c->ic_ieee;
lc->center_freq = c->ic_freq; /* XXX */
/* lc->flags */
lc->band = lkpi_net80211_chan_to_nl80211_band(c);
@@ -4357,7 +4730,9 @@ sw_scan:
lc++;
}
#else
- for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ /* Add bands in reverse order for scanning. */
+ n = 0;
+ for (band = NUM_NL80211_BANDS - 1; band >= 0; band--) {
struct ieee80211_supported_band *supband;
struct linuxkpi_ieee80211_channel *channels;
@@ -4372,9 +4747,27 @@ sw_scan:
channels = supband->channels;
for (i = 0; i < supband->n_channels; i++) {
- *lc = channels[i];
- lc++;
+ if (lkpi_scan_chan(&channels[i], ic, false))
+ *(cpp + n++) = &channels[i];
+ }
+ }
+ if (lkpi_order_scanlist)
+ lkpi_scan_chan_list_resort(cpp, nchan);
+
+ if ((linuxkpi_debug_80211 & D80211_SCAN) != 0) {
+ printf("%s:%d: %s SCAN Channel List (nchan=%zu): ",
+ __func__, __LINE__, ic->ic_name, nchan);
+ for (i = 0; i < nchan; i++) {
+ struct linuxkpi_ieee80211_channel *xc;
+
+ xc = *(cpp + i);
+ printf(" %d(%d)",
+ ieee80211_mhz2ieee(xc->center_freq,
+ lkpi_nl80211_band_to_net80211_band(
+ xc->band)),
+ xc->center_freq);
}
+ printf("\n");
}
#endif
@@ -4404,6 +4797,7 @@ sw_scan:
ieend = lkpi_scan_ies_add(ie, &hw_req->ies, band_mask, vap, hw);
hw_req->req.ie = ie;
hw_req->req.ie_len = ieend - ie;
+ hw_req->req.scan_start = jiffies;
lvif = VAP_TO_LVIF(vap);
vif = LVIF_TO_VIF(lvif);
@@ -4421,11 +4815,30 @@ sw_scan:
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
if (running) {
free(hw_req, M_LKPI80211);
+ TRACE_SCAN(ic, "Trying to start new scan while still "
+ "running (2); cancelling new net80211 scan; "
+ "scan_flags %b",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+ ieee80211_cancel_scan(vap);
return;
}
+ lkpi_update_mcast_filter(ic);
+ TRACE_SCAN(ic, "Starting HW_SCAN: scan_flags %b, "
+ "ie_len %d, n_ssids %d, n_chan %d, common_ie_len %d [%d, %d]",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, hw_req->req.ie_len,
+ hw_req->req.n_ssids, hw_req->req.n_channels,
+ hw_req->ies.common_ie_len,
+ hw_req->ies.len[NL80211_BAND_2GHZ],
+ hw_req->ies.len[NL80211_BAND_5GHZ]);
+
error = lkpi_80211_mo_hw_scan(hw, vif, hw_req);
if (error != 0) {
+ bool scan_done;
+ int e;
+
+ TRACE_SCAN(ic, "hw_scan failed; scan_flags %b, error %d",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS, error);
ieee80211_cancel_scan(vap);
/*
@@ -4442,13 +4855,35 @@ sw_scan:
* So we cannot rely on that behaviour and have to check
* and balance between both code paths.
*/
+ e = 0;
+ scan_done = true;
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) {
+
free(lhw->hw_req, M_LKPI80211);
lhw->hw_req = NULL;
+ /*
+ * The ieee80211_cancel_scan() above runs in a
+ * taskq and it may take ages for the previous
+ * scan to clear; starting a new one right away
+ * we run into the problem that the old one is
+ * still active.
+ */
+ e = msleep(lhw, &lhw->scan_mtx, 0, "lhwscanstop", hz);
+ scan_done = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0;
+
+ /*
+ * Now we can clear running if no one else did.
+ */
lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING;
}
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+ lkpi_update_mcast_filter(ic);
+ if (!scan_done) {
+ ic_printf(ic, "ERROR: %s: timeout/error to wait "
+ "for ieee80211_cancel_scan: %d\n", __func__, e);
+ return;
+ }
/*
* XXX-SIGH magic number.
@@ -4456,24 +4891,15 @@ sw_scan:
* not possible. Fall back to sw scan in that case.
*/
if (error == 1) {
- LKPI_80211_LHW_SCAN_LOCK(lhw);
- lhw->scan_flags &= ~LKPI_LHW_SCAN_HW;
- LKPI_80211_LHW_SCAN_UNLOCK(lhw);
/*
- * XXX If we clear this now and later a driver
- * thinks it * can do a hw_scan again, we will
- * currently not re-enable it?
+ * We need to put this into some defered context
+ * the net80211 scan may not be done yet
+ * (ic_flags & IEEE80211_F_SCAN) and we cannot
+ * wait here; if we do scan_curchan_task always
+ * runs after our timeout to finalize the scan.
*/
- vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD;
- ieee80211_start_scan(vap,
- IEEE80211_SCAN_ACTIVE |
- IEEE80211_SCAN_NOPICK |
- IEEE80211_SCAN_ONCE,
- IEEE80211_SCAN_FOREVER,
- ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20),
- ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200),
- vap->iv_des_nssid, vap->iv_des_ssid);
- goto sw_scan;
+ ieee80211_runtask(ic, &lvif->sw_scan_task);
+ return;
}
ic_printf(ic, "ERROR: %s: hw_scan returned %d\n",
@@ -4483,12 +4909,50 @@ sw_scan:
}
static void
+lkpi_sw_scan_task(void *arg, int pending __unused)
+{
+ struct lkpi_hw *lhw;
+ struct lkpi_vif *lvif;
+ struct ieee80211vap *vap;
+ struct ieee80211_scan_state *ss;
+
+ lvif = arg;
+ vap = LVIF_TO_VAP(lvif);
+ lhw = vap->iv_ic->ic_softc;
+ ss = vap->iv_ic->ic_scan;
+
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ /*
+ * We will re-enable this at scan_end calling lkpi_enable_hw_scan().
+ * IEEE80211_FEXT_SCAN_OFFLOAD will be cleared by lkpi_ic_scan_start.
+ */
+ lhw->scan_flags &= ~LKPI_LHW_SCAN_HW;
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
+
+ TRACE_SCAN(vap->iv_ic, "Triggering SW_SCAN: pending %d, scan_flags %b",
+ pending, lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
+ /*
+ * This will call ic_scan_start() and we will get into the right path
+ * unless other scans started in between.
+ */
+ ieee80211_start_scan(vap,
+ IEEE80211_SCAN_ONCE,
+ msecs_to_ticks(10000), /* 10000 ms (=~ 50 chan * 200 ms) */
+ ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20),
+ ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200),
+ vap->iv_des_nssid, vap->iv_des_ssid);
+}
+
+static void
lkpi_ic_scan_end(struct ieee80211com *ic)
{
struct lkpi_hw *lhw;
bool is_hw_scan;
lhw = ic->ic_softc;
+ TRACE_SCAN(ic, "scan_flags %b", lhw->scan_flags, LKPI_LHW_SCAN_BITS);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) == 0) {
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -4517,6 +4981,16 @@ lkpi_ic_scan_end(struct ieee80211com *ic)
if (vap->iv_state == IEEE80211_S_SCAN)
lkpi_hw_conf_idle(hw, true);
}
+
+ /*
+ * In case we disabled the hw_scan in lkpi_ic_scan_start() and
+ * switched to swscan, re-enable hw_scan if available.
+ */
+ lkpi_enable_hw_scan(lhw);
+
+ LKPI_80211_LHW_SCAN_LOCK(lhw);
+ wakeup(lhw);
+ LKPI_80211_LHW_SCAN_UNLOCK(lhw);
}
static void
@@ -4527,6 +5001,10 @@ lkpi_ic_scan_curchan(struct ieee80211_scan_state *ss,
bool is_hw_scan;
lhw = ss->ss_ic->ic_softc;
+ TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d maxdwell %lu",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ ss->ss_ic->ic_curchan->ic_ieee, maxdwell);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -4541,6 +5019,10 @@ lkpi_ic_scan_mindwell(struct ieee80211_scan_state *ss)
bool is_hw_scan;
lhw = ss->ss_ic->ic_softc;
+ TRACE_SCAN(ss->ss_ic, "scan_flags %b chan %d mindwell %lu",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ ss->ss_ic->ic_curchan->ic_ieee, ss->ss_mindwell);
+
LKPI_80211_LHW_SCAN_LOCK(lhw);
is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0;
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
@@ -5571,6 +6053,12 @@ lkpi_ic_ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap
return (-ENXIO);
}
+ if (lsta->state != IEEE80211_STA_AUTHORIZED) {
+ ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n",
+ __func__, lsta, ni, vap, sta, lsta->state);
+ return (-ENXIO);
+ }
+
params.sta = sta;
params.action = IEEE80211_AMPDU_RX_START;
params.buf_size = _IEEE80211_MASKSHIFT(le16toh(baparamset), IEEE80211_BAPS_BUFSIZ);
@@ -5647,13 +6135,35 @@ lkpi_ic_ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap)
lvif = VAP_TO_LVIF(vap);
vif = LVIF_TO_VIF(lvif);
lsta = ni->ni_drv_data;
+ if (lsta == NULL) {
+ ic_printf(ic, "%s: lsta %p ni %p vap %p, lsta is NULL\n",
+ __func__, lsta, ni, vap);
+ goto net80211_only;
+ }
sta = LSTA_TO_STA(lsta);
+ if (!lsta->added_to_drv) {
+ ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p not added to firmware\n",
+ __func__, lsta, ni, vap, sta);
+ goto net80211_only;
+ }
+
+ if (lsta->state != IEEE80211_STA_AUTHORIZED) {
+ ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n",
+ __func__, lsta, ni, vap, sta, lsta->state);
+ goto net80211_only;
+ }
+
IMPROVE_HT("This really should be passed from ht_recv_action_ba_delba.");
for (tid = 0; tid < WME_NUM_TID; tid++) {
if (&ni->ni_rx_ampdu[tid] == rap)
break;
}
+ if (tid == WME_NUM_TID) {
+ ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p TID not found\n",
+ __func__, lsta, ni, vap, sta);
+ goto net80211_only;
+ }
params.sta = sta;
params.action = IEEE80211_AMPDU_RX_STOP;
@@ -5788,8 +6298,9 @@ lkpi_ic_getradiocaps(struct ieee80211com *ic, int maxchan,
cflags &= ~NET80211_CBW_FLAG_HT40;
error = ieee80211_add_channel_cbw(c, maxchan, n,
- channels[i].hw_value, channels[i].center_freq,
- channels[i].max_power,
+ ieee80211_mhz2ieee(channels[i].center_freq,
+ lkpi_nl80211_band_to_net80211_band(channels[i].band)),
+ channels[i].center_freq, channels[i].max_power,
nflags, bands, cflags);
/* net80211::ENOBUFS: *n >= maxchans */
if (error != 0 && error != ENOBUFS)
@@ -5860,8 +6371,9 @@ lkpi_ic_getradiocaps(struct ieee80211com *ic, int maxchan,
cflags &= ~NET80211_CBW_FLAG_HT40;
error = ieee80211_add_channel_cbw(c, maxchan, n,
- channels[i].hw_value, channels[i].center_freq,
- channels[i].max_power,
+ ieee80211_mhz2ieee(channels[i].center_freq,
+ lkpi_nl80211_band_to_net80211_band(channels[i].band)),
+ channels[i].center_freq, channels[i].max_power,
nflags, bands, cflags);
/* net80211::ENOBUFS: *n >= maxchans */
if (error != 0 && error != ENOBUFS)
@@ -5908,8 +6420,11 @@ linuxkpi_ieee80211_alloc_hw(size_t priv_len, const struct ieee80211_ops *ops)
LKPI_80211_LHW_SCAN_LOCK_INIT(lhw);
LKPI_80211_LHW_TXQ_LOCK_INIT(lhw);
+ spin_lock_init(&lhw->txq_lock);
sx_init_flags(&lhw->lvif_sx, "lhw-lvif", SX_RECURSE | SX_DUPOK);
+ LKPI_80211_LHW_MC_LOCK_INIT(lhw);
TAILQ_INIT(&lhw->lvif_head);
+ __hw_addr_init(&lhw->mc_list);
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
lhw->txq_generation[ac] = 1;
TAILQ_INIT(&lhw->scheduled_txqs[ac]);
@@ -6006,25 +6521,44 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw)
}
}
+ LKPI_80211_LHW_MC_LOCK(lhw);
+ lkpi_cleanup_mcast_list_locked(lhw);
+ LKPI_80211_LHW_MC_UNLOCK(lhw);
+
/* Cleanup more of lhw here or in wiphy_free()? */
+ spin_lock_destroy(&lhw->txq_lock);
LKPI_80211_LHW_TXQ_LOCK_DESTROY(lhw);
LKPI_80211_LHW_SCAN_LOCK_DESTROY(lhw);
sx_destroy(&lhw->lvif_sx);
+ LKPI_80211_LHW_MC_LOCK_DESTROY(lhw)
IMPROVE();
}
void
-linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw, char *name)
+linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw)
{
struct lkpi_hw *lhw;
struct ieee80211com *ic;
+ struct device *dev;
lhw = HW_TO_LHW(hw);
ic = lhw->ic;
- /* Now set a proper name before ieee80211_ifattach(). */
+ /* Save the backpointer from net80211 to LinuxKPI. */
ic->ic_softc = lhw;
- ic->ic_name = name;
+
+ /*
+ * Set a proper name before ieee80211_ifattach() if dev is set.
+ * ath1xk also unset the dev so we need to check.
+ */
+ dev = wiphy_dev(hw->wiphy);
+ if (dev != NULL) {
+ ic->ic_name = dev_name(dev);
+ } else {
+ TODO("adjust arguments to still have the old dev or go through "
+ "the hoops of getting the bsddev from hw and detach; "
+ "or do in XXX; check ath1kx drivers");
+ }
/* XXX-BZ do we also need to set wiphy name? */
}
@@ -6101,26 +6635,26 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw)
IEEE80211_C_SHSLOT | /* short slot time supported */
IEEE80211_C_SHPREAMBLE /* short preamble supported */
;
-#if 0
- /* Scanning is a different kind of beast to re-work. */
- ic->ic_caps |= IEEE80211_C_BGSCAN;
+
+#ifdef LKPI_80211_BGSCAN
+ if (lhw->ops->hw_scan)
+ ic->ic_caps |= IEEE80211_C_BGSCAN;
#endif
- if (lhw->ops->hw_scan) {
- /*
- * Advertise full-offload scanning.
- *
- * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise
- * we essentially disable hw_scan for all drivers not setting
- * the flag.
- */
- ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
- lhw->scan_flags |= LKPI_LHW_SCAN_HW;
- }
+
+ lkpi_enable_hw_scan(lhw);
/* Does HW support Fragmentation offload? */
if (ieee80211_hw_check(hw, SUPPORTS_TX_FRAG))
ic->ic_flags_ext |= IEEE80211_FEXT_FRAG_OFFLOAD;
+ /* Does HW support full AMPDU[-TX] offload? */
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION))
+ ic->ic_flags_ext |= IEEE80211_FEXT_AMPDU_OFFLOAD;
+#ifdef __notyet__
+ if (ieee80211_hw_check(hw, TX_AMSDU))
+ if (ieee80211_hw_check(hw, SUPPORTS_AMSDU_IN_AMPDU))
+#endif
+
/*
* The wiphy variables report bitmasks of avail antennas.
* (*get_antenna) get the current bitmask sets which can be
@@ -6332,8 +6866,10 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw)
hw->wiphy->max_scan_ie_len -= lhw->scan_ie_len;
}
- if (bootverbose)
+ if (bootverbose) {
+ ic_printf(ic, "netdev_features %b\n", hw->netdev_features, NETIF_F_BITS);
ieee80211_announce(ic);
+ }
return (0);
err:
@@ -6570,13 +7106,19 @@ linuxkpi_ieee80211_scan_completed(struct ieee80211_hw *hw,
ic = lhw->ic;
ss = ic->ic_scan;
+ TRACE_SCAN(ic, "scan_flags %b info { %ju, %6D, aborted %d }",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ (uintmax_t)info->scan_start_tsf, info->tsf_bssid, ":",
+ info->aborted);
+
ieee80211_scan_done(ss->ss_vap);
LKPI_80211_LHW_SCAN_LOCK(lhw);
free(lhw->hw_req, M_LKPI80211);
lhw->hw_req = NULL;
lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING;
- wakeup(lhw);
+ /* The wakeup(lhw) will be called from lkpi_ic_scan_end(). */
+ /* wakeup(lhw); */
LKPI_80211_LHW_SCAN_UNLOCK(lhw);
return;
@@ -6832,11 +7374,76 @@ lkpi_convert_rx_status(struct ieee80211_hw *hw, struct lkpi_sta *lsta,
rx_stats->c_pktflags |= IEEE80211_RX_F_FAIL_FCSCRC;
#endif
+ /* Fill in some sinfo bits to fill gaps not reported byt the driver. */
if (lsta != NULL) {
memcpy(&lsta->sinfo.rxrate, &rxrate, sizeof(rxrate));
lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
+
+ if (rx_status->chains != 0) {
+ lsta->sinfo.chains = rx_status->chains;
+ memcpy(lsta->sinfo.chain_signal, rx_status->chain_signal,
+ sizeof(lsta->sinfo.chain_signal));
+ lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
+ }
+ }
+}
+
+#ifdef LINUXKPI_DEBUG_80211
+static void
+lkpi_rx_log_beacon(struct mbuf *m, struct lkpi_hw *lhw,
+ struct ieee80211_rx_status *rx_status)
+{
+ struct ieee80211_mgmt *f;
+ uint8_t *e;
+ char ssid[IEEE80211_NWID_LEN * 4 + 1];
+
+ memset(ssid, '\0', sizeof(ssid));
+
+ f = mtod(m, struct ieee80211_mgmt *);
+ e = f->u.beacon.variable;
+ /*
+ * Usually SSID is right after the fixed part and for debugging we will
+ * be fine should we miss it if it is not.
+ */
+ while ((e - (uint8_t *)f) < m->m_len) {
+ if (*e == IEEE80211_ELEMID_SSID)
+ break;
+ e += (2 + *(e + 1));
+ }
+ if (*e == IEEE80211_ELEMID_SSID) {
+ int i, len;
+ char *p;
+
+ p = ssid;
+ len = m->m_len - ((e + 2) - (uint8_t *)f);
+ if (len > *(e + 1))
+ len = *(e + 1);
+ e += 2;
+ for (i = 0; i < len; i++) {
+ /* Printable character? */
+ if (*e >= 0x20 && *e < 0x7f) {
+ *p++ = *e++;
+ } else {
+ snprintf(p, 5, "%#04x", *e++);
+ p += 4;
+ }
+ }
+ *p = '\0';
}
+
+ /* We print skb, skb->data, m as we are seeing 'ghost beacons'. */
+ TRACE_SCAN_BEACON(lhw->ic, "Beacon: scan_flags %b, band %s freq %u chan %-4d "
+ "len %d { %#06x %#06x %6D %6D %6D %#06x %ju %u %#06x SSID '%s' }",
+ lhw->scan_flags, LKPI_LHW_SCAN_BITS,
+ lkpi_nl80211_band_name(rx_status->band), rx_status->freq,
+ linuxkpi_ieee80211_frequency_to_channel(rx_status->freq, 0),
+ m->m_pkthdr.len, f->frame_control, f->duration_id,
+ f->da, ":", f->sa, ":", f->bssid, ":", f->seq_ctrl,
+ (uintmax_t)le64_to_cpu(f->u.beacon.timestamp),
+ le16_to_cpu(f->u.beacon.beacon_int),
+ le16_to_cpu(f->u.beacon.capab_info), ssid);
}
+#endif
/* For %list see comment towards the end of the function. */
void
@@ -6894,7 +7501,15 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
is_beacon = ieee80211_is_beacon(hdr->frame_control);
#ifdef LINUXKPI_DEBUG_80211
- if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0)
+ /*
+ * We use the mbuf here as otherwise the variable part might
+ * be in skb frags.
+ */
+ if (is_beacon && ((linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0))
+ lkpi_rx_log_beacon(m, lhw, rx_status);
+
+ if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0 &&
+ (linuxkpi_debug_80211 & D80211_SCAN_BEACON) == 0)
goto no_trace_beacons;
if (linuxkpi_debug_80211 & D80211_TRACE_RX)
@@ -6909,7 +7524,8 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
hexdump(mtod(m, const void *), m->m_len, "RX (raw) ", 0);
/* Implement a dump_rxcb() !!! */
- if (linuxkpi_debug_80211 & D80211_TRACE_RX)
+ if ((linuxkpi_debug_80211 & D80211_TRACE_RX) != 0 ||
+ (linuxkpi_debug_80211 & D80211_SCAN_BEACON) != 0)
printf("TRACE-RX: %s: RXCB: %ju %ju %u, %b, %u, %#0x, %#0x, "
"%u band %u, %u { %d %d %d %d }, %d, %#x %#x %#x %#x %u %u %u\n",
__func__,
@@ -7216,7 +7832,7 @@ lkpi_wiphy_delayed_work_timer(struct timer_list *tl)
{
struct wiphy_delayed_work *wdwk;
- wdwk = from_timer(wdwk, tl, timer);
+ wdwk = timer_container_of(wdwk, tl, timer);
wiphy_work_queue(wdwk->wiphy, &wdwk->work);
}
@@ -7791,8 +8407,11 @@ linuxkpi_ieee80211_connection_loss(struct ieee80211_vif *vif)
nstate = IEEE80211_S_INIT;
arg = 0; /* Not a valid reason. */
- ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s\n", __func__,
- vif, vap, ieee80211_state_name[vap->iv_state]);
+ ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s (synched %d, assoc %d "
+ "beacons %d dtim_period %d)\n", __func__, vif, vap,
+ ieee80211_state_name[vap->iv_state],
+ lvif->lvif_bss_synched, vif->cfg.assoc, lvif->beacons,
+ vif->bss_conf.dtim_period);
ieee80211_new_state(vap, nstate, arg);
}
@@ -7805,8 +8424,11 @@ linuxkpi_ieee80211_beacon_loss(struct ieee80211_vif *vif)
lvif = VIF_TO_LVIF(vif);
vap = LVIF_TO_VAP(lvif);
- ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s\n", __func__,
- vif, vap, ieee80211_state_name[vap->iv_state]);
+ ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s (synched %d, assoc %d "
+ "beacons %d dtim_period %d)\n", __func__, vif, vap,
+ ieee80211_state_name[vap->iv_state],
+ lvif->lvif_bss_synched, vif->cfg.assoc, lvif->beacons,
+ vif->bss_conf.dtim_period);
ieee80211_beacon_miss(vap->iv_ic);
}
@@ -7954,21 +8576,30 @@ lkpi_ieee80211_wake_queues_locked(struct ieee80211_hw *hw)
void
linuxkpi_ieee80211_wake_queues(struct ieee80211_hw *hw)
{
- wiphy_lock(hw->wiphy);
+ struct lkpi_hw *lhw;
+ unsigned long flags;
+
+ lhw = HW_TO_LHW(hw);
+
+ spin_lock_irqsave(&lhw->txq_lock, flags);
lkpi_ieee80211_wake_queues_locked(hw);
- wiphy_unlock(hw->wiphy);
+ spin_unlock_irqrestore(&lhw->txq_lock, flags);
}
void
linuxkpi_ieee80211_wake_queue(struct ieee80211_hw *hw, int qnum)
{
+ struct lkpi_hw *lhw;
+ unsigned long flags;
KASSERT(qnum < hw->queues, ("%s: qnum %d >= hw->queues %d, hw %p\n",
__func__, qnum, hw->queues, hw));
- wiphy_lock(hw->wiphy);
+ lhw = HW_TO_LHW(hw);
+
+ spin_lock_irqsave(&lhw->txq_lock, flags);
lkpi_ieee80211_wake_queues(hw, qnum);
- wiphy_unlock(hw->wiphy);
+ spin_unlock_irqrestore(&lhw->txq_lock, flags);
}
/* This is just hardware queues. */
diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h
index 89afec1235bd..0dfcd7646c34 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.h
+++ b/sys/compat/linuxkpi/common/src/linux_80211.h
@@ -59,6 +59,8 @@
#define D80211_IMPROVE_TXQ 0x00000004
#define D80211_TRACE 0x00000010
#define D80211_TRACEOK 0x00000020
+#define D80211_SCAN 0x00000040
+#define D80211_SCAN_BEACON 0x00000080
#define D80211_TRACE_TX 0x00000100
#define D80211_TRACE_TX_DUMP 0x00000200
#define D80211_TRACE_RX 0x00001000
@@ -75,6 +77,20 @@
#define D80211_TRACE_MODE_HE 0x04000000
#define D80211_TRACE_MODE_EHT 0x08000000
+#ifdef LINUXKPI_DEBUG_80211
+#define TRACE_SCAN(ic, fmt, ...) \
+ if (linuxkpi_debug_80211 & D80211_SCAN) \
+ printf("%s:%d: %s SCAN " fmt "\n", \
+ __func__, __LINE__, ic->ic_name, ##__VA_ARGS__)
+#define TRACE_SCAN_BEACON(ic, fmt, ...) \
+ if (linuxkpi_debug_80211 & D80211_SCAN_BEACON) \
+ printf("%s:%d: %s SCAN " fmt "\n", \
+ __func__, __LINE__, ic->ic_name, ##__VA_ARGS__)
+#else
+#define TRACE_SCAN(...) do {} while (0)
+#define TRACE_SCAN_BEACON(...) do {} while (0)
+#endif
+
#define IMPROVE_TXQ(...) \
if (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ) \
printf("%s:%d: XXX LKPI80211 IMPROVE_TXQ\n", __func__, __LINE__)
@@ -187,6 +203,12 @@ struct lkpi_vif {
enum ieee80211_state, int);
struct ieee80211_node * (*iv_update_bss)(struct ieee80211vap *,
struct ieee80211_node *);
+ void (*iv_recv_mgmt)(struct ieee80211_node *,
+ struct mbuf *, int,
+ const struct ieee80211_rx_stats *,
+ int, int);
+ struct task sw_scan_task;
+
struct list_head lsta_list;
struct lkpi_sta *lvif_bss;
@@ -194,6 +216,7 @@ struct lkpi_vif {
struct ieee80211_node *key_update_iv_bss;
int ic_unlocked; /* Count of ic unlocks pending (*mo_set_key) */
int nt_unlocked; /* Count of nt unlocks pending (*mo_set_key) */
+ int beacons; /* # of beacons since assoc */
bool lvif_bss_synched;
bool added_to_drv; /* Driver knows; i.e. we called add_interface(). */
@@ -223,10 +246,14 @@ struct lkpi_hw { /* name it mac80211_sc? */
struct sx lvif_sx;
struct list_head lchanctx_list;
+ struct netdev_hw_addr_list mc_list;
+ unsigned int mc_flags;
+ struct sx mc_sx;
struct mtx txq_mtx;
uint32_t txq_generation[IEEE80211_NUM_ACS];
TAILQ_HEAD(, lkpi_txq) scheduled_txqs[IEEE80211_NUM_ACS];
+ spinlock_t txq_lock;
/* Deferred RX path. */
struct task rxq_task;
@@ -279,7 +306,7 @@ struct lkpi_hw { /* name it mac80211_sc? */
int max_rates; /* Maximum number of bitrates supported in any channel. */
int scan_ie_len; /* Length of common per-band scan IEs. */
- bool update_mc;
+ bool mc_all_multi;
bool update_wme;
bool rxq_stopped;
@@ -289,6 +316,9 @@ struct lkpi_hw { /* name it mac80211_sc? */
#define LHW_TO_HW(_lhw) (&(_lhw)->hw)
#define HW_TO_LHW(_hw) container_of(_hw, struct lkpi_hw, hw)
+#define LKPI_LHW_SCAN_BITS \
+ "\010\1RUNING\2HW"
+
struct lkpi_chanctx {
struct list_head entry;
@@ -369,6 +399,13 @@ struct lkpi_wiphy {
#define LKPI_80211_LHW_LVIF_LOCK(_lhw) sx_xlock(&(_lhw)->lvif_sx)
#define LKPI_80211_LHW_LVIF_UNLOCK(_lhw) sx_xunlock(&(_lhw)->lvif_sx)
+#define LKPI_80211_LHW_MC_LOCK_INIT(_lhw) \
+ sx_init_flags(&lhw->mc_sx, "lhw-mc", 0);
+#define LKPI_80211_LHW_MC_LOCK_DESTROY(_lhw) \
+ sx_destroy(&lhw->mc_sx);
+#define LKPI_80211_LHW_MC_LOCK(_lhw) sx_xlock(&(_lhw)->mc_sx)
+#define LKPI_80211_LHW_MC_UNLOCK(_lhw) sx_xunlock(&(_lhw)->mc_sx)
+
#define LKPI_80211_LVIF_LOCK(_lvif) mtx_lock(&(_lvif)->mtx)
#define LKPI_80211_LVIF_UNLOCK(_lvif) mtx_unlock(&(_lvif)->mtx)
diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c
index 78b2120f2d8c..1046b753574f 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c
+++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c
@@ -53,6 +53,8 @@ lkpi_80211_mo_start(struct ieee80211_hw *hw)
struct lkpi_hw *lhw;
int error;
+ lockdep_assert_wiphy(hw->wiphy);
+
lhw = HW_TO_LHW(hw);
if (lhw->ops->start == NULL) {
error = EOPNOTSUPP;
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index dcdec0dfcc78..458744a9fec6 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -2120,7 +2120,7 @@ add_timer_on(struct timer_list *timer, int cpu)
}
int
-del_timer(struct timer_list *timer)
+timer_delete(struct timer_list *timer)
{
if (callout_stop(&(timer)->callout) == -1)
@@ -2129,7 +2129,7 @@ del_timer(struct timer_list *timer)
}
int
-del_timer_sync(struct timer_list *timer)
+timer_delete_sync(struct timer_list *timer)
{
if (callout_drain(&(timer)->callout) == -1)
@@ -2138,13 +2138,6 @@ del_timer_sync(struct timer_list *timer)
}
int
-timer_delete_sync(struct timer_list *timer)
-{
-
- return (del_timer_sync(timer));
-}
-
-int
timer_shutdown_sync(struct timer_list *timer)
{
diff --git a/sys/compat/linuxkpi/common/src/linux_devres.c b/sys/compat/linuxkpi/common/src/linux_devres.c
index 84f03ba0dd7d..23c91cb5ab2f 100644
--- a/sys/compat/linuxkpi/common/src/linux_devres.c
+++ b/sys/compat/linuxkpi/common/src/linux_devres.c
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2020-2021 The FreeBSD Foundation
+ * Copyright (c) 2020-2025 The FreeBSD Foundation
*
* This software was developed by Bj\xc3\xb6rn Zeeb under sponsorship from
* the FreeBSD Foundation.
@@ -223,6 +223,30 @@ lkpi_devm_kmalloc_release(struct device *dev __unused, void *p __unused)
/* Nothing to do. Freed with the devres. */
}
+static int
+lkpi_devm_kmalloc_match(struct device *dev __unused, void *p, void *mp)
+{
+ return (p == mp);
+}
+
+void
+lkpi_devm_kfree(struct device *dev, const void *p)
+{
+ void *mp;
+ int error;
+
+ if (p == NULL)
+ return;
+
+ /* I assume Linux simply casts the const away... */
+ mp = __DECONST(void *, p);
+ error = lkpi_devres_destroy(dev, lkpi_devm_kmalloc_release,
+ lkpi_devm_kmalloc_match, mp);
+ if (error != 0)
+ dev_warn(dev, "%s: lkpi_devres_destroy failed with %d\n",
+ __func__, error);
+}
+
struct devres_action {
void *data;
void (*action)(void *);
diff --git a/sys/compat/linuxkpi/common/src/linux_pci.c b/sys/compat/linuxkpi/common/src/linux_pci.c
index d5bbbea1eb2c..43fd6ad28ac4 100644
--- a/sys/compat/linuxkpi/common/src/linux_pci.c
+++ b/sys/compat/linuxkpi/common/src/linux_pci.c
@@ -111,6 +111,9 @@ static device_method_t pci_methods[] = {
DEVMETHOD(pci_iov_uninit, linux_pci_iov_uninit),
DEVMETHOD(pci_iov_add_vf, linux_pci_iov_add_vf),
+ /* Bus interface. */
+ DEVMETHOD(bus_add_child, bus_generic_add_child),
+
/* backlight interface */
DEVMETHOD(backlight_update_status, linux_backlight_update_status),
DEVMETHOD(backlight_get_status, linux_backlight_get_status),
@@ -145,6 +148,23 @@ struct linux_dma_priv {
#define DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock)
#define DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock)
+static void
+lkpi_set_pcim_iomap_devres(struct pcim_iomap_devres *dr, int bar,
+ void *res)
+{
+ dr->mmio_table[bar] = (void *)rman_get_bushandle(res);
+ dr->res_table[bar] = res;
+}
+
+static bool
+lkpi_pci_bar_id_valid(int bar)
+{
+ if (bar < 0 || bar > PCIR_MAX_BAR_0)
+ return (false);
+
+ return (true);
+}
+
static int
linux_pdev_dma_uninit(struct pci_dev *pdev)
{
@@ -289,12 +309,18 @@ lkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
{
struct pci_dev *pdev, *found;
- KASSERT(odev == NULL, ("%s: odev argument not yet supported\n", __func__));
-
found = NULL;
spin_lock(&pci_lock);
list_for_each_entry(pdev, &pci_devices, links) {
- if (pdev->vendor == vendor && pdev->device == device) {
+ /* Walk until we find odev. */
+ if (odev != NULL) {
+ if (pdev == odev)
+ odev = NULL;
+ continue;
+ }
+
+ if ((pdev->vendor == vendor || vendor == PCI_ANY_ID) &&
+ (pdev->device == device || device == PCI_ANY_ID)) {
found = pdev;
break;
}
@@ -757,6 +783,9 @@ _lkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen __unused)
struct pci_mmio_region *mmio, *p;
int type;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
type = pci_resource_type(pdev, bar);
if (type < 0) {
device_printf(pdev->dev.bsddev, "%s: bar %d type %d\n",
@@ -797,6 +826,9 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar,
{
struct resource *res;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
res = _lkpi_pci_iomap(pdev, bar, maxlen);
if (res == NULL)
return (NULL);
@@ -810,9 +842,41 @@ linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar,
void *
linuxkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
{
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
return (linuxkpi_pci_iomap_range(pdev, bar, 0, maxlen));
}
+void *
+linuxkpi_pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+{
+ struct pcim_iomap_devres *dr;
+ void *res;
+
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (NULL);
+
+ dr = lkpi_pcim_iomap_devres_find(pdev);
+ if (dr == NULL)
+ return (NULL);
+
+ if (dr->res_table[bar] != NULL)
+ return (dr->res_table[bar]);
+
+ res = linuxkpi_pci_iomap(pdev, bar, maxlen);
+ if (res == NULL) {
+ /*
+ * Do not free the devres in case there were
+ * other valid mappings before already.
+ */
+ return (NULL);
+ }
+ lkpi_set_pcim_iomap_devres(dr, bar, res);
+
+ return (res);
+}
+
void
linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res)
{
@@ -864,8 +928,7 @@ linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask, const char *nam
res = _lkpi_pci_iomap(pdev, bar, 0);
if (res == NULL)
goto err;
- dr->mmio_table[bar] = (void *)rman_get_bushandle(res);
- dr->res_table[bar] = res;
+ lkpi_set_pcim_iomap_devres(dr, bar, res);
mappings |= (1 << bar);
}
@@ -1099,8 +1162,9 @@ pci_resource_len(struct pci_dev *pdev, int bar)
return (rle->count);
}
-int
-pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+static int
+lkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name,
+ bool managed)
{
struct resource *res;
struct pci_devres *dr;
@@ -1108,9 +1172,20 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
int rid;
int type;
+ if (!lkpi_pci_bar_id_valid(bar))
+ return (-EINVAL);
+
+ /*
+ * If the bar is not valid, return success without adding the BAR;
+ * otherwise linuxkpi_pcim_request_all_regions() will error.
+ */
+ if (pci_resource_len(pdev, bar) == 0)
+ return (0);
+ /* Likewise if it is neither IO nor MEM, nothing to do for us. */
type = pci_resource_type(pdev, bar);
if (type < 0)
- return (-ENODEV);
+ return (0);
+
rid = PCIR_BAR(bar);
res = bus_alloc_resource_any(pdev->dev.bsddev, type, &rid,
RF_ACTIVE|RF_SHAREABLE);
@@ -1123,11 +1198,16 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
/*
* It seems there is an implicit devres tracking on these if the device
- * is managed; otherwise the resources are not automatiaclly freed on
- * FreeBSD/LinuxKPI tough they should be/are expected to be by Linux
- * drivers.
+ * is managed (lkpi_pci_devres_find() case); otherwise the resources are
+ * not automatically freed on FreeBSD/LinuxKPI though they should be/are
+ * expected to be by Linux drivers.
+ * Otherwise if we are called from a pcim-function with the managed
+ * argument set, we need to track devres independent of pdev->managed.
*/
- dr = lkpi_pci_devres_find(pdev);
+ if (managed)
+ dr = lkpi_pci_devres_get_alloc(pdev);
+ else
+ dr = lkpi_pci_devres_find(pdev);
if (dr != NULL) {
dr->region_mask |= (1 << bar);
dr->region_table[bar] = res;
@@ -1144,6 +1224,12 @@ pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
}
int
+linuxkpi_pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+{
+ return (lkpi_pci_request_region(pdev, bar, res_name, false));
+}
+
+int
linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name)
{
int error;
@@ -1159,6 +1245,24 @@ linuxkpi_pci_request_regions(struct pci_dev *pdev, const char *res_name)
return (0);
}
+int
+linuxkpi_pcim_request_all_regions(struct pci_dev *pdev, const char *res_name)
+{
+ int bar, error;
+
+ for (bar = 0; bar <= PCIR_MAX_BAR_0; bar++) {
+ error = lkpi_pci_request_region(pdev, bar, res_name, true);
+ if (error != 0) {
+ device_printf(pdev->dev.bsddev, "%s: bar %d res_name '%s': "
+ "lkpi_pci_request_region returned %d\n", __func__,
+ bar, res_name, error);
+ pci_release_regions(pdev);
+ return (error);
+ }
+ }
+ return (0);
+}
+
void
linuxkpi_pci_release_region(struct pci_dev *pdev, int bar)
{
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/skbuff.h
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/test-bug.h
diff --git a/sys/compat/linuxkpi/dummy/include/kunit/test.h b/sys/compat/linuxkpi/dummy/include/kunit/test.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/sys/compat/linuxkpi/dummy/include/kunit/test.h
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 92e98aa57ebf..c7a8862fb906 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -888,13 +888,13 @@ options IEEE80211_DEBUG_REFCNT
options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support
options IEEE80211_SUPPORT_TDMA #enable TDMA support
-# The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide
-# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally
-# used with 802.11 devices that depend on the `wlan' module.
+# The `wlan_wep', `wlan_tkip', `wlan_ccmp', and `wlan_gcmp' devices provide
+# support for WEP, TKIP, AES-CCMP and AES-GCMP crypto protocols optionally used
+# with 802.11 devices that depend on the `wlan' module.
device wlan_wep
+device wlan_tkip
device wlan_ccmp
device wlan_gcmp
-device wlan_tkip
# The `wlan_xauth' device provides support for external (i.e. user-mode)
# authenticators for use with 802.11 drivers that use the `wlan'
@@ -1249,7 +1249,7 @@ options MAC
options MAC_BIBA
options MAC_BSDEXTENDED
options MAC_DDB
-options MAC_DO
+options MAC_DO
options MAC_IFOFF
options MAC_IPACL
options MAC_LOMAC
diff --git a/sys/conf/files b/sys/conf/files
index d89813c70355..9661bafea8f9 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
+kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 9b6ba03b78df..a342242ac66e 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -13,14 +13,14 @@ include "conf/files.x86"
#
elf-vdso.so.o standard \
dependency "$S/amd64/amd64/sigtramp.S assym.inc $S/conf/vdso_amd64.ldscript $S/tools/amd64_vdso.sh" \
- compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' DEBUG='${DEBUG}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_vdso.sh" \
+ compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_vdso.sh" \
no-ctfconvert \
no-implicit-rule before-depend \
clean "elf-vdso.so.o elf-vdso.so.1 vdso_offsets.h sigtramp.pico"
#
elf-vdso32.so.o optional compat_freebsd32 \
dependency "$S/amd64/ia32/ia32_sigtramp.S ia32_assym.h $S/conf/vdso_amd64_ia32.ldscript $S/tools/amd64_ia32_vdso.sh" \
- compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' DEBUG='${DEBUG}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_ia32_vdso.sh" \
+ compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_ia32_vdso.sh" \
no-ctfconvert \
no-implicit-rule before-depend \
clean "elf-vdso32.so.o elf-vdso32.so.1 vdso_ia32_offsets.h ia32_sigtramp.pico"
@@ -107,7 +107,8 @@ crypto/openssl/amd64/poly1305-x86_64.S optional ossl
crypto/openssl/amd64/sha1-x86_64.S optional ossl
crypto/openssl/amd64/sha256-x86_64.S optional ossl
crypto/openssl/amd64/sha512-x86_64.S optional ossl
-crypto/openssl/amd64/ossl_aes_gcm.c optional ossl
+crypto/openssl/amd64/ossl_aes_gcm_avx512.c optional ossl
+crypto/openssl/ossl_aes_gcm.c optional ossl
dev/amdgpio/amdgpio.c optional amdgpio
dev/axgbe/if_axgbe_pci.c optional axp
dev/axgbe/xgbe-desc.c optional axp
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 91b01845519e..880e804b6c95 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -132,7 +132,7 @@ libkern/udivdi3.c standard
libkern/umoddi3.c standard
crypto/openssl/ossl_arm.c optional ossl
-crypto/openssl/arm/ossl_aes_gcm.c optional ossl
+crypto/openssl/arm/ossl_aes_gcm_neon.c optional ossl
crypto/openssl/arm/aes-armv4.S optional ossl \
compile-with "${NORMAL_C} -I${SRCTOP}/sys/crypto/openssl"
crypto/openssl/arm/bsaes-armv7.S optional ossl \
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index d2c3aa260cd9..0deada385f31 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -31,9 +31,11 @@ zfs-sha512-ppc.o optional zfs \
# openssl ppc common files
crypto/openssl/ossl_ppc.c optional ossl powerpc64 | ossl powerpc64le
+crypto/openssl/ossl_aes_gcm.c optional ossl powerpc64 | ossl powerpc64le
# openssl assembly files (powerpc64le)
crypto/openssl/powerpc64le/aes-ppc.S optional ossl powerpc64le
+crypto/openssl/powerpc64le/aes-gcm-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/aesp8-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/chacha-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/ecp_nistz256-ppc64.S optional ossl powerpc64le
@@ -54,6 +56,7 @@ crypto/openssl/powerpc64le/x25519-ppc64.S optional ossl powerpc64le
# openssl assembly files (powerpc64)
crypto/openssl/powerpc64/aes-ppc.S optional ossl powerpc64
+crypto/openssl/powerpc64/aes-gcm-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/aesp8-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/chacha-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/ecp_nistz256-ppc64.S optional ossl powerpc64
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index 9976e9cfec5d..953da7dd1284 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -146,6 +146,7 @@ dev/hyperv/vmbus/vmbus_et.c optional hyperv
dev/hyperv/vmbus/vmbus_if.m optional hyperv
dev/hyperv/vmbus/vmbus_res.c optional hyperv
dev/hyperv/vmbus/vmbus_xact.c optional hyperv
+dev/ichwd/i6300esbwd.c optional ichwd
dev/ichwd/ichwd.c optional ichwd
dev/imcsmb/imcsmb.c optional imcsmb
dev/imcsmb/imcsmb_pci.c optional imcsmb pci
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
index 1fcfd6467e7f..0251486247da 100644
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -8,7 +8,11 @@
# the rest of /usr/src, but they still always process SRCCONF even though
# the normal mechanisms to prevent that (compiling out of tree) won't
# work. To ensure they do work, we have to duplicate thee few lines here.
+.if exists(${SRCTOP}/src.conf)
+SRCCONF?= ${SRCTOP}/src.conf
+.else
SRCCONF?= /etc/src.conf
+.endif
.if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_)
.include "${SRCCONF}"
_srcconf_included_:
diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh
index 8b60da95741e..145377c1e75e 100644
--- a/sys/conf/newvers.sh
+++ b/sys/conf/newvers.sh
@@ -50,8 +50,8 @@
#
TYPE="FreeBSD"
-REVISION="15.0"
-BRANCH="PRERELEASE"
+REVISION="16.0"
+BRANCH="CURRENT"
if [ -n "${BRANCH_OVERRIDE}" ]; then
BRANCH=${BRANCH_OVERRIDE}
fi
diff --git a/sys/conf/options b/sys/conf/options
index 4009ba2b4843..66f7f2ee2d7e 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -53,7 +53,7 @@ DDB_CAPTURE_MAXBUFSIZE opt_ddb.h
DDB_CTF opt_ddb.h
DDB_NUMSYM opt_ddb.h
EARLY_PRINTF opt_global.h
-BLOAT_KERNEL_WITH_EXTERR opt_global.h
+EXTERR_STRINGS opt_global.h
FULL_BUF_TRACKING opt_global.h
GDB
KDB opt_global.h
diff --git a/sys/contrib/dev/acpica/components/executer/extrace.c b/sys/contrib/dev/acpica/components/executer/extrace.c
index d54d4908ca65..b48a5fcb289b 100644
--- a/sys/contrib/dev/acpica/components/executer/extrace.c
+++ b/sys/contrib/dev/acpica/components/executer/extrace.c
@@ -301,7 +301,7 @@ AcpiExTraceArgs(ACPI_OPERAND_OBJECT **Params, UINT32 Count)
switch (obj_desc->Common.Type)
{
case ACPI_TYPE_INTEGER:
- ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%lx", obj_desc->Integer.Value));
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%jx", (uintmax_t)obj_desc->Integer.Value));
break;
case ACPI_TYPE_STRING:
diff --git a/sys/contrib/dev/qat/qat_402xx.bin b/sys/contrib/dev/qat/qat_402xx.bin
new file mode 100644
index 000000000000..74151547edce
--- /dev/null
+++ b/sys/contrib/dev/qat/qat_402xx.bin
Binary files differ
diff --git a/sys/contrib/dev/qat/qat_402xx_mmp.bin b/sys/contrib/dev/qat/qat_402xx_mmp.bin
new file mode 100644
index 000000000000..6404eb009d2f
--- /dev/null
+++ b/sys/contrib/dev/qat/qat_402xx_mmp.bin
Binary files differ
diff --git a/sys/contrib/dev/rtw88/main.c b/sys/contrib/dev/rtw88/main.c
index 021d076808e0..963b73f35350 100644
--- a/sys/contrib/dev/rtw88/main.c
+++ b/sys/contrib/dev/rtw88/main.c
@@ -57,6 +57,62 @@ module_param_named(support_vht, rtw_vht_support, bool, 0644);
MODULE_PARM_DESC(support_vht, "Set to Y to enable VHT support");
#endif
+#if defined(__FreeBSD__)
+/* Macros based on rtw89::core.c. */
+#define RTW88_DEF_CHAN(_freq, _hw_val, _flags, _band) \
+ { .center_freq = _freq, .hw_value = _hw_val, .flags = _flags, .band = _band, }
+#define RTW88_DEF_CHAN_2G(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_2GHZ)
+#define RTW88_DEF_CHAN_5G(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, 0, NL80211_BAND_5GHZ)
+#define RTW88_DEF_CHAN_5G_NO_HT40MINUS(_freq, _hw_val) \
+ RTW88_DEF_CHAN(_freq, _hw_val, IEEE80211_CHAN_NO_HT40MINUS, NL80211_BAND_5GHZ)
+
+static struct ieee80211_channel rtw_channeltable_2g[] = {
+ RTW88_DEF_CHAN_2G(2412, 1),
+ RTW88_DEF_CHAN_2G(2417, 2),
+ RTW88_DEF_CHAN_2G(2422, 3),
+ RTW88_DEF_CHAN_2G(2427, 4),
+ RTW88_DEF_CHAN_2G(2432, 5),
+ RTW88_DEF_CHAN_2G(2437, 6),
+ RTW88_DEF_CHAN_2G(2442, 7),
+ RTW88_DEF_CHAN_2G(2447, 8),
+ RTW88_DEF_CHAN_2G(2452, 9),
+ RTW88_DEF_CHAN_2G(2457, 10),
+ RTW88_DEF_CHAN_2G(2462, 11),
+ RTW88_DEF_CHAN_2G(2467, 12),
+ RTW88_DEF_CHAN_2G(2472, 13),
+ RTW88_DEF_CHAN_2G(2484, 14),
+};
+
+static struct ieee80211_channel rtw_channeltable_5g[] = {
+ RTW88_DEF_CHAN_5G(5180, 36),
+ RTW88_DEF_CHAN_5G(5200, 40),
+ RTW88_DEF_CHAN_5G(5220, 44),
+ RTW88_DEF_CHAN_5G(5240, 48),
+ RTW88_DEF_CHAN_5G(5260, 52),
+ RTW88_DEF_CHAN_5G(5280, 56),
+ RTW88_DEF_CHAN_5G(5300, 60),
+ RTW88_DEF_CHAN_5G(5320, 64),
+ RTW88_DEF_CHAN_5G(5500, 100),
+ RTW88_DEF_CHAN_5G(5520, 104),
+ RTW88_DEF_CHAN_5G(5540, 108),
+ RTW88_DEF_CHAN_5G(5560, 112),
+ RTW88_DEF_CHAN_5G(5580, 116),
+ RTW88_DEF_CHAN_5G(5600, 120),
+ RTW88_DEF_CHAN_5G(5620, 124),
+ RTW88_DEF_CHAN_5G(5640, 128),
+ RTW88_DEF_CHAN_5G(5660, 132),
+ RTW88_DEF_CHAN_5G(5680, 136),
+ RTW88_DEF_CHAN_5G(5700, 140),
+ RTW88_DEF_CHAN_5G(5720, 144),
+ RTW88_DEF_CHAN_5G(5745, 149),
+ RTW88_DEF_CHAN_5G(5765, 153),
+ RTW88_DEF_CHAN_5G(5785, 157),
+ RTW88_DEF_CHAN_5G(5805, 161),
+ RTW88_DEF_CHAN_5G_NO_HT40MINUS(5825, 165),
+};
+#elif deifned(__linux__)
static struct ieee80211_channel rtw_channeltable_2g[] = {
{.center_freq = 2412, .hw_value = 1,},
{.center_freq = 2417, .hw_value = 2,},
@@ -102,6 +158,7 @@ static struct ieee80211_channel rtw_channeltable_5g[] = {
{.center_freq = 5825, .hw_value = 165,
.flags = IEEE80211_CHAN_NO_HT40MINUS},
};
+#endif
static struct ieee80211_rate rtw_ratetable[] = {
{.bitrate = 10, .hw_value = 0x00,},
diff --git a/sys/contrib/dev/rtw89/fw.c b/sys/contrib/dev/rtw89/fw.c
index e360f27c2ade..b4c0f864bc75 100644
--- a/sys/contrib/dev/rtw89/fw.c
+++ b/sys/contrib/dev/rtw89/fw.c
@@ -908,11 +908,7 @@ int rtw89_build_phy_tbl_from_elm(struct rtw89_dev *rtwdev,
case RTW89_FW_ELEMENT_ID_RADIO_B:
case RTW89_FW_ELEMENT_ID_RADIO_C:
case RTW89_FW_ELEMENT_ID_RADIO_D:
-#if defined(__linux__)
rf_path = arg.rf_path;
-#elif defined(__FreeBSD__)
- rf_path = __DECONST(enum rtw89_rf_path, arg.rf_path);
-#endif
idx = elm->u.reg2.idx;
elm_info->rf_radio[idx] = tbl;
diff --git a/sys/contrib/libnv/nvlist.c b/sys/contrib/libnv/nvlist.c
index 41edc72322c3..73226ee51a78 100644
--- a/sys/contrib/libnv/nvlist.c
+++ b/sys/contrib/libnv/nvlist.c
@@ -478,7 +478,7 @@ nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level)
void
nvlist_dump(const nvlist_t *nvl, int fd)
{
- const nvlist_t *tmpnvl;
+ const nvlist_t *tmpnvl, *top;
nvpair_t *nvp, *tmpnvp;
void *cookie;
int level;
@@ -487,6 +487,7 @@ nvlist_dump(const nvlist_t *nvl, int fd)
if (nvlist_dump_error_check(nvl, fd, level))
return;
+ top = nvl;
nvp = nvlist_first_nvpair(nvl);
while (nvp != NULL) {
dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp),
@@ -645,6 +646,8 @@ nvlist_dump(const nvlist_t *nvl, int fd)
while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
do {
+ if (nvl == top)
+ return;
cookie = NULL;
if (nvlist_in_array(nvl))
dprintf(fd, "%*s,\n", level * 4, "");
@@ -847,7 +850,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
{
unsigned char *buf, *ptr;
size_t left, size;
- const nvlist_t *tmpnvl;
+ const nvlist_t *tmpnvl, *top;
nvpair_t *nvp, *tmpnvp;
void *cookie;
@@ -868,6 +871,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
ptr = nvlist_pack_header(nvl, ptr, &left);
+ top = nvl;
nvp = nvlist_first_nvpair(nvl);
while (nvp != NULL) {
NVPAIR_ASSERT(nvp);
@@ -958,6 +962,8 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
goto fail;
while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
do {
+ if (nvl == top)
+ goto out;
cookie = NULL;
if (nvlist_in_array(nvl)) {
ptr = nvpair_pack_nvlist_array_next(ptr,
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
index b49255e8381d..08021aabcb61 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
+++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
@@ -65,7 +65,7 @@ if __name__ == '__main__':
# check last (HEAD) commit message
last_commit_message_raw = subprocess.run([
- 'git', 'show', '-s', '--format=%B', 'HEAD'
+ 'git', 'show', '-s', '--format=%B', head
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for line in last_commit_message_raw.stdout.decode().splitlines():
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
index de29ad1f57b6..0278264d9279 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
@@ -6,6 +6,13 @@
set -eu
+# We've been seeing this script take over 15min to run. This may or
+# may not be normal. Just to get a little more insight, print out
+# a message to stdout with the top running process, and do this every
+# 30 seconds. We can delete this watchdog later once we get a better
+# handle on what the timeout value should be.
+(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) &
+
# install needed packages
export DEBIAN_FRONTEND="noninteractive"
sudo apt-get -y update
@@ -65,3 +72,6 @@ sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \
for i in /sys/block/s*/queue/scheduler; do
echo "none" | sudo tee $i
done
+
+# Kill off our watchdog
+kill $(jobs -p)
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
index 70a2364f1fc6..62e06926e268 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
@@ -25,6 +25,10 @@ UBMIRROR="https://cloud-images.ubuntu.com"
# default nic model for vm's
NIC="virtio"
+# additional options for virt-install
+OPTS[0]=""
+OPTS[1]=""
+
case "$OS" in
almalinux8)
OSNAME="AlmaLinux 8"
@@ -61,6 +65,14 @@ case "$OS" in
OSNAME="Debian 12"
URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2"
;;
+ debian13)
+ OSNAME="Debian 13"
+ # TODO: Overwrite OSv to debian13 for virt-install until it's added to osinfo
+ OSv="debian12"
+ URL="https://cloud.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2"
+ OPTS[0]="--boot"
+ OPTS[1]="uefi=on"
+ ;;
fedora41)
OSNAME="Fedora 41"
OSv="fedora-unknown"
@@ -242,7 +254,7 @@ sudo virt-install \
--network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \
--cloud-init user-data=/tmp/user-data \
--disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \
- --import --noautoconsole >/dev/null
+ --import --noautoconsole ${OPTS[0]} ${OPTS[1]} >/dev/null
# Give the VMs hostnames so we don't have to refer to them with
# hardcoded IP addresses.
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
index c41ecd09d52e..ee058b488088 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
@@ -41,7 +41,7 @@ function debian() {
libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \
libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \
lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \
- python3-cffi python3-dev python3-distlib python3-packaging \
+ python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \
python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \
rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev
echo "##[endgroup]"
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
index 6bf10024a1a6..0adcad2a99bc 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
@@ -12,16 +12,26 @@ source /var/tmp/env.txt
# wait for poweroff to succeed
PID=$(pidof /usr/bin/qemu-system-x86_64)
tail --pid=$PID -f /dev/null
-sudo virsh undefine openzfs
+sudo virsh undefine --nvram openzfs
# cpu pinning
CPUSET=("0,1" "2,3")
+# additional options for virt-install
+OPTS[0]=""
+OPTS[1]=""
+
case "$OS" in
freebsd*)
# FreeBSD needs only 6GiB
RAM=6
;;
+ debian13)
+ RAM=8
+ # Boot Debian 13 with uefi=on and secureboot=off (ZFS Kernel Module not signed)
+ OPTS[0]="--boot"
+ OPTS[1]="firmware=efi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no"
+ ;;
*)
# Linux needs more memory, but can be optimized to share it via KSM
RAM=8
@@ -79,7 +89,7 @@ EOF
--network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \
--disk $DISK-system,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \
--disk $DISK-tests,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \
- --import --noautoconsole >/dev/null
+ --import --noautoconsole ${OPTS[0]} ${OPTS[1]}
done
# generate some memory stats
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
index cda620313189..69349678d84c 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
@@ -29,7 +29,7 @@ jobs:
- name: Generate OS config and CI type
id: os
run: |
- FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]'
+ FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]'
QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]'
# determine CI type when running on PR
ci_type="full"
@@ -44,7 +44,7 @@ jobs:
os_selection="$FULL_OS"
fi
- if [ ${{ github.event.inputs.fedora_kernel_ver }} != "" ] ; then
+ if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then
# They specified a custom kernel version for Fedora. Use only
# Fedora runners.
os_json=$(echo ${os_selection} | jq -c '[.[] | select(startswith("fedora"))]')
@@ -53,9 +53,8 @@ jobs:
os_json=$(echo ${os_selection} | jq -c)
fi
- echo $os_json
- echo "os=$os_json" >> $GITHUB_OUTPUT
- echo "ci_type=$ci_type" >> $GITHUB_OUTPUT
+ echo "os=$os_json" | tee -a $GITHUB_OUTPUT
+ echo "ci_type=$ci_type" | tee -a $GITHUB_OUTPUT
qemu-vm:
name: qemu-x86
@@ -63,8 +62,8 @@ jobs:
strategy:
fail-fast: false
matrix:
- # rhl: almalinux8, almalinux9, centos-stream9, fedora41
- # debian: debian11, debian12, ubuntu22, ubuntu24
+ # rhl: almalinux8, almalinux9, centos-stream9, fedora4x
+ # debian: debian12, debian13, ubuntu22, ubuntu24
# misc: archlinux, tumbleweed
# FreeBSD variants of 2025-06:
# FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r
@@ -78,8 +77,12 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
- name: Setup QEMU
- timeout-minutes: 10
- run: .github/workflows/scripts/qemu-1-setup.sh
+ timeout-minutes: 20
+ run: |
+ # Add a timestamp to each line to debug timeouts
+ while IFS=$'\n' read -r line; do
+ echo "$(date +'%H:%M:%S') $line"
+ done < <(.github/workflows/scripts/qemu-1-setup.sh)
- name: Start build machine
timeout-minutes: 10
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index 1a9c671feac6..5704b5c6de8a 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,7 +1,7 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 2.3.99
+Version: 2.4.99
Release: 1
Release-Tags: relext
License: CDDL
diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am
index 5f09d170e730..30f78e490b78 100644
--- a/sys/contrib/openzfs/Makefile.am
+++ b/sys/contrib/openzfs/Makefile.am
@@ -1,6 +1,7 @@
CLEANFILES =
dist_noinst_DATA =
INSTALL_DATA_HOOKS =
+INSTALL_EXEC_HOOKS =
ALL_LOCAL =
CLEAN_LOCAL =
CHECKS = shellcheck checkbashisms
@@ -71,6 +72,9 @@ all: gitrev
PHONY += install-data-hook $(INSTALL_DATA_HOOKS)
install-data-hook: $(INSTALL_DATA_HOOKS)
+PHONY += install-exec-hook $(INSTALL_EXEC_HOOKS)
+install-exec-hook: $(INSTALL_EXEC_HOOKS)
+
PHONY += maintainer-clean-local
maintainer-clean-local:
-$(RM) $(GITREV)
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am
index 96040976e53e..ca94f6b77e06 100644
--- a/sys/contrib/openzfs/cmd/Makefile.am
+++ b/sys/contrib/openzfs/cmd/Makefile.am
@@ -98,17 +98,16 @@ endif
if USING_PYTHON
-bin_SCRIPTS += arc_summary arcstat dbufstat zilstat
-CLEANFILES += arc_summary arcstat dbufstat zilstat
-dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in
+bin_SCRIPTS += zarcsummary zarcstat dbufstat zilstat
+CLEANFILES += zarcsummary zarcstat dbufstat zilstat
+dist_noinst_DATA += %D%/zarcsummary %D%/zarcstat.in %D%/dbufstat.in %D%/zilstat.in
-$(call SUBST,arcstat,%D%/)
+$(call SUBST,zarcstat,%D%/)
$(call SUBST,dbufstat,%D%/)
$(call SUBST,zilstat,%D%/)
-arc_summary: %D%/arc_summary
+zarcsummary: %D%/zarcsummary
$(AM_V_at)cp $< $@
endif
-
PHONY += cmd
cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS)
diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/zarcstat.in
index 6f9abb39c3fb..8ffd20481166 100755
--- a/sys/contrib/openzfs/cmd/arcstat.in
+++ b/sys/contrib/openzfs/cmd/zarcstat.in
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: CDDL-1.0
#
# Print out ZFS ARC Statistics exported via kstat(1)
-# For a definition of fields, or usage, use arcstat -v
+# For a definition of fields, or usage, use zarcstat -v
#
# This script was originally a fork of the original arcstat.pl (0.1)
# by Neelakanth Nadgir, originally published on his Sun blog on
@@ -56,6 +56,7 @@ import time
import getopt
import re
import copy
+import os
from signal import signal, SIGINT, SIGWINCH, SIG_DFL
@@ -171,7 +172,7 @@ cols = {
"zactive": [7, 1000, "zfetch prefetches active per second"],
}
-# ARC structural breakdown from arc_summary
+# ARC structural breakdown from zarcsummary
structfields = {
"cmp": ["compressed", "Compressed"],
"ovh": ["overhead", "Overhead"],
@@ -187,7 +188,7 @@ structstats = { # size stats
"sz": ["_size", "size"],
}
-# ARC types breakdown from arc_summary
+# ARC types breakdown from zarcsummary
typefields = {
"data": ["data", "ARC data"],
"meta": ["metadata", "ARC metadata"],
@@ -198,7 +199,7 @@ typestats = { # size stats
"sz": ["_size", "size"],
}
-# ARC states breakdown from arc_summary
+# ARC states breakdown from zarcsummary
statefields = {
"ano": ["anon", "Anonymous"],
"mfu": ["mfu", "MFU"],
@@ -261,7 +262,7 @@ hdr_intr = 20 # Print header every 20 lines of output
opfile = None
sep = " " # Default separator is 2 spaces
l2exist = False
-cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
+cmd = ("Usage: zarcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
"[count]]\n")
cur = {}
d = {}
@@ -348,10 +349,10 @@ def usage():
"character or string\n")
sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n")
sys.stderr.write("\nExamples:\n")
- sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
- sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
- sys.stderr.write("\tarcstat -v\n")
- sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
+ sys.stderr.write("\tzarcstat -o /tmp/a.log 2 10\n")
+ sys.stderr.write("\tzarcstat -s \",\" -o /tmp/a.log 2 10\n")
+ sys.stderr.write("\tzarcstat -v\n")
+ sys.stderr.write("\tzarcstat -f time,hit%,dh%,ph%,mh% 1\n")
sys.stderr.write("\n")
sys.exit(1)
@@ -366,7 +367,7 @@ def snap_stats():
cur = kstat
- # fill in additional values from arc_summary
+ # fill in additional values from zarcsummary
cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\
cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\
cur["uncached_data"]+cur["uncached_metadata"]
@@ -766,6 +767,7 @@ def calculate():
def main():
+
global sint
global count
global hdr_intr
diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/zarcsummary
index e60c6b64e8a1..24a129d9ca70 100755
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/zarcsummary
@@ -34,7 +34,7 @@ Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
the in-source documentation and code at
https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
-The original introduction to arc_summary can be found at
+The original introduction to zarcsummary can be found at
http://cuddletech.com/?p=454
"""
@@ -161,7 +161,7 @@ elif sys.platform.startswith('linux'):
return get_params(TUNABLES_PATH)
def get_version_impl(request):
- # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
+ # The original zarcsummary called /sbin/modinfo/{spl,zfs} to get
# the version information. We switch to /sys/module/{spl,zfs}/version
# to make sure we get what is really loaded in the kernel
try:
@@ -439,7 +439,7 @@ def print_header():
"""
# datetime is now recommended over time but we keep the exact formatting
- # from the older version of arc_summary in case there are scripts
+ # from the older version of zarcsummary in case there are scripts
# that expect it in this way
daydate = time.strftime(DATE_FORMAT)
spc_date = LINE_LENGTH-len(daydate)
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index a5f23be2aaaf..70096b809656 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -107,7 +107,9 @@ extern uint_t zfs_reconstruct_indirect_combinations_max;
extern uint_t zfs_btree_verify_intensity;
static const char cmdname[] = "zdb";
-uint8_t dump_opt[256];
+uint8_t dump_opt[512];
+
+#define ALLOCATED_OPT 256
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
@@ -1586,9 +1588,8 @@ dump_spacemap(objset_t *os, space_map_t *sm)
continue;
}
- uint8_t words;
char entry_type;
- uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
+ uint64_t entry_off, entry_run, entry_vdev;
if (sm_entry_is_single_word(word)) {
entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
@@ -1596,35 +1597,43 @@ dump_spacemap(objset_t *os, space_map_t *sm)
entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
sm->sm_start;
entry_run = SM_RUN_DECODE(word) << mapshift;
- words = 1;
+
+ (void) printf("\t [%6llu] %c "
+ "range: %012llx-%012llx size: %08llx\n",
+ (u_longlong_t)entry_id, entry_type,
+ (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run - 1),
+ (u_longlong_t)entry_run);
} else {
/* it is a two-word entry so we read another word */
ASSERT(sm_entry_is_double_word(word));
uint64_t extra_word;
offset += sizeof (extra_word);
+ ASSERT3U(offset, <, space_map_length(sm));
VERIFY0(dmu_read(os, space_map_object(sm), offset,
sizeof (extra_word), &extra_word,
DMU_READ_PREFETCH));
- ASSERT3U(offset, <=, space_map_length(sm));
-
entry_run = SM2_RUN_DECODE(word) << mapshift;
entry_vdev = SM2_VDEV_DECODE(word);
entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
'A' : 'F';
entry_off = (SM2_OFFSET_DECODE(extra_word) <<
mapshift) + sm->sm_start;
- words = 2;
- }
- (void) printf("\t [%6llu] %c range:"
- " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
- (u_longlong_t)entry_id,
- entry_type, (u_longlong_t)entry_off,
- (u_longlong_t)(entry_off + entry_run),
- (u_longlong_t)entry_run,
- (u_longlong_t)entry_vdev, words);
+ if (zopt_metaslab_args == 0 ||
+ zopt_metaslab[0] == entry_vdev) {
+ (void) printf("\t [%6llu] %c "
+ "range: %012llx-%012llx size: %08llx "
+ "vdev: %llu\n",
+ (u_longlong_t)entry_id, entry_type,
+ (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run - 1),
+ (u_longlong_t)entry_run,
+ (u_longlong_t)entry_vdev);
+ }
+ }
if (entry_type == 'A')
alloc += entry_run;
@@ -1660,6 +1669,16 @@ dump_metaslab_stats(metaslab_t *msp)
}
static void
+dump_allocated(void *arg, uint64_t start, uint64_t size)
+{
+ uint64_t *off = arg;
+ if (*off != start)
+ (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off,
+ start - *off);
+ *off = start + size;
+}
+
+static void
dump_metaslab(metaslab_t *msp)
{
vdev_t *vd = msp->ms_group->mg_vd;
@@ -1675,13 +1694,24 @@ dump_metaslab(metaslab_t *msp)
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
(u_longlong_t)space_map_object(sm), freebuf);
- if (dump_opt['m'] > 2 && !dump_opt['L']) {
+ if (dump_opt[ALLOCATED_OPT] ||
+ (dump_opt['m'] > 2 && !dump_opt['L'])) {
mutex_enter(&msp->ms_lock);
VERIFY0(metaslab_load(msp));
+ }
+
+ if (dump_opt['m'] > 2 && !dump_opt['L']) {
zfs_range_tree_stat_verify(msp->ms_allocatable);
dump_metaslab_stats(msp);
- metaslab_unload(msp);
- mutex_exit(&msp->ms_lock);
+ }
+
+ if (dump_opt[ALLOCATED_OPT]) {
+ uint64_t off = msp->ms_start;
+ zfs_range_tree_walk(msp->ms_allocatable, dump_allocated,
+ &off);
+ if (off != msp->ms_start + msp->ms_size)
+ (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off,
+ msp->ms_size - off);
}
if (dump_opt['m'] > 1 && sm != NULL &&
@@ -1696,6 +1726,12 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
+ if (dump_opt[ALLOCATED_OPT] ||
+ (dump_opt['m'] > 2 && !dump_opt['L'])) {
+ metaslab_unload(msp);
+ mutex_exit(&msp->ms_lock);
+ }
+
if (vd->vdev_ops == &vdev_draid_ops)
ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
else
@@ -1732,8 +1768,9 @@ print_vdev_metaslab_header(vdev_t *vd)
}
}
- (void) printf("\tvdev %10llu %s",
- (u_longlong_t)vd->vdev_id, bias_str);
+ (void) printf("\tvdev %10llu\t%s metaslab shift %4llu",
+ (u_longlong_t)vd->vdev_id, bias_str,
+ (u_longlong_t)vd->vdev_ms_shift);
if (ms_flush_data_obj != 0) {
(void) printf(" ms_unflushed_phys object %llu",
@@ -1873,7 +1910,7 @@ dump_metaslabs(spa_t *spa)
(void) printf("\nMetaslabs:\n");
- if (!dump_opt['d'] && zopt_metaslab_args > 0) {
+ if (zopt_metaslab_args > 0) {
c = zopt_metaslab[0];
if (c >= children)
@@ -2628,7 +2665,7 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
if (BP_GET_LEVEL(bp) != zb->zb_level) {
(void) printf(" (ERROR: Block pointer level "
"(%llu) does not match bookmark level (%lld))",
- BP_GET_LEVEL(bp), (u_longlong_t)zb->zb_level);
+ BP_GET_LEVEL(bp), (longlong_t)zb->zb_level);
corruption_found = B_TRUE;
}
}
@@ -9368,6 +9405,8 @@ main(int argc, char **argv)
{"all-reconstruction", no_argument, NULL, 'Y'},
{"livelist", no_argument, NULL, 'y'},
{"zstd-headers", no_argument, NULL, 'Z'},
+ {"allocated-map", no_argument, NULL,
+ ALLOCATED_OPT},
{0, 0, 0, 0}
};
@@ -9398,6 +9437,7 @@ main(int argc, char **argv)
case 'u':
case 'y':
case 'Z':
+ case ALLOCATED_OPT:
dump_opt[c]++;
dump_all = 0;
break;
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.h b/sys/contrib/openzfs/cmd/zdb/zdb.h
index 6b6c9169816b..48b561eb202c 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.h
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.h
@@ -29,6 +29,6 @@
#define _ZDB_H
void dump_intent_log(zilog_t *);
-extern uint8_t dump_opt[256];
+extern uint8_t dump_opt[512];
#endif /* _ZDB_H */
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c
index 62e290cd122c..3d91fb28a4c7 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c
@@ -48,8 +48,6 @@
#include "zdb.h"
-extern uint8_t dump_opt[256];
-
static char tab_prefix[4] = "\t\t\t";
static void
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
index 093a04c4636a..c0b161ecf248 100644
--- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
@@ -9,18 +9,18 @@ dist_zedexec_SCRIPTS = \
%D%/all-debug.sh \
%D%/all-syslog.sh \
%D%/data-notify.sh \
- %D%/deadman-slot_off.sh \
+ %D%/deadman-sync-slot_off.sh \
%D%/generic-notify.sh \
- %D%/pool_import-led.sh \
+ %D%/pool_import-sync-led.sh \
%D%/resilver_finish-notify.sh \
%D%/resilver_finish-start-scrub.sh \
%D%/scrub_finish-notify.sh \
- %D%/statechange-led.sh \
+ %D%/statechange-sync-led.sh \
%D%/statechange-notify.sh \
- %D%/statechange-slot_off.sh \
+ %D%/statechange-sync-slot_off.sh \
%D%/trim_finish-notify.sh \
- %D%/vdev_attach-led.sh \
- %D%/vdev_clear-led.sh
+ %D%/vdev_attach-sync-led.sh \
+ %D%/vdev_clear-sync-led.sh
nodist_zedexec_SCRIPTS = \
%D%/history_event-zfs-list-cacher.sh
@@ -30,17 +30,17 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS)
zedconfdefaults = \
all-syslog.sh \
data-notify.sh \
- deadman-slot_off.sh \
+ deadman-sync-slot_off.sh \
history_event-zfs-list-cacher.sh \
- pool_import-led.sh \
+ pool_import-sync-led.sh \
resilver_finish-notify.sh \
resilver_finish-start-scrub.sh \
scrub_finish-notify.sh \
- statechange-led.sh \
+ statechange-sync-led.sh \
statechange-notify.sh \
- statechange-slot_off.sh \
- vdev_attach-led.sh \
- vdev_clear-led.sh
+ statechange-sync-slot_off.sh \
+ vdev_attach-sync-led.sh \
+ vdev_clear-sync-led.sh
dist_noinst_DATA += %D%/README
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh
index 7b339b3add01..7b339b3add01 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh
index 40cb61f17307..40cb61f17307 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh
index 06acce93b8aa..06acce93b8aa 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed_exec.c b/sys/contrib/openzfs/cmd/zed/zed_exec.c
index 036081decd64..a14af4f20a85 100644
--- a/sys/contrib/openzfs/cmd/zed/zed_exec.c
+++ b/sys/contrib/openzfs/cmd/zed/zed_exec.c
@@ -196,37 +196,29 @@ _nop(int sig)
(void) sig;
}
-static void *
-_reap_children(void *arg)
+static void
+wait_for_children(boolean_t do_pause, boolean_t wait)
{
- (void) arg;
- struct launched_process_node node, *pnode;
pid_t pid;
- int status;
struct rusage usage;
- struct sigaction sa = {};
-
- (void) sigfillset(&sa.sa_mask);
- (void) sigdelset(&sa.sa_mask, SIGCHLD);
- (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
-
- (void) sigemptyset(&sa.sa_mask);
- sa.sa_handler = _nop;
- sa.sa_flags = SA_NOCLDSTOP;
- (void) sigaction(SIGCHLD, &sa, NULL);
+ int status;
+ struct launched_process_node node, *pnode;
for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
(void) pthread_mutex_lock(&_launched_processes_lock);
- pid = wait4(0, &status, WNOHANG, &usage);
-
+ pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage);
if (pid == 0 || pid == (pid_t)-1) {
(void) pthread_mutex_unlock(&_launched_processes_lock);
- if (pid == 0 || errno == ECHILD)
- pause();
- else if (errno != EINTR)
+ if ((pid == 0) || (errno == ECHILD)) {
+ if (do_pause)
+ pause();
+ } else if (errno != EINTR)
zed_log_msg(LOG_WARNING,
"Failed to wait for children: %s",
strerror(errno));
+ if (!do_pause)
+ return;
+
} else {
memset(&node, 0, sizeof (node));
node.pid = pid;
@@ -278,6 +270,25 @@ _reap_children(void *arg)
}
}
+}
+
+static void *
+_reap_children(void *arg)
+{
+ (void) arg;
+ struct sigaction sa = {};
+
+ (void) sigfillset(&sa.sa_mask);
+ (void) sigdelset(&sa.sa_mask, SIGCHLD);
+ (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
+
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_handler = _nop;
+ sa.sa_flags = SA_NOCLDSTOP;
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ wait_for_children(B_TRUE, B_FALSE);
+
return (NULL);
}
@@ -307,6 +318,45 @@ zed_exec_fini(void)
}
/*
+ * Check if the zedlet name indicates if it is a synchronous zedlet
+ *
+ * Synchronous zedlets have a "-sync-" immediately following the event name in
+ * their zedlet filename, like:
+ *
+ * EVENT_NAME-sync-ZEDLETNAME.sh
+ *
+ * For example, if you wanted a synchronous statechange script:
+ *
+ * statechange-sync-myzedlet.sh
+ *
+ * Synchronous zedlets are guaranteed to be the only zedlet running. No other
+ * zedlets may run in parallel with a synchronous zedlet. A synchronous
+ * zedlet will wait for all previously spawned zedlets to finish before running.
+ * Users should be careful to only use synchronous zedlets when needed, since
+ * they decrease parallelism.
+ */
+static boolean_t
+zedlet_is_sync(const char *zedlet, const char *event)
+{
+ const char *sync_str = "-sync-";
+ size_t sync_str_len;
+ size_t zedlet_len;
+ size_t event_len;
+
+ sync_str_len = strlen(sync_str);
+ zedlet_len = strlen(zedlet);
+ event_len = strlen(event);
+
+ if (event_len + sync_str_len >= zedlet_len)
+ return (B_FALSE);
+
+ if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
* Process the event [eid] by synchronously invoking all zedlets with a
* matching class prefix.
*
@@ -368,9 +418,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
z = zed_strings_next(zcp->zedlets)) {
for (csp = class_strings; *csp; csp++) {
n = strlen(*csp);
- if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
+ if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) {
+ boolean_t is_sync = zedlet_is_sync(z, *csp);
+
+ if (is_sync) {
+ /*
+ * Wait for previous zedlets to
+ * finish
+ */
+ wait_for_children(B_FALSE, B_TRUE);
+ }
+
_zed_exec_fork_child(eid, zcp->zedlet_dir,
z, e, zcp->zevent_fd, zcp->do_foreground);
+
+ if (is_sync) {
+ /*
+ * Wait for sync zedlet we just launched
+ * to finish.
+ */
+ wait_for_children(B_FALSE, B_TRUE);
+ }
+ }
}
}
free(e);
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 235f011af953..f7a627a2fee6 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -5303,6 +5303,7 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
@@ -5345,6 +5346,7 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
+ { ZFS_DELEG_PERM_SEND_RAW, ZFS_DELEG_NOTE_SEND_RAW },
{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
@@ -5929,6 +5931,10 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_SEND:
str = gettext("");
break;
+ case ZFS_DELEG_NOTE_SEND_RAW:
+ str = gettext("Allow sending ONLY encrypted (raw) replication"
+ "\n\t\t\t\tstreams");
+ break;
case ZFS_DELEG_NOTE_SHARE:
str = gettext("Allows sharing file systems over NFS or SMB"
"\n\t\t\t\tprotocols");
diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c
index 2bd3051dce7b..edf9dfa2cece 100644
--- a/sys/contrib/openzfs/cmd/zhack.c
+++ b/sys/contrib/openzfs/cmd/zhack.c
@@ -54,6 +54,7 @@
#include <sys/dmu_tx.h>
#include <zfeature_common.h>
#include <libzutil.h>
+#include <sys/metaslab_impl.h>
static importargs_t g_importargs;
static char *g_pool;
@@ -69,7 +70,8 @@ static __attribute__((noreturn)) void
usage(void)
{
(void) fprintf(stderr,
- "Usage: zhack [-c cachefile] [-d dir] <subcommand> <args> ...\n"
+ "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
+ "<args> ...\n"
"where <subcommand> <args> is one of the following:\n"
"\n");
@@ -93,7 +95,10 @@ usage(void)
" -c repair corrupted label checksums\n"
" -u restore the label on a detached device\n"
"\n"
- " <device> : path to vdev\n");
+ " <device> : path to vdev\n"
+ "\n"
+ " metaslab leak <pool>\n"
+ " apply allocation map from zdb to specified pool\n");
exit(1);
}
@@ -363,10 +368,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount + 1, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
@@ -376,10 +383,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount - 1, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
@@ -496,6 +505,186 @@ zhack_do_feature(int argc, char **argv)
return (0);
}
+static boolean_t
+strstarts(const char *a, const char *b)
+{
+ return (strncmp(a, b, strlen(b)) == 0);
+}
+
+static void
+metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
+ dmu_tx_t *tx)
+{
+ ASSERT(msp->ms_disabled);
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+ uint64_t txg = dmu_tx_get_txg(tx);
+
+ uint64_t off = start;
+ while (off < start + size) {
+ uint64_t ostart, osize;
+ boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
+ off, start + size - off, &ostart, &osize);
+ if (!found)
+ break;
+ zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
+
+ if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
+ vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
+ txg);
+
+ zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
+ osize);
+ msp->ms_allocating_total += osize;
+ off = ostart + osize;
+ }
+}
+
+static void
+zhack_do_metaslab_leak(int argc, char **argv)
+{
+ int c;
+ char *target;
+ spa_t *spa;
+
+ optind = 1;
+ boolean_t force = B_FALSE;
+ while ((c = getopt(argc, argv, "f")) != -1) {
+ switch (c) {
+ case 'f':
+ force = B_TRUE;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, "error: missing pool name\n");
+ usage();
+ }
+ target = argv[0];
+
+ zhack_spa_open(target, B_FALSE, FTAG, &spa);
+ spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
+
+ char *line = NULL;
+ size_t cap = 0;
+
+ vdev_t *vd = NULL;
+ metaslab_t *prev = NULL;
+ dmu_tx_t *tx = NULL;
+ while (getline(&line, &cap, stdin) > 0) {
+ if (strstarts(line, "\tvdev ")) {
+ uint64_t vdev_id, ms_shift;
+ if (sscanf(line,
+ "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
+ &vdev_id, &ms_shift) == 1) {
+ VERIFY3U(sscanf(line, "\tvdev %"PRIu64
+ "\t metaslab shift %4"PRIu64,
+ &vdev_id, &ms_shift), ==, 2);
+ }
+ vd = vdev_lookup_top(spa, vdev_id);
+ if (vd == NULL) {
+ fprintf(stderr, "error: no such vdev with "
+ "id %"PRIu64"\n", vdev_id);
+ break;
+ }
+ if (tx) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ tx = NULL;
+ prev = NULL;
+ }
+ if (vd->vdev_ms_shift != ms_shift) {
+ fprintf(stderr, "error: ms_shift mismatch: %"
+ PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
+ ms_shift);
+ break;
+ }
+ } else if (strstarts(line, "\tmetaslabs ")) {
+ uint64_t ms_count;
+ VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
+ ==, 1);
+ ASSERT(vd);
+ if (!force && vd->vdev_ms_count != ms_count) {
+ fprintf(stderr, "error: ms_count mismatch: %"
+ PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
+ ms_count);
+ break;
+ }
+ } else if (strstarts(line, "ALLOC:")) {
+ uint64_t start, size;
+ VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
+ &start, &size), ==, 2);
+
+ ASSERT(vd);
+ metaslab_t *cur =
+ vd->vdev_ms[start >> vd->vdev_ms_shift];
+ if (prev != cur) {
+ if (prev) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ }
+ ASSERT(cur);
+ metaslab_disable(cur);
+ mutex_enter(&cur->ms_lock);
+ metaslab_load(cur);
+ prev = cur;
+ tx = dmu_tx_create_dd(
+ spa_get_dsl(vd->vdev_spa)->dp_root_dir);
+ dmu_tx_assign(tx, DMU_TX_WAIT);
+ }
+
+ metaslab_force_alloc(cur, start, size, tx);
+ } else {
+ continue;
+ }
+ }
+ if (tx) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ tx = NULL;
+ prev = NULL;
+ }
+ if (line)
+ free(line);
+
+ spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
+ spa_close(spa, FTAG);
+}
+
+static int
+zhack_do_metaslab(int argc, char **argv)
+{
+ char *subcommand;
+
+ argc--;
+ argv++;
+ if (argc == 0) {
+ (void) fprintf(stderr,
+ "error: no metaslab operation specified\n");
+ usage();
+ }
+
+ subcommand = argv[0];
+ if (strcmp(subcommand, "leak") == 0) {
+ zhack_do_metaslab_leak(argc, argv);
+ } else {
+ (void) fprintf(stderr, "error: unknown subcommand: %s\n",
+ subcommand);
+ usage();
+ }
+
+ return (0);
+}
+
#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
MIN(MAX(ashift, UBERBLOCK_SHIFT), \
MAX_UBERBLOCK_SHIFT)
@@ -981,7 +1170,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
zfs_prop_init();
- while ((c = getopt(argc, argv, "+c:d:")) != -1) {
+ while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
switch (c) {
case 'c':
g_importargs.cachefile = optarg;
@@ -990,6 +1179,10 @@ main(int argc, char **argv)
assert(g_importargs.paths < MAX_NUM_PATHS);
g_importargs.path[g_importargs.paths++] = optarg;
break;
+ case 'o':
+ if (handle_tunable_option(optarg, B_FALSE) != 0)
+ exit(1);
+ break;
default:
usage();
break;
@@ -1011,6 +1204,8 @@ main(int argc, char **argv)
rv = zhack_do_feature(argc, argv);
} else if (strcmp(subcommand, "label") == 0) {
return (zhack_do_label(argc, argv));
+ } else if (strcmp(subcommand, "metaslab") == 0) {
+ rv = zhack_do_metaslab(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am
index 2f962408e5a3..5bb6d8160b18 100644
--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
@@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \
%D%/compatibility.d/openzfs-2.1-linux \
%D%/compatibility.d/openzfs-2.2 \
%D%/compatibility.d/openzfs-2.3 \
+ %D%/compatibility.d/openzfs-2.4 \
%D%/compatibility.d/openzfsonosx-1.7.0 \
%D%/compatibility.d/openzfsonosx-1.8.1 \
%D%/compatibility.d/openzfsonosx-1.9.3 \
@@ -187,7 +188,9 @@ zpoolcompatlinks = \
"openzfs-2.2 openzfs-2.2-linux" \
"openzfs-2.2 openzfs-2.2-freebsd" \
"openzfs-2.3 openzfs-2.3-linux" \
- "openzfs-2.3 openzfs-2.3-freebsd"
+ "openzfs-2.3 openzfs-2.3-freebsd" \
+ "openzfs-2.4 openzfs-2.4-linux" \
+ "openzfs-2.4 openzfs-2.4-freebsd"
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
INSTALL_DATA_HOOKS += zpool-install-data-hook
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
new file mode 100644
index 000000000000..3fbd91014c95
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
@@ -0,0 +1,48 @@
+# Features supported by OpenZFS 2.4 on Linux and FreeBSD
+allocation_classes
+async_destroy
+blake3
+block_cloning
+block_cloning_endian
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+dynamic_gang_header
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+fast_dedup
+filesystem_limits
+head_errlog
+hole_birth
+large_blocks
+large_dnode
+large_microzap
+livelist
+log_spacemap
+longname
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+physical_rewrite
+project_quota
+raidz_expansion
+redacted_datasets
+redaction_bookmarks
+redaction_list_spill
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+vdev_zaps_v2
+zilsaxattr
+zpool_checkpoint
+zstd_compress
diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am
index be3539fe905d..80ef1ea7ca11 100644
--- a/sys/contrib/openzfs/cmd/zstream/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am
@@ -18,6 +18,7 @@ zstream_LDADD = \
libzpool.la \
libnvpair.la
-PHONY += install-exec-hook
-install-exec-hook:
+cmd-zstream-install-exec-hook:
cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump
+
+INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook
diff --git a/sys/contrib/openzfs/config/always-arch.m4 b/sys/contrib/openzfs/config/always-arch.m4
index 9f413eeddf95..1ee6099ca8b2 100644
--- a/sys/contrib/openzfs/config/always-arch.m4
+++ b/sys/contrib/openzfs/config/always-arch.m4
@@ -39,3 +39,20 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
AM_CONDITIONAL([TARGET_CPU_ARM], test $TARGET_CPU = arm)
])
+dnl #
+dnl # Check for conflicting environment variables
+dnl #
+dnl # If ARCH env variable is set up, then kernel Makefile in the /usr/src/kernel
+dnl # can misbehave during the zfs ./configure test of the module compilation.
+AC_DEFUN([ZFS_AC_CONFIG_CHECK_ARCH_VAR], [
+ AC_MSG_CHECKING([for conflicting environment variables])
+ if test -n "$ARCH"; then
+ AC_MSG_RESULT([warning])
+ AC_MSG_WARN(m4_normalize([ARCH environment variable is set to "$ARCH".
+ This can cause build kernel modules support check failure.
+ Please unset it.]))
+ else
+ AC_MSG_RESULT([done])
+ fi
+])
+
diff --git a/sys/contrib/openzfs/config/always-compiler-options.m4 b/sys/contrib/openzfs/config/always-compiler-options.m4
index 6383b12506ee..37fa079e0f4c 100644
--- a/sys/contrib/openzfs/config/always-compiler-options.m4
+++ b/sys/contrib/openzfs/config/always-compiler-options.m4
@@ -156,6 +156,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
])
dnl #
+dnl # Check if kernel cc supports -Wno-format-zero-length option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH], [
+ saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length])
+
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Werror -Wno-format-zero-length"
+
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+ KERNEL_NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length
+ AC_MSG_RESULT([yes])
+ ], [
+ KERNEL_NO_FORMAT_ZERO_LENGTH=
+ AC_MSG_RESULT([no])
+ ])
+
+ CC="$saved_cc"
+ CFLAGS="$saved_flags"
+ AC_SUBST([KERNEL_NO_FORMAT_ZERO_LENGTH])
+])
+
+dnl #
dnl # Check if cc supports -Wno-clobbered option.
dnl #
dnl # We actually invoke it with the -Wclobbered option
@@ -231,20 +259,17 @@ dnl #
dnl # Check if kernel cc supports -Winfinite-recursion option.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [
- AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion])
-
saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion])
+
saved_flags="$CFLAGS"
- CC="gcc"
CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
- AS_IF([ test -n "$KERNEL_CC" ], [
- CC="$KERNEL_CC"
- ])
- AS_IF([ test -n "$KERNEL_LLVM" ], [
- CC="clang"
- ])
-
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
KERNEL_INFINITE_RECURSION=-Winfinite-recursion
AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1,
@@ -329,20 +354,17 @@ dnl #
dnl # Check if kernel cc supports -fno-ipa-sra option.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
- AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
-
saved_cc="$CC"
+ AS_IF(
+ [ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ],
+ [ test -n "$KERNEL_LLVM" ], [ CC="clang" ],
+ [ CC="gcc" ]
+ )
+ AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra])
+
saved_flags="$CFLAGS"
- CC="gcc"
CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
- AS_IF([ test -n "$KERNEL_CC" ], [
- CC="$KERNEL_CC"
- ])
- AS_IF([ test -n "$KERNEL_LLVM" ], [
- CC="clang"
- ])
-
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
KERNEL_NO_IPA_SRA=-fno-ipa-sra
AC_MSG_RESULT([yes])
diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4
index 83190c6fbe3f..02011bf39fb2 100644
--- a/sys/contrib/openzfs/config/kernel-blkdev.m4
+++ b/sys/contrib/openzfs/config/kernel-blkdev.m4
@@ -29,9 +29,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- bdev = blkdev_get_by_path(path, mode, holder, &h);
+ bdev = blkdev_get_by_path(path, mode, holder, NULL);
])
])
@@ -48,9 +47,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- bdh = bdev_open_by_path(path, mode, holder, &h);
+ bdh = bdev_open_by_path(path, mode, holder, NULL);
])
])
@@ -68,9 +66,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [
const char *path = "path";
fmode_t mode = 0;
void *holder = NULL;
- struct blk_holder_ops h;
- file = bdev_file_open_by_path(path, mode, holder, &h);
+ file = bdev_file_open_by_path(path, mode, holder, NULL);
])
])
diff --git a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 b/sys/contrib/openzfs/config/kernel-dentry-operations.m4
index aa5a9f2aff39..6d87ad0e0710 100644
--- a/sys/contrib/openzfs/config/kernel-dentry-operations.m4
+++ b/sys/contrib/openzfs/config/kernel-dentry-operations.m4
@@ -24,6 +24,9 @@ dnl #
dnl # 2.6.38 API change
dnl # Added d_set_d_op() helper function.
dnl #
+dnl # 6.17 API change
+dnl # d_set_d_op() removed. No direct replacement.
+dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [
ZFS_LINUX_TEST_SRC([d_set_d_op], [
#include <linux/dcache.h>
@@ -34,22 +37,21 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [
AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [
AC_MSG_CHECKING([whether d_set_d_op() is available])
- ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op],
- [d_set_d_op], [fs/dcache.c], [
+ ZFS_LINUX_TEST_RESULT([d_set_d_op], [
AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_D_SET_D_OP, 1,
+ [Define if d_set_d_op() is available])
], [
- ZFS_LINUX_TEST_ERROR([d_set_d_op])
+ AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [
ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_SRC_D_SET_D_OP
- ZFS_AC_KERNEL_SRC_S_D_OP
])
AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
ZFS_AC_KERNEL_D_SET_D_OP
- ZFS_AC_KERNEL_S_D_OP
])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index e3e7625db7d8..35819e4d68c5 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -70,6 +70,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_COMMIT_METADATA
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
+ ZFS_AC_KERNEL_SRC_DENTRY
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SRC_SECURITY_INODE
ZFS_AC_KERNEL_SRC_FST_MOUNT
@@ -188,6 +189,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_COMMIT_METADATA
ZFS_AC_KERNEL_SETATTR_PREPARE
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
+ ZFS_AC_KERNEL_DENTRY
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SECURITY_INODE
ZFS_AC_KERNEL_FST_MOUNT
diff --git a/sys/contrib/openzfs/config/user-statx.m4 b/sys/contrib/openzfs/config/user-statx.m4
index 0315f93e0c20..1ba74a40e9b8 100644
--- a/sys/contrib/openzfs/config/user-statx.m4
+++ b/sys/contrib/openzfs/config/user-statx.m4
@@ -2,7 +2,7 @@ dnl #
dnl # Check for statx() function and STATX_MNT_ID availability
dnl #
AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
- AC_CHECK_HEADERS([linux/stat.h],
+ AC_CHECK_HEADERS([sys/stat.h],
[have_stat_headers=yes],
[have_stat_headers=no])
@@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
AC_MSG_CHECKING([for STATX_MNT_ID])
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
- #include <linux/stat.h>
+ #include <sys/stat.h>
]], [[
struct statx stx;
int mask = STATX_MNT_ID;
@@ -29,6 +29,6 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_STATX], [
])
])
], [
- AC_MSG_WARN([linux/stat.h not found; skipping statx support])
+ AC_MSG_WARN([sys/stat.h not found; skipping statx support])
])
]) dnl end AC_DEFUN
diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4
index 7cf1b02d8757..adf6576f3193 100644
--- a/sys/contrib/openzfs/config/zfs-build.m4
+++ b/sys/contrib/openzfs/config/zfs-build.m4
@@ -256,6 +256,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
+ ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_FORMAT_ZERO_LENGTH
ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW
ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
@@ -265,6 +266,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
ZFS_AC_CONFIG_ALWAYS_SYSTEM
ZFS_AC_CONFIG_ALWAYS_ARCH
+ ZFS_AC_CONFIG_CHECK_ARCH_VAR
ZFS_AC_CONFIG_ALWAYS_PYTHON
ZFS_AC_CONFIG_ALWAYS_PYZFS
ZFS_AC_CONFIG_ALWAYS_SED
diff --git a/sys/contrib/openzfs/contrib/debian/copyright b/sys/contrib/openzfs/contrib/debian/copyright
index 65c7d209d8eb..006f32fdf924 100644
--- a/sys/contrib/openzfs/contrib/debian/copyright
+++ b/sys/contrib/openzfs/contrib/debian/copyright
@@ -4,7 +4,7 @@ The detailed contributor information can be found in [2][3].
Files: contrib/debian/*
Copyright:
- 2013-2016, Aron Xu <aron@debian.org>
+ 2013-2025, Aron Xu <aron@debian.org>
2016, Petter Reinholdtsen <pere@hungry.com>
2013, Carlos Alberto Lopez Perez <clopez@igalia.com>
2013, Turbo Fredriksson <turbo@bayour.com>
@@ -12,6 +12,8 @@ Copyright:
2011-2013, Darik Horn <dajhorn@vanadac.com>
2018-2019, Mo Zhou <cdluminate@gmail.com>
2018-2020, Mo Zhou <lumin@debian.org>
+ 2023-2024, Shengqi Chen <harry-chen@outlook.com>
+ 2024-2025, Shengqi Chen <harry@debian.org>
License: GPL-2+
[1] https://tracker.debian.org/pkg/zfs-linux
diff --git a/sys/contrib/openzfs/contrib/debian/not-installed b/sys/contrib/openzfs/contrib/debian/not-installed
index 88557f76fcae..9c08da5a6a7b 100644
--- a/sys/contrib/openzfs/contrib/debian/not-installed
+++ b/sys/contrib/openzfs/contrib/debian/not-installed
@@ -1,4 +1,4 @@
-usr/bin/arc_summary.py
+usr/bin/zarcsummary.py
usr/share/zfs/zfs-helpers.sh
etc/default/zfs
etc/init.d
@@ -9,4 +9,4 @@ etc/zfs/vdev_id.conf.sas_direct.example
etc/zfs/vdev_id.conf.sas_switch.example
etc/zfs/vdev_id.conf.scsi.example
etc/zfs/zfs-functions
-lib/systemd/system/zfs-import.service
+usr/lib/systemd/system/zfs-import.service
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in
index ed7b541e3607..fce542270dd8 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libnvpair3.install.in
@@ -1 +1 @@
-lib/@DEB_HOST_MULTIARCH@/libnvpair.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libnvpair.so.*
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install
index c33123f69a8d..bafdebe9bb91 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.install
@@ -1,2 +1,2 @@
-lib/*/security/pam_zfs_key.so
+usr/lib/*/security/pam_zfs_key.so
usr/share/pam-configs/zfs_key
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst
index 03893454eee9..db4db73d6d5a 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libpam-zfs.postinst
@@ -1,7 +1,7 @@
#!/bin/sh
set -e
-if ! $(ldd "/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then
+if ! $(ldd "/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then
pam-auth-update --package
fi
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in
index a197d030d743..bb33386791e1 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libuutil3.install.in
@@ -1 +1 @@
-lib/@DEB_HOST_MULTIARCH@/libuutil.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libuutil.so.*
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in
index eaa8c3925e24..5673e2661c6a 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs-dev.install.in
@@ -1,3 +1,5 @@
-lib/@DEB_HOST_MULTIARCH@/*.a usr/lib/@DEB_HOST_MULTIARCH@
+usr/lib/@DEB_HOST_MULTIARCH@/*.a
+usr/lib/@DEB_HOST_MULTIARCH@/*.so
+usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig
usr/include
-usr/lib/@DEB_HOST_MULTIARCH@
+
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in
index 6765aaee59cc..a9054c14cc73 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in
@@ -1,2 +1,2 @@
-lib/@DEB_HOST_MULTIARCH@/libzfs.so.*
-lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libzfs.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.*
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in
index 49216742433f..b61b8ab63265 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfsbootenv1.install.in
@@ -1 +1 @@
-lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.*
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in
index b9e872df9ba8..0e087a2709b3 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in
@@ -1 +1 @@
-lib/@DEB_HOST_MULTIARCH@/libzpool.so.*
+usr/lib/@DEB_HOST_MULTIARCH@/libzpool.so.*
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install
index b3afef50dbd4..496cab2ad5e4 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-test.install
@@ -1,4 +1,4 @@
-sbin/ztest
+usr/sbin/ztest
usr/bin/raidz_test
usr/share/man/man1/raidz_test.1
usr/share/man/man1/test-runner.1
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install
index a348ba828ee5..30699a8a98da 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.install
@@ -1,5 +1,5 @@
etc/zfs/zed.d/*
-lib/systemd/system/zfs-zed.service
+usr/lib/systemd/system/zfs-zed.service
usr/lib/zfs-linux/zed.d/*
usr/sbin/zed
usr/share/man/man8/zed.8
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
index 37284a78ad18..6810108f2c5d 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
@@ -1,48 +1,48 @@
etc/default/zfs
etc/zfs/zfs-functions
etc/zfs/zpool.d/
-lib/systemd/system-generators/
-lib/systemd/system-preset/
-lib/systemd/system/zfs-import-cache.service
-lib/systemd/system/zfs-import-scan.service
-lib/systemd/system/zfs-import.target
-lib/systemd/system/zfs-load-key.service
-lib/systemd/system/zfs-mount.service
-lib/systemd/system/zfs-mount@.service
-lib/systemd/system/zfs-scrub-monthly@.timer
-lib/systemd/system/zfs-scrub-weekly@.timer
-lib/systemd/system/zfs-scrub@.service
-lib/systemd/system/zfs-trim-monthly@.timer
-lib/systemd/system/zfs-trim-weekly@.timer
-lib/systemd/system/zfs-trim@.service
-lib/systemd/system/zfs-share.service
-lib/systemd/system/zfs-volume-wait.service
-lib/systemd/system/zfs-volumes.target
-lib/systemd/system/zfs.target
-lib/udev/
-sbin/fsck.zfs
-sbin/mount.zfs
-sbin/zdb
-sbin/zfs
-sbin/zfs_ids_to_path
-sbin/zgenhostid
-sbin/zhack
-sbin/zinject
-sbin/zpool
-sbin/zstream
-sbin/zstreamdump
+usr/lib/systemd/system-generators/
+usr/lib/systemd/system-preset/
+usr/lib/systemd/system/zfs-import-cache.service
+usr/lib/systemd/system/zfs-import-scan.service
+usr/lib/systemd/system/zfs-import.target
+usr/lib/systemd/system/zfs-load-key.service
+usr/lib/systemd/system/zfs-mount.service
+usr/lib/systemd/system/zfs-mount@.service
+usr/lib/systemd/system/zfs-scrub-monthly@.timer
+usr/lib/systemd/system/zfs-scrub-weekly@.timer
+usr/lib/systemd/system/zfs-scrub@.service
+usr/lib/systemd/system/zfs-trim-monthly@.timer
+usr/lib/systemd/system/zfs-trim-weekly@.timer
+usr/lib/systemd/system/zfs-trim@.service
+usr/lib/systemd/system/zfs-share.service
+usr/lib/systemd/system/zfs-volume-wait.service
+usr/lib/systemd/system/zfs-volumes.target
+usr/lib/systemd/system/zfs.target
+usr/lib/udev/
+usr/sbin/fsck.zfs
+usr/sbin/mount.zfs
+usr/sbin/zdb
+usr/sbin/zfs
+usr/sbin/zfs_ids_to_path
+usr/sbin/zgenhostid
+usr/sbin/zhack
+usr/sbin/zinject
+usr/sbin/zpool
+usr/sbin/zstream
+usr/sbin/zstreamdump
usr/bin/zvol_wait
-usr/lib/modules-load.d/ lib/
+usr/lib/modules-load.d/
usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb
usr/lib/zfs-linux/zfs_prepare_disk
-usr/sbin/arc_summary
-usr/sbin/arcstat
-usr/sbin/dbufstat
-usr/sbin/zilstat
+usr/bin/zarcsummary
+usr/bin/zarcstat
+usr/bin/dbufstat usr/sbin
+usr/bin/zilstat
usr/share/zfs/compatibility.d/
usr/share/bash-completion/completions
-usr/share/man/man1/arcstat.1
+usr/share/man/man1/zarcstat.1
usr/share/man/man1/zhack.1
usr/share/man/man1/zvol_wait.1
usr/share/man/man5/
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links
new file mode 100644
index 000000000000..54099e6573b0
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.links
@@ -0,0 +1,3 @@
+usr/sbin/zfs usr/bin/zfs
+usr/sbin/zpool usr/bin/zpool
+usr/lib/zfs-linux/zpool_influxdb usr/bin/zpool_influxdb
diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in
index 3226d604546c..5087a7e18e16 100755
--- a/sys/contrib/openzfs/contrib/debian/rules.in
+++ b/sys/contrib/openzfs/contrib/debian/rules.in
@@ -37,18 +37,19 @@ override_dh_auto_configure:
@# Build the userland, but don't build the kernel modules.
dh_auto_configure -- @CFGOPTS@ \
--bindir=/usr/bin \
- --sbindir=/sbin \
- --libdir=/lib/"$(DEB_HOST_MULTIARCH)" \
- --with-udevdir=/lib/udev \
+ --sbindir=/usr/sbin \
+ --with-mounthelperdir=/usr/sbin \
+ --libdir=/usr/lib/"$(DEB_HOST_MULTIARCH)" \
+ --with-udevdir=/usr/lib/udev \
--with-zfsexecdir=/usr/lib/zfs-linux \
--enable-systemd \
--enable-pyzfs \
--with-python=python3 \
- --with-pammoduledir='/lib/$(DEB_HOST_MULTIARCH)/security' \
+ --with-pammoduledir='/usr/lib/$(DEB_HOST_MULTIARCH)/security' \
--with-pkgconfigdir='/usr/lib/$(DEB_HOST_MULTIARCH)/pkgconfig' \
- --with-systemdunitdir=/lib/systemd/system \
- --with-systemdpresetdir=/lib/systemd/system-preset \
- --with-systemdgeneratordir=/lib/systemd/system-generators \
+ --with-systemdunitdir=/usr/lib/systemd/system \
+ --with-systemdpresetdir=/usr/lib/systemd/system-preset \
+ --with-systemdgeneratordir=/usr/lib/systemd/system-generators \
--with-config=user
for i in $(wildcard $(CURDIR)/debian/*.install.in) ; do \
@@ -77,23 +78,10 @@ override_dh_auto_install:
@# Install the utilities.
$(MAKE) install DESTDIR='$(CURDIR)/debian/tmp'
- # Move from bin_dir to /usr/sbin
- # Remove suffix (.py) as per policy 10.4 - Scripts
- # https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
- mkdir -p '$(CURDIR)/debian/tmp/usr/sbin/'
- mv '$(CURDIR)/debian/tmp/usr/bin/arc_summary' '$(CURDIR)/debian/tmp/usr/sbin/arc_summary'
- mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat'
- mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat'
- mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat'
-
- @# Zed has dependencies outside of the system root.
- mv '$(CURDIR)/debian/tmp/sbin/zed' '$(CURDIR)/debian/tmp/usr/sbin/zed'
- sed -i 's|ExecStart=/sbin/|ExecStart=/usr/sbin/|g' '$(CURDIR)/debian/tmp/lib/systemd/system/zfs-zed.service'
-
@# Install the DKMS source.
@# We only want the files needed to build the modules
install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \
- '$(CURDIR)/scripts/dkms.postbuild'
+ '$(CURDIR)/scripts/dkms.postbuild' '$(CURDIR)/scripts/objtool-wrapper.in'
$(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;)
@# Only ever build Linux modules
@@ -108,8 +96,8 @@ override_dh_auto_install:
@# - zfs.release$
@# * Takes care of spaces and tabs
@# * Remove reference to ZFS_AC_PACKAGE
- awk '/^AC_CONFIG_FILES\(\[/,/^\]\)/ {\
- if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))/) \
+ awk '/^AC_CONFIG_FILES\(\[/,/\]\)/ {\
+ if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))|scripts\/objtool-wrapper.*\]\)$$/) \
{next} } {print}' \
'$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' | sed '/ZFS_AC_PACKAGE/d' > '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac'
@# Set "SUBDIRS = module include" for CONFIG_KERNEL and remove SUBDIRS for all other configs.
@@ -131,11 +119,6 @@ override_dh_auto_install:
cd '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)'; ./autogen.sh
rm -fr '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/autom4te.cache'
- for i in `ls $(CURDIR)/debian/tmp/lib/$(DEB_HOST_MULTIARCH)/*.so`; do \
- ln -s '/lib/$(DEB_HOST_MULTIARCH)/'`readlink $${i}` '$(CURDIR)/debian/tmp/usr/lib/$(DEB_HOST_MULTIARCH)/'`basename $${i}`; \
- rm $${i}; \
- done
-
chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions'
chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs'
@@ -159,7 +142,7 @@ override_dh_auto_clean:
@if test -e META.orig; then mv META.orig META; fi
override_dh_install:
- find debian/tmp/lib -name '*.la' -delete
+ find debian/tmp/usr/lib -name '*.la' -delete
dh_install
override_dh_missing:
@@ -173,8 +156,8 @@ override_dh_installinit:
dh_installinit -R --name zfs-zed
override_dh_installsystemd:
- mkdir -p debian/openzfs-zfsutils/lib/systemd/system
- ln -sr /dev/null debian/openzfs-zfsutils/lib/systemd/system/zfs-import.service
+ mkdir -p debian/openzfs-zfsutils/usr/lib/systemd/system
+ ln -sr /dev/null debian/openzfs-zfsutils/usr/lib/systemd/system/zfs-import.service
dh_installsystemd --no-stop-on-upgrade -X zfs-zed.service
dh_installsystemd --name zfs-zed
diff --git a/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev b/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev
index 0cf21a4211a8..d4f968aed8f2 100755
--- a/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev
+++ b/sys/contrib/openzfs/contrib/debian/tree/zfs-initramfs/usr/share/initramfs-tools/hooks/zdev
@@ -5,7 +5,7 @@
PREREQ="udev"
PREREQ_UDEV_RULES="60-zvol.rules 69-vdev.rules"
-COPY_EXEC_LIST="/lib/udev/zvol_id /lib/udev/vdev_id"
+COPY_EXEC_LIST="/usr/lib/udev/zvol_id /usr/lib/udev/vdev_id"
# Generic result code.
RC=0
diff --git a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
index 4776087d9a76..db9bf0e20274 100644
--- a/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
+++ b/sys/contrib/openzfs/contrib/initramfs/hooks/zfsunlock.in
@@ -8,3 +8,12 @@ fi
. /usr/share/initramfs-tools/hook-functions
copy_exec /usr/share/initramfs-tools/zfsunlock /usr/bin/zfsunlock
+
+if [ -f /etc/initramfs-tools/etc/motd ]; then
+ copy_file text /etc/initramfs-tools/etc/motd /etc/motd
+else
+ tmpf=$(mktemp)
+ echo "If you use zfs encrypted root filesystems, you can use \`zfsunlock\` to manually unlock it" > "$tmpf"
+ copy_file text "$tmpf" /etc/motd
+ rm -f "$tmpf"
+fi
diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
index a0bc172c6f44..88698dedabbc 100644
--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@@ -391,7 +391,11 @@ static int
zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
int argc, const char **argv)
{
+#if defined(__FreeBSD__)
+ config->homes_prefix = strdup("zroot/home");
+#else
config->homes_prefix = strdup("rpool/home");
+#endif
if (config->homes_prefix == NULL) {
pam_syslog(pamh, LOG_ERR, "strdup failure");
return (PAM_SERVICE_ERR);
diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py
index b26a37f5de10..26d66a452726 100644
--- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py
+++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/exceptions.py
@@ -604,5 +604,4 @@ class RaidzExpansionRunning(ZFSError):
errno = ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS
message = "A raidz device is currently expanding"
-
# vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
index 2f5fe4619ef7..14b42f2e7087 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
@@ -63,6 +63,17 @@ typedef longlong_t hrtime_t;
#define NSEC_TO_TICK(nsec) ((nsec) / (NANOSEC / hz))
static __inline hrtime_t
+getlrtime(void)
+{
+ struct timespec ts;
+ hrtime_t nsec;
+
+ getnanouptime(&ts);
+ nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec;
+ return (nsec);
+}
+
+static __inline hrtime_t
gethrtime(void)
{
struct timespec ts;
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
index 16e8a319a5f8..152e5a606f0e 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
@@ -61,32 +61,6 @@
#endif
/*
- * 2.6.30 API change,
- * The const keyword was added to the 'struct dentry_operations' in
- * the dentry structure. To handle this we define an appropriate
- * dentry_operations_t typedef which can be used.
- */
-typedef const struct dentry_operations dentry_operations_t;
-
-/*
- * 2.6.38 API addition,
- * Added d_clear_d_op() helper function which clears some flags and the
- * registered dentry->d_op table. This is required because d_set_d_op()
- * issues a warning when the dentry operations table is already set.
- * For the .zfs control directory to work properly we must be able to
- * override the default operations table and register custom .d_automount
- * and .d_revalidate callbacks.
- */
-static inline void
-d_clear_d_op(struct dentry *dentry)
-{
- dentry->d_op = NULL;
- dentry->d_flags &= ~(
- DCACHE_OP_HASH | DCACHE_OP_COMPARE |
- DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
-}
-
-/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
index 563e0a19663d..c883836c2f83 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
@@ -130,7 +130,7 @@ RW_READ_HELD(krwlock_t *rwp)
/*
* The Linux rwsem implementation does not require a matching destroy.
*/
-#define rw_destroy(rwp) ((void) 0)
+#define rw_destroy(rwp) ASSERT(!(RW_LOCK_HELD(rwp)))
/*
* Upgrading a rwsem from a reader to a writer is not supported by the
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
index 087389b57b34..ad2815e46394 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
@@ -25,6 +25,6 @@
#ifndef _SPL_STAT_H
#define _SPL_STAT_H
-#include <linux/stat.h>
+#include <sys/stat.h>
#endif /* SPL_STAT_H */
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
index 33b273b53996..4edc42a8aef9 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
@@ -80,6 +80,14 @@ gethrestime_sec(void)
}
static inline hrtime_t
+getlrtime(void)
+{
+ inode_timespec_t ts;
+ ktime_get_coarse_ts64(&ts);
+ return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
+}
+
+static inline hrtime_t
gethrtime(void)
{
struct timespec64 ts;
diff --git a/sys/contrib/openzfs/include/sys/dmu_impl.h b/sys/contrib/openzfs/include/sys/dmu_impl.h
index 21a8b16a3ee6..bae872bd1907 100644
--- a/sys/contrib/openzfs/include/sys/dmu_impl.h
+++ b/sys/contrib/openzfs/include/sys/dmu_impl.h
@@ -168,12 +168,10 @@ extern "C" {
* dn_allocated_txg
* dn_free_txg
* dn_assigned_txg
- * dn_dirty_txg
+ * dn_dirtycnt
* dd_assigned_tx
* dn_notxholds
* dn_nodnholds
- * dn_dirtyctx
- * dn_dirtyctx_firstset
* (dn_phys copy fields?)
* (dn_phys contents?)
* held from:
diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h
index 76218c8b09ca..8bd1db5b7165 100644
--- a/sys/contrib/openzfs/include/sys/dnode.h
+++ b/sys/contrib/openzfs/include/sys/dnode.h
@@ -141,12 +141,6 @@ struct dmu_buf_impl;
struct objset;
struct zio;
-enum dnode_dirtycontext {
- DN_UNDIRTIED,
- DN_DIRTY_OPEN,
- DN_DIRTY_SYNC
-};
-
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1 << 0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1)
@@ -340,11 +334,9 @@ struct dnode {
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
- uint64_t dn_dirty_txg; /* txg dnode was last dirtied */
+ uint8_t dn_dirtycnt;
kcondvar_t dn_notxholds;
kcondvar_t dn_nodnholds;
- enum dnode_dirtycontext dn_dirtyctx;
- const void *dn_dirtyctx_firstset; /* dbg: contents meaningless */
/* protected by own devices */
zfs_refcount_t dn_tx_holds;
@@ -440,7 +432,6 @@ void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting);
int dnode_try_claim(objset_t *os, uint64_t object, int slots);
boolean_t dnode_is_dirty(dnode_t *dn);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
-void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
@@ -468,9 +459,6 @@ void dnode_free_interior_slots(dnode_t *dn);
void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type);
-#define DNODE_IS_DIRTY(_dn) \
- ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
-
#define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \
((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
(((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \
diff --git a/sys/contrib/openzfs/include/sys/dsl_deleg.h b/sys/contrib/openzfs/include/sys/dsl_deleg.h
index ae729b9f32ff..36dd6211219d 100644
--- a/sys/contrib/openzfs/include/sys/dsl_deleg.h
+++ b/sys/contrib/openzfs/include/sys/dsl_deleg.h
@@ -46,6 +46,7 @@ extern "C" {
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
index 659c64bf15a6..a771b11420fd 100644
--- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
@@ -58,6 +58,7 @@ extern "C" {
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
+#define FM_EREPORT_ZFS_SITOUT "sitout"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index fc359c10365a..49ab9d3db795 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -385,6 +385,8 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
+ VDEV_PROP_SIT_OUT,
+ VDEV_PROP_AUTOSIT,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -1673,6 +1675,7 @@ typedef enum {
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
ZFS_ERR_ASHIFT_MISMATCH,
ZFS_ERR_STREAM_LARGE_MICROZAP,
+ ZFS_ERR_TOO_MANY_SITOUTS,
} zfs_errno_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index 4ab472bd6742..5a8c2f846be2 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -279,10 +279,12 @@ struct vdev {
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
uint64_t vdev_failfast; /* device failfast setting */
+ boolean_t vdev_autosit; /* automatic sitout management */
boolean_t vdev_rz_expanding; /* raidz is being expanded? */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
+ uint64_t vdev_last_latency_check;
/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
@@ -431,6 +433,10 @@ struct vdev {
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
uint64_t vdev_expansion_time; /* vdev's last expansion time */
+ /* used to calculate average read latency */
+ uint64_t *vdev_prev_histo;
+ int64_t vdev_outlier_count; /* read outlier amongst peers */
+ hrtime_t vdev_read_sit_out_expire; /* end of sit out period */
list_node_t vdev_leaf_node; /* leaf vdev list */
/*
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h
index 3b02728cdbf3..df8c2aed4045 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h
@@ -61,6 +61,9 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
+boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t);
+void vdev_raidz_sit_child(vdev_t *, uint64_t);
+void vdev_raidz_unsit_child(vdev_t *);
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
index debce6f09a22..8c8dcfb077f6 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@@ -119,6 +119,7 @@ typedef struct raidz_col {
uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
uint8_t rc_force_repair:1; /* Write good data to this column */
uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
+ uint8_t rc_latency_outlier:1; /* Latency outlier for this device */
int rc_shadow_devidx; /* for double write during expansion */
int rc_shadow_error; /* for double write during expansion */
uint64_t rc_shadow_offset; /* for double write during expansion */
@@ -133,6 +134,7 @@ typedef struct raidz_row {
int rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
+ int rr_outlier_cnt; /* Count of latency outlier devices */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
diff --git a/sys/contrib/openzfs/include/sys/zfs_file.h b/sys/contrib/openzfs/include/sys/zfs_file.h
index a1f344c2bb79..67abe9988aaa 100644
--- a/sys/contrib/openzfs/include/sys/zfs_file.h
+++ b/sys/contrib/openzfs/include/sys/zfs_file.h
@@ -46,7 +46,7 @@ void zfs_file_close(zfs_file_t *fp);
int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid);
int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off,
- ssize_t *resid);
+ uint8_t ashift, ssize_t *resid);
int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid);
int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off,
ssize_t *resid);
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index 353805fcb969..a8acb83b4c2f 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -82,7 +82,8 @@ gbh_nblkptrs(uint64_t size) {
static inline zio_eck_t *
gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) {
ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t)));
- return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - sizeof (zio_eck_t)));
+ return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size -
+ sizeof (zio_eck_t)));
}
static inline blkptr_t *
diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h
index cdc9dba2a28d..5791246e99e4 100644
--- a/sys/contrib/openzfs/include/sys/zvol.h
+++ b/sys/contrib/openzfs/include/sys/zvol.h
@@ -53,7 +53,7 @@ extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volthreading(const char *, boolean_t);
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
extern int zvol_set_ro(const char *, boolean_t);
-extern zvol_state_handle_t *zvol_suspend(const char *);
+extern int zvol_suspend(const char *, zvol_state_handle_t **);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
diff --git a/sys/contrib/openzfs/include/zfs_deleg.h b/sys/contrib/openzfs/include/zfs_deleg.h
index f80fe46d35f8..a7bbf1620ad5 100644
--- a/sys/contrib/openzfs/include/zfs_deleg.h
+++ b/sys/contrib/openzfs/include/zfs_deleg.h
@@ -55,6 +55,7 @@ typedef enum {
ZFS_DELEG_NOTE_PROMOTE,
ZFS_DELEG_NOTE_RENAME,
ZFS_DELEG_NOTE_SEND,
+ ZFS_DELEG_NOTE_SEND_RAW,
ZFS_DELEG_NOTE_RECEIVE,
ZFS_DELEG_NOTE_ALLOW,
ZFS_DELEG_NOTE_USERPROP,
diff --git a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
index a605af962a6d..13cc0b46ac93 100644
--- a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
+++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/stat.h
@@ -33,7 +33,7 @@
#ifdef HAVE_STATX
#include <fcntl.h>
-#include <linux/stat.h>
+#include <sys/stat.h>
#endif
/*
diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/time.h b/sys/contrib/openzfs/lib/libspl/include/sys/time.h
index da80a5852ae5..062c6ec979fc 100644
--- a/sys/contrib/openzfs/lib/libspl/include/sys/time.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/time.h
@@ -98,6 +98,15 @@ gethrestime_sec(void)
}
static inline hrtime_t
+getlrtime(void)
+{
+ struct timeval tv;
+ (void) gettimeofday(&tv, NULL);
+ return ((((uint64_t)tv.tv_sec) * NANOSEC) +
+ ((uint64_t)tv.tv_usec * NSEC_PER_USEC));
+}
+
+static inline hrtime_t
gethrtime(void)
{
struct timespec ts;
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index ba161d1ef10f..184ea4a55b43 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -6117,7 +6117,9 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
- <enumerator name='VDEV_NUM_PROPS' value='52'/>
+ <enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
+ <enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
+ <enumerator name='VDEV_NUM_PROPS' value='54'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index 10b42720e963..ce154ae1a4cd 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -5549,6 +5549,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
/* Only use if provided by the RAIDZ VDEV above */
if (prop == VDEV_PROP_RAIDZ_EXPANDING)
return (ENOENT);
+ if (prop == VDEV_PROP_SIT_OUT)
+ return (ENOENT);
}
if (vdev_prop_index_to_string(prop, intval,
(const char **)&strval) != 0)
@@ -5718,8 +5720,16 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
nvlist_free(nvl);
nvlist_free(outnvl);
- if (ret)
- (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
+ if (ret) {
+ if (errno == ENOTSUP) {
+ zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+ "property not supported for this vdev"));
+ (void) zfs_error(zhp->zpool_hdl, EZFS_PROPTYPE, errbuf);
+ } else {
+ (void) zpool_standard_error(zhp->zpool_hdl, errno,
+ errbuf);
+ }
+ }
return (ret);
}
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
index 4edddc2a759b..26f5135dff62 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@@ -776,6 +776,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_ASHIFT_MISMATCH:
zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap);
break;
+ case ZFS_ERR_TOO_MANY_SITOUTS:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "too many disks "
+ "already sitting out"));
+ zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, "%s", zfs_strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c
index fea2f81458f9..8ed374627264 100644
--- a/sys/contrib/openzfs/lib/libzpool/kernel.c
+++ b/sys/contrib/openzfs/lib/libzpool/kernel.c
@@ -1238,7 +1238,7 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
*/
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf,
- size_t count, loff_t pos, ssize_t *resid)
+ size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)
{
ssize_t rc, split, done;
int sectors;
@@ -1248,8 +1248,8 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf,
* system calls so that the process can be killed in between.
* This is used by ztest to simulate realistic failure modes.
*/
- sectors = count >> SPA_MINBLOCKSHIFT;
- split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
+ sectors = count >> ashift;
+ split = (sectors > 0 ? rand() % sectors : 0) << ashift;
rc = pwrite64(fp->f_fd, buf, split, pos);
if (rc != -1) {
done = rc;
diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am
index 6a7b2d3e46b7..9fb8fdb175b0 100644
--- a/sys/contrib/openzfs/man/Makefile.am
+++ b/sys/contrib/openzfs/man/Makefile.am
@@ -2,7 +2,7 @@ dist_noinst_man_MANS = \
%D%/man1/cstyle.1
dist_man_MANS = \
- %D%/man1/arcstat.1 \
+ %D%/man1/zarcstat.1 \
%D%/man1/raidz_test.1 \
%D%/man1/test-runner.1 \
%D%/man1/zhack.1 \
diff --git a/sys/contrib/openzfs/man/man1/cstyle.1 b/sys/contrib/openzfs/man/man1/cstyle.1
index 241c82edd5a8..8f29129ce175 100644
--- a/sys/contrib/openzfs/man/man1/cstyle.1
+++ b/sys/contrib/openzfs/man/man1/cstyle.1
@@ -21,7 +21,7 @@
.\"
.\" CDDL HEADER END
.\"
-.Dd May 26, 2021
+.Dd April 4, 2022
.Dt CSTYLE 1
.Os
.
diff --git a/sys/contrib/openzfs/man/man1/arcstat.1 b/sys/contrib/openzfs/man/man1/zarcstat.1
index f2474fbb701f..3633c5d417fe 100644
--- a/sys/contrib/openzfs/man/man1/arcstat.1
+++ b/sys/contrib/openzfs/man/man1/zarcstat.1
@@ -13,12 +13,12 @@
.\" Copyright (c) 2015 by Delphix. All rights reserved.
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\"
-.Dd December 23, 2022
-.Dt ARCSTAT 1
+.Dd September 19, 2024
+.Dt ZARCSTAT 1
.Os
.
.Sh NAME
-.Nm arcstat
+.Nm zarcstat
.Nd report ZFS ARC and L2ARC statistics
.Sh SYNOPSIS
.Nm
diff --git a/sys/contrib/openzfs/man/man1/zhack.1 b/sys/contrib/openzfs/man/man1/zhack.1
index f58c0527649b..63658cf930e9 100644
--- a/sys/contrib/openzfs/man/man1/zhack.1
+++ b/sys/contrib/openzfs/man/man1/zhack.1
@@ -23,7 +23,7 @@
.\"
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
.\"
-.Dd May 26, 2021
+.Dd May 3, 2023
.Dt ZHACK 1
.Os
.
@@ -122,6 +122,24 @@ Example:
.Nm zhack Cm label repair Fl cu Ar device
Fix checksums and undetach a device
.
+.It Xo
+.Nm zhack
+.Cm metaslab leak
+.Op Fl f
+.Ar pool
+.Xc
+Apply a fragmentation profile generated by
+.Sy zdb
+to the specified
+.Ar pool Ns
+\&.
+.Pp
+The
+.Fl f
+flag forces the profile to apply even if the vdevs in the
+.Ar pool
+don't have the same number of metaslabs as the fragmentation profile.
+.
.El
.
.Sh GLOBAL OPTIONS
@@ -143,6 +161,8 @@ Search for
members in
.Ar dir .
Can be specified more than once.
+.It Fl o Ar var Ns = Ns Ar value
+Set the given tunable to the provided value.
.El
.
.Sh EXAMPLES
diff --git a/sys/contrib/openzfs/man/man1/ztest.1 b/sys/contrib/openzfs/man/man1/ztest.1
index febbb62b1664..ae857bfea29c 100644
--- a/sys/contrib/openzfs/man/man1/ztest.1
+++ b/sys/contrib/openzfs/man/man1/ztest.1
@@ -24,7 +24,7 @@
.\" reserved.
.\" Copyright (c) 2017, Intel Corporation.
.\"
-.Dd May 26, 2021
+.Dd July 12, 2025
.Dt ZTEST 1
.Os
.
diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4
index 683f8e2b631f..61dfe42e463d 100644
--- a/sys/contrib/openzfs/man/man4/spl.4
+++ b/sys/contrib/openzfs/man/man4/spl.4
@@ -15,7 +15,7 @@
.\"
.\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\"
-.Dd August 24, 2020
+.Dd May 7, 2025
.Dt SPL 4
.Os
.
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 5c7958667f92..7f1adaceb408 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -4,6 +4,7 @@
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
.\" Copyright (c) 2023, 2024, 2025, Klara, Inc.
+.\"
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at
@@ -17,7 +18,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
-.Dd May 29, 2025
+.Dd August 14, 2025
.Dt ZFS 4
.Os
.
@@ -601,6 +602,42 @@ new format when enabling the
feature.
The default is to convert all log entries.
.
+.It Sy vdev_read_sit_out_secs Ns = Ns Sy 600 Ns s Po 10 min Pc Pq ulong
+When a slow disk outlier is detected it is placed in a sit out state.
+While sitting out the disk will not participate in normal reads, instead its
+data will be reconstructed as needed from parity.
+Scrub operations will always read from a disk, even if it's sitting out.
+A number of disks in a RAID-Z or dRAID vdev may sit out at the same time, up
+to the number of parity devices.
+Writes will still be issued to a disk which is sitting out to maintain full
+redundancy.
+Defaults to 600 seconds and a value of zero disables disk sit-outs in general,
+including slow disk outlier detection.
+.
+.It Sy vdev_raidz_outlier_check_interval_ms Ns = Ns Sy 1000 Ns ms Po 1 sec Pc Pq ulong
+How often each RAID-Z and dRAID vdev will check for slow disk outliers.
+Increasing this interval will reduce the sensitivity of detection (since all
+I/Os since the last check are included in the statistics), but will slow the
+response to a disk developing a problem.
+Defaults to once per second; setting extremely small values may cause negative
+performance effects.
+.
+.It Sy vdev_raidz_outlier_insensitivity Ns = Ns Sy 50 Pq uint
+When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is
+used to determine how far out an outlier must be before it counts as an event
+worth consdering.
+This is phrased as "insensitivity" because larger values result in fewer
+detections.
+Smaller values will result in more aggressive sitting out of disks that may have
+problems, but may significantly increase the rate of spurious sit-outs.
+.Pp
+To provide a more technical definition of this parameter, this is the multiple
+of the inter-quartile range (IQR) that is being used in a Tukey's Fence
+detection algorithm.
+This is much higher than a normal Tukey's Fence k-value, because the
+distribution under consideration is probably an extreme-value distribution,
+rather than a more typical Gaussian distribution.
+.
.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32 KiB Pc Pq uint
During top-level vdev removal, chunks of data are copied from the vdev
which may include free space in order to trade bandwidth for IOPS.
diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
index d2f817631c15..299a23720201 100644
--- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5
+++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
@@ -9,7 +9,7 @@
.\" source. A copy of the CDDL is also available via the Internet at
.\" http://www.illumos.org/license/CDDL.
.\"
-.Dd May 26, 2021
+.Dd October 8, 2024
.Dt VDEV_ID.CONF 5
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/dracut.zfs.7 b/sys/contrib/openzfs/man/man7/dracut.zfs.7
index fb5da553af6e..3d051d4d3343 100644
--- a/sys/contrib/openzfs/man/man7/dracut.zfs.7
+++ b/sys/contrib/openzfs/man/man7/dracut.zfs.7
@@ -1,7 +1,7 @@
.\" SPDX-License-Identifier: CDDL-1.0
.\" SPDX-License-Identifier: 0BSD
.\"
-.Dd March 28, 2023
+.Dd July 13, 2024
.Dt DRACUT.ZFS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7
index acabe6b6613a..0fb28d7db13c 100644
--- a/sys/contrib/openzfs/man/man7/vdevprops.7
+++ b/sys/contrib/openzfs/man/man7/vdevprops.7
@@ -19,9 +19,9 @@
.\"
.\" CDDL HEADER END
.\"
-.\" Copyright (c) 2021 Klara, Inc.
+.\" Copyright (c) 2021, 2025, Klara, Inc.
.\"
-.Dd October 30, 2022
+.Dd July 23, 2024
.Dt VDEVPROPS 7
.Os
.
@@ -106,11 +106,17 @@ The number of children belonging to this vdev
.It Sy read_errors , write_errors , checksum_errors , initialize_errors , trim_errors
The number of errors of each type encountered by this vdev
.It Sy slow_ios
-The number of slow I/Os encountered by this vdev,
-These represent I/O operations that didn't complete in
+This indicates the number of slow I/O operations encountered by this vdev.
+A slow I/O is defined as an operation that did not complete within the
.Sy zio_slow_io_ms
-milliseconds
+threshold in milliseconds
.Pq Sy 30000 No by default .
+For
+.Sy RAIDZ
+and
+.Sy DRAID
+configurations, this value also represents the number of times the vdev was
+identified as an outlier and excluded from participating in read I/O operations.
.It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops
The number of I/O operations of each type performed by this vdev
.It Xo
@@ -150,6 +156,31 @@ The amount of space to reserve for the EFI system partition
.It Sy failfast
If this device should propagate BIO errors back to ZFS, used to disable
failfast.
+.It Sy sit_out
+Only valid for
+.Sy RAIDZ
+and
+.Sy DRAID
+vdevs.
+True when a slow disk outlier was detected and the vdev is currently in a sit
+out state.
+This property can be manually set to cause vdevs to sit out.
+It will also be automatically set by the
+.Sy autosit
+logic if that is enabled.
+While sitting out, the vdev will not participate in normal reads, instead its
+data will be reconstructed as needed from parity.
+.It Sy autosit
+Only valid for
+.Sy RAIDZ
+and
+.Sy DRAID
+vdevs.
+If set, this enables the kernel-level slow disk detection logic.
+This logic automatically causes any vdevs that are significant negative
+performance outliers to sit out, as described in the
+.Sy sit_out
+property.
.It Sy path
The path to the device for this vdev
.It Sy allocating
diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7
index 5c736e53670d..bb2178d85bcd 100644
--- a/sys/contrib/openzfs/man/man7/zfsconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2023 Klara, Inc.
.\"
-.Dd October 6, 2023
+.Dd October 2, 2024
.Dt ZFSCONCEPTS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7
index ac3152cb5d51..0930771c9fce 100644
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@@ -39,7 +39,7 @@
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
.\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
.\"
-.Dd June 29, 2024
+.Dd August 6, 2025
.Dt ZFSPROPS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index 10dfd1f92936..b4404a6eb58d 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -19,7 +19,7 @@
.\" Copyright (c) 2019, Allan Jude
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd October 2, 2024
+.Dd July 23, 2025
.Dt ZPOOL-FEATURES 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
index dafe3bffc453..b9c8926d835d 100644
--- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd April 7, 2023
+.Dd August 6, 2025
.Dt ZPOOLCONCEPTS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man7/zpoolprops.7 b/sys/contrib/openzfs/man/man7/zpoolprops.7
index 5d84753193ee..d3b4c2376943 100644
--- a/sys/contrib/openzfs/man/man7/zpoolprops.7
+++ b/sys/contrib/openzfs/man/man7/zpoolprops.7
@@ -29,7 +29,7 @@
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\" Copyright (c) 2023, Klara Inc.
.\"
-.Dd November 18, 2024
+.Dd December 4, 2024
.Dt ZPOOLPROPS 7
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
index 0a5b6af73fdb..c3290ea14769 100644
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@@ -15,7 +15,7 @@
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
.\"
-.Dd April 23, 2025
+.Dd August 12, 2025
.Dt ZDB 8
.Os
.
@@ -69,6 +69,13 @@
.Op Fl U Ar cache
.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns …
.Nm
+.Fl -allocated-map
+.Op Fl mAFLPXY
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
+.Op Fl t Ar txg
+.Op Fl U Ar cache
+.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns …
+.Nm
.Fl O
.Op Fl K Ar key
.Ar dataset path
@@ -128,6 +135,11 @@ that zdb may interpret inconsistent pool data and behave erratically.
.Sh OPTIONS
Display options:
.Bl -tag -width Ds
+.It Fl Sy -allocated-map
+Prints out a list of all the allocated regions in the pool.
+Primarily intended for use with the
+.Nm zhack metaslab leak
+subcommand.
.It Fl b , -block-stats
Display statistics regarding the number, size
.Pq logical, physical and allocated
diff --git a/sys/contrib/openzfs/man/man8/zed.8.in b/sys/contrib/openzfs/man/man8/zed.8.in
index c90a1834403b..2d19f2d8496b 100644
--- a/sys/contrib/openzfs/man/man8/zed.8.in
+++ b/sys/contrib/openzfs/man/man8/zed.8.in
@@ -13,7 +13,7 @@
.\"
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
.\"
-.Dd May 26, 2021
+.Dd August 22, 2022
.Dt ZED 8
.Os
.
@@ -158,6 +158,8 @@ Multiple ZEDLETs may be invoked for a given zevent.
ZEDLETs are executables invoked by the ZED in response to a given zevent.
They should be written under the presumption they can be invoked concurrently,
and they should use appropriate locking to access any shared resources.
+The one exception to this are "synchronous zedlets", which are described later
+in this page.
Common variables used by ZEDLETs can be stored in the default rc file which
is sourced by scripts; these variables should be prefixed with
.Sy ZED_ .
@@ -233,6 +235,36 @@ and
.Sy ZPOOL .
These variables may be overridden in the rc file.
.
+.Sh Synchronous ZEDLETS
+ZED's normal behavior is to spawn off zedlets in parallel and ignore their
+completion order.
+This means that ZED can potentially
+have zedlets for event ID number 2 starting before zedlets for event ID number
+1 have finished.
+Most of the time this is fine, and it actually helps when the system is getting
+hammered with hundreds of events.
+.Pp
+However, there are times when you want your zedlets to be executed in sequence
+with the event ID.
+That is where synchronous zedlets come in.
+.Pp
+ZED will wait for all previously spawned zedlets to finish before running
+a synchronous zedlet.
+Synchronous zedlets are guaranteed to be the only
+zedlet running.
+No other zedlets may run in parallel with a synchronous zedlet.
+Users should be careful to only use synchronous zedlets when needed, since
+they decrease parallelism.
+.Pp
+To make a zedlet synchronous, simply add a "-sync-" immediately following the
+event name in the zedlet's file name:
+.Pp
+.Sy EVENT_NAME-sync-ZEDLETNAME.sh
+.Pp
+For example, if you wanted a synchronous statechange script:
+.Pp
+.Sy statechange-sync-myzedlet.sh
+.
.Sh FILES
.Bl -tag -width "-c"
.It Pa @sysconfdir@/zfs/zed.d
diff --git a/sys/contrib/openzfs/man/man8/zfs-allow.8 b/sys/contrib/openzfs/man/man8/zfs-allow.8
index 5a8e80bf6a43..e3b0e1ab3e12 100644
--- a/sys/contrib/openzfs/man/man8/zfs-allow.8
+++ b/sys/contrib/openzfs/man/man8/zfs-allow.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd September 8, 2025
.Dt ZFS-ALLOW 8
.Os
.
@@ -212,7 +212,8 @@ receive subcommand Must also have the \fBmount\fR and \fBcreate\fR ability, requ
release subcommand Allows releasing a user hold which might destroy the snapshot
rename subcommand Must also have the \fBmount\fR and \fBcreate\fR ability in the new parent
rollback subcommand Must also have the \fBmount\fR ability
-send subcommand
+send subcommand Allows sending a replication stream of a dataset.
+send:raw subcommand Only allows sending raw replication streams, preventing encrypted datasets being sent in decrypted form.
share subcommand Allows sharing file systems over NFS or SMB protocols
snapshot subcommand Must also have the \fBmount\fR ability
diff --git a/sys/contrib/openzfs/man/man8/zfs-bookmark.8 b/sys/contrib/openzfs/man/man8/zfs-bookmark.8
index 083ff46d241b..5a0933820020 100644
--- a/sys/contrib/openzfs/man/man8/zfs-bookmark.8
+++ b/sys/contrib/openzfs/man/man8/zfs-bookmark.8
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved.
.\"
-.Dd May 12, 2022
+.Dd July 11, 2022
.Dt ZFS-BOOKMARK 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-clone.8 b/sys/contrib/openzfs/man/man8/zfs-clone.8
index cd412815f5fe..9609cf2ce36a 100644
--- a/sys/contrib/openzfs/man/man8/zfs-clone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-clone.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-CLONE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8
index 91878056cc7d..58bde5799240 100644
--- a/sys/contrib/openzfs/man/man8/zfs-create.8
+++ b/sys/contrib/openzfs/man/man8/zfs-create.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd June 2, 2023
.Dt ZFS-CREATE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-destroy.8 b/sys/contrib/openzfs/man/man8/zfs-destroy.8
index 38359be02430..6a6791f7a44e 100644
--- a/sys/contrib/openzfs/man/man8/zfs-destroy.8
+++ b/sys/contrib/openzfs/man/man8/zfs-destroy.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd February 5, 2025
.Dt ZFS-DESTROY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-diff.8 b/sys/contrib/openzfs/man/man8/zfs-diff.8
index d4c48f4109be..5b94ea524666 100644
--- a/sys/contrib/openzfs/man/man8/zfs-diff.8
+++ b/sys/contrib/openzfs/man/man8/zfs-diff.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-DIFF 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-hold.8 b/sys/contrib/openzfs/man/man8/zfs-hold.8
index 0c88937f0dc8..a877e428f88b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-hold.8
+++ b/sys/contrib/openzfs/man/man8/zfs-hold.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd November 8, 2022
.Dt ZFS-HOLD 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-jail.8 b/sys/contrib/openzfs/man/man8/zfs-jail.8
index 53499a279d05..569f5f57eab4 100644
--- a/sys/contrib/openzfs/man/man8/zfs-jail.8
+++ b/sys/contrib/openzfs/man/man8/zfs-jail.8
@@ -37,7 +37,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZFS-JAIL 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8
index 677d8292e207..42eff94f9762 100644
--- a/sys/contrib/openzfs/man/man8/zfs-list.8
+++ b/sys/contrib/openzfs/man/man8/zfs-list.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd February 8, 2024
+.Dd August 25, 2025
.Dt ZFS-LIST 8
.Os
.
@@ -50,27 +50,25 @@
.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns …
.
.Sh DESCRIPTION
-If specified, you can list property information by the absolute pathname or the
-relative pathname.
-By default, all file systems and volumes are displayed.
+By default, all file systems and volumes are displayed, with the following
+fields:
+.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint .
Snapshots are displayed if the
.Sy listsnapshots
pool property is
.Sy on
.Po the default is
.Sy off
-.Pc ,
+.Pc
or if the
.Fl t Sy snapshot
or
.Fl t Sy all
options are specified.
-The following fields are displayed:
-.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint .
.Bl -tag -width "-H"
.It Fl H
Used for scripting mode.
-Do not print headers and separate fields by a single tab instead of arbitrary
+Do not print headers, and separate fields by a single tab instead of arbitrary
white space.
.It Fl j , -json Op Ar --json-int
Print the output in JSON format.
@@ -87,7 +85,7 @@ of
will display only the dataset and its direct children.
.It Fl o Ar property
A comma-separated list of properties to display.
-The property must be:
+Each property must be:
.Bl -bullet -compact
.It
One of the properties described in the
@@ -125,30 +123,41 @@ section of
or the value
.Sy name
to sort by the dataset name.
-Multiple properties can be specified at one time using multiple
+Multiple properties can be specified to operate together using multiple
.Fl s
-property options.
+or
+.Fl S
+options.
Multiple
.Fl s
-options are evaluated from left to right in decreasing order of importance.
-The following is a list of sorting criteria:
+and
+.Fl S
+options are evaluated from left to right to supply sort keys in
+decreasing order of priority.
+Property types operate as follows:
.Bl -bullet -compact
.It
Numeric types sort in numeric order.
.It
String types sort in alphabetical order.
.It
-Types inappropriate for a row sort that row to the literal bottom, regardless of
-the specified ordering.
+Types inappropriate for a row sort that row to the literal bottom,
+regardless of the specified ordering.
.El
.Pp
-If no sorting options are specified the existing behavior of
-.Nm zfs Cm list
-is preserved.
+If no sort columns are specified, or if two lines of output would sort
+equally across all specified columns, then datasets and bookmarks are
+sorted by name, whereas snapshots are sorted first by the name of their
+dataset and then by the time of their creation.
+When no sort columns are specified but snapshots are listed, this
+default behavior causes snapshots to be grouped under their datasets in
+chronological order by creation time.
.It Fl S Ar property
Same as
.Fl s ,
-but sorts by property in descending order.
+but sorts by
+.Ar property
+in descending order.
.It Fl t Ar type
A comma-separated list of types to display, where
.Ar type
diff --git a/sys/contrib/openzfs/man/man8/zfs-load-key.8 b/sys/contrib/openzfs/man/man8/zfs-load-key.8
index 7838c46d9e77..3a11cea99fd6 100644
--- a/sys/contrib/openzfs/man/man8/zfs-load-key.8
+++ b/sys/contrib/openzfs/man/man8/zfs-load-key.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd January 13, 2020
+.Dd July 11, 2022
.Dt ZFS-LOAD-KEY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
index ea470247daac..9e44ea30c636 100644
--- a/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
+++ b/sys/contrib/openzfs/man/man8/zfs-mount-generator.8.in
@@ -23,7 +23,7 @@
.\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
.\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
.\"
-.Dd May 31, 2021
+.Dd November 30, 2021
.Dt ZFS-MOUNT-GENERATOR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8
index 9fca6fffd5bb..2689b6dc345b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-mount.8
+++ b/sys/contrib/openzfs/man/man8/zfs-mount.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd February 16, 2019
+.Dd October 12, 2024
.Dt ZFS-MOUNT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-project.8 b/sys/contrib/openzfs/man/man8/zfs-project.8
index 36547680f53e..4ebfdf6ffe4f 100644
--- a/sys/contrib/openzfs/man/man8/zfs-project.8
+++ b/sys/contrib/openzfs/man/man8/zfs-project.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZFS-PROJECT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-promote.8 b/sys/contrib/openzfs/man/man8/zfs-promote.8
index 767045812607..435a7a5d0144 100644
--- a/sys/contrib/openzfs/man/man8/zfs-promote.8
+++ b/sys/contrib/openzfs/man/man8/zfs-promote.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-PROMOTE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-rename.8 b/sys/contrib/openzfs/man/man8/zfs-rename.8
index 4cf192c0682b..8fedc67469e6 100644
--- a/sys/contrib/openzfs/man/man8/zfs-rename.8
+++ b/sys/contrib/openzfs/man/man8/zfs-rename.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-RENAME 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-rewrite.8 b/sys/contrib/openzfs/man/man8/zfs-rewrite.8
index a3a037f3794a..ca5340c7e5eb 100644
--- a/sys/contrib/openzfs/man/man8/zfs-rewrite.8
+++ b/sys/contrib/openzfs/man/man8/zfs-rewrite.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2025 iXsystems, Inc.
.\"
-.Dd May 6, 2025
+.Dd July 23, 2025
.Dt ZFS-REWRITE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8
index f7c6b840303c..6c5f6b94afd5 100644
--- a/sys/contrib/openzfs/man/man8/zfs-send.8
+++ b/sys/contrib/openzfs/man/man8/zfs-send.8
@@ -31,7 +31,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2024, Klara, Inc.
.\"
-.Dd October 2, 2024
+.Dd August 29, 2025
.Dt ZFS-SEND 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8
index 67f4d6eba171..08daf09d05f8 100644
--- a/sys/contrib/openzfs/man/man8/zfs-set.8
+++ b/sys/contrib/openzfs/man/man8/zfs-set.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd April 20, 2024
+.Dd October 12, 2024
.Dt ZFS-SET 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-share.8 b/sys/contrib/openzfs/man/man8/zfs-share.8
index f7a09a189182..e9c32a44b0c7 100644
--- a/sys/contrib/openzfs/man/man8/zfs-share.8
+++ b/sys/contrib/openzfs/man/man8/zfs-share.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd May 17, 2021
+.Dd July 11, 2022
.Dt ZFS-SHARE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
index 3ddd1273c8e8..8f4b2c335f09 100644
--- a/sys/contrib/openzfs/man/man8/zfs-snapshot.8
+++ b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZFS-SNAPSHOT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-upgrade.8 b/sys/contrib/openzfs/man/man8/zfs-upgrade.8
index bac74e37aef9..a5ce2b760da4 100644
--- a/sys/contrib/openzfs/man/man8/zfs-upgrade.8
+++ b/sys/contrib/openzfs/man/man8/zfs-upgrade.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd July 11, 2022
.Dt ZFS-UPGRADE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-userspace.8 b/sys/contrib/openzfs/man/man8/zfs-userspace.8
index d7a4d18e83b1..c255d911740d 100644
--- a/sys/contrib/openzfs/man/man8/zfs-userspace.8
+++ b/sys/contrib/openzfs/man/man8/zfs-userspace.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd June 30, 2019
+.Dd July 11, 2022
.Dt ZFS-USERSPACE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-wait.8 b/sys/contrib/openzfs/man/man8/zfs-wait.8
index 554a67455c60..e5c60010d2f9 100644
--- a/sys/contrib/openzfs/man/man8/zfs-wait.8
+++ b/sys/contrib/openzfs/man/man8/zfs-wait.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 31, 2021
+.Dd July 11, 2022
.Dt ZFS-WAIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8
index 7ad0ac89463c..a56a304e82b2 100644
--- a/sys/contrib/openzfs/man/man8/zfs-zone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-zone.8
@@ -38,7 +38,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2021 Klara, Inc.
.\"
-.Dd June 3, 2022
+.Dd July 11, 2022
.Dt ZFS-ZONE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs.8 b/sys/contrib/openzfs/man/man8/zfs.8
index e16a3a82b672..b7566a727469 100644
--- a/sys/contrib/openzfs/man/man8/zfs.8
+++ b/sys/contrib/openzfs/man/man8/zfs.8
@@ -37,7 +37,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd April 18, 2025
+.Dd May 12, 2025
.Dt ZFS 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8 b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
index eef0ce68f17b..465e336d170c 100644
--- a/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
+++ b/sys/contrib/openzfs/man/man8/zfs_ids_to_path.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2020 by Delphix. All rights reserved.
.\"
-.Dd April 17, 2020
+.Dd July 11, 2022
.Dt ZFS_IDS_TO_PATH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zgenhostid.8 b/sys/contrib/openzfs/man/man8/zgenhostid.8
index 2b5b4fc18216..ff564880f97d 100644
--- a/sys/contrib/openzfs/man/man8/zgenhostid.8
+++ b/sys/contrib/openzfs/man/man8/zgenhostid.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
.\"
-.Dd May 26, 2021
+.Dd July 11, 2022
.Dt ZGENHOSTID 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-attach.8 b/sys/contrib/openzfs/man/man8/zpool-attach.8
index 51d876767666..f120350a5190 100644
--- a/sys/contrib/openzfs/man/man8/zpool-attach.8
+++ b/sys/contrib/openzfs/man/man8/zpool-attach.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 28, 2023
+.Dd November 8, 2023
.Dt ZPOOL-ATTACH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8 b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
index d97d10d5df6e..b654f669cfa2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
+++ b/sys/contrib/openzfs/man/man8/zpool-checkpoint.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZPOOL-CHECKPOINT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-clear.8 b/sys/contrib/openzfs/man/man8/zpool-clear.8
index 19cd4be36408..70cd8325bd0e 100644
--- a/sys/contrib/openzfs/man/man8/zpool-clear.8
+++ b/sys/contrib/openzfs/man/man8/zpool-clear.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd April 29, 2024
.Dt ZPOOL-CLEAR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8
index 490c67629a20..a36ae260a158 100644
--- a/sys/contrib/openzfs/man/man8/zpool-create.8
+++ b/sys/contrib/openzfs/man/man8/zpool-create.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-CREATE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-destroy.8 b/sys/contrib/openzfs/man/man8/zpool-destroy.8
index f49f29804ad7..82f3f3e203d6 100644
--- a/sys/contrib/openzfs/man/man8/zpool-destroy.8
+++ b/sys/contrib/openzfs/man/man8/zpool-destroy.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-DESTROY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-detach.8 b/sys/contrib/openzfs/man/man8/zpool-detach.8
index ae02dbc2d5b8..79a44310110d 100644
--- a/sys/contrib/openzfs/man/man8/zpool-detach.8
+++ b/sys/contrib/openzfs/man/man8/zpool-detach.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-DETACH 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8
index 2d32dce2bb65..36a9864dc73b 100644
--- a/sys/contrib/openzfs/man/man8/zpool-events.8
+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
@@ -190,6 +190,16 @@ Issued when a scrub is resumed on a pool.
.It Sy scrub.paused
Issued when a scrub is paused on a pool.
.It Sy bootfs.vdev.attach
+.It Sy sitout
+Issued when a
+.Sy RAIDZ
+or
+.Sy DRAID
+vdev triggers the
+.Sy autosit
+logic.
+This logic detects when a disk in such a vdev is significantly slower than its
+peers, and sits them out temporarily to preserve the performance of the pool.
.El
.
.Sh PAYLOADS
diff --git a/sys/contrib/openzfs/man/man8/zpool-export.8 b/sys/contrib/openzfs/man/man8/zpool-export.8
index 171a7541c6d2..02495c088f94 100644
--- a/sys/contrib/openzfs/man/man8/zpool-export.8
+++ b/sys/contrib/openzfs/man/man8/zpool-export.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-EXPORT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8
index 1d6d1f08afa6..bfe1bae7619f 100644
--- a/sys/contrib/openzfs/man/man8/zpool-get.8
+++ b/sys/contrib/openzfs/man/man8/zpool-get.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd October 12, 2024
.Dt ZPOOL-GET 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-history.8 b/sys/contrib/openzfs/man/man8/zpool-history.8
index f15086eabc47..f02168951ff2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-history.8
+++ b/sys/contrib/openzfs/man/man8/zpool-history.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-HISTORY 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-import.8 b/sys/contrib/openzfs/man/man8/zpool-import.8
index 9076f5c34929..c6d5f222b6b2 100644
--- a/sys/contrib/openzfs/man/man8/zpool-import.8
+++ b/sys/contrib/openzfs/man/man8/zpool-import.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-IMPORT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-initialize.8 b/sys/contrib/openzfs/man/man8/zpool-initialize.8
index 39579a58010e..5299a897cb97 100644
--- a/sys/contrib/openzfs/man/man8/zpool-initialize.8
+++ b/sys/contrib/openzfs/man/man8/zpool-initialize.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd May 27, 2021
+.Dd July 30, 2025
.Dt ZPOOL-INITIALIZE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8
index d8c21d0cfc6c..5dd9c9d55e20 100644
--- a/sys/contrib/openzfs/man/man8/zpool-iostat.8
+++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd January 29, 2024
.Dt ZPOOL-IOSTAT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 b/sys/contrib/openzfs/man/man8/zpool-labelclear.8
index ba3d1509aa75..b807acaaede3 100644
--- a/sys/contrib/openzfs/man/man8/zpool-labelclear.8
+++ b/sys/contrib/openzfs/man/man8/zpool-labelclear.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 31, 2021
+.Dd July 11, 2022
.Dt ZPOOL-LABELCLEAR 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8
index b720e203c1c9..106399941f98 100644
--- a/sys/contrib/openzfs/man/man8/zpool-list.8
+++ b/sys/contrib/openzfs/man/man8/zpool-list.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd October 12, 2024
.Dt ZPOOL-LIST 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8
index 49b1f34ad5d5..388c7634acce 100644
--- a/sys/contrib/openzfs/man/man8/zpool-offline.8
+++ b/sys/contrib/openzfs/man/man8/zpool-offline.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd December 21, 2023
.Dt ZPOOL-OFFLINE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-reguid.8 b/sys/contrib/openzfs/man/man8/zpool-reguid.8
index 77101fc07326..b98c88e320de 100644
--- a/sys/contrib/openzfs/man/man8/zpool-reguid.8
+++ b/sys/contrib/openzfs/man/man8/zpool-reguid.8
@@ -29,7 +29,7 @@
.\" Copyright (c) 2024, Klara Inc.
.\" Copyright (c) 2024, Mateusz Piotrowski
.\"
-.Dd June 21, 2023
+.Dd August 26, 2024
.Dt ZPOOL-REGUID 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-remove.8 b/sys/contrib/openzfs/man/man8/zpool-remove.8
index d10a92e49bbe..4d5fc431d332 100644
--- a/sys/contrib/openzfs/man/man8/zpool-remove.8
+++ b/sys/contrib/openzfs/man/man8/zpool-remove.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd March 16, 2022
+.Dd November 19, 2024
.Dt ZPOOL-REMOVE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-reopen.8 b/sys/contrib/openzfs/man/man8/zpool-reopen.8
index 594cff3d16d8..c4e10f0a546e 100644
--- a/sys/contrib/openzfs/man/man8/zpool-reopen.8
+++ b/sys/contrib/openzfs/man/man8/zpool-reopen.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 2, 2021
+.Dd July 11, 2022
.Dt ZPOOL-REOPEN 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-replace.8 b/sys/contrib/openzfs/man/man8/zpool-replace.8
index 9f3156eeb3ef..651af13b19b8 100644
--- a/sys/contrib/openzfs/man/man8/zpool-replace.8
+++ b/sys/contrib/openzfs/man/man8/zpool-replace.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 29, 2021
+.Dd July 11, 2022
.Dt ZPOOL-REPLACE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-resilver.8 b/sys/contrib/openzfs/man/man8/zpool-resilver.8
index 2161d77f62ed..59c4be5db209 100644
--- a/sys/contrib/openzfs/man/man8/zpool-resilver.8
+++ b/sys/contrib/openzfs/man/man8/zpool-resilver.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd July 11, 2022
.Dt ZPOOL-RESILVER 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8
index 0ecf8bd3851f..cf7ead5788bf 100644
--- a/sys/contrib/openzfs/man/man8/zpool-scrub.8
+++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd December 11, 2024
+.Dd August 6, 2025
.Dt ZPOOL-SCRUB 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-split.8 b/sys/contrib/openzfs/man/man8/zpool-split.8
index a67c865cf30c..ee4c6384cf23 100644
--- a/sys/contrib/openzfs/man/man8/zpool-split.8
+++ b/sys/contrib/openzfs/man/man8/zpool-split.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd June 2, 2021
+.Dd July 11, 2022
.Dt ZPOOL-SPLIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index a7f3e088043b..108a1067b384 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd February 14, 2024
+.Dd May 20, 2025
.Dt ZPOOL-STATUS 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-sync.8 b/sys/contrib/openzfs/man/man8/zpool-sync.8
index 8f438f363e83..d1dc05d0c202 100644
--- a/sys/contrib/openzfs/man/man8/zpool-sync.8
+++ b/sys/contrib/openzfs/man/man8/zpool-sync.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd August 9, 2019
+.Dd July 11, 2022
.Dt ZPOOL-SYNC 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-trim.8 b/sys/contrib/openzfs/man/man8/zpool-trim.8
index 18723e1be0d2..c4e849019789 100644
--- a/sys/contrib/openzfs/man/man8/zpool-trim.8
+++ b/sys/contrib/openzfs/man/man8/zpool-trim.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd May 27, 2021
+.Dd July 30, 2025
.Dt ZPOOL-TRIM 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-upgrade.8 b/sys/contrib/openzfs/man/man8/zpool-upgrade.8
index 20632ae4bba0..cf69060da5ce 100644
--- a/sys/contrib/openzfs/man/man8/zpool-upgrade.8
+++ b/sys/contrib/openzfs/man/man8/zpool-upgrade.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
-.Dd March 16, 2022
+.Dd July 11, 2022
.Dt ZPOOL-UPGRADE 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-wait.8 b/sys/contrib/openzfs/man/man8/zpool-wait.8
index 0ffb4badfb7b..28a51d29a913 100644
--- a/sys/contrib/openzfs/man/man8/zpool-wait.8
+++ b/sys/contrib/openzfs/man/man8/zpool-wait.8
@@ -28,7 +28,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 27, 2021
+.Dd January 29, 2024
.Dt ZPOOL-WAIT 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8
index b96944050594..3bfef780b298 100644
--- a/sys/contrib/openzfs/man/man8/zpool.8
+++ b/sys/contrib/openzfs/man/man8/zpool.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd February 14, 2024
+.Dd November 19, 2024
.Dt ZPOOL 8
.Os
.
diff --git a/sys/contrib/openzfs/man/man8/zstream.8 b/sys/contrib/openzfs/man/man8/zstream.8
index 03a8479c9e6a..5b3d063bc4a5 100644
--- a/sys/contrib/openzfs/man/man8/zstream.8
+++ b/sys/contrib/openzfs/man/man8/zstream.8
@@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2020 by Delphix. All rights reserved.
.\"
-.Dd October 4, 2022
+.Dd November 10, 2022
.Dt ZSTREAM 8
.Os
.
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 362d2295e091..58a80dc4402c 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -4,7 +4,7 @@
ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
ZFS_MODULE_CFLAGS += -Wmissing-prototypes
-ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@
+ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@
ifneq ($(KBUILD_EXTMOD),)
zfs_include = @abs_top_srcdir@/include
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
index 6d3bcca9f995..dcb0a391dda4 100644
--- a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
@@ -38,11 +38,14 @@
kfpu_begin(); E(s, d, b); kfpu_end(); \
}
+#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \
+ defined(__PPC64__)
/* some implementation is always okay */
static inline boolean_t sha2_is_supported(void)
{
return (B_TRUE);
}
+#endif
#if defined(__x86_64)
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
index 2efd9fcf4c99..a85a71a83df4 100644
--- a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
@@ -38,11 +38,14 @@
kfpu_begin(); E(s, d, b); kfpu_end(); \
}
+#if defined(__x86_64) || defined(__aarch64__) || defined(__arm__) || \
+ defined(__aarch64__) || defined(__arm__) || defined(__PPC64__)
/* some implementation is always okay */
static inline boolean_t sha2_is_supported(void)
{
return (B_TRUE);
}
+#endif
#if defined(__x86_64)
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index ace2360c032d..393bfaa65ff5 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -188,11 +188,6 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
- CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_max, "LU",
- "Maximum ARC size in bytes (LEGACY)");
-
int
param_set_arc_min(SYSCTL_HANDLER_ARGS)
{
@@ -217,11 +212,6 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
- CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_min, "LU",
- "Minimum ARC size in bytes (LEGACY)");
-
extern uint_t zfs_arc_free_target;
int
@@ -245,16 +235,6 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
return (0);
}
-/*
- * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on
- * pagedaemon initialization.
- */
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
- CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_free_target, "IU",
- "Desired number of free pages below which ARC triggers reclaim"
- " (LEGACY)");
-
int
param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
{
@@ -273,187 +253,6 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
- CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_no_grow_shift, "I",
- "log2(fraction of ARC which must be free to allow growing) (LEGACY)");
-
-extern uint64_t l2arc_write_max;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max,
- CTLFLAG_RWTUN, &l2arc_write_max, 0,
- "Max write bytes per interval (LEGACY)");
-
-extern uint64_t l2arc_write_boost;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost,
- CTLFLAG_RWTUN, &l2arc_write_boost, 0,
- "Extra write bytes during device warmup (LEGACY)");
-
-extern uint64_t l2arc_headroom;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom,
- CTLFLAG_RWTUN, &l2arc_headroom, 0,
- "Number of max device writes to precache (LEGACY)");
-
-extern uint64_t l2arc_headroom_boost;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost,
- CTLFLAG_RWTUN, &l2arc_headroom_boost, 0,
- "Compressed l2arc_headroom multiplier (LEGACY)");
-
-extern uint64_t l2arc_feed_secs;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs,
- CTLFLAG_RWTUN, &l2arc_feed_secs, 0,
- "Seconds between L2ARC writing (LEGACY)");
-
-extern uint64_t l2arc_feed_min_ms;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms,
- CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0,
- "Min feed interval in milliseconds (LEGACY)");
-
-extern int l2arc_noprefetch;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch,
- CTLFLAG_RWTUN, &l2arc_noprefetch, 0,
- "Skip caching prefetched buffers (LEGACY)");
-
-extern int l2arc_feed_again;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again,
- CTLFLAG_RWTUN, &l2arc_feed_again, 0,
- "Turbo L2ARC warmup (LEGACY)");
-
-extern int l2arc_norw;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
- CTLFLAG_RWTUN, &l2arc_norw, 0,
- "No reads during writes (LEGACY)");
-
-static int
-param_get_arc_state_size(SYSCTL_HANDLER_ARGS)
-{
- arc_state_t *state = (arc_state_t *)arg1;
- int64_t val;
-
- val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) +
- zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]);
- return (sysctl_handle_64(oidp, &val, 0, req));
-}
-
-extern arc_state_t ARC_anon;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_anon, 0, param_get_arc_state_size, "Q",
- "size of anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
- &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
- &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in anonymous state");
-
-extern arc_state_t ARC_mru;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mru, 0, param_get_arc_state_size, "Q",
- "size of mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
- &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
- &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mru state");
-
-extern arc_state_t ARC_mru_ghost;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mru_ghost, 0, param_get_arc_state_size, "Q",
- "size of mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
- &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
- &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mru ghost state");
-
-extern arc_state_t ARC_mfu;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mfu, 0, param_get_arc_state_size, "Q",
- "size of mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
- &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
- &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mfu state");
-
-extern arc_state_t ARC_mfu_ghost;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q",
- "size of mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
- &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
- &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in mfu ghost state");
-
-extern arc_state_t ARC_uncached;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_uncached, 0, param_get_arc_state_size, "Q",
- "size of uncached state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
- &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of evictable metadata in uncached state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
- &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of evictable data in uncached state");
-
-extern arc_state_t ARC_l2c_only;
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size,
- CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &ARC_l2c_only, 0, param_get_arc_state_size, "Q",
- "size of l2c_only state");
-
-/* dbuf.c */
-
-/* dmu.c */
-
-/* dmu_zfetch.c */
-
-SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
-
-extern uint32_t zfetch_max_distance;
-
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance,
- CTLFLAG_RWTUN, &zfetch_max_distance, 0,
- "Max bytes to prefetch per stream (LEGACY)");
-
-extern uint32_t zfetch_max_idistance;
-
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
- CTLFLAG_RWTUN, &zfetch_max_idistance, 0,
- "Max bytes to prefetch indirects for per stream (LEGACY)");
-
-/* dsl_pool.c */
-
-/* dnode.c */
-
-/* dsl_scan.c */
-
/* metaslab.c */
int
@@ -514,19 +313,6 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
"Condense on-disk spacemap when it is more than this many percents"
" of in-memory counterpart");
-extern uint_t zfs_remove_max_segment;
-
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment,
- CTLFLAG_RWTUN, &zfs_remove_max_segment, 0,
- "Largest contiguous segment ZFS will attempt to allocate when removing"
- " a device");
-
-extern int zfs_removal_suspend_progress;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress,
- CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0,
- "Ensures certain actions can happen while in the middle of a removal");
-
/*
* Minimum size which forces the dynamic allocator to change
* it's allocation strategy. Once the space map cannot satisfy
@@ -749,12 +535,6 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
- param_set_min_auto_ashift, "IU",
- "Min ashift used when creating new top-level vdev. (LEGACY)");
-
int
param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
{
@@ -774,13 +554,6 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
return (0);
}
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
- param_set_max_auto_ashift, "IU",
- "Max ashift used when optimizing for logical -> physical sector size on"
- " new top-level vdevs. (LEGACY)");
-
/*
* Since the DTL space map of a vdev is not expected to have a lot of
* entries, we default its block size to 4K.
@@ -802,23 +575,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz,
CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0,
"Block size for standard space map. Power of 2 greater than 4096.");
-extern int vdev_validate_skip;
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
- CTLFLAG_RDTUN, &vdev_validate_skip, 0,
- "Enable to bypass vdev_validate().");
-
-/* vdev_mirror.c */
-
-/* vdev_queue.c */
-
-extern uint_t zfs_vdev_max_active;
-
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight,
- CTLFLAG_RWTUN, &zfs_vdev_max_active, 0,
- "The maximum number of I/Os of all types active for each device."
- " (LEGACY)");
-
/* zio.c */
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata,
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
index b15a3e6e38c0..cb5787269db2 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
@@ -1175,7 +1175,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
int count = 0;
zfs_acl_phys_t acl_phys;
- if (zp->z_zfsvfs->z_replay == B_FALSE) {
+ if (ZTOV(zp) != NULL && zp->z_zfsvfs->z_replay == B_FALSE) {
ASSERT_VOP_IN_SEQC(ZTOV(zp));
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
index 21e5f7938f9f..ca13569a1235 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
@@ -164,8 +164,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
- ssize_t *resid)
+ uint8_t ashift, ssize_t *resid)
{
+ (void) ashift;
return (zfs_file_write_impl(fp, buf, count, &off, resid));
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index 174141a5deab..8dce97baba66 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -61,6 +61,7 @@
#include <sys/fs/zfs.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/dbuf.h>
@@ -388,7 +389,9 @@ zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
error = vn_lock(vp, LK_EXCLUSIVE);
if (error)
return (error);
+ vn_seqc_write_begin(vp);
error = zfs_ioctl_setxattr(vp, fsx, cred);
+ vn_seqc_write_end(vp);
VOP_UNLOCK(vp);
return (error);
}
@@ -2205,6 +2208,7 @@ zfs_setattr_dir(znode_t *dzp)
if (err)
break;
+ vn_seqc_write_begin(ZTOV(zp));
mutex_enter(&dzp->z_lock);
if (zp->z_uid != dzp->z_uid) {
@@ -2254,6 +2258,7 @@ sa_add_projid_err:
dmu_tx_abort(tx);
}
tx = NULL;
+ vn_seqc_write_end(ZTOV(zp));
if (err != 0 && err != ENOENT)
break;
@@ -5729,6 +5734,9 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
{
ulong_t val;
int error;
+#ifdef _PC_CLONE_BLKSIZE
+ zfsvfs_t *zfsvfs;
+#endif
error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
curthread->td_ucred, NULL);
@@ -5775,6 +5783,21 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
*ap->a_retval = 1;
return (0);
#endif
+#ifdef _PC_CLONE_BLKSIZE
+ case _PC_CLONE_BLKSIZE:
+ zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data;
+ if (zfs_bclone_enabled &&
+ spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os),
+ SPA_FEATURE_BLOCK_CLONING))
+ *ap->a_retval = dsl_dataset_feature_is_active(
+ zfsvfs->z_os->os_dsl_dataset,
+ SPA_FEATURE_LARGE_BLOCKS) ?
+ SPA_MAXBLOCKSIZE :
+ SPA_OLD_MAXBLOCKSIZE;
+ else
+ *ap->a_retval = 0;
+ return (0);
+#endif
default:
return (vop_stdpathconf(ap));
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
index 7cd0a153577c..649022ab5bcb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
@@ -817,6 +817,10 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
(*zpp)->z_dnodesize = dnodesize;
(*zpp)->z_projid = projid;
+ vnode_t *vp = ZTOV(*zpp);
+ if (!(flag & IS_ROOT_NODE))
+ vn_seqc_write_begin(vp);
+
if (vap->va_mask & AT_XVATTR)
zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
@@ -825,7 +829,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
}
if (!(flag & IS_ROOT_NODE)) {
- vnode_t *vp = ZTOV(*zpp);
+ vn_seqc_write_end(vp);
vp->v_vflag |= VV_FORCEINSMQ;
int err = insmntque(vp, zfsvfs->z_vfs);
vp->v_vflag &= ~VV_FORCEINSMQ;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index c729947369c2..3fdcdbac6f68 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -115,8 +115,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
*/
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
- ssize_t *resid)
+ uint8_t ashift, ssize_t *resid)
{
+ (void) ashift;
ssize_t rc;
rc = kernel_write(fp, buf, count, &off);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index 48dae79a2373..81ac26cb0c93 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -202,7 +202,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
return (!!dentry->d_inode);
}
-static dentry_operations_t zpl_dops_snapdirs = {
+static const struct dentry_operations zpl_dops_snapdirs = {
/*
* Auto mounting of snapshots is only supported for 2.6.37 and
* newer kernels. Prior to this kernel the ops->follow_link()
@@ -215,6 +215,51 @@ static dentry_operations_t zpl_dops_snapdirs = {
.d_revalidate = zpl_snapdir_revalidate,
};
+/*
+ * For the .zfs control directory to work properly we must be able to override
+ * the default operations table and register custom .d_automount and
+ * .d_revalidate callbacks.
+ */
+static void
+set_snapdir_dentry_ops(struct dentry *dentry, unsigned int extraflags) {
+ static const unsigned int op_flags =
+ DCACHE_OP_HASH | DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE |
+ DCACHE_OP_PRUNE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_REAL;
+
+#ifdef HAVE_D_SET_D_OP
+ /*
+ * d_set_d_op() will set the DCACHE_OP_ flags according to what it
+ * finds in the passed dentry_operations, so we don't have to.
+ *
+ * We clear the flags and the old op table before calling d_set_d_op()
+ * because issues a warning when the dentry operations table is already
+ * set.
+ */
+ dentry->d_op = NULL;
+ dentry->d_flags &= ~op_flags;
+ d_set_d_op(dentry, &zpl_dops_snapdirs);
+ dentry->d_flags |= extraflags;
+#else
+ /*
+ * Since 6.17 there's no exported way to modify dentry ops, so we have
+ * to reach in and do it ourselves. This should be safe for our very
+ * narrow use case, which is to create or splice in an entry to give
+ * access to a snapshot.
+ *
+ * We need to set the op flags directly. We hardcode
+ * DCACHE_OP_REVALIDATE because that's the only operation we have; if
+ * we ever extend zpl_dops_snapdirs we will need to update the op flags
+ * to match.
+ */
+ spin_lock(&dentry->d_lock);
+ dentry->d_op = &zpl_dops_snapdirs;
+ dentry->d_flags &= ~op_flags;
+ dentry->d_flags |= DCACHE_OP_REVALIDATE | extraflags;
+ spin_unlock(&dentry->d_lock);
+#endif
+}
+
static struct dentry *
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
@@ -236,10 +281,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
return (ERR_PTR(error));
ASSERT(error == 0 || ip == NULL);
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-
+ set_snapdir_dentry_ops(dentry, DCACHE_NEED_AUTOMOUNT);
return (d_splice_alias(ip, dentry));
}
@@ -373,8 +415,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
+ set_snapdir_dentry_ops(dentry, 0);
d_instantiate(dentry, ip);
}
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_deleg.c b/sys/contrib/openzfs/module/zcommon/zfs_deleg.c
index 49bb534ca26c..87596558c9a1 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_deleg.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_deleg.c
@@ -59,6 +59,7 @@ const zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_SNAPSHOT},
{ZFS_DELEG_PERM_SHARE},
{ZFS_DELEG_PERM_SEND},
+ {ZFS_DELEG_PERM_SEND_RAW},
{ZFS_DELEG_PERM_USERPROP},
{ZFS_DELEG_PERM_USERQUOTA},
{ZFS_DELEG_PERM_GROUPQUOTA},
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index 04ae9f986d8f..07819ba2be8b 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -467,9 +467,15 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING",
boolean_table, sfeatures);
+ zprop_register_index(VDEV_PROP_SIT_OUT, "sit_out", 0,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "SIT_OUT", boolean_table,
+ sfeatures);
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
boolean_table, sfeatures);
+ zprop_register_index(VDEV_PROP_AUTOSIT, "autosit", 0,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "AUTOSIT", boolean_table,
+ sfeatures);
/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index df41e3b49204..bd6dc8edd8ca 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq;
static uint_t zfs_arc_evict_threads = 0;
/* The 7 states: */
-arc_state_t ARC_anon;
-arc_state_t ARC_mru;
-arc_state_t ARC_mru_ghost;
-arc_state_t ARC_mfu;
-arc_state_t ARC_mfu_ghost;
-arc_state_t ARC_l2c_only;
-arc_state_t ARC_uncached;
+static arc_state_t ARC_anon;
+/* */ arc_state_t ARC_mru;
+static arc_state_t ARC_mru_ghost;
+/* */ arc_state_t ARC_mfu;
+static arc_state_t ARC_mfu_ghost;
+static arc_state_t ARC_l2c_only;
+static arc_state_t ARC_uncached;
arc_stats_t arc_stats = {
{ "hits", KSTAT_DATA_UINT64 },
@@ -832,15 +832,15 @@ typedef struct arc_async_flush {
#define L2ARC_FEED_TYPES 4
/* L2ARC Performance Tunables */
-uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
-uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
-uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
-uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
-uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
-int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
-int l2arc_feed_again = B_TRUE; /* turbo warmup */
-int l2arc_norw = B_FALSE; /* no reads during writes */
+static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
+static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
+static uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
+static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
+static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
+static int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
+static int l2arc_feed_again = B_TRUE; /* turbo warmup */
+static int l2arc_norw = B_FALSE; /* no reads during writes */
static uint_t l2arc_meta_percent = 33; /* limit on headers size */
/*
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 7403f10d91b7..fccc4c5b5b94 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -2270,14 +2270,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
#endif
- /*
- * We make this assert for private objects as well, but after we
- * check if we're already dirty. They are allowed to re-dirty
- * in syncing context.
- */
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
- dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
mutex_enter(&db->db_mtx);
/*
@@ -2289,12 +2281,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
db->db_state == DB_CACHED || db->db_state == DB_FILL ||
db->db_state == DB_NOFILL);
- mutex_enter(&dn->dn_mtx);
- dnode_set_dirtyctx(dn, tx, db);
- if (tx->tx_txg > dn->dn_dirty_txg)
- dn->dn_dirty_txg = tx->tx_txg;
- mutex_exit(&dn->dn_mtx);
-
if (db->db_blkid == DMU_SPILL_BLKID)
dn->dn_have_spill = B_TRUE;
@@ -2313,13 +2299,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr_next);
}
- /*
- * Only valid if not already dirty.
- */
- ASSERT(dn->dn_object == 0 ||
- dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
-
ASSERT3U(dn->dn_nlevels, >, db->db_level);
/*
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index d6658375f810..0dc9adc7fd4f 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -1701,9 +1701,11 @@ ddt_load(spa_t *spa)
}
}
- error = ddt_log_load(ddt);
- if (error != 0 && error != ENOENT)
- return (error);
+ if (ddt->ddt_flags & DDT_FLAG_LOG) {
+ error = ddt_log_load(ddt);
+ if (error != 0 && error != ENOENT)
+ return (error);
+ }
DDT_KSTAT_SET(ddt, dds_log_active_entries,
avl_numnodes(&ddt->ddt_log_active->ddl_tree));
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index 3d30e244c1f7..c7a2426f3a77 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -176,11 +176,13 @@ ddt_log_update_stats(ddt_t *ddt)
* that's reasonable to expect anyway.
*/
dmu_object_info_t doi;
- uint64_t nblocks;
- dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, &doi);
- nblocks = doi.doi_physical_blocks_512;
- dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, &doi);
- nblocks += doi.doi_physical_blocks_512;
+ uint64_t nblocks = 0;
+ if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object,
+ &doi) == 0)
+ nblocks += doi.doi_physical_blocks_512;
+ if (dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object,
+ &doi) == 0)
+ nblocks += doi.doi_physical_blocks_512;
ddt_object_t *ddo = &ddt->ddt_log_stats;
ddo->ddo_count =
@@ -243,6 +245,13 @@ ddt_log_alloc_entry(ddt_t *ddt)
}
static void
+ddt_log_free_entry(ddt_t *ddt, ddt_log_entry_t *ddle)
+{
+ kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
+ ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
+}
+
+static void
ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
{
/* Create the log tree entry from a live or stored entry */
@@ -347,8 +356,7 @@ ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
avl_remove(&ddl->ddl_tree, ddle);
- kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
- ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
+ ddt_log_free_entry(ddt, ddle);
return (B_TRUE);
}
@@ -365,8 +373,7 @@ ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
avl_remove(&ddl->ddl_tree, ddle);
- kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
- ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
+ ddt_log_free_entry(ddt, ddle);
return (B_TRUE);
}
@@ -527,8 +534,7 @@ ddt_log_empty(ddt_t *ddt, ddt_log_t *ddl)
IMPLY(ddt->ddt_version == UINT64_MAX, avl_is_empty(&ddl->ddl_tree));
while ((ddle =
avl_destroy_nodes(&ddl->ddl_tree, &cookie)) != NULL) {
- kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
- ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
+ ddt_log_free_entry(ddt, ddle);
}
ASSERT(avl_is_empty(&ddl->ddl_tree));
}
@@ -727,7 +733,7 @@ ddt_log_load(ddt_t *ddt)
ddle = fe;
fe = AVL_NEXT(fl, fe);
avl_remove(fl, ddle);
-
+ ddt_log_free_entry(ddt, ddle);
ddle = ae;
ae = AVL_NEXT(al, ae);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index f7f808d5b8f7..a7a5c89bdafb 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -759,6 +759,8 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
*/
uint8_t ibps = ibs - SPA_BLKPTRSHIFT;
limit = P2ROUNDUP(dmu_prefetch_max, 1 << ibs) >> ibs;
+ if (limit == 0)
+ end2 = start2;
do {
level2++;
start2 = P2ROUNDUP(start2, 1 << ibps) >> ibps;
@@ -1689,8 +1691,8 @@ dmu_object_cached_size(objset_t *os, uint64_t object,
dmu_object_info_from_dnode(dn, &doi);
- for (uint64_t off = 0; off < doi.doi_max_offset;
- off += dmu_prefetch_max) {
+ for (uint64_t off = 0; off < doi.doi_max_offset &&
+ dmu_prefetch_max > 0; off += dmu_prefetch_max) {
/* dbuf_read doesn't prefetch L1 blocks. */
dmu_prefetch_by_dnode(dn, 1, off,
dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index a77f338bdfd3..8e6b569c2100 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -2037,6 +2037,8 @@ userquota_updates_task(void *arg)
dn->dn_id_flags |= DN_ID_CHKED_BONUS;
}
dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
+ ASSERT3U(dn->dn_dirtycnt, >, 0);
+ dn->dn_dirtycnt--;
mutex_exit(&dn->dn_mtx);
multilist_sublist_remove(list, dn);
@@ -2070,6 +2072,10 @@ dnode_rele_task(void *arg)
dnode_t *dn;
while ((dn = multilist_sublist_head(list)) != NULL) {
+ mutex_enter(&dn->dn_mtx);
+ ASSERT3U(dn->dn_dirtycnt, >, 0);
+ dn->dn_dirtycnt--;
+ mutex_exit(&dn->dn_mtx);
multilist_sublist_remove(list, dn);
dnode_rele(dn, &os->os_synced_dnodes);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 51165d0bf723..3d3a9c713568 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -57,19 +57,19 @@ static unsigned int zfetch_max_sec_reap = 2;
/* min bytes to prefetch per stream (default 2MB) */
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
/* max bytes to prefetch per stream (default 8MB) */
-unsigned int zfetch_max_distance = 8 * 1024 * 1024;
+static unsigned int zfetch_max_distance = 8 * 1024 * 1024;
#else
/* min bytes to prefetch per stream (default 4MB) */
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
/* max bytes to prefetch per stream (default 64MB) */
-unsigned int zfetch_max_distance = 64 * 1024 * 1024;
+static unsigned int zfetch_max_distance = 64 * 1024 * 1024;
#endif
/* max bytes to prefetch indirects for per stream (default 128MB) */
-unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
+static unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
/* max request reorder distance within a stream (default 16MB) */
-unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
+static unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
/* Max log2 fraction of holes in a stream */
-unsigned int zfetch_hole_shift = 2;
+static unsigned int zfetch_hole_shift = 2;
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index 963ff41232a3..6c150d31c669 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -173,9 +173,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
- dn->dn_dirty_txg = 0;
- dn->dn_dirtyctx = 0;
- dn->dn_dirtyctx_firstset = NULL;
+ dn->dn_dirtycnt = 0;
dn->dn_bonus = NULL;
dn->dn_have_spill = B_FALSE;
dn->dn_zio = NULL;
@@ -229,9 +227,7 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_allocated_txg);
ASSERT0(dn->dn_free_txg);
ASSERT0(dn->dn_assigned_txg);
- ASSERT0(dn->dn_dirty_txg);
- ASSERT0(dn->dn_dirtyctx);
- ASSERT0P(dn->dn_dirtyctx_firstset);
+ ASSERT0(dn->dn_dirtycnt);
ASSERT0P(dn->dn_bonus);
ASSERT(!dn->dn_have_spill);
ASSERT0P(dn->dn_zio);
@@ -692,10 +688,8 @@ dnode_destroy(dnode_t *dn)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
- dn->dn_dirty_txg = 0;
+ dn->dn_dirtycnt = 0;
- dn->dn_dirtyctx = 0;
- dn->dn_dirtyctx_firstset = NULL;
if (dn->dn_bonus != NULL) {
mutex_enter(&dn->dn_bonus->db_mtx);
dbuf_destroy(dn->dn_bonus);
@@ -800,11 +794,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dn->dn_bonuslen = bonuslen;
dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
dn->dn_compress = ZIO_COMPRESS_INHERIT;
- dn->dn_dirtyctx = 0;
dn->dn_free_txg = 0;
- dn->dn_dirtyctx_firstset = NULL;
- dn->dn_dirty_txg = 0;
+ dn->dn_dirtycnt = 0;
dn->dn_allocated_txg = tx->tx_txg;
dn->dn_id_flags = 0;
@@ -955,9 +947,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_allocated_txg = odn->dn_allocated_txg;
ndn->dn_free_txg = odn->dn_free_txg;
ndn->dn_assigned_txg = odn->dn_assigned_txg;
- ndn->dn_dirty_txg = odn->dn_dirty_txg;
- ndn->dn_dirtyctx = odn->dn_dirtyctx;
- ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
+ ndn->dn_dirtycnt = odn->dn_dirtycnt;
ASSERT0(zfs_refcount_count(&odn->dn_tx_holds));
zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
ASSERT(avl_is_empty(&ndn->dn_dbufs));
@@ -1020,9 +1010,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
odn->dn_allocated_txg = 0;
odn->dn_free_txg = 0;
odn->dn_assigned_txg = 0;
- odn->dn_dirty_txg = 0;
- odn->dn_dirtyctx = 0;
- odn->dn_dirtyctx_firstset = NULL;
+ odn->dn_dirtycnt = 0;
odn->dn_have_spill = B_FALSE;
odn->dn_zio = NULL;
odn->dn_oldused = 0;
@@ -1273,8 +1261,8 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
} else if (DN_SLOT_IS_PTR(dn)) {
mutex_enter(&dn->dn_mtx);
boolean_t can_free = (dn->dn_type == DMU_OT_NONE &&
- zfs_refcount_is_zero(&dn->dn_holds) &&
- !DNODE_IS_DIRTY(dn));
+ dn->dn_dirtycnt == 0 &&
+ zfs_refcount_is_zero(&dn->dn_holds));
mutex_exit(&dn->dn_mtx);
if (!can_free)
@@ -1757,17 +1745,23 @@ dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp)
* reference on the dnode. Returns FALSE if unable to add a
* new reference.
*/
+static boolean_t
+dnode_add_ref_locked(dnode_t *dn, const void *tag)
+{
+ ASSERT(MUTEX_HELD(&dn->dn_mtx));
+ if (zfs_refcount_is_zero(&dn->dn_holds))
+ return (FALSE);
+ VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag));
+ return (TRUE);
+}
+
boolean_t
dnode_add_ref(dnode_t *dn, const void *tag)
{
mutex_enter(&dn->dn_mtx);
- if (zfs_refcount_is_zero(&dn->dn_holds)) {
- mutex_exit(&dn->dn_mtx);
- return (FALSE);
- }
- VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag));
+ boolean_t r = dnode_add_ref_locked(dn, tag);
mutex_exit(&dn->dn_mtx);
- return (TRUE);
+ return (r);
}
void
@@ -1830,31 +1824,20 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
}
/*
- * Checks if the dnode itself is dirty, or is carrying any uncommitted records.
- * It is important to check both conditions, as some operations (eg appending
- * to a file) can dirty both as a single logical unit, but they are not synced
- * out atomically, so checking one and not the other can result in an object
- * appearing to be clean mid-way through a commit.
+ * Test if the dnode is dirty, or carrying uncommitted records.
*
- * Do not change this lightly! If you get it wrong, dmu_offset_next() can
- * detect a hole where there is really data, leading to silent corruption.
+ * dn_dirtycnt is the number of txgs this dnode is dirty on. It's incremented
+ * in dnode_setdirty() the first time the dnode is dirtied on a txg, and
+ * decremented in either dnode_rele_task() or userquota_updates_task() when the
+ * txg is synced out.
*/
boolean_t
dnode_is_dirty(dnode_t *dn)
{
mutex_enter(&dn->dn_mtx);
-
- for (int i = 0; i < TXG_SIZE; i++) {
- if (multilist_link_active(&dn->dn_dirty_link[i]) ||
- !list_is_empty(&dn->dn_dirty_records[i])) {
- mutex_exit(&dn->dn_mtx);
- return (B_TRUE);
- }
- }
-
+ boolean_t dirty = (dn->dn_dirtycnt != 0);
mutex_exit(&dn->dn_mtx);
-
- return (B_FALSE);
+ return (dirty);
}
void
@@ -1916,7 +1899,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
* dnode will hang around after we finish processing its
* children.
*/
- VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
+ mutex_enter(&dn->dn_mtx);
+ VERIFY(dnode_add_ref_locked(dn, (void *)(uintptr_t)tx->tx_txg));
+ dn->dn_dirtycnt++;
+ ASSERT3U(dn->dn_dirtycnt, <=, 3);
+ mutex_exit(&dn->dn_mtx);
(void) dbuf_dirty(dn->dn_dbuf, tx);
@@ -2221,32 +2208,6 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
mutex_exit(&dn->dn_dbufs_mtx);
}
-void
-dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag)
-{
- /*
- * Don't set dirtyctx to SYNC if we're just modifying this as we
- * initialize the objset.
- */
- if (dn->dn_dirtyctx == DN_UNDIRTIED) {
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
-
- if (ds != NULL) {
- rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag);
- }
- if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
- if (dmu_tx_is_syncing(tx))
- dn->dn_dirtyctx = DN_DIRTY_SYNC;
- else
- dn->dn_dirtyctx = DN_DIRTY_OPEN;
- dn->dn_dirtyctx_firstset = tag;
- }
- if (ds != NULL) {
- rrw_exit(&ds->ds_bp_rwlock, tag);
- }
- }
-}
-
static void
dnode_partial_zero(dnode_t *dn, uint64_t off, uint64_t blkoff, uint64_t len,
dmu_tx_t *tx)
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index dceafbc27556..6f7c060f97f8 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -251,11 +251,11 @@ spa_mode_t spa_mode_global = SPA_MODE_UNINIT;
#ifdef ZFS_DEBUG
/*
- * Everything except dprintf, set_error, spa, and indirect_remap is on
- * by default in debug builds.
+ * Everything except dprintf, set_error, indirect_remap, and raidz_reconstruct
+ * is on by default in debug builds.
*/
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR |
- ZFS_DEBUG_INDIRECT_REMAP);
+ ZFS_DEBUG_INDIRECT_REMAP | ZFS_DEBUG_RAIDZ_RECONSTRUCT);
#else
int zfs_flags = 0;
#endif
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 9cf35e379000..fc6d445f9785 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -29,7 +29,7 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Datto Inc. All rights reserved.
- * Copyright (c) 2021, Klara Inc.
+ * Copyright (c) 2021, 2025, Klara, Inc.
* Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
*/
@@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29;
/* upper limit for metaslab size (16G) */
static uint_t zfs_vdev_max_ms_shift = 34;
-int vdev_validate_skip = B_FALSE;
+static int vdev_validate_skip = B_FALSE;
/*
* Since the DTL space map of a vdev is not expected to have a lot of
@@ -1086,6 +1086,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
}
}
+ if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))
+ vd->vdev_autosit =
+ vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);
+
/*
* Add ourselves to the parent's list of children.
*/
@@ -1187,6 +1191,9 @@ vdev_free(vdev_t *vd)
spa_spare_remove(vd);
if (vd->vdev_isl2cache)
spa_l2cache_remove(vd);
+ if (vd->vdev_prev_histo)
+ kmem_free(vd->vdev_prev_histo,
+ sizeof (uint64_t) * VDEV_L_HISTO_BUCKETS);
txg_list_destroy(&vd->vdev_ms_list);
txg_list_destroy(&vd->vdev_dtl_list);
@@ -3857,6 +3864,26 @@ vdev_load(vdev_t *vd)
}
}
+ if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+ spa_t *spa = vd->vdev_spa;
+ uint64_t autosit;
+
+ error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+ vdev_prop_to_name(VDEV_PROP_AUTOSIT), sizeof (autosit),
+ 1, &autosit);
+ if (error == 0) {
+ vd->vdev_autosit = autosit == 1;
+ } else if (error == ENOENT) {
+ vd->vdev_autosit = vdev_prop_default_numeric(
+ VDEV_PROP_AUTOSIT);
+ } else {
+ vdev_dbgmsg(vd,
+ "vdev_load: zap_lookup(top_zap=%llu) "
+ "failed [error=%d]",
+ (u_longlong_t)vd->vdev_top_zap, error);
+ }
+ }
+
/*
* Load any rebuild state from the top-level vdev zap.
*/
@@ -4616,6 +4643,8 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_stat.vs_checksum_errors = 0;
vd->vdev_stat.vs_dio_verify_errors = 0;
vd->vdev_stat.vs_slow_ios = 0;
+ atomic_store_64(&vd->vdev_outlier_count, 0);
+ vd->vdev_read_sit_out_expire = 0;
for (int c = 0; c < vd->vdev_children; c++)
vdev_clear(spa, vd->vdev_child[c]);
@@ -6107,6 +6136,56 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_failfast = intval & 1;
break;
+ case VDEV_PROP_SIT_OUT:
+ /* Only expose this for a draid or raidz leaf */
+ if (!vd->vdev_ops->vdev_op_leaf ||
+ vd->vdev_top == NULL ||
+ (vd->vdev_top->vdev_ops != &vdev_raidz_ops &&
+ vd->vdev_top->vdev_ops != &vdev_draid_ops)) {
+ error = ENOTSUP;
+ break;
+ }
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ if (intval == 1) {
+ vdev_t *ancestor = vd;
+ while (ancestor->vdev_parent != vd->vdev_top)
+ ancestor = ancestor->vdev_parent;
+ vdev_t *pvd = vd->vdev_top;
+ uint_t sitouts = 0;
+ for (int i = 0; i < pvd->vdev_children; i++) {
+ if (pvd->vdev_child[i] == ancestor)
+ continue;
+ if (vdev_sit_out_reads(
+ pvd->vdev_child[i], 0)) {
+ sitouts++;
+ }
+ }
+ if (sitouts >= vdev_get_nparity(pvd)) {
+ error = ZFS_ERR_TOO_MANY_SITOUTS;
+ break;
+ }
+ if (error == 0)
+ vdev_raidz_sit_child(vd,
+ INT64_MAX - gethrestime_sec());
+ } else {
+ vdev_raidz_unsit_child(vd);
+ }
+ break;
+ case VDEV_PROP_AUTOSIT:
+ if (vd->vdev_ops != &vdev_raidz_ops &&
+ vd->vdev_ops != &vdev_draid_ops) {
+ error = ENOTSUP;
+ break;
+ }
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_autosit = intval == 1;
+ break;
case VDEV_PROP_CHECKSUM_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
@@ -6456,6 +6535,19 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
ZPROP_SRC_NONE);
}
continue;
+ case VDEV_PROP_SIT_OUT:
+ /* Only expose this for a draid or raidz leaf */
+ if (vd->vdev_ops->vdev_op_leaf &&
+ vd->vdev_top != NULL &&
+ (vd->vdev_top->vdev_ops ==
+ &vdev_raidz_ops ||
+ vd->vdev_top->vdev_ops ==
+ &vdev_draid_ops)) {
+ vdev_prop_add_list(outnvl, propname,
+ NULL, vdev_sit_out_reads(vd, 0),
+ ZPROP_SRC_NONE);
+ }
+ continue;
case VDEV_PROP_TRIM_SUPPORT:
/* only valid for leaf vdevs */
if (vd->vdev_ops->vdev_op_leaf) {
@@ -6506,6 +6598,29 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
+ case VDEV_PROP_AUTOSIT:
+ /* Only raidz vdevs cannot have this property */
+ if (vd->vdev_ops != &vdev_raidz_ops &&
+ vd->vdev_ops != &vdev_draid_ops) {
+ src = ZPROP_SRC_NONE;
+ intval = ZPROP_BOOLEAN_NA;
+ } else {
+ err = vdev_prop_get_int(vd, prop,
+ &intval);
+ if (err && err != ENOENT)
+ break;
+
+ if (intval ==
+ vdev_prop_default_numeric(prop))
+ src = ZPROP_SRC_DEFAULT;
+ else
+ src = ZPROP_SRC_LOCAL;
+ }
+
+ vdev_prop_add_list(outnvl, propname, NULL,
+ intval, src);
+ break;
+
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index a05289102af2..8588cfee3f7d 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2018 Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2025, Klara, Inc.
*/
#include <sys/zfs_context.h>
@@ -1996,6 +1997,33 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
rc->rc_allow_repair = 1;
}
}
+
+ if (vdev_sit_out_reads(cvd, zio->io_flags)) {
+ rr->rr_outlier_cnt++;
+ ASSERT0(rc->rc_latency_outlier);
+ rc->rc_latency_outlier = 1;
+ }
+ }
+
+ /*
+ * When the row contains a latency outlier and sufficient parity
+ * exists to reconstruct the column data, then skip reading the
+ * known slow child vdev as a performance optimization.
+ */
+ if (rr->rr_outlier_cnt > 0 &&
+ (rr->rr_firstdatacol - rr->rr_missingparity) >=
+ (rr->rr_missingdata + 1)) {
+
+ for (int c = rr->rr_cols - 1; c >= rr->rr_firstdatacol; c--) {
+ raidz_col_t *rc = &rr->rr_col[c];
+
+ if (rc->rc_error == 0 && rc->rc_latency_outlier) {
+ rr->rr_missingdata++;
+ rc->rc_error = SET_ERROR(EAGAIN);
+ rc->rc_skipped = 1;
+ break;
+ }
+ }
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev_file.c b/sys/contrib/openzfs/module/zfs/vdev_file.c
index f457669bc809..20b4db65ec06 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_file.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_file.c
@@ -228,7 +228,8 @@ vdev_file_io_strategy(void *arg)
abd_return_buf_copy(zio->io_abd, buf, size);
} else {
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
- err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
+ err = zfs_file_pwrite(vf->vf_file, buf, size, off,
+ vd->vdev_ashift, &resid);
abd_return_buf(zio->io_abd, buf, size);
}
zio->io_error = err;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index c12713b107bf..e69e5598939e 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -122,7 +122,7 @@
* The maximum number of i/os active to each device. Ideally, this will be >=
* the sum of each queue's max_active.
*/
-uint_t zfs_vdev_max_active = 1000;
+static uint_t zfs_vdev_max_active = 1000;
/*
* Per-queue limits on the number of i/os active to each device. If the
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index b597d6daefde..56b8e3b60b22 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -24,6 +24,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
+ * Copyright (c) 2025, Klara, Inc.
*/
#include <sys/zfs_context.h>
@@ -356,6 +357,32 @@ unsigned long raidz_expand_max_reflow_bytes = 0;
uint_t raidz_expand_pause_point = 0;
/*
+ * This represents the duration for a slow drive read sit out.
+ */
+static unsigned long vdev_read_sit_out_secs = 600;
+
+/*
+ * How often each RAID-Z and dRAID vdev will check for slow disk outliers.
+ * Increasing this interval will reduce the sensitivity of detection (since all
+ * I/Os since the last check are included in the statistics), but will slow the
+ * response to a disk developing a problem.
+ *
+ * Defaults to once per second; setting extremely small values may cause
+ * negative performance effects.
+ */
+static hrtime_t vdev_raidz_outlier_check_interval_ms = 1000;
+
+/*
+ * When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is
+ * used to determine how far out an outlier must be before it counts as an event
+ * worth consdering.
+ *
+ * Smaller values will result in more aggressive sitting out of disks that may
+ * have problems, but may significantly increase the rate of spurious sit-outs.
+ */
+static uint32_t vdev_raidz_outlier_insensitivity = 50;
+
+/*
* Maximum amount of copy io's outstanding at once.
*/
#ifdef _ILP32
@@ -2311,6 +2338,41 @@ vdev_raidz_min_asize(vdev_t *vd)
vd->vdev_children);
}
+/*
+ * return B_TRUE if a read should be skipped due to being too slow.
+ *
+ * In vdev_child_slow_outlier() it looks for outliers based on disk
+ * latency from the most recent child reads. Here we're checking if,
+ * over time, a disk has has been an outlier too many times and is
+ * now in a sit out period.
+ */
+boolean_t
+vdev_sit_out_reads(vdev_t *vd, zio_flag_t io_flags)
+{
+ if (vdev_read_sit_out_secs == 0)
+ return (B_FALSE);
+
+ /* Avoid skipping a data column read when scrubbing */
+ if (io_flags & ZIO_FLAG_SCRUB)
+ return (B_FALSE);
+
+ if (!vd->vdev_ops->vdev_op_leaf) {
+ boolean_t sitting = B_FALSE;
+ for (int c = 0; c < vd->vdev_children; c++) {
+ sitting |= vdev_sit_out_reads(vd->vdev_child[c],
+ io_flags);
+ }
+ return (sitting);
+ }
+
+ if (vd->vdev_read_sit_out_expire >= gethrestime_sec())
+ return (B_TRUE);
+
+ vd->vdev_read_sit_out_expire = 0;
+
+ return (B_FALSE);
+}
+
void
vdev_raidz_child_done(zio_t *zio)
{
@@ -2475,6 +2537,45 @@ vdev_raidz_io_start_read_row(zio_t *zio, raidz_row_t *rr, boolean_t forceparity)
rc->rc_skipped = 1;
continue;
}
+
+ if (vdev_sit_out_reads(cvd, zio->io_flags)) {
+ rr->rr_outlier_cnt++;
+ ASSERT0(rc->rc_latency_outlier);
+ rc->rc_latency_outlier = 1;
+ }
+ }
+
+ /*
+ * When the row contains a latency outlier and sufficient parity
+ * exists to reconstruct the column data, then skip reading the
+ * known slow child vdev as a performance optimization.
+ */
+ if (rr->rr_outlier_cnt > 0 &&
+ (rr->rr_firstdatacol - rr->rr_missingparity) >=
+ (rr->rr_missingdata + 1)) {
+
+ for (int c = rr->rr_cols - 1; c >= 0; c--) {
+ raidz_col_t *rc = &rr->rr_col[c];
+
+ if (rc->rc_error == 0 && rc->rc_latency_outlier) {
+ if (c >= rr->rr_firstdatacol)
+ rr->rr_missingdata++;
+ else
+ rr->rr_missingparity++;
+ rc->rc_error = SET_ERROR(EAGAIN);
+ rc->rc_skipped = 1;
+ break;
+ }
+ }
+ }
+
+ for (int c = rr->rr_cols - 1; c >= 0; c--) {
+ raidz_col_t *rc = &rr->rr_col[c];
+ vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+ if (rc->rc_error || rc->rc_size == 0)
+ continue;
+
if (forceparity ||
c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 ||
(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
@@ -2498,6 +2599,7 @@ vdev_raidz_io_start_read_phys_cols(zio_t *zio, raidz_map_t *rm)
ASSERT3U(prc->rc_devidx, ==, i);
vdev_t *cvd = vd->vdev_child[i];
+
if (!vdev_readable(cvd)) {
prc->rc_error = SET_ERROR(ENXIO);
prc->rc_tried = 1; /* don't even try */
@@ -2774,6 +2876,239 @@ vdev_raidz_worst_error(raidz_row_t *rr)
return (error);
}
+/*
+ * Find the median value from a set of n values
+ */
+static uint64_t
+latency_median_value(const uint64_t *data, size_t n)
+{
+ uint64_t m;
+
+ if (n % 2 == 0)
+ m = (data[(n >> 1) - 1] + data[n >> 1]) >> 1;
+ else
+ m = data[((n + 1) >> 1) - 1];
+
+ return (m);
+}
+
+/*
+ * Calculate the outlier fence from a set of n latency values
+ *
+ * fence = Q3 + vdev_raidz_outlier_insensitivity x (Q3 - Q1)
+ */
+static uint64_t
+latency_quartiles_fence(const uint64_t *data, size_t n, uint64_t *iqr)
+{
+ uint64_t q1 = latency_median_value(&data[0], n >> 1);
+ uint64_t q3 = latency_median_value(&data[(n + 1) >> 1], n >> 1);
+
+ /*
+ * To avoid detecting false positive outliers when N is small and
+ * and the latencies values are very close, make sure the IQR
+ * is at least 25% larger than Q1.
+ */
+ *iqr = MAX(q3 - q1, q1 / 4);
+
+ return (q3 + (*iqr * vdev_raidz_outlier_insensitivity));
+}
+#define LAT_CHILDREN_MIN 5
+#define LAT_OUTLIER_LIMIT 20
+
+static int
+latency_compare(const void *arg1, const void *arg2)
+{
+ const uint64_t *l1 = (uint64_t *)arg1;
+ const uint64_t *l2 = (uint64_t *)arg2;
+
+ return (TREE_CMP(*l1, *l2));
+}
+
+void
+vdev_raidz_sit_child(vdev_t *svd, uint64_t secs)
+{
+ for (int c = 0; c < svd->vdev_children; c++)
+ vdev_raidz_sit_child(svd->vdev_child[c], secs);
+
+ if (!svd->vdev_ops->vdev_op_leaf)
+ return;
+
+ /* Begin a sit out period for this slow drive */
+ svd->vdev_read_sit_out_expire = gethrestime_sec() +
+ secs;
+
+ /* Count each slow io period */
+ mutex_enter(&svd->vdev_stat_lock);
+ svd->vdev_stat.vs_slow_ios++;
+ mutex_exit(&svd->vdev_stat_lock);
+}
+
+void
+vdev_raidz_unsit_child(vdev_t *vd)
+{
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_raidz_unsit_child(vd->vdev_child[c]);
+
+ if (!vd->vdev_ops->vdev_op_leaf)
+ return;
+
+ vd->vdev_read_sit_out_expire = 0;
+}
+
+/*
+ * Check for any latency outlier from latest set of child reads.
+ *
+ * Uses a Tukey's fence, with K = 50, for detecting extreme outliers. This
+ * rule defines extreme outliers as data points outside the fence of the
+ * third quartile plus fifty times the Interquartile Range (IQR). This range
+ * is the distance between the first and third quartile.
+ *
+ * Fifty is an extremely large value for Tukey's fence, but the outliers we're
+ * attempting to detect here are orders of magnitude times larger than the
+ * median. This large value should capture any truly fault disk quickly,
+ * without causing spurious sit-outs.
+ *
+ * To further avoid spurious sit-outs, vdevs must be detected multiple times
+ * as an outlier before they are sat, and outlier counts will gradually decay.
+ * Every nchildren times we have detected an outlier, we subtract 2 from the
+ * outlier count of all children. If detected outliers are close to uniformly
+ * distributed, this will result in the outlier count remaining close to 0
+ * (in expectation; over long enough time-scales, spurious sit-outs are still
+ * possible).
+ */
+static void
+vdev_child_slow_outlier(zio_t *zio)
+{
+ vdev_t *vd = zio->io_vd;
+ if (!vd->vdev_autosit || vdev_read_sit_out_secs == 0 ||
+ vd->vdev_children < LAT_CHILDREN_MIN)
+ return;
+
+ hrtime_t now = getlrtime();
+ uint64_t last = atomic_load_64(&vd->vdev_last_latency_check);
+
+ if ((now - last) < MSEC2NSEC(vdev_raidz_outlier_check_interval_ms))
+ return;
+
+ /* Allow a single winner when there are racing callers. */
+ if (atomic_cas_64(&vd->vdev_last_latency_check, last, now) != last)
+ return;
+
+ int children = vd->vdev_children;
+ uint64_t *lat_data = kmem_alloc(sizeof (uint64_t) * children, KM_SLEEP);
+
+ for (int c = 0; c < children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ if (cvd->vdev_prev_histo == NULL) {
+ mutex_enter(&cvd->vdev_stat_lock);
+ size_t size =
+ sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
+ cvd->vdev_prev_histo = kmem_zalloc(size, KM_SLEEP);
+ memcpy(cvd->vdev_prev_histo,
+ cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ],
+ size);
+ mutex_exit(&cvd->vdev_stat_lock);
+ }
+ }
+ uint64_t max = 0;
+ vdev_t *svd = NULL;
+ uint_t sitouts = 0;
+ boolean_t skip = B_FALSE, svd_sitting = B_FALSE;
+ for (int c = 0; c < children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ boolean_t sitting = vdev_sit_out_reads(cvd, 0) ||
+ cvd->vdev_state != VDEV_STATE_HEALTHY;
+
+ /* We can't sit out more disks than we have parity */
+ if (sitting && ++sitouts >= vdev_get_nparity(vd))
+ skip = B_TRUE;
+
+ mutex_enter(&cvd->vdev_stat_lock);
+
+ uint64_t *prev_histo = cvd->vdev_prev_histo;
+ uint64_t *histo =
+ cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ];
+ if (skip) {
+ size_t size =
+ sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
+ memcpy(prev_histo, histo, size);
+ mutex_exit(&cvd->vdev_stat_lock);
+ continue;
+ }
+ uint64_t count = 0;
+ lat_data[c] = 0;
+ for (int i = 0; i < VDEV_L_HISTO_BUCKETS; i++) {
+ uint64_t this_count = histo[i] - prev_histo[i];
+ lat_data[c] += (1ULL << i) * this_count;
+ count += this_count;
+ }
+ size_t size = sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
+ memcpy(prev_histo, histo, size);
+ mutex_exit(&cvd->vdev_stat_lock);
+ lat_data[c] /= MAX(1, count);
+
+ /* Wait until all disks have been read from */
+ if (lat_data[c] == 0 && !sitting) {
+ skip = B_TRUE;
+ continue;
+ }
+
+ /* Keep track of the vdev with largest value */
+ if (lat_data[c] > max) {
+ max = lat_data[c];
+ svd = cvd;
+ svd_sitting = sitting;
+ }
+ }
+
+ if (skip) {
+ kmem_free(lat_data, sizeof (uint64_t) * children);
+ return;
+ }
+
+ qsort((void *)lat_data, children, sizeof (uint64_t), latency_compare);
+
+ uint64_t iqr;
+ uint64_t fence = latency_quartiles_fence(lat_data, children, &iqr);
+
+ ASSERT3U(lat_data[children - 1], ==, max);
+ if (max > fence && !svd_sitting) {
+ ASSERT3U(iqr, >, 0);
+ uint64_t incr = MAX(1, MIN((max - fence) / iqr,
+ LAT_OUTLIER_LIMIT / 4));
+ vd->vdev_outlier_count += incr;
+ if (vd->vdev_outlier_count >= children) {
+ for (int c = 0; c < children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ cvd->vdev_outlier_count -= 2;
+ cvd->vdev_outlier_count = MAX(0,
+ cvd->vdev_outlier_count);
+ }
+ vd->vdev_outlier_count = 0;
+ }
+ /*
+ * Keep track of how many times this child has had
+ * an outlier read. A disk that persitently has a
+ * higher than peers outlier count will be considered
+ * a slow disk.
+ */
+ svd->vdev_outlier_count += incr;
+ if (svd->vdev_outlier_count > LAT_OUTLIER_LIMIT) {
+ ASSERT0(svd->vdev_read_sit_out_expire);
+ vdev_raidz_sit_child(svd, vdev_read_sit_out_secs);
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_SITOUT,
+ zio->io_spa, svd, NULL, NULL, 0);
+ vdev_dbgmsg(svd, "begin read sit out for %d secs",
+ (int)vdev_read_sit_out_secs);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ vd->vdev_child[c]->vdev_outlier_count = 0;
+ }
+ }
+
+ kmem_free(lat_data, sizeof (uint64_t) * children);
+}
+
static void
vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
{
@@ -3515,6 +3850,9 @@ vdev_raidz_io_done(zio_t *zio)
raidz_row_t *rr = rm->rm_row[i];
vdev_raidz_io_done_verified(zio, rr);
}
+ /* Periodically check for a read outlier */
+ if (zio->io_type == ZIO_TYPE_READ)
+ vdev_child_slow_outlier(zio);
zio_checksum_verified(zio);
} else {
/*
@@ -5155,3 +5493,10 @@ ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scrub_after_expand, INT, ZMOD_RW,
"For expanded RAIDZ, automatically start a pool scrub when expansion "
"completes");
+ZFS_MODULE_PARAM(zfs_vdev, vdev_, read_sit_out_secs, ULONG, ZMOD_RW,
+ "Raidz/draid slow disk sit out time period in seconds");
+ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_check_interval_ms, U64,
+ ZMOD_RW, "Interval to check for slow raidz/draid children");
+ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_insensitivity, UINT,
+ ZMOD_RW, "How insensitive the slow raidz/draid child check should be");
+/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 2f7a739da241..2ce0121324ad 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024;
*
* See also the accessor function spa_remove_max_segment().
*/
-uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
+static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
/*
* Ignore hard IO errors during device removal. When set if a device
@@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024;
* This is used by the test suite so that it can ensure that certain
* actions happen while in the middle of a removal.
*/
-int zfs_removal_suspend_progress = 0;
+static int zfs_removal_suspend_progress = 0;
#define VDEV_REMOVAL_ZAP_OBJS "lzap"
diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c
index 0816ea134bf3..4cf9e0dbb405 100644
--- a/sys/contrib/openzfs/module/zfs/zfeature.c
+++ b/sys/contrib/openzfs/module/zfs/zfeature.c
@@ -308,6 +308,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
+ ASSERT(MUTEX_HELD(&spa->spa_feat_stats_lock));
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount, tx));
@@ -360,7 +361,9 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
feature->fi_desc, tx));
+ mutex_enter(&spa->spa_feat_stats_lock);
feature_sync(spa, feature, initial_refcount, tx);
+ mutex_exit(&spa->spa_feat_stats_lock);
if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) {
uint64_t enabling_txg = dmu_tx_get_txg(tx);
@@ -416,6 +419,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
ASSERT(dmu_tx_is_syncing(tx));
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP);
switch (action) {
@@ -433,6 +437,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
}
feature_sync(spa, feature, refcount, tx);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/zfs_crrd.c b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
index f9267ed41d71..30d4c7c36897 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_crrd.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
@@ -162,9 +162,9 @@ dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
daydiff = time - rrd_tail(&db->dbr_days);
monthdiff = time - rrd_tail(&db->dbr_months);
- if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
+ if (monthdiff >= 0 && monthdiff >= 30 * 24 * 60 * 60)
rrd_add(&db->dbr_months, time, txg);
- else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
+ else if (daydiff >= 0 && daydiff >= 24 * 60 * 60)
rrd_add(&db->dbr_days, time, txg);
else if (minutedif >= 0)
rrd_add(&db->dbr_minutes, time, txg);
@@ -208,7 +208,8 @@ dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
if (r2 == NULL)
return (r1);
- return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
+ return (ABS(tv - (hrtime_t)r1->rrdd_time) <
+ ABS(tv - (hrtime_t)r2->rrdd_time) ? r1 : r2);
}
uint64_t
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 121b966b9864..5ca7c2320c4e 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -683,6 +683,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
dsl_dataset_t *ds;
const char *cp;
int error;
+ boolean_t rawok = (zc->zc_flags & 0x8);
/*
* Generate the current snapshot name from the given objsetid, then
@@ -705,6 +706,10 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
ZFS_DELEG_PERM_SEND, cr);
+ if (error != 0 && rawok == B_TRUE) {
+ error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
+ ZFS_DELEG_PERM_SEND_RAW, cr);
+ }
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
@@ -714,9 +719,17 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
+ boolean_t rawok = nvlist_exists(innvl, "rawok");
+ int error;
+
(void) innvl;
- return (zfs_secpolicy_write_perms(zc->zc_name,
- ZFS_DELEG_PERM_SEND, cr));
+ error = zfs_secpolicy_write_perms(zc->zc_name,
+ ZFS_DELEG_PERM_SEND, cr);
+ if (error != 0 && rawok == B_TRUE) {
+ error = zfs_secpolicy_write_perms(zc->zc_name,
+ ZFS_DELEG_PERM_SEND_RAW, cr);
+ }
+ return (error);
}
static int
@@ -4726,7 +4739,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
error = error ? error : resume_err;
}
zfs_vfs_rele(zfsvfs);
- } else if ((zv = zvol_suspend(fsname)) != NULL) {
+ } else if (zvol_suspend(fsname, &zv) == 0) {
error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
outnvl);
zvol_resume(zv);
@@ -5448,7 +5461,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
}
error = error ? error : end_err;
zfs_vfs_rele(zfsvfs);
- } else if ((zv = zvol_suspend(tofs)) != NULL) {
+ } else if (zvol_suspend(tofs, &zv) == 0) {
error = dmu_recv_end(&drc, zvol_tag(zv));
zvol_resume(zv);
} else {
@@ -7619,7 +7632,7 @@ zfs_ioctl_init(void)
zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
- POOL_CHECK_NONE, B_TRUE, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 2fd3e1c37045..faced0db7e9e 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -1145,20 +1145,34 @@ zvol_tag(zvol_state_t *zv)
/*
* Suspend the zvol for recv and rollback.
*/
-zvol_state_t *
-zvol_suspend(const char *name)
+int
+zvol_suspend(const char *name, zvol_state_t **zvp)
{
zvol_state_t *zv;
zv = zvol_find_by_name(name, RW_WRITER);
if (zv == NULL)
- return (NULL);
+ return (SET_ERROR(ENOENT));
/* block all I/O, release in zvol_resume. */
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
+ /*
+ * If it's being removed, unlock and return error. It doesn't make any
+ * sense to try to suspend a zvol being removed, but being here also
+ * means that zvol_remove_minors_impl() is about to call zvol_remove()
+ * and then destroy the zvol_state_t, so returning a pointer to it for
+ * the caller to mess with would be a disaster anyway.
+ */
+ if (zv->zv_flags & ZVOL_REMOVING) {
+ mutex_exit(&zv->zv_state_lock);
+ rw_exit(&zv->zv_suspend_lock);
+ /* NB: Returning EIO here to match zfsvfs_teardown() */
+ return (SET_ERROR(EIO));
+ }
+
atomic_inc(&zv->zv_suspend_ref);
if (zv->zv_open_count > 0)
@@ -1171,7 +1185,8 @@ zvol_suspend(const char *name)
mutex_exit(&zv->zv_state_lock);
/* zv_suspend_lock is released in zvol_resume() */
- return (zv);
+ *zvp = zv;
+ return (0);
}
int
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 1ce668e7b86d..8986e29eb7fb 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -433,7 +433,7 @@ make install DESTDIR=%{?buildroot}
find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \;
%if 0%{!?__brp_mangle_shebangs:1}
find %{?buildroot}%{_bindir} \
- \( -name arc_summary -or -name arcstat -or -name dbufstat \
+ \( -name zarcsummary -or -name zarcstat -or -name dbufstat \
-or -name zilstat \) \
-exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \;
find %{?buildroot}%{_datadir} \
@@ -508,8 +508,8 @@ systemctl --system daemon-reload >/dev/null || true
%{_bindir}/raidz_test
%{_bindir}/zvol_wait
# Optional Python 3 scripts
-%{_bindir}/arc_summary
-%{_bindir}/arcstat
+%{_bindir}/zarcsummary
+%{_bindir}/zarcstat
%{_bindir}/dbufstat
%{_bindir}/zilstat
# Man pages
diff --git a/sys/contrib/openzfs/scripts/spdxcheck.pl b/sys/contrib/openzfs/scripts/spdxcheck.pl
index cdab5368f19c..4d4e14368beb 100755
--- a/sys/contrib/openzfs/scripts/spdxcheck.pl
+++ b/sys/contrib/openzfs/scripts/spdxcheck.pl
@@ -83,8 +83,8 @@ my $tagged_patterns = q(
man/man?/*.?.in
# Unsuffixed programs (or generated of same)
- cmd/arcstat.in
- cmd/arc_summary
+ cmd/zarcstat.in
+ cmd/zarcsummary
cmd/dbufstat.in
cmd/zilstat.in
cmd/zpool/zpool.d/*
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 2da46458289a..4dd700ef361c 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -323,6 +323,10 @@ tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
'zfs_send_raw', 'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing']
tags = ['functional', 'cli_root', 'zfs_send']
+[tests/functional/cli_root/zfs_send_delegation]
+tests = ['zfs_send_test']
+tags = ['functional', 'cli_root', 'zfs_send_delegation']
+
[tests/functional/cli_root/zfs_set]
tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos',
@@ -379,7 +383,7 @@ tags = ['functional', 'cli_root', 'zfs_wait']
[tests/functional/cli_root/zhack]
tests = ['zhack_label_repair_001', 'zhack_label_repair_002',
- 'zhack_label_repair_003', 'zhack_label_repair_004']
+ 'zhack_label_repair_003', 'zhack_label_repair_004', 'zhack_metaslab_leak']
pre =
post =
tags = ['functional', 'cli_root', 'zhack']
@@ -546,7 +550,7 @@ tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_multiple_pools',
'zpool_error_scrub_001_pos', 'zpool_error_scrub_002_pos',
'zpool_error_scrub_003_pos', 'zpool_error_scrub_004_pos',
- 'zpool_scrub_date_range_001']
+ 'zpool_scrub_date_range_001', 'zpool_scrub_date_range_002']
tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
@@ -624,8 +628,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg',
'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
- 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
- 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
+ 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos',
+ 'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege',
'zilstat_001_pos']
user =
tags = ['functional', 'cli_user', 'misc']
@@ -637,6 +641,10 @@ tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos',
user =
tags = ['functional', 'cli_user', 'zfs_list']
+[tests/functional/cli_root/zfs_send_delegation_user]
+tests = ['zfs_send_usertest']
+tags = ['functional', 'cli_root', 'zfs_send_delegation_user']
+
[tests/functional/cli_user/zpool_iostat]
tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
'zpool_iostat_003_neg', 'zpool_iostat_004_pos',
@@ -940,10 +948,11 @@ tags = ['functional', 'rename_dirs']
[tests/functional/replacement]
tests = ['attach_import', 'attach_multiple', 'attach_rebuild',
- 'attach_resilver', 'detach', 'rebuild_disabled_feature',
- 'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild',
- 'replace_resilver', 'resilver_restart_001', 'resilver_restart_002',
- 'scrub_cancel']
+ 'attach_resilver', 'attach_resilver_sit_out', 'detach',
+ 'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz',
+ 'replace_import', 'replace_rebuild', 'replace_resilver',
+ 'replace_resilver_sit_out', 'resilver_restart_001',
+ 'resilver_restart_002', 'scrub_cancel']
tags = ['functional', 'replacement']
[tests/functional/reservation]
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index f3d56acffde0..339361cc2762 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -109,7 +109,9 @@ tags = ['functional', 'direct']
[tests/functional/events:Linux]
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
- 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
+ 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple',
+ 'zed_synchronous_zedlet', 'slow_vdev_sit_out', 'slow_vdev_sit_out_neg',
+ 'slow_vdev_degraded_sit_out']
tags = ['functional', 'events']
[tests/functional/fallocate:Linux]
@@ -161,7 +163,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
tags = ['functional', 'mmp']
[tests/functional/mount:Linux]
-tests = ['umount_unlinked_drain']
+tests = ['umount_unlinked_drain', 'mount_loopback']
tags = ['functional', 'mount']
[tests/functional/pam:Linux]
diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run
index 7767c0c2d535..b56ffc3a4a2d 100644
--- a/sys/contrib/openzfs/tests/runfiles/sanity.run
+++ b/sys/contrib/openzfs/tests/runfiles/sanity.run
@@ -400,8 +400,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg',
'zpool_history_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg',
'zpool_remove_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
- 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
- 'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
+ 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos',
+ 'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege',
'zilstat_001_pos']
user =
tags = ['functional', 'cli_user', 'misc']
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 001970120148..5bc65f993734 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -232,7 +232,7 @@ maybe = {
'cli_root/zpool_trim/zpool_trim_fault_export_import_online':
['FAIL', known_reason],
'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', 6141],
- 'cli_user/misc/arc_summary_001_pos': ['FAIL', known_reason],
+ 'cli_user/misc/zarcsummary_001_pos': ['FAIL', known_reason],
'delegate/setup': ['SKIP', exec_reason],
'events/zed_cksum_config': ['FAIL', known_reason],
'fault/auto_replace_002_pos': ['FAIL', known_reason],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c
index cbebd33e0bf6..c8d8622c7571 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c
@@ -863,7 +863,8 @@ test_result(const crypto_test_t *test, int encrypt_rv, uint8_t *encrypt_buf,
return (pass);
/* print summary of test result */
- printf("%s[%lu]: encrypt=%s decrypt=%s\n", test->fileloc, test->id,
+ printf("%s[%ju]: encrypt=%s decrypt=%s\n", test->fileloc,
+ (uintmax_t)test->id,
encrypt_pass ? "PASS" : "FAIL",
decrypt_pass ? "PASS" : "FAIL");
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index 884a99d785bc..1c4d25e152a7 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -100,6 +100,7 @@ export SYSTEM_FILES_COMMON='awk
uniq
vmstat
wc
+ which
xargs
xxh128sum'
@@ -146,6 +147,7 @@ export SYSTEM_FILES_LINUX='attr
lscpu
lsmod
lsscsi
+ mkfs.xfs
mkswap
modprobe
mountpoint
@@ -169,8 +171,8 @@ export ZFS_FILES='zdb
zpool
ztest
raidz_test
- arc_summary
- arcstat
+ zarcsummary
+ zarcstat
zilstat
dbufstat
mount.zfs
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index 23e89599cae0..6b0f8b18c4b6 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -1112,6 +1112,16 @@ function get_pool_prop # property pool
zpool get -Hpo value "$prop" "$pool" || log_fail "zpool get $prop $pool"
}
+# Get the specified vdev property in parsable format or fail
+function get_vdev_prop
+{
+ typeset prop="$1"
+ typeset pool="$2"
+ typeset vdev="$3"
+
+ zpool get -Hpo value "$prop" "$pool" "$vdev" || log_fail "zpool get $prop $pool $vdev"
+}
+
# Return 0 if a pool exists; $? otherwise
#
# $1 - pool name
@@ -1971,6 +1981,28 @@ function wait_vdev_state # pool disk state timeout
}
#
+# Wait for vdev 'sit_out' property to be cleared.
+#
+# $1 pool name
+# $2 vdev name
+# $3 timeout
+#
+function wait_sit_out #pool vdev timeout
+{
+ typeset pool=${1:-$TESTPOOL}
+ typeset vdev="$2"
+ typeset timeout=${3:-300}
+ for (( timer = 0; timer < $timeout; timer++ )); do
+ if [ "$(get_vdev_prop sit_out "$pool" "$vdev")" = "off" ]; then
+ return 0
+ fi
+ sleep 1;
+ done
+
+ return 1
+}
+
+#
# Check the output of 'zpool status -v <pool>',
# and to see if the content of <token> contain the <keyword> specified.
#
@@ -2881,6 +2913,28 @@ function user_run
log_note "user: $user"
log_note "cmd: $*"
+ if ! sudo -Eu $user test -x $PATH ; then
+ log_note "-------------------------------------------------"
+ log_note "Warning: $user doesn't have permissions on $PATH"
+ log_note ""
+ log_note "This usually happens when you're running ZTS locally"
+ log_note "from inside the ZFS source dir, and are attempting to"
+ log_note "run a test that calls user_run. The ephemeral user"
+ log_note "($user) that ZTS is creating does not have permission"
+ log_note "to traverse to $PATH, or the binaries in $PATH are"
+ log_note "not the right permissions."
+ log_note ""
+ log_note "To get around this, copy your ZFS source directory"
+ log_note "to a world-accessible location (like /tmp), and "
+ log_note "change the permissions on your ZFS source dir "
+ log_note "to allow access."
+ log_note ""
+ log_note "Also, verify that /dev/zfs is RW for others:"
+ log_note ""
+ log_note " sudo chmod o+rw /dev/zfs"
+ log_note "-------------------------------------------------"
+ fi
+
typeset out=$TEST_BASE_DIR/out
typeset err=$TEST_BASE_DIR/err
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index e273c9f85c28..54b50c9dba77 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -72,9 +72,12 @@ MULTIHOST_INTERVAL multihost.interval zfs_multihost_interval
OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize
PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable
RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes
+READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs
+SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms
+SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity
REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
-REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
-REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
+REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress
+REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment
RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms
RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent
SCAN_LEGACY scan_legacy zfs_scan_legacy
@@ -88,6 +91,7 @@ SPA_DISCARD_MEMORY_LIMIT spa.discard_memory_limit zfs_spa_discard_memory_limit
SPA_LOAD_VERIFY_DATA spa.load_verify_data spa_load_verify_data
SPA_LOAD_VERIFY_METADATA spa.load_verify_metadata spa_load_verify_metadata
SPA_NOTE_TXG_TIME spa.note_txg_time spa_note_txg_time
+SPA_FLUSH_TXG_TIME spa.flush_txg_time spa_flush_txg_time
TRIM_EXTENT_BYTES_MIN trim.extent_bytes_min zfs_trim_extent_bytes_min
TRIM_METASLAB_SKIP trim.metaslab_skip zfs_trim_metaslab_skip
TRIM_TXG_BATCH trim.txg_batch zfs_trim_txg_batch
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 41e7b45ef4ec..1517f90e99a5 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -892,6 +892,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zfs_send/zfs_send_raw.ksh \
functional/cli_root/zfs_send/zfs_send_skip_missing.ksh \
functional/cli_root/zfs_send/zfs_send_sparse.ksh \
+ functional/cli_root/zfs_send_delegation/cleanup.ksh \
+ functional/cli_root/zfs_send_delegation/setup.ksh \
+ functional/cli_root/zfs_send_delegation/zfs_send_test.ksh \
functional/cli_root/zfs_set/cache_001_pos.ksh \
functional/cli_root/zfs_set/cache_002_neg.ksh \
functional/cli_root/zfs_set/canmount_001_pos.ksh \
@@ -1009,6 +1012,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zhack/zhack_label_repair_002.ksh \
functional/cli_root/zhack/zhack_label_repair_003.ksh \
functional/cli_root/zhack/zhack_label_repair_004.ksh \
+ functional/cli_root/zhack/zhack_metaslab_leak.ksh \
functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \
functional/cli_root/zpool_add/add-o_ashift.ksh \
functional/cli_root/zpool_add/add_prop_ashift.ksh \
@@ -1247,6 +1251,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_date_range_001.ksh \
+ functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \
@@ -1351,9 +1356,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool/zpool_002_pos.ksh \
functional/cli_root/zpool/zpool_003_pos.ksh \
functional/cli_root/zpool/zpool_colors.ksh \
- functional/cli_user/misc/arcstat_001_pos.ksh \
- functional/cli_user/misc/arc_summary_001_pos.ksh \
- functional/cli_user/misc/arc_summary_002_neg.ksh \
+ functional/cli_user/misc/zarcstat_001_pos.ksh \
+ functional/cli_user/misc/zarcsummary_001_pos.ksh \
+ functional/cli_user/misc/zarcsummary_002_neg.ksh \
functional/cli_user/misc/zilstat_001_pos.ksh \
functional/cli_user/misc/cleanup.ksh \
functional/cli_user/misc/setup.ksh \
@@ -1408,6 +1413,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_user/zfs_list/zfs_list_005_neg.ksh \
functional/cli_user/zfs_list/zfs_list_007_pos.ksh \
functional/cli_user/zfs_list/zfs_list_008_neg.ksh \
+ functional/cli_user/zfs_send_delegation_user/cleanup.ksh \
+ functional/cli_user/zfs_send_delegation_user/setup.ksh \
+ functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh \
functional/cli_user/zpool_iostat/cleanup.ksh \
functional/cli_user/zpool_iostat/setup.ksh \
functional/cli_user/zpool_iostat/zpool_iostat_001_neg.ksh \
@@ -1524,6 +1532,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/events_001_pos.ksh \
functional/events/events_002_pos.ksh \
functional/events/setup.ksh \
+ functional/events/slow_vdev_degraded_sit_out.ksh \
+ functional/events/slow_vdev_sit_out.ksh \
+ functional/events/slow_vdev_sit_out_neg.ksh \
functional/events/zed_cksum_config.ksh \
functional/events/zed_cksum_reported.ksh \
functional/events/zed_diagnose_multiple.ksh \
@@ -1532,6 +1543,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/zed_rc_filter.ksh \
functional/events/zed_slow_io.ksh \
functional/events/zed_slow_io_many_vdevs.ksh \
+ functional/events/zed_synchronous_zedlet.ksh \
functional/exec/cleanup.ksh \
functional/exec/exec_001_pos.ksh \
functional/exec/exec_002_neg.ksh \
@@ -1706,6 +1718,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/mmp/setup.ksh \
functional/mount/cleanup.ksh \
functional/mount/setup.ksh \
+ functional/mount/mount_loopback.ksh \
functional/mount/umount_001.ksh \
functional/mount/umountall_001.ksh \
functional/mount/umount_unlinked_drain.ksh \
@@ -1935,6 +1948,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/replacement/attach_multiple.ksh \
functional/replacement/attach_rebuild.ksh \
functional/replacement/attach_resilver.ksh \
+ functional/replacement/attach_resilver_sit_out.ksh \
functional/replacement/cleanup.ksh \
functional/replacement/detach.ksh \
functional/replacement/rebuild_disabled_feature.ksh \
@@ -1943,6 +1957,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/replacement/replace_import.ksh \
functional/replacement/replace_rebuild.ksh \
functional/replacement/replace_resilver.ksh \
+ functional/replacement/replace_resilver_sit_out.ksh \
functional/replacement/resilver_restart_001.ksh \
functional/replacement/resilver_restart_002.ksh \
functional/replacement/scrub_cancel.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh
index 46965aa7cc37..b89790d5d525 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_tunables.ksh
@@ -63,7 +63,7 @@ log_mustnot_expect 'no such tunable: 0' zdb -o show=0
log_mustnot_expect 'no such tunable: 1' zdb -o info=1
# can set multiple in same command
-log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294965758 -> 512$"'
+log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294932990 -> 512$"'
# can set and show in same command
log_must eval 'zdb -o zfs_recover=1 -o zfs_recover -o zfs_recover=0 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_recover: 1 zfs_recover: 1 -> 0$"'
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh
new file mode 100755
index 000000000000..4a59e15cc693
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/cleanup.ksh
@@ -0,0 +1,43 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
+
+
+poolexists $TESTPOOL1 && \
+ destroy_pool $TESTPOOL1
+
+del_user $STAFF1
+del_user $STAFF2
+del_group $STAFF_GROUP
+
+del_user $OTHER1
+del_user $OTHER2
+del_group $OTHER_GROUP
+
+default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh
new file mode 100755
index 000000000000..0978193eddc4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/setup.ksh
@@ -0,0 +1,50 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
+
+# Create staff group and add two user to it
+log_must add_group $STAFF_GROUP
+if ! id $STAFF1 > /dev/null 2>&1; then
+ log_must add_user $STAFF_GROUP $STAFF1
+fi
+if ! id $STAFF2 > /dev/null 2>&1; then
+ log_must add_user $STAFF_GROUP $STAFF2
+fi
+
+# Create other group and add two user to it
+log_must add_group $OTHER_GROUP
+if ! id $OTHER1 > /dev/null 2>&1; then
+ log_must add_user $OTHER_GROUP $OTHER1
+fi
+if ! id $OTHER2 > /dev/null 2>&1; then
+ log_must add_user $OTHER_GROUP $OTHER2
+fi
+DISK=${DISKS%% *}
+
+default_raidz_setup $DISKS
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh
new file mode 100755
index 000000000000..d018f313fae1
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_send_delegation/zfs_send_test.ksh
@@ -0,0 +1,111 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+# STRATEGY:
+# 1. Create a pool (this is done by the test framework)
+# 2. Create an encrypted dataset
+# 3. Write random data to the encrypted dataset
+# 4. Snapshot the dataset
+# 5. As root: attempt a send and raw send (both should succeed)
+# 6. Create a delegation (zfs allow -u user send testpool/encrypted_dataset)
+# 7. As root: attempt a send and raw send (both should succeed)
+# 8. Create a delegation (zfs allow -u user send:raw testpool/encrypted_dataset)
+# 9. As root: attempt a send and raw send (both should succeed)
+# 10. Disable delegation (zfs unallow)
+# 11. As root: attempt a send and raw send (both should succeed)
+# 12. Clean up (handled by framework)
+#
+# Tested as a user under ../cli_user/zfs_send_delegation_user/
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+. $STF_SUITE/tests/functional/delegate/delegate.cfg
+
+# create encrypted dataset
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+# create target dataset for receives
+if ! zfs list | grep testfs2 >/dev/null 2>&1; then
+ dataset_created="TRUE"
+ log_must zfs create $TESTPOOL/$TESTFS2
+fi
+
+# create user and group
+typeset perms="snapshot,reservation,compression,checksum,userprop,receive"
+
+log_note "Added permissions to the $OTHER1 user."
+log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS1
+log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS2
+
+# create random data
+log_must fill_fs $TESTPOOL/$TESTFS1/child 1 2047 1024 1 R
+
+# snapshot
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1
+
+
+# check baseline send abilities (should pass)
+log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0_datastream.$$"
+log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0raw_datastream.$$"
+
+
+# create delegation
+log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
+
+# attempt send with full allow
+
+log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1_datastream.$$"
+log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1raw_datastream.$$"
+
+# create raw delegation
+log_must zfs allow $OTHER1 send:raw $TESTPOOL/$TESTFS1
+log_must zfs unallow $OTHER1 send $TESTPOOL/$TESTFS1
+
+# test new send abilities (should pass)
+log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2_datastream.$$"
+log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2raw_datastream.$$"
+
+
+# disable raw delegation
+zfs unallow $OTHER1 send:raw $TESTPOOL/$TESTFS1
+zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
+
+# verify original send abilities (should pass)
+log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3_datastream.$$"
+log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3raw_datastream.$$"
+
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ destroy_dataset $TESTPOOL/$TESTFS1 -r \
+ destroy_dataset $TESTPOOL/$TESTFS2 -r
+
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh
new file mode 100755
index 000000000000..0d2a39be6b5a
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh
@@ -0,0 +1,70 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+
+#
+# Description:
+#
+# Test whether zhack metaslab leak functions correctly
+#
+# Strategy:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Gather pool capacity stats
+# 3. Generate fragmentation data with zdb
+# 4. Destroy the pool
+# 5. Create a new pool with the same configuration
+# 6. Export the pool
+# 7. Apply the fragmentation information with zhack metaslab leak
+# 8. Import the pool
+# 9. Verify that pool capacity stats match
+
+. "$STF_SUITE"/include/libtest.shlib
+
+verify_runnable "global"
+
+function cleanup
+{
+ zpool destroy $TESTPOOL
+ rm $tmp
+}
+
+log_onexit cleanup
+log_assert "zhack metaslab leak leaks the right amount of space"
+
+typeset tmp=$(mktemp)
+
+log_must zpool create $TESTPOOL $DISKS
+for i in `seq 1 16`; do
+ log_must dd if=/dev/urandom of=/$TESTPOOL/f$i bs=1M count=16
+ log_must zpool sync $TESTPOOL
+done
+for i in `seq 2 2 16`; do
+ log_must rm /$TESTPOOL/f$i
+done
+for i in `seq 1 16`; do
+ log_must touch /$TESTPOOL/g$i
+ log_must zpool sync $TESTPOOL
+done
+
+alloc=$(zpool get -Hpo value alloc $TESTPOOL)
+log_must eval "zdb -m --allocated-map $TESTPOOL > $tmp"
+log_must zpool destroy $TESTPOOL
+
+log_must zpool create $TESTPOOL $DISKS
+log_must zpool export $TESTPOOL
+log_must eval "zhack metaslab leak $TESTPOOL < $tmp"
+log_must zpool import $TESTPOOL
+
+alloc2=$(zpool get -Hpo value alloc $TESTPOOL)
+
+within_percent $alloc $alloc2 98 ||
+ log_fail "space usage changed too much: $alloc to $alloc2"
+
+log_pass "zhack metaslab leak behaved correctly"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh
new file mode 100755
index 000000000000..9327df81a5c5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh
@@ -0,0 +1,76 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2025 Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
+
+#
+# DESCRIPTION:
+# Verify that the timestamp database updates all the tables as expected.
+#
+# STRATEGY:
+# 1. Decrease the note and flush frequency of the txg database.
+# 2. Force the pool to sync several txgs
+# 3. Verify that there are entries in each of the "month", "day", and
+# "minute" tables.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ log_must restore_tunable SPA_NOTE_TXG_TIME
+ log_must restore_tunable SPA_FLUSH_TXG_TIME
+ rm /$TESTPOOL/f1
+}
+
+log_onexit cleanup
+
+log_assert "Verifiy timestamp databases all update as expected."
+
+log_must save_tunable SPA_NOTE_TXG_TIME
+log_must set_tunable64 SPA_NOTE_TXG_TIME 1
+log_must save_tunable SPA_FLUSH_TXG_TIME
+log_must set_tunable64 SPA_FLUSH_TXG_TIME 1
+
+log_must touch /$TESTPOOL/f1
+log_must zpool sync $TESTPOOL
+sleep 1
+log_must touch /$TESTPOOL/f1
+log_must zpool sync $TESTPOOL
+sleep 1
+log_must touch /$TESTPOOL/f1
+log_must zpool sync $TESTPOOL
+
+mos_zap="$(zdb -dddd $TESTPOOL 1)"
+minutes_entries=$(echo "$mos_zap" | grep "txg_log_time:minutes" | awk '{print $5}')
+days_entries=$(echo "$mos_zap" | grep "txg_log_time:days" | awk '{print $5}')
+months_entries=$(echo "$mos_zap" | grep "txg_log_time:months" | awk '{print $5}')
+
+[[ "$minutes_entries" -ne "0" ]] || log_fail "0 entries in the minutes table"
+[[ "$days_entries" -ne "0" ]] || log_fail "0 entries in the days table"
+[[ "$months_entries" -ne "0" ]] || log_fail "0 entries in the months table"
+
+log_pass "Verified all timestamp databases had entries as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arcstat_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcstat_001_pos.ksh
index 700bd9a6f529..d63b72f8039d 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arcstat_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcstat_001_pos.ksh
@@ -37,7 +37,7 @@ log_assert "arcstat generates output and doesn't return an error code"
typeset -i i=0
while [[ $i -lt ${#args[*]} ]]; do
- log_must eval "arcstat ${args[i]} > /dev/null"
+ log_must eval "zarcstat ${args[i]} > /dev/null"
((i = i + 1))
done
log_pass "arcstat generates output and doesn't return an error code"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_001_pos.ksh
index 0840878fdb0d..b7faac5243c9 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_001_pos.ksh
@@ -30,16 +30,16 @@
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
-log_assert "arc_summary generates output and doesn't return an error code"
+log_assert "zarcsummary generates output and doesn't return an error code"
# Without this, the below checks aren't going to work the way we hope...
set -o pipefail
for arg in "" "-a" "-d" "-p 1" "-g" "-s arc" "-r"; do
- log_must eval "arc_summary $arg > /dev/null"
+ log_must eval "zarcsummary $arg > /dev/null"
done
-log_must eval "arc_summary | head > /dev/null"
-log_must eval "arc_summary | head -1 > /dev/null"
+log_must eval "zarcsummary | head > /dev/null"
+log_must eval "zarcsummary | head -1 > /dev/null"
-log_pass "arc_summary generates output and doesn't return an error code"
+log_pass "zarcsummary generates output and doesn't return an error code"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_002_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_002_neg.ksh
index ec4abe35409d..227646777ba0 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_002_neg.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zarcsummary_002_neg.ksh
@@ -30,10 +30,10 @@
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
-log_assert "arc_summary generates an error code with invalid options"
+log_assert "zarcsummary generates an error code with invalid options"
for arg in "-x" "-5" "-p 7" "--err" "-@"; do
- log_mustnot eval "arc_summary $arg > /dev/null"
+ log_mustnot eval "zarcsummary $arg > /dev/null"
done
-log_pass "arc_summary generates an error code with invalid options"
+log_pass "zarcsummary generates an error code with invalid options"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh
new file mode 100755
index 000000000000..4a59e15cc693
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/cleanup.ksh
@@ -0,0 +1,43 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
+
+
+poolexists $TESTPOOL1 && \
+ destroy_pool $TESTPOOL1
+
+del_user $STAFF1
+del_user $STAFF2
+del_group $STAFF_GROUP
+
+del_user $OTHER1
+del_user $OTHER2
+del_group $OTHER_GROUP
+
+default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh
new file mode 100755
index 000000000000..0978193eddc4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/setup.ksh
@@ -0,0 +1,50 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
+
+# Create staff group and add two user to it
+log_must add_group $STAFF_GROUP
+if ! id $STAFF1 > /dev/null 2>&1; then
+ log_must add_user $STAFF_GROUP $STAFF1
+fi
+if ! id $STAFF2 > /dev/null 2>&1; then
+ log_must add_user $STAFF_GROUP $STAFF2
+fi
+
+# Create other group and add two user to it
+log_must add_group $OTHER_GROUP
+if ! id $OTHER1 > /dev/null 2>&1; then
+ log_must add_user $OTHER_GROUP $OTHER1
+fi
+if ! id $OTHER2 > /dev/null 2>&1; then
+ log_must add_user $OTHER_GROUP $OTHER2
+fi
+DISK=${DISKS%% *}
+
+default_raidz_setup $DISKS
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh
new file mode 100755
index 000000000000..f62f2b07929c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh
@@ -0,0 +1,145 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025, Klara Inc.
+#
+
+# STRATEGY:
+# 1. Create a pool (this is done by the test framework)
+# 2. Create a user
+# 3. Create an encrypted dataset
+# 4. Write random data to the encrypted dataset
+# 5. Snapshot the dataset
+# 6. As root: attempt a send and raw send (both should succeed)
+# 7. As user: attempt a send and raw send (both should fail, no permission)
+# 8. Create a delegation (zfs allow -u user send testpool/encrypted_dataset)
+# 9. As root: attempt a send and raw send (both should succeed)
+# 10. As user: attempt a send and raw send (both should succeed)
+# 11. Create a delegation (zfs allow -u user sendraw testpool/encrypted_dataset)
+# 12. As root: attempt a send and raw send (both should succeed)
+# 13. As user: attempt a send and raw send (send should fail, raw send should succeed)
+# 14. Disable delegation (zfs unallow)
+# 15. As root: attempt a send and raw send (both should succeed)
+# 16. As user: attempt a send and raw send (both should fail, no permission)
+# 17. Clean up (handled by framework)
+# root tests to verify this doesnt affect root user under ../cli_root/zfs_send_delegation/
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+. $STF_SUITE/tests/functional/delegate/delegate.cfg
+
+# create encrypted dataset
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+# create target dataset for receives
+log_must zfs create $TESTPOOL/$TESTFS2
+
+# set user perms
+# need to run chown for fs permissions for $OTHER1
+typeset perms="snapshot,reservation,compression,checksum,userprop,receive,mount,create"
+
+log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS1
+log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS2
+log_must chown ${OTHER1}:${OTHER_GROUP} /$TESTPOOL/$TESTFS2
+
+# create random data
+log_must fill_fs $TESTPOOL/$TESTFS1/child 1 2047 1024 1 R
+
+# snapshot
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1
+
+# note
+# we need to use `sh -c` here becuase the quoting on <<<"$*" in the user_run wrapper is broken once pipes and redirects get involved
+
+# check baseline send abilities (should fail)
+log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream.$$'"
+# verify nothing went through
+if [ -s $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream.$$ ]
+then
+ log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv0_user_datastream !"
+fi
+log_mustnot user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream.$$'"
+# verify nothing went through
+if [ -s $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream.$$ ]
+then
+ log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv0raw_user_datastream !"
+fi
+
+# create delegation
+log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
+
+# attempt send with full allow (should pass)
+log_must user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv1_user_datastream.$$'"
+log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv1raw_user_datastream.$$'"
+
+
+# create raw delegation
+log_must zfs allow $OTHER1 send:raw $TESTPOOL/$TESTFS1
+# We have to remove 'send' to confirm 'send raw' only allows what we want
+log_must zfs unallow -u $OTHER1 send $TESTPOOL/$TESTFS1
+
+# test new sendraw abilities (send should fail, sendraw should pass)
+log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream.$$'"
+ verify nothing went through
+if [ -s $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream.$$ ]
+then
+ log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv2_user_datastream !"
+fi
+log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv2raw_user_datastream.$$'"
+
+# disable raw delegation
+log_must zfs unallow -u $OTHER1 send:raw $TESTPOOL/$TESTFS1
+log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
+
+# test with raw taken away (should pass)
+log_must user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv3_user_datastream.$$'"
+log_must user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv3raw_user_datastream.$$'"
+
+# disable send abilities
+log_must zfs unallow -u $OTHER1 send $TESTPOOL/$TESTFS1
+
+# verify original send abilities (should fail)
+log_mustnot user_run $OTHER1 sh -c "'zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream.$$'"
+ verify nothing went through
+if [ -s $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream.$$ ]
+then
+ log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv4_user_datastream !"
+fi
+log_mustnot user_run $OTHER1 sh -c "'zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive -u $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream.$$'"
+ verify nothing went through
+if [ -s $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream.$$ ]
+then
+ log_fail "A zfs recieve was completed in $TESTPOOL/$TESTFS2/zfsrecv4raw_user_datastream !"
+fi
+
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ destroy_dataset $TESTPOOL/$TESTFS1 -r \
+ destroy_dataset $TESTPOOL/$TESTFS2 -r
+
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh
new file mode 100755
index 000000000000..d5feb6936b4b
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_degraded_sit_out.ksh
@@ -0,0 +1,106 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2025 by Klara, Inc.
+
+# DESCRIPTION:
+# Verify that vdevs 'sit out' when they are slow
+#
+# STRATEGY:
+# 1. Create various raidz/draid pools
+# 2. Degrade/fault one of the disks.
+# 3. Inject delays into one of the disks
+# 4. Verify disk is set to 'sit out' for awhile.
+# 5. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+function cleanup
+{
+ restore_tunable READ_SIT_OUT_SECS
+ restore_tunable SIT_OUT_CHECK_INTERVAL
+ log_must zinject -c all
+ log_must zpool events -c
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/vdev.$$.*
+}
+
+log_assert "Verify sit_out works"
+
+log_onexit cleanup
+
+# shorten sit out period for testing
+save_tunable READ_SIT_OUT_SECS
+set_tunable32 READ_SIT_OUT_SECS 5
+
+save_tunable SIT_OUT_CHECK_INTERVAL
+set_tunable64 SIT_OUT_CHECK_INTERVAL 20
+
+log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
+
+for raidtype in raidz2 raidz3 draid2 draid3 ; do
+ log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
+ log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
+ log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
+ log_must zpool export $TESTPOOL2
+ log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
+
+ BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
+ SLOW_VDEV=$TEST_BASE_DIR/vdev.$$.8
+
+ # Initial state should not be sitting out
+ log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off" ]]
+
+ # Delay our reads 200ms to trigger sit out
+ log_must zinject -d $SLOW_VDEV -D200:1 -T read $TESTPOOL2
+ type=$((RANDOM % 2))
+ [[ "$type" -eq "0" ]] && action="degrade" || action="fault"
+ log_must zinject -d $BAD_VDEV -A $action -T read $TESTPOOL2
+
+ # Do some reads and wait for us to sit out
+ for i in {0..99} ; do
+ dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+ done
+
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "on"
+
+ # Clear fault injection
+ log_must zinject -c all
+
+ # Wait for us to exit our sit out period
+ log_must wait_sit_out $TESTPOOL2 $SLOW_VDEV 10
+
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off"
+ destroy_pool $TESTPOOL2
+ log_must zpool labelclear -f $BAD_VDEV
+done
+
+log_pass "sit_out works correctly"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh
new file mode 100755
index 000000000000..37f616cf56ee
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out.ksh
@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
+
+# DESCRIPTION:
+# Verify that vdevs 'sit out' when they are slow
+#
+# STRATEGY:
+# 1. Create various raidz/draid pools
+# 2. Inject delays into one of the disks
+# 3. Verify disk is set to 'sit out' for awhile.
+# 4. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+function cleanup
+{
+ restore_tunable READ_SIT_OUT_SECS
+ restore_tunable SIT_OUT_CHECK_INTERVAL
+ log_must zinject -c all
+ log_must zpool events -c
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/vdev.$$.*
+}
+
+log_assert "Verify sit_out works"
+
+log_onexit cleanup
+
+# shorten sit out period for testing
+save_tunable READ_SIT_OUT_SECS
+set_tunable32 READ_SIT_OUT_SECS 5
+
+save_tunable SIT_OUT_CHECK_INTERVAL
+set_tunable64 SIT_OUT_CHECK_INTERVAL 20
+
+log_must truncate -s200M $TEST_BASE_DIR/vdev.$$.{0..9}
+
+for raidtype in raidz raidz2 raidz3 draid1 draid2 draid3 ; do
+ log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
+ log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
+ log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=600
+ log_must zpool export $TESTPOOL2
+ log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
+
+ BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
+
+ # Initial state should not be sitting out
+ log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off" ]]
+
+ # Delay our reads 200ms to trigger sit out
+ log_must zinject -d $BAD_VDEV -D200:1 -T read $TESTPOOL2
+
+ # Do some reads and wait for us to sit out
+ for i in {0..99} ; do
+ dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 200)) bs=2M count=1 of=/dev/null
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+ done
+
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "on"
+
+ # Clear fault injection
+ log_must zinject -c all
+
+ # Wait for us to exit our sit out period
+ log_must wait_sit_out $TESTPOOL2 $BAD_VDEV 10
+
+ # Verify sit_out was cleared during wait_sit_out
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off"
+
+ destroy_pool $TESTPOOL2
+done
+
+log_pass "sit_out works correctly"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh
new file mode 100755
index 000000000000..457105a66453
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/slow_vdev_sit_out_neg.ksh
@@ -0,0 +1,116 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2025 by Klara, Inc.
+
+# DESCRIPTION:
+# Verify that we don't sit out too many vdevs
+#
+# STRATEGY:
+# 1. Create draid2 pool
+# 2. Inject delays into three of the disks
+# 3. Do reads to trigger sit-outs
+# 4. Verify exactly 2 disks sit out
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+function cleanup
+{
+ restore_tunable READ_SIT_OUT_SECS
+ restore_tunable SIT_OUT_CHECK_INTERVAL
+ log_must zinject -c all
+ log_must zpool events -c
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/vdev.$$.*
+}
+
+log_assert "Verify sit_out works"
+
+log_onexit cleanup
+
+save_tunable SIT_OUT_CHECK_INTERVAL
+set_tunable64 SIT_OUT_CHECK_INTERVAL 20
+
+log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
+
+log_must zpool create $TESTPOOL2 draid2 $TEST_BASE_DIR/vdev.$$.{0..9}
+log_must zpool set autosit=on $TESTPOOL2 draid2-0
+log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
+log_must zpool export $TESTPOOL2
+log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
+
+BAD_VDEV1=$TEST_BASE_DIR/vdev.$$.7
+BAD_VDEV2=$TEST_BASE_DIR/vdev.$$.8
+BAD_VDEV3=$TEST_BASE_DIR/vdev.$$.9
+
+# Initial state should not be sitting out
+log_must eval [[ "$(get_vdev_prop autosit $TESTPOOL2 draid2-0)" == "on" ]]
+log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "off" ]]
+log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "off" ]]
+log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off" ]]
+
+# Delay our reads 200ms to trigger sit out
+log_must zinject -d $BAD_VDEV1 -D200:1 -T read $TESTPOOL2
+
+# Do some reads and wait for us to sit out
+for i in {0..99} ; do
+ dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+done
+log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "on"
+
+log_must zinject -d $BAD_VDEV2 -D200:1 -T read $TESTPOOL2
+# Do some reads and wait for us to sit out
+for i in {0..99} ; do
+ dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+done
+log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "on"
+
+log_must zinject -d $BAD_VDEV3 -D200:1 -T read $TESTPOOL2
+# Do some reads and wait for us to sit out
+for i in {0..99} ; do
+ dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
+ dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+done
+log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off"
+
+
+log_pass "sit_out works correctly"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh
new file mode 100755
index 000000000000..6b732ea96d0c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_synchronous_zedlet.ksh
@@ -0,0 +1,149 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
+#
+
+# DESCRIPTION:
+# Verify ZED synchronous zedlets work as expected
+#
+# STRATEGY:
+# 1. Create a scrub_start zedlet that runs quickly
+# 2. Create a scrub_start zedlet that runs slowly (takes seconds)
+# 3. Create a scrub_finish zedlet that is synchronous and runs slowly
+# 4. Create a trim_start zedlet that runs quickly
+# 4. Scrub the pool
+# 5. Trim the pool
+# 6. Verify the synchronous scrub_finish zedlet waited for the scrub_start
+# zedlets to finish (including the slow one). If the scrub_finish zedlet
+# was not synchronous, it would have completed before the slow scrub_start
+# zedlet.
+# 7. Verify the trim_start zedlet waited for the slow synchronous scrub_finish
+# zedlet to complete.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/events/events_common.kshlib
+
+verify_runnable "both"
+
+OUR_ZEDLETS="scrub_start-async.sh scrub_start-slow.sh scrub_finish-sync-slow.sh trim_start-async.sh"
+
+OUTFILE="$TEST_BASE_DIR/zed_synchronous_zedlet_lines"
+TESTPOOL2=testpool2
+
+function cleanup
+{
+ zed_stop
+
+ for i in $OUR_ZEDLETS ; do
+ log_must rm -f $ZEDLET_DIR/$i
+ done
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/vdev-file-sync-zedlet
+ log_must rm -f $OUTFILE
+}
+
+log_assert "Verify ZED synchronous zedlets work as expected"
+
+log_onexit cleanup
+
+# Make a pool
+log_must truncate -s 100M $TEST_BASE_DIR/vdev-file-sync-zedlet
+log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/vdev-file-sync-zedlet
+
+# Do an initial scrub
+log_must zpool scrub -w $TESTPOOL2
+
+log_must zpool events -c
+
+mkdir -p $ZEDLET_DIR
+
+# Create zedlets
+cat << EOF > $ZEDLET_DIR/scrub_start-async.sh
+#!/bin/ksh -p
+echo "\$(date) \$(basename \$0)" >> $OUTFILE
+EOF
+
+cat << EOF > $ZEDLET_DIR/scrub_start-slow.sh
+#!/bin/ksh -p
+sleep 3
+echo "\$(date) \$(basename \$0)" >> $OUTFILE
+EOF
+
+cat << EOF > $ZEDLET_DIR/scrub_finish-sync-slow.sh
+#!/bin/ksh -p
+sleep 3
+echo "\$(date) \$(basename \$0)" >> $OUTFILE
+EOF
+
+cat << EOF > $ZEDLET_DIR/trim_start-async.sh
+#!/bin/ksh -p
+echo "\$(date) \$(basename \$0)" >> $OUTFILE
+EOF
+
+for i in $OUR_ZEDLETS ; do
+ log_must chmod +x $ZEDLET_DIR/$i
+done
+
+log_must zed_start
+
+# Do a scrub - it should be instantaneous.
+log_must zpool scrub -w $TESTPOOL2
+
+# Start off a trim immediately after scrubiung. The trim should be
+# instantaneous and generate a trimp_start event. This will happen in parallel
+# with the slow 'scrub_finish-sync-slow.sh' zedlet still running.
+log_must zpool trim -w $TESTPOOL2
+
+# Wait for scrub_finish event to happen for sanity. This is the *event*, not
+# the completion of zedlets for the event.
+log_must file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.trim_finish' 10
+
+# At a minimum, scrub_start-slow.sh + scrub_finish-sync-slow.sh will take a
+# total of 6 seconds to run, so wait 7 sec to be sure.
+sleep 7
+
+# If our zedlets were run in the right order, with sync correctly honored, you
+# will see this ordering in $OUTFILE:
+#
+# Fri May 16 12:04:23 PDT 2025 scrub_start-async.sh
+# Fri May 16 12:04:26 PDT 2025 scrub_start-slow.sh
+# Fri May 16 12:04:31 PDT 2025 scrub_finish-sync-slow.sh
+# Fri May 16 12:04:31 PDT 2025 trim_start-async.sh
+#
+# Check for this ordering
+
+# Get a list of just the script names in the order they were executed
+# from OUTFILE
+lines="$(echo $(grep -Eo '(scrub|trim)_.+\.sh$' $OUTFILE))"
+
+# Compare it to the ordering we expect
+expected="\
+scrub_start-async.sh \
+scrub_start-slow.sh \
+scrub_finish-sync-slow.sh \
+trim_start-async.sh"
+log_must test "$lines" == "$expected"
+
+log_pass "Verified synchronous zedlets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
index 1b3310edb98b..45b041503e22 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
@@ -67,7 +67,7 @@ log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))
# Add some data to the pool
log_must zfs create $TESTPOOL/fs
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
-log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
+log_must fill_fs $MNTPOINT $PARITY 200 32768 100 R
sync_pool $TESTPOOL
# Replace the last child vdev to form a replacing vdev
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh
new file mode 100755
index 000000000000..86adef7ea032
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mount/mount_loopback.ksh
@@ -0,0 +1,111 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that we can make an xfs filesystem on a ZFS-backed loopback device.
+#
+# See:
+# https://github.com/openzfs/zfs/pull/17298
+# https://github.com/openzfs/zfs/issues/17277
+#
+# STRATEGY:
+# 1. Make a pool
+# 2. Make a file on the pool or create zvol
+# 3. Mount the file/zvol behind a loopback device
+# 4. Create & mount an xfs filesystem on the loopback device
+
+function cleanup
+{
+ if [ -d $TEST_BASE_DIR/mnt ] ; then
+ umount $TEST_BASE_DIR/mnt
+ log_must rmdir $TEST_BASE_DIR/mnt
+ fi
+ if [ -n "$DEV" ] ; then
+ log_must losetup -d $DEV
+ fi
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TEST_BASE_DIR/file1
+}
+
+if [ ! -x "$(which mkfs.xfs)" ] ; then
+ log_unsupported "No mkfs.xfs binary"
+fi
+
+if [ ! -d /lib/modules/$(uname -r)/kernel/fs/xfs ] && \
+ ! grep -qE '\sxfs$' /proc/filesystems ; then
+ log_unsupported "No XFS kernel support"
+fi
+
+log_assert "Make an xfs filesystem on a ZFS-backed loopback device"
+log_onexit cleanup
+
+# fio options
+export NUMJOBS=2
+export RUNTIME=3
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export FILE_SIZE=$(( 1024 * 1024 ))
+
+function do_test
+{
+ imgfile=$1
+ log_note "Running test on $imgfile"
+ log_must losetup -f $imgfile
+ DEV=$(losetup --associated $imgfile | grep -Eo '^/dev/loop[0-9]+')
+ log_must mkfs.xfs $DEV
+ mkdir $TEST_BASE_DIR/mnt
+ log_must mount $DEV $TEST_BASE_DIR/mnt
+ export DIRECTORY=$TEST_BASE_DIR/mnt
+
+ for d in 0 1 ; do
+ # fio options
+ export DIRECT=$d
+ log_must fio $FIO_SCRIPTS/mkfiles.fio
+ log_must fio $FIO_SCRIPTS/random_reads.fio
+ done
+ log_must umount $TEST_BASE_DIR/mnt
+ log_must rmdir $TEST_BASE_DIR/mnt
+ log_must losetup -d $DEV
+ DEV=""
+}
+
+log_must truncate -s 1G $TEST_BASE_DIR/file1
+log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/file1
+log_must truncate -s 512M /$TESTPOOL2/img
+do_test /$TESTPOOL2/img
+log_must rm /$TESTPOOL2/img
+log_must zfs create -V 512M $TESTPOOL2/vol
+
+blkdev="$ZVOL_DEVDIR/$TESTPOOL2/vol"
+block_device_wait $blkdev
+do_test $blkdev
+
+log_pass "Verified xfs filesystem on a ZFS-backed loopback device"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh
index 3249bd93d5ce..a1da4a8631e1 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh
@@ -17,6 +17,7 @@
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/refreserv/refreserv.cfg
+. $STF_SUITE/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib
#
# DESCRIPTION:
@@ -24,7 +25,7 @@
#
# STRATEGY:
# 1. Create a pool with a single raidz vdev
-# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k]
+# 2. For each block size [4k, 8k, 128k]
# - create a volume
# - fully overwrite it
# - verify that referenced is less than or equal to reservation
@@ -38,6 +39,7 @@
# 1. This test will use up to 14 disks but can cover the key concepts with
# 5 disks.
# 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely.
+# Therefore, when creating the pool we specify 4Kn sectors.
#
verify_runnable "global"
@@ -60,29 +62,10 @@ log_onexit cleanup
poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"
-# Testing tiny block sizes on ashift=12 pools causes so much size inflation
-# that small test disks may fill before creating small volumes. However,
-# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for
-# testing the problems that arise from 4K and 8K blocks on ashift=12 pools.
-if is_freebsd; then
- bps=$(diskinfo -v ${alldisks[0]} | awk '/sectorsize/ { print $1 }')
-elif is_linux; then
- bps=$(lsblk -nrdo min-io /dev/${alldisks[0]})
-fi
-log_must test "$bps" -eq 512 -o "$bps" -eq 4096
-case "$bps" in
-512)
- allshifts=(9 10 17)
- maxpct=151
- ;;
-4096)
- allshifts=(12 13 17)
- maxpct=110
- ;;
-*)
- log_fail "bytes/sector: $bps != (512|4096)"
- ;;
-esac
+ashift=12
+allshifts=(12 13 17)
+maxpct=110
+
log_note "Testing in ashift=${allshifts[0]} mode"
# This loop handles all iterations of steps 1 through 4 described in strategy
@@ -99,18 +82,21 @@ for parity in 1 2 3; do
continue
fi
- log_must zpool create -O compression=off "$TESTPOOL" "$raid" "${disks[@]}"
+ log_must zpool create -o ashift=$ashift "$TESTPOOL" "$raid" "${disks[@]}"
for bits in "${allshifts[@]}"; do
vbs=$((1 << bits))
log_note "Testing $raid-$ndisks volblocksize=$vbs"
- vol=$TESTPOOL/$TESTVOL
+ vol=$TESTPOOL/$TESTVOL-$vbs
+ zdev=$ZVOL_DEVDIR/$vol
log_must zfs create -V ${volsize}m \
-o volblocksize=$vbs "$vol"
- block_device_wait "/dev/zvol/$vol"
- log_must dd if=/dev/zero of=/dev/zvol/$vol \
- bs=1024k count=$volsize
+ block_device_wait $zdev
+ blockdev_exists $zdev
+
+ log_must timeout 120 dd if=/dev/urandom of=$zdev \
+ bs=1024k count=$volsize status=progress
sync_pool $TESTPOOL
ref=$(zfs get -Hpo value referenced "$vol")
@@ -126,7 +112,7 @@ for parity in 1 2 3; do
log_must test "$deltapct" -le $maxpct
log_must_busy zfs destroy "$vol"
- block_device_wait
+ blockdev_missing $zdev
done
log_must_busy zpool destroy "$TESTPOOL"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh
new file mode 100755
index 000000000000..6820aba184b7
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/attach_resilver_sit_out.ksh
@@ -0,0 +1,189 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2025, Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Attaching disks while a disk is sitting out reads should pass
+#
+# STRATEGY:
+# 1. Create raidz pools
+# 2. Make one disk slower and trigger a read sit out for that disk
+# 3. Start some random I/O
+# 4. Attach a disk to the pool.
+# 5. Verify the integrity of the file system and the resilvering.
+
+verify_runnable "global"
+
+save_tunable READ_SIT_OUT_SECS
+set_tunable32 READ_SIT_OUT_SECS 120
+save_tunable SIT_OUT_CHECK_INTERVAL
+set_tunable64 SIT_OUT_CHECK_INTERVAL 20
+
+function cleanup
+{
+ restore_tunable READ_SIT_OUT_SECS
+ restore_tunable SIT_OUT_CHECK_INTERVAL
+ log_must zinject -c all
+ log_must zpool events -c
+
+ if [[ -n "$child_pids" ]]; then
+ for wait_pid in $child_pids; do
+ kill $wait_pid
+ done
+ fi
+
+ if poolexists $TESTPOOL1; then
+ destroy_pool $TESTPOOL1
+ fi
+
+ [[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O with a sit out completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function attach_test
+{
+ typeset vdev=$1
+ typeset disk=$2
+
+ typeset i=0
+ while [[ $i -lt $iters ]]; do
+ log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
+ file_trunc $options $TESTDIR/$TESTFILE.$i &
+ typeset pid=$!
+
+ sleep 1
+
+ child_pids="$child_pids $pid"
+ ((i = i + 1))
+ done
+
+ # attach disk with a slow drive still present
+ SECONDS=0
+ log_must zpool attach -w $TESTPOOL1 $vdev $disk
+ log_note took $SECONDS seconds to attach disk
+
+ for wait_pid in $child_pids
+ do
+ kill $wait_pid
+ done
+ child_pids=""
+
+ log_must zinject -c all
+ log_must zpool export $TESTPOOL1
+ log_must zpool import -d $TESTDIR $TESTPOOL1
+ log_must zfs umount $TESTPOOL1/$TESTFS1
+ log_must zdb -cdui $TESTPOOL1/$TESTFS1
+ log_must zfs mount $TESTPOOL1/$TESTFS1
+ verify_pool $TESTPOOL1
+}
+
+DEVSIZE="150M"
+specials_list=""
+i=0
+while [[ $i != 10 ]]; do
+ truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
+ specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+ ((i = i + 1))
+done
+
+slow_disk=$TESTDIR/$TESTFILE1.3
+log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
+
+# Test file size in MB
+count=200
+
+for type in "raidz1" "raidz2" "raidz3" ; do
+ create_pool $TESTPOOL1 $type $specials_list
+ log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
+ log_must zfs create -o primarycache=none -o recordsize=512K \
+ $TESTPOOL1/$TESTFS1
+ log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+ log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
+
+ # Make one disk 100ms slower to trigger a sit out
+ log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
+
+ # Do some reads and wait for sit out on slow disk
+ SECONDS=0
+ typeset -i size=0
+ for i in $(seq 1 $count) ; do
+ dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
+ size=$i
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+ done
+
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
+ log_note took $SECONDS seconds to reach sit out reading ${size}M
+ log_must zpool status -s $TESTPOOL1
+
+ typeset top=$(zpool status -j | jq -r ".pools.$TESTPOOL1.vdevs[].vdevs[].name")
+ attach_test $top $TESTDIR/$REPLACEFILE
+
+ log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
+
+ destroy_pool $TESTPOOL1
+ log_must rm -rf /$TESTPOOL1
+done
+
+log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh
new file mode 100755
index 000000000000..4109dbaf45ac
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/replace_resilver_sit_out.ksh
@@ -0,0 +1,199 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2025, Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Replacing disks while a disk is sitting out reads should pass
+#
+# STRATEGY:
+# 1. Create raidz and draid pools
+# 2. Make one disk slower and trigger a read sit out for that disk
+# 3. Start some random I/O
+# 4. Replace a disk in the pool with another disk.
+# 5. Verify the integrity of the file system and the resilvering.
+#
+
+verify_runnable "global"
+
+save_tunable READ_SIT_OUT_SECS
+set_tunable32 READ_SIT_OUT_SECS 120
+save_tunable SIT_OUT_CHECK_INTERVAL
+set_tunable64 SIT_OUT_CHECK_INTERVAL 20
+
+function cleanup
+{
+ restore_tunable READ_SIT_OUT_SECS
+ restore_tunable SIT_OUT_CHECK_INTERVAL
+ log_must zinject -c all
+ log_must zpool events -c
+
+ if [[ -n "$child_pids" ]]; then
+ for wait_pid in $child_pids
+ do
+ kill $wait_pid
+ done
+ fi
+
+ if poolexists $TESTPOOL1; then
+ destroy_pool $TESTPOOL1
+ fi
+
+ [[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O with a sit out completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function replace_test
+{
+ typeset -i iters=2
+ typeset disk1=$1
+ typeset disk2=$2
+ typeset repl_type=$3
+
+ typeset i=0
+ while [[ $i -lt $iters ]]; do
+ log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
+ file_trunc $options $TESTDIR/$TESTFILE.$i &
+ typeset pid=$!
+
+ sleep 1
+
+ child_pids="$child_pids $pid"
+ ((i = i + 1))
+ done
+
+ typeset repl_flag="-w"
+ if [[ "$repl_type" == "seq" ]]; then
+ repl_flag="-ws"
+ fi
+ # replace disk with a slow drive still present
+ SECONDS=0
+ log_must zpool replace $repl_flag $TESTPOOL1 $disk1 $disk2
+ log_note took $SECONDS seconds to replace disk
+
+ for wait_pid in $child_pids
+ do
+ kill $wait_pid
+ done
+ child_pids=""
+
+ log_must zinject -c all
+ log_must zpool export $TESTPOOL1
+ log_must zpool import -d $TESTDIR $TESTPOOL1
+ log_must zfs umount $TESTPOOL1/$TESTFS1
+ log_must zdb -cdui $TESTPOOL1/$TESTFS1
+ log_must zfs mount $TESTPOOL1/$TESTFS1
+ verify_pool $TESTPOOL1
+}
+
+DEVSIZE="150M"
+specials_list=""
+i=0
+while [[ $i != 10 ]]; do
+ log_must truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
+ specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+ ((i = i + 1))
+done
+
+slow_disk=$TESTDIR/$TESTFILE1.3
+log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
+
+# Test file size in MB
+count=400
+
+for type in "raidz2" "raidz3" "draid2"; do
+ create_pool $TESTPOOL1 $type $specials_list
+ log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
+ log_must zfs create -o primarycache=none -o recordsize=512K \
+ $TESTPOOL1/$TESTFS1
+ log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+ log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
+
+ # Make one disk 100ms slower to trigger a sit out
+ log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
+
+ # Do some reads and wait for sit out on slow disk
+ SECONDS=0
+ typeset -i size=0
+ for i in $(seq 1 $count) ; do
+ dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
+ size=$i
+
+ sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
+ if [[ "$sit_out" == "on" ]] ; then
+ break
+ fi
+ done
+ log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
+ log_note took $SECONDS seconds to reach sit out reading ${size}M
+ log_must zpool status -s $TESTPOOL1
+
+ typeset repl_type="replace"
+ if [[ "$type" == "draid2" && $((RANDOM % 2)) -eq 0 ]]; then
+ repl_type="seq"
+ fi
+ replace_test $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE $repl_type
+
+ log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
+
+ destroy_pool $TESTPOOL1
+ log_must rm -rf /$TESTPOOL1
+done
+
+log_pass
diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
index ab8981e25cb2..0150ce72f0a4 100644
--- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
@@ -464,7 +464,8 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
(type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
if (actual_pages != num_pages) {
- vm_page_unhold_pages(pages, actual_pages);
+ if (actual_pages > 0)
+ vm_page_unhold_pages(pages, actual_pages);
free(pagelist, M_VCPAGELIST);
return (-ENOMEM);
}
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
new file mode 100644
index 000000000000..694ed4fc8b32
--- /dev/null
+++ b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * VAES extensions. It was ported from cipher_aes_gcm_hw_vaes_avx512.inc.
+ */
+
+#include <sys/endian.h>
+#include <sys/systm.h>
+
+#include <crypto/openssl/ossl.h>
+#include <crypto/openssl/ossl_aes_gcm.h>
+#include <crypto/openssl/ossl_cipher.h>
+
+#include <opencrypto/cryptodev.h>
+
+_Static_assert(
+ sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
+ "ossl_gcm_context too large");
+
+void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
+
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
+
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
+}
+
+static void
+gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
+{
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
+}
+
+void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
+void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
+void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
+ const unsigned char *iv, size_t ivlen);
+void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+ size_t len);
+void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
+
+static void
+gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
+}
+
+static void
+gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
+ size_t len)
+{
+ KASSERT(len == AES_GCM_IV_LEN,
+ ("%s: invalid IV length %zu", __func__, len));
+
+ ctx->gcm.Yi.u[0] = 0; /* Current counter */
+ ctx->gcm.Yi.u[1] = 0;
+ ctx->gcm.Xi.u[0] = 0; /* AAD hash */
+ ctx->gcm.Xi.u[1] = 0;
+ ctx->gcm.len.u[0] = 0; /* AAD length */
+ ctx->gcm.len.u[1] = 0; /* Message length */
+ ctx->gcm.ares = 0;
+ ctx->gcm.mres = 0;
+
+ ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
+}
+
+static int
+gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
+ size_t len)
+{
+ uint64_t alen = ctx->gcm.len.u[0];
+ size_t lenblks;
+ unsigned int ares;
+
+ /* Bad sequence: call of AAD update after message processing */
+ if (ctx->gcm.len.u[1])
+ return -2;
+
+ alen += len;
+ /* AAD is limited by 2^64 bits, thus 2^61 bytes */
+ if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
+ return -1;
+ ctx->gcm.len.u[0] = alen;
+
+ ares = ctx->gcm.ares;
+ /* Partial AAD block left from previous AAD update calls */
+ if (ares > 0) {
+ /*
+ * Fill partial block buffer till full block
+ * (note, the hash is stored reflected)
+ */
+ while (ares > 0 && len > 0) {
+ ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
+ --len;
+ ares = (ares + 1) % AES_BLOCK_LEN;
+ }
+ /* Full block gathered */
+ if (ares == 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ } else { /* no more AAD */
+ ctx->gcm.ares = ares;
+ return 0;
+ }
+ }
+
+ /* Bulk AAD processing */
+ lenblks = len & ((size_t)(-AES_BLOCK_LEN));
+ if (lenblks > 0) {
+ ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
+ aad += lenblks;
+ len -= lenblks;
+ }
+
+ /* Add remaining AAD to the hash (note, the hash is stored reflected) */
+ if (len > 0) {
+ ares = (unsigned int)len;
+ for (size_t i = 0; i < len; ++i)
+ ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ }
+
+ ctx->gcm.ares = ares;
+
+ return 0;
+}
+
+static int
+_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len, bool encrypt)
+{
+ uint64_t mlen = ctx->gcm.len.u[1];
+
+ mlen += len;
+ if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
+ return -1;
+
+ ctx->gcm.len.u[1] = mlen;
+
+ /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
+ if (ctx->gcm.ares > 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ ctx->gcm.ares = 0;
+ }
+
+ if (encrypt) {
+ ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ } else {
+ ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ }
+
+ return 0;
+}
+
+static int
+gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, true);
+}
+
+static int
+gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, false);
+}
+
+static int
+gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
+ size_t len)
+{
+ unsigned int *res = &ctx->gcm.mres;
+
+ /* Finalize AAD processing */
+ if (ctx->gcm.ares > 0)
+ res = &ctx->gcm.ares;
+
+ ossl_aes_gcm_finalize_avx512(ctx, *res);
+
+ ctx->gcm.ares = ctx->gcm.mres = 0;
+
+ if (tag != NULL)
+ return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
+ return 0;
+}
+
+static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
+ .init = gcm_init_avx512,
+ .setiv = gcm_setiv_avx512,
+ .aad = gcm_aad_avx512,
+ .encrypt = gcm_encrypt_avx512,
+ .decrypt = gcm_decrypt_avx512,
+ .finish = gcm_finish_avx512,
+ .tag = gcm_tag,
+};
+
+int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+
+int
+ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ void *_ctx)
+{
+ struct ossl_gcm_context *ctx;
+
+ ctx = _ctx;
+ ctx->ops = &gcm_ops_avx512;
+ gcm_init(ctx, key, klen);
+ return (0);
+}
diff --git a/sys/crypto/openssl/arm/ossl_aes_gcm.c b/sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
index e51b7b4fbc04..e51b7b4fbc04 100644
--- a/sys/crypto/openssl/arm/ossl_aes_gcm.c
+++ b/sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm.c b/sys/crypto/openssl/ossl_aes_gcm.c
index d08b2ac8a759..e1cdc710c3aa 100644
--- a/sys/crypto/openssl/amd64/ossl_aes_gcm.c
+++ b/sys/crypto/openssl/ossl_aes_gcm.c
@@ -1,6 +1,7 @@
/*
* Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ * Copyright (c) 2023, Raptor Engineering, LLC. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -9,11 +10,10 @@
*/
/*
- * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using
- * AES-NI and VAES extensions respectively. These were ported from
- * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc. The
- * AES-NI implementation makes use of a generic C implementation for partial
- * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * AES-NI (x86) or POWER8 Crypto Extensions (ppc). It was ported from
+ * cipher_aes_gcm_hw_aesni.inc and it makes use of a generic C implementation
+ * for partial blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
*/
#include <sys/endian.h>
@@ -29,225 +29,152 @@ _Static_assert(
sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
"ossl_gcm_context too large");
-void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
-
-static void
-gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
- ("%s: invalid key length %zu", __func__, keylen));
-
- memset(&ctx->gcm, 0, sizeof(ctx->gcm));
- memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
- aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
- ctx->ops->init(ctx, key, keylen);
-}
-
-static void
-gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
-{
- (void)ctx->ops->finish(ctx, NULL, 0);
- memcpy(tag, ctx->gcm.Xi.c, len);
-}
+#if defined(__amd64__) || defined(__i386__)
+#define AES_set_encrypt_key aesni_set_encrypt_key
+#define AES_gcm_encrypt aesni_gcm_encrypt
+#define AES_gcm_decrypt aesni_gcm_decrypt
+#define AES_encrypt aesni_encrypt
+#define AES_ctr32_encrypt_blocks aesni_ctr32_encrypt_blocks
+#define GCM_init gcm_init_avx
+#define GCM_gmult gcm_gmult_avx
+#define GCM_ghash gcm_ghash_avx
+
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+size_t AES_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t AES_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
-void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
-void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
-void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
- const unsigned char *iv, size_t ivlen);
-void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
size_t len);
-void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
-
-static void
-gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
-}
-static void
-gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
- size_t len)
-{
- KASSERT(len == AES_GCM_IV_LEN,
- ("%s: invalid IV length %zu", __func__, len));
+#elif defined(__powerpc64__)
+#define AES_set_encrypt_key aes_p8_set_encrypt_key
+#define AES_gcm_encrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,1)
+#define AES_gcm_decrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,0)
+#define AES_encrypt aes_p8_encrypt
+#define AES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
+#define GCM_init gcm_init_p8
+#define GCM_gmult gcm_gmult_p8
+#define GCM_ghash gcm_ghash_p8
+
+size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
- ctx->gcm.Yi.u[0] = 0; /* Current counter */
- ctx->gcm.Yi.u[1] = 0;
- ctx->gcm.Xi.u[0] = 0; /* AAD hash */
- ctx->gcm.Xi.u[1] = 0;
- ctx->gcm.len.u[0] = 0; /* AAD length */
- ctx->gcm.len.u[1] = 0; /* Message length */
- ctx->gcm.ares = 0;
- ctx->gcm.mres = 0;
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
- ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
-}
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
+ size_t len);
-static int
-gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
- size_t len)
+static size_t
+ppc_aes_gcm_crypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key, unsigned char ivec_[16], uint64_t *Xi,
+ int encrypt)
{
- uint64_t alen = ctx->gcm.len.u[0];
- size_t lenblks;
- unsigned int ares;
-
- /* Bad sequence: call of AAD update after message processing */
- if (ctx->gcm.len.u[1])
- return -2;
-
- alen += len;
- /* AAD is limited by 2^64 bits, thus 2^61 bytes */
- if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
- return -1;
- ctx->gcm.len.u[0] = alen;
+ union {
+ uint32_t d[4];
+ uint8_t c[16];
+ } *ivec = (void *)ivec_;
+ int s = 0;
+ int ndone = 0;
+ int ctr_reset = 0;
+ uint32_t ivec_val;
+ uint64_t blocks_unused;
+ uint64_t nb = len / 16;
+ uint64_t next_ctr = 0;
+ unsigned char ctr_saved[12];
+
+ memcpy(ctr_saved, ivec, 12);
+
+ while (nb) {
+ ivec_val = ivec->d[3];
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec_val = bswap32(ivec_val);
+#endif
- ares = ctx->gcm.ares;
- /* Partial AAD block left from previous AAD update calls */
- if (ares > 0) {
- /*
- * Fill partial block buffer till full block
- * (note, the hash is stored reflected)
- */
- while (ares > 0 && len > 0) {
- ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
- --len;
- ares = (ares + 1) % AES_BLOCK_LEN;
- }
- /* Full block gathered */
- if (ares == 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- } else { /* no more AAD */
- ctx->gcm.ares = ares;
- return 0;
+ blocks_unused = (uint64_t)0xffffffffU + 1 - (uint64_t)ivec_val;
+ if (nb > blocks_unused) {
+ len = blocks_unused * 16;
+ nb -= blocks_unused;
+ next_ctr = blocks_unused;
+ ctr_reset = 1;
+ } else {
+ len = nb * 16;
+ next_ctr = nb;
+ nb = 0;
}
- }
- /* Bulk AAD processing */
- lenblks = len & ((size_t)(-AES_BLOCK_LEN));
- if (lenblks > 0) {
- ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
- aad += lenblks;
- len -= lenblks;
- }
+ s = encrypt ? ppc_aes_gcm_encrypt(in, out, len, key, ivec->c, Xi) :
+ ppc_aes_gcm_decrypt(in, out, len, key, ivec->c, Xi);
- /* Add remaining AAD to the hash (note, the hash is stored reflected) */
- if (len > 0) {
- ares = (unsigned int)len;
- for (size_t i = 0; i < len; ++i)
- ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ /* add counter to ivec */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec->d[3] = bswap32(ivec_val + next_ctr);
+#else
+ ivec->d[3] += next_ctr;
+#endif
+ if (ctr_reset) {
+ ctr_reset = 0;
+ in += len;
+ out += len;
+ }
+ memcpy(ivec, ctr_saved, 12);
+ ndone += s;
}
- ctx->gcm.ares = ares;
-
- return 0;
+ return ndone;
}
-static int
-_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len, bool encrypt)
-{
- uint64_t mlen = ctx->gcm.len.u[1];
-
- mlen += len;
- if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
- return -1;
-
- ctx->gcm.len.u[1] = mlen;
-
- /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
- if (ctx->gcm.ares > 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- ctx->gcm.ares = 0;
- }
-
- if (encrypt) {
- ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- } else {
- ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- }
-
- return 0;
-}
+#else
+#error "Unsupported architecture!"
+#endif
-static int
-gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- return _gcm_encrypt_avx512(ctx, in, out, len, true);
-}
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
-static int
-gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
-{
- return _gcm_encrypt_avx512(ctx, in, out, len, false);
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ AES_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
}
-static int
-gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
- size_t len)
+static void
+gcm_tag_op(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
{
- unsigned int *res = &ctx->gcm.mres;
-
- /* Finalize AAD processing */
- if (ctx->gcm.ares > 0)
- res = &ctx->gcm.ares;
-
- ossl_aes_gcm_finalize_avx512(ctx, *res);
-
- ctx->gcm.ares = ctx->gcm.mres = 0;
-
- if (tag != NULL)
- return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
- return 0;
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
}
-static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
- .init = gcm_init_avx512,
- .setiv = gcm_setiv_avx512,
- .aad = gcm_aad_avx512,
- .encrypt = gcm_encrypt_avx512,
- .decrypt = gcm_decrypt_avx512,
- .finish = gcm_finish_avx512,
- .tag = gcm_tag,
-};
-
-size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks);
-void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
- size_t blocks, void *ks, const unsigned char *iv);
-
-void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]);
-void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]);
-void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
- size_t len);
-
static void
-gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+gcm_init_op(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]);
ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]);
#endif
- gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u);
+ GCM_init(ctx->gcm.Htable, ctx->gcm.H.u);
}
static void
-gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
+gcm_setiv_op(struct ossl_gcm_context *ctx, const unsigned char *iv,
size_t len)
{
uint32_t ctr;
@@ -269,7 +196,7 @@ gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
ctx->gcm.Xi.u[0] = 0;
ctx->gcm.Xi.u[1] = 0;
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
ctr++;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -280,7 +207,7 @@ gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
}
static int
-gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad,
+gcm_aad_op(struct ossl_gcm_context *ctx, const unsigned char *aad,
size_t len)
{
size_t i;
@@ -303,14 +230,14 @@ gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad,
n = (n + 1) % 16;
}
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
else {
ctx->gcm.ares = n;
return 0;
}
}
if ((i = (len & (size_t)-AES_BLOCK_LEN))) {
- gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
+ GCM_ghash(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
aad += i;
len -= i;
}
@@ -341,7 +268,7 @@ gcm_encrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -354,7 +281,7 @@ gcm_encrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
n = mres % 16;
for (i = 0; i < len; ++i) {
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -366,7 +293,7 @@ gcm_encrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n];
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -390,7 +317,7 @@ gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -408,7 +335,7 @@ gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -418,7 +345,7 @@ gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -430,12 +357,12 @@ gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
while (j--) {
for (i = 0; i < 16; ++i)
ctx->gcm.Xi.c[i] ^= out[i];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
out += 16;
}
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -453,7 +380,7 @@ gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
}
static int
-gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_encrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -463,7 +390,7 @@ gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
if ((error = gcm_encrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_encrypt(in + res, out + res, len - res,
+ bulk = AES_gcm_encrypt(in + res, out + res, len - res,
&ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -492,7 +419,7 @@ gcm_decrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -506,7 +433,7 @@ gcm_decrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
for (i = 0; i < len; ++i) {
uint8_t c;
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -520,7 +447,7 @@ gcm_decrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
ctx->gcm.Xi.c[n] ^= c;
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -544,7 +471,7 @@ gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
if (ctx->gcm.ares) {
/* First call to decrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -564,7 +491,7 @@ gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -578,12 +505,12 @@ gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
size_t k;
for (k = 0; k < 16; ++k)
ctx->gcm.Xi.c[k] ^= in[k];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
in += 16;
}
j = i / 16;
in -= i;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -595,7 +522,7 @@ gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
len -= i;
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -615,7 +542,7 @@ gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
}
static int
-gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_decrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -625,7 +552,7 @@ gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
if ((error = gcm_decrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
+ bulk = AES_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -637,14 +564,14 @@ gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
}
static int
-gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
+gcm_finish_op(struct ossl_gcm_context *ctx, const unsigned char *tag,
size_t len)
{
uint64_t alen = ctx->gcm.len.u[0] << 3;
uint64_t clen = ctx->gcm.len.u[1] << 3;
if (ctx->gcm.mres || ctx->gcm.ares)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
#if BYTE_ORDER == LITTLE_ENDIAN
alen = bswap64(alen);
@@ -653,7 +580,7 @@ gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
ctx->gcm.Xi.u[0] ^= alen;
ctx->gcm.Xi.u[1] ^= clen;
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
@@ -663,40 +590,26 @@ gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
return 0;
}
-static const struct ossl_aes_gcm_ops gcm_ops_aesni = {
- .init = gcm_init_aesni,
- .setiv = gcm_setiv_aesni,
- .aad = gcm_aad_aesni,
- .encrypt = gcm_encrypt_aesni,
- .decrypt = gcm_decrypt_aesni,
- .finish = gcm_finish_aesni,
- .tag = gcm_tag,
+static const struct ossl_aes_gcm_ops gcm_ops = {
+ .init = gcm_init_op,
+ .setiv = gcm_setiv_op,
+ .aad = gcm_aad_op,
+ .encrypt = gcm_encrypt_op,
+ .decrypt = gcm_decrypt_op,
+ .finish = gcm_finish_op,
+ .tag = gcm_tag_op,
};
-int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx);
-
-int
-ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen,
- void *_ctx)
-{
- struct ossl_gcm_context *ctx;
-
- ctx = _ctx;
- ctx->ops = &gcm_ops_aesni;
- gcm_init(ctx, key, klen);
- return (0);
-}
-
-int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+int ossl_aes_gcm_setkey(const unsigned char *key, int klen, void *_ctx);
int
-ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ossl_aes_gcm_setkey(const unsigned char *key, int klen,
void *_ctx)
{
struct ossl_gcm_context *ctx;
ctx = _ctx;
- ctx->ops = &gcm_ops_avx512;
+ ctx->ops = &gcm_ops;
gcm_init(ctx, key, klen);
return (0);
}
diff --git a/sys/crypto/openssl/ossl_ppc.c b/sys/crypto/openssl/ossl_ppc.c
index 0951745c4b43..980211f46a76 100644
--- a/sys/crypto/openssl/ossl_ppc.c
+++ b/sys/crypto/openssl/ossl_ppc.c
@@ -38,9 +38,12 @@ unsigned int OPENSSL_ppccap_P = 0;
ossl_cipher_setkey_t aes_p8_set_encrypt_key;
ossl_cipher_setkey_t aes_p8_set_decrypt_key;
+
ossl_cipher_setkey_t vpaes_set_encrypt_key;
ossl_cipher_setkey_t vpaes_set_decrypt_key;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
+
void
ossl_cpuid(struct ossl_softc *sc)
{
@@ -75,7 +78,11 @@ ossl_cpuid(struct ossl_softc *sc)
ossl_cipher_aes_cbc.set_encrypt_key = aes_p8_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = aes_p8_set_decrypt_key;
sc->has_aes = true;
- } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
+
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
+ sc->has_aes_gcm = true;
+ } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
ossl_cipher_aes_cbc.set_encrypt_key = vpaes_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = vpaes_set_decrypt_key;
sc->has_aes = true;
diff --git a/sys/crypto/openssl/ossl_x86.c b/sys/crypto/openssl/ossl_x86.c
index 9161eaf39beb..bf034f05b783 100644
--- a/sys/crypto/openssl/ossl_x86.c
+++ b/sys/crypto/openssl/ossl_x86.c
@@ -56,7 +56,7 @@ ossl_cipher_setkey_t aesni_set_decrypt_key;
#ifdef __amd64__
int ossl_vaes_vpclmulqdq_capable(void);
-ossl_cipher_setkey_t ossl_aes_gcm_setkey_aesni;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
ossl_cipher_setkey_t ossl_aes_gcm_setkey_avx512;
#endif
@@ -141,8 +141,8 @@ ossl_cpuid(struct ossl_softc *sc)
} else if ((cpu_feature2 &
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) ==
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) {
- ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey_aesni;
- ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey_aesni;
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
sc->has_aes_gcm = true;
} else {
sc->has_aes_gcm = false;
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index 733c440f5ee3..a26cf8161294 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -459,12 +459,11 @@ DB_SHOW_COMMAND(proc, db_show_proc)
db_printf("??? (%#x)\n", p->p_state);
}
if (p->p_ucred != NULL) {
- db_printf(" uid: %d gids: ", p->p_ucred->cr_uid);
- for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
- db_printf("%d", p->p_ucred->cr_groups[i]);
- if (i < (p->p_ucred->cr_ngroups - 1))
- db_printf(", ");
- }
+ db_printf(" uid: %d gid: %d supp gids: ",
+ p->p_ucred->cr_uid, p->p_ucred->cr_gid);
+ for (i = 0; i < p->p_ucred->cr_ngroups; i++)
+ db_printf(i == 0 ? "%d" : ", %d",
+ p->p_ucred->cr_groups[i]);
db_printf("\n");
}
if (p->p_pptr != NULL)
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index a2159b12876f..175bfe835e6f 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -4,6 +4,10 @@
* Copyright (c) 2000, 2001 Michael Smith
* Copyright (c) 2000 BSDi
* All rights reserved.
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Aymeric Wibo
+ * <obiwac@freebsd.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -107,8 +111,9 @@ struct callout acpi_sleep_timer;
/* Bitmap of device quirks. */
int acpi_quirks;
-/* Supported sleep states. */
-static BOOLEAN acpi_sleep_states[ACPI_S_STATE_COUNT];
+/* Supported sleep states and types. */
+static bool acpi_supported_stypes[POWER_STYPE_COUNT];
+static bool acpi_supported_sstates[ACPI_S_STATE_COUNT];
static void acpi_lookup(void *arg, const char *name, device_t *dev);
static int acpi_modevent(struct module *mod, int event, void *junk);
@@ -165,23 +170,30 @@ static ACPI_STATUS acpi_probe_child(ACPI_HANDLE handle, UINT32 level,
void *context, void **status);
static void acpi_sleep_enable(void *arg);
static ACPI_STATUS acpi_sleep_disable(struct acpi_softc *sc);
-static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state);
+static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc,
+ enum power_stype stype);
static void acpi_shutdown_final(void *arg, int howto);
static void acpi_enable_fixed_events(struct acpi_softc *sc);
static void acpi_resync_clock(struct acpi_softc *sc);
-static int acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate);
-static int acpi_wake_run_prep(ACPI_HANDLE handle, int sstate);
-static int acpi_wake_prep_walk(int sstate);
+static int acpi_wake_sleep_prep(ACPI_HANDLE handle,
+ enum power_stype stype);
+static int acpi_wake_run_prep(ACPI_HANDLE handle, enum power_stype stype);
+static int acpi_wake_prep_walk(enum power_stype stype);
static int acpi_wake_sysctl_walk(device_t dev);
static int acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS);
-static void acpi_system_eventhandler_sleep(void *arg, int state);
-static void acpi_system_eventhandler_wakeup(void *arg, int state);
-static int acpi_sname2sstate(const char *sname);
-static const char *acpi_sstate2sname(int sstate);
static int acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
+static void acpi_system_eventhandler_sleep(void *arg,
+ enum power_stype stype);
+static void acpi_system_eventhandler_wakeup(void *arg,
+ enum power_stype stype);
+static enum power_stype acpi_sstate_to_stype(int sstate);
+static int acpi_sname_to_sstate(const char *sname);
+static const char *acpi_sstate_to_sname(int sstate);
static int acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
+static int acpi_stype_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS);
-static int acpi_pm_func(u_long cmd, void *arg, ...);
+static int acpi_stype_to_sstate(struct acpi_softc *sc, enum power_stype stype);
+static int acpi_pm_func(u_long cmd, void *arg, enum power_stype stype);
static void acpi_enable_pcie(void);
static void acpi_reset_interfaces(device_t dev);
@@ -472,6 +484,7 @@ acpi_attach(device_t dev)
UINT32 flags;
UINT8 TypeA, TypeB;
char *env;
+ enum power_stype stype;
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
@@ -584,31 +597,30 @@ acpi_attach(device_t dev)
SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "power_button_state",
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- &sc->acpi_power_button_sx, 0, acpi_sleep_state_sysctl, "A",
+ &sc->acpi_power_button_stype, 0, acpi_stype_sysctl, "A",
"Power button ACPI sleep state.");
SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "sleep_button_state",
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- &sc->acpi_sleep_button_sx, 0, acpi_sleep_state_sysctl, "A",
+ &sc->acpi_sleep_button_stype, 0, acpi_stype_sysctl, "A",
"Sleep button ACPI sleep state.");
SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "lid_switch_state",
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- &sc->acpi_lid_switch_sx, 0, acpi_sleep_state_sysctl, "A",
- "Lid ACPI sleep state. Set to S3 if you want to suspend your laptop when close the Lid.");
+ &sc->acpi_lid_switch_stype, 0, acpi_stype_sysctl, "A",
+ "Lid ACPI sleep state. Set to s2idle or s2mem if you want to suspend "
+ "your laptop when close the lid.");
SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "standby_state",
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- &sc->acpi_standby_sx, 0, acpi_sleep_state_sysctl, "A", "");
- SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
- OID_AUTO, "suspend_state",
- CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- &sc->acpi_suspend_sx, 0, acpi_sleep_state_sysctl, "A", "");
+ &sc->acpi_standby_sx, 0, acpi_sleep_state_sysctl, "A",
+ "ACPI Sx state to use when going standby (S1 or S2).");
SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "sleep_delay", CTLFLAG_RW, &sc->acpi_sleep_delay, 0,
"sleep delay in seconds");
SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
- OID_AUTO, "s4bios", CTLFLAG_RW, &sc->acpi_s4bios, 0, "S4BIOS mode");
+ OID_AUTO, "s4bios", CTLFLAG_RW, &sc->acpi_s4bios, 0,
+ "Use S4BIOS when hibernating.");
SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
OID_AUTO, "verbose", CTLFLAG_RW, &sc->acpi_verbose, 0, "verbose mode");
SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
@@ -654,31 +666,38 @@ acpi_attach(device_t dev)
sc->acpi_s4bios = 1;
#endif
- /* Probe all supported sleep states. */
- acpi_sleep_states[ACPI_STATE_S0] = TRUE;
- for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
+ /*
+ * Probe all supported ACPI sleep states. Awake (S0) is always supported.
+ */
+ acpi_supported_sstates[ACPI_STATE_S0] = TRUE;
+ acpi_supported_stypes[POWER_STYPE_AWAKE] = true;
+ for (state = ACPI_STATE_S1; state <= ACPI_STATE_S5; state++)
if (ACPI_SUCCESS(AcpiEvaluateObject(ACPI_ROOT_OBJECT,
__DECONST(char *, AcpiGbl_SleepStateNames[state]), NULL, NULL)) &&
- ACPI_SUCCESS(AcpiGetSleepTypeData(state, &TypeA, &TypeB)))
- acpi_sleep_states[state] = TRUE;
+ ACPI_SUCCESS(AcpiGetSleepTypeData(state, &TypeA, &TypeB))) {
+ acpi_supported_sstates[state] = TRUE;
+ acpi_supported_stypes[acpi_sstate_to_stype(state)] = true;
+ }
/*
- * Dispatch the default sleep state to devices. The lid switch is set
+ * Dispatch the default sleep type to devices. The lid switch is set
* to UNKNOWN by default to avoid surprising users.
*/
- sc->acpi_power_button_sx = acpi_sleep_states[ACPI_STATE_S5] ?
- ACPI_STATE_S5 : ACPI_STATE_UNKNOWN;
- sc->acpi_lid_switch_sx = ACPI_STATE_UNKNOWN;
- sc->acpi_standby_sx = acpi_sleep_states[ACPI_STATE_S1] ?
- ACPI_STATE_S1 : ACPI_STATE_UNKNOWN;
- sc->acpi_suspend_sx = acpi_sleep_states[ACPI_STATE_S3] ?
- ACPI_STATE_S3 : ACPI_STATE_UNKNOWN;
-
- /* Pick the first valid sleep state for the sleep button default. */
- sc->acpi_sleep_button_sx = ACPI_STATE_UNKNOWN;
- for (state = ACPI_STATE_S1; state <= ACPI_STATE_S4; state++)
- if (acpi_sleep_states[state]) {
- sc->acpi_sleep_button_sx = state;
+ sc->acpi_power_button_stype = acpi_supported_stypes[POWER_STYPE_POWEROFF] ?
+ POWER_STYPE_POWEROFF : POWER_STYPE_UNKNOWN;
+ sc->acpi_lid_switch_stype = POWER_STYPE_UNKNOWN;
+
+ sc->acpi_standby_sx = ACPI_STATE_UNKNOWN;
+ if (acpi_supported_sstates[ACPI_STATE_S1])
+ sc->acpi_standby_sx = ACPI_STATE_S1;
+ else if (acpi_supported_sstates[ACPI_STATE_S2])
+ sc->acpi_standby_sx = ACPI_STATE_S2;
+
+ /* Pick the first valid sleep type for the sleep button default. */
+ sc->acpi_sleep_button_stype = POWER_STYPE_UNKNOWN;
+ for (stype = POWER_STYPE_STANDBY; stype <= POWER_STYPE_HIBERNATE; stype++)
+ if (acpi_supported_stypes[stype]) {
+ sc->acpi_sleep_button_stype = stype;
break;
}
@@ -703,7 +722,7 @@ acpi_attach(device_t dev)
/* Flag our initial states. */
sc->acpi_enabled = TRUE;
- sc->acpi_sstate = ACPI_STATE_S0;
+ sc->acpi_stype = POWER_STYPE_AWAKE;
sc->acpi_sleep_disabled = TRUE;
/* Create the control device */
@@ -715,7 +734,8 @@ acpi_attach(device_t dev)
goto out;
/* Register ACPI again to pass the correct argument of pm_func. */
- power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc);
+ power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc,
+ acpi_supported_stypes);
acpi_platform_osc(dev);
@@ -741,6 +761,58 @@ acpi_attach(device_t dev)
return_VALUE (error);
}
+static int
+acpi_stype_to_sstate(struct acpi_softc *sc, enum power_stype stype)
+{
+ switch (stype) {
+ case POWER_STYPE_AWAKE:
+ return (ACPI_STATE_S0);
+ case POWER_STYPE_STANDBY:
+ return (sc->acpi_standby_sx);
+ case POWER_STYPE_SUSPEND_TO_MEM:
+ return (ACPI_STATE_S3);
+ case POWER_STYPE_HIBERNATE:
+ return (ACPI_STATE_S4);
+ case POWER_STYPE_POWEROFF:
+ return (ACPI_STATE_S5);
+ case POWER_STYPE_SUSPEND_TO_IDLE:
+ case POWER_STYPE_COUNT:
+ case POWER_STYPE_UNKNOWN:
+ return (ACPI_STATE_UNKNOWN);
+ }
+ return (ACPI_STATE_UNKNOWN);
+}
+
+/*
+ * XXX It would be nice if we didn't need this function, but we'd need
+ * acpi_EnterSleepState and acpi_ReqSleepState to take in actual ACPI S-states,
+ * which won't be possible at the moment because suspend-to-idle (which is not
+ * an ACPI S-state nor maps to one) will be implemented here.
+ *
+ * In the future, we should make generic a lot of the logic in these functions
+ * to enable suspend-to-idle on non-ACPI builds, and then make
+ * acpi_EnterSleepState and acpi_ReqSleepState truly take in ACPI S-states
+ * again.
+ */
+static enum power_stype
+acpi_sstate_to_stype(int sstate)
+{
+ switch (sstate) {
+ case ACPI_STATE_S0:
+ return (POWER_STYPE_AWAKE);
+ case ACPI_STATE_S1:
+ case ACPI_STATE_S2:
+ return (POWER_STYPE_STANDBY);
+ case ACPI_STATE_S3:
+ return (POWER_STYPE_SUSPEND_TO_MEM);
+ case ACPI_STATE_S4:
+ return (POWER_STYPE_HIBERNATE);
+ case ACPI_STATE_S5:
+ return (POWER_STYPE_POWEROFF);
+ }
+ return (POWER_STYPE_UNKNOWN);
+}
+
static void
acpi_set_power_children(device_t dev, int state)
{
@@ -2036,7 +2108,7 @@ acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate)
* Note illegal _S0D is evaluated because some systems expect this.
*/
sc = device_get_softc(bus);
- snprintf(sxd, sizeof(sxd), "_S%dD", sc->acpi_sstate);
+ snprintf(sxd, sizeof(sxd), "_S%dD", acpi_stype_to_sstate(sc, sc->acpi_stype));
status = acpi_GetInteger(handle, sxd, dstate);
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
device_printf(dev, "failed to get %s on %s: %s\n", sxd,
@@ -3134,9 +3206,9 @@ acpi_sleep_force_task(void *context)
{
struct acpi_softc *sc = (struct acpi_softc *)context;
- if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
- device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
- sc->acpi_next_sstate);
+ if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_stype)))
+ device_printf(sc->acpi_dev, "force sleep state %s failed\n",
+ power_stype_to_name(sc->acpi_next_stype));
}
static void
@@ -3163,24 +3235,24 @@ acpi_sleep_force(void *arg)
* acks are in.
*/
int
-acpi_ReqSleepState(struct acpi_softc *sc, int state)
+acpi_ReqSleepState(struct acpi_softc *sc, enum power_stype stype)
{
#if defined(__amd64__) || defined(__i386__)
struct apm_clone_data *clone;
ACPI_STATUS status;
- if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
+ if (stype < POWER_STYPE_AWAKE || stype >= POWER_STYPE_COUNT)
return (EINVAL);
- if (!acpi_sleep_states[state])
+ if (!acpi_supported_stypes[stype])
return (EOPNOTSUPP);
/*
* If a reboot/shutdown/suspend request is already in progress or
* suspend is blocked due to an upcoming shutdown, just return.
*/
- if (rebooting || sc->acpi_next_sstate != 0 || suspend_blocked) {
+ if (rebooting || sc->acpi_next_stype != POWER_STYPE_AWAKE ||
+ suspend_blocked)
return (0);
- }
/* Wait until sleep is enabled. */
while (sc->acpi_sleep_disabled) {
@@ -3189,12 +3261,12 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
ACPI_LOCK(acpi);
- sc->acpi_next_sstate = state;
+ sc->acpi_next_stype = stype;
/* S5 (soft-off) should be entered directly with no waiting. */
- if (state == ACPI_STATE_S5) {
+ if (stype == POWER_STYPE_POWEROFF) {
ACPI_UNLOCK(acpi);
- status = acpi_EnterSleepState(sc, state);
+ status = acpi_EnterSleepState(sc, stype);
return (ACPI_SUCCESS(status) ? 0 : ENXIO);
}
@@ -3210,7 +3282,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
/* If devd(8) is not running, immediately enter the sleep state. */
if (!devctl_process_running()) {
ACPI_UNLOCK(acpi);
- status = acpi_EnterSleepState(sc, state);
+ status = acpi_EnterSleepState(sc, stype);
return (ACPI_SUCCESS(status) ? 0 : ENXIO);
}
@@ -3225,7 +3297,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
ACPI_UNLOCK(acpi);
/* Now notify devd(8) also. */
- acpi_UserNotify("Suspend", ACPI_ROOT_OBJECT, state);
+ acpi_UserNotify("Suspend", ACPI_ROOT_OBJECT, stype);
return (0);
#else
@@ -3248,17 +3320,17 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
struct acpi_softc *sc;
int ret, sleeping;
- /* If no pending sleep state, return an error. */
+ /* If no pending sleep type, return an error. */
ACPI_LOCK(acpi);
sc = clone->acpi_sc;
- if (sc->acpi_next_sstate == 0) {
+ if (sc->acpi_next_stype == POWER_STYPE_AWAKE) {
ACPI_UNLOCK(acpi);
return (ENXIO);
}
/* Caller wants to abort suspend process. */
if (error) {
- sc->acpi_next_sstate = 0;
+ sc->acpi_next_stype = POWER_STYPE_AWAKE;
callout_stop(&sc->susp_force_to);
device_printf(sc->acpi_dev,
"listener on %s cancelled the pending suspend\n",
@@ -3288,7 +3360,7 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
ACPI_UNLOCK(acpi);
ret = 0;
if (sleeping) {
- if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
+ if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_stype)))
ret = ENODEV;
}
return (ret);
@@ -3345,7 +3417,7 @@ enum acpi_sleep_state {
* Currently we support S1-S5 but S4 is only S4BIOS
*/
static ACPI_STATUS
-acpi_EnterSleepState(struct acpi_softc *sc, int state)
+acpi_EnterSleepState(struct acpi_softc *sc, enum power_stype stype)
{
register_t intr;
ACPI_STATUS status;
@@ -3353,13 +3425,13 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
enum acpi_sleep_state slp_state;
int sleep_result;
- ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
+ ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, stype);
- if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
+ if (stype <= POWER_STYPE_AWAKE || stype >= POWER_STYPE_COUNT)
return_ACPI_STATUS (AE_BAD_PARAMETER);
- if (!acpi_sleep_states[state]) {
- device_printf(sc->acpi_dev, "Sleep state S%d not supported by BIOS\n",
- state);
+ if (!acpi_supported_stypes[stype]) {
+ device_printf(sc->acpi_dev, "Sleep type %s not supported on this "
+ "platform\n", power_stype_to_name(stype));
return (AE_SUPPORT);
}
@@ -3371,7 +3443,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
return (status);
}
- if (state == ACPI_STATE_S5) {
+ if (stype == POWER_STYPE_POWEROFF) {
/*
* Shut down cleanly and power off. This will call us back through the
* shutdown handlers.
@@ -3399,16 +3471,16 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
#endif
/*
- * Be sure to hold Giant across DEVICE_SUSPEND/RESUME
+ * Be sure to hold bus topology lock across DEVICE_SUSPEND/RESUME.
*/
bus_topo_lock();
slp_state = ACPI_SS_NONE;
- sc->acpi_sstate = state;
+ sc->acpi_stype = stype;
/* Enable any GPEs as appropriate and requested by the user. */
- acpi_wake_prep_walk(state);
+ acpi_wake_prep_walk(stype);
slp_state = ACPI_SS_GPE_SET;
/*
@@ -3425,7 +3497,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
}
slp_state = ACPI_SS_DEV_SUSPEND;
- status = AcpiEnterSleepStatePrep(state);
+ status = AcpiEnterSleepStatePrep(stype);
if (ACPI_FAILURE(status)) {
device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
AcpiFormatException(status));
@@ -3438,9 +3510,9 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
suspendclock();
intr = intr_disable();
- if (state != ACPI_STATE_S1) {
- sleep_result = acpi_sleep_machdep(sc, state);
- acpi_wakeup_machdep(sc, state, sleep_result, 0);
+ if (stype != POWER_STYPE_STANDBY) {
+ sleep_result = acpi_sleep_machdep(sc, stype);
+ acpi_wakeup_machdep(sc, stype, sleep_result, 0);
/*
* XXX According to ACPI specification SCI_EN bit should be restored
@@ -3451,10 +3523,10 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
* This hack is picked up from Linux, which claims that it follows
* Windows behavior.
*/
- if (sleep_result == 1 && state != ACPI_STATE_S4)
+ if (sleep_result == 1 && stype != POWER_STYPE_HIBERNATE)
AcpiWriteBitRegister(ACPI_BITREG_SCI_ENABLE, ACPI_ENABLE_EVENT);
- if (sleep_result == 1 && state == ACPI_STATE_S3) {
+ if (sleep_result == 1 && stype == POWER_STYPE_SUSPEND_TO_MEM) {
/*
* Prevent mis-interpretation of the wakeup by power button
* as a request for power off.
@@ -3480,20 +3552,20 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
intr_restore(intr);
/* call acpi_wakeup_machdep() again with interrupt enabled */
- acpi_wakeup_machdep(sc, state, sleep_result, 1);
+ acpi_wakeup_machdep(sc, stype, sleep_result, 1);
- AcpiLeaveSleepStatePrep(state);
+ AcpiLeaveSleepStatePrep(stype);
if (sleep_result == -1)
goto backout;
- /* Re-enable ACPI hardware on wakeup from sleep state 4. */
- if (state == ACPI_STATE_S4)
+ /* Re-enable ACPI hardware on wakeup from hibernate. */
+ if (stype == POWER_STYPE_HIBERNATE)
AcpiEnable();
} else {
- status = AcpiEnterSleepState(state);
+ status = AcpiEnterSleepState(stype);
intr_restore(intr);
- AcpiLeaveSleepStatePrep(state);
+ AcpiLeaveSleepStatePrep(stype);
if (ACPI_FAILURE(status)) {
device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n",
AcpiFormatException(status));
@@ -3510,13 +3582,13 @@ backout:
if (slp_state >= ACPI_SS_SLP_PREP)
resumeclock();
if (slp_state >= ACPI_SS_GPE_SET) {
- acpi_wake_prep_walk(state);
- sc->acpi_sstate = ACPI_STATE_S0;
+ acpi_wake_prep_walk(stype);
+ sc->acpi_stype = POWER_STYPE_AWAKE;
}
if (slp_state >= ACPI_SS_DEV_SUSPEND)
DEVICE_RESUME(root_bus);
if (slp_state >= ACPI_SS_SLP_PREP)
- AcpiLeaveSleepState(state);
+ AcpiLeaveSleepState(stype);
if (slp_state >= ACPI_SS_SLEPT) {
#if defined(__i386__) || defined(__amd64__)
/* NB: we are still using ACPI timecounter at this point. */
@@ -3525,7 +3597,7 @@ backout:
acpi_resync_clock(sc);
acpi_enable_fixed_events(sc);
}
- sc->acpi_next_sstate = 0;
+ sc->acpi_next_stype = POWER_STYPE_AWAKE;
bus_topo_unlock();
@@ -3551,7 +3623,7 @@ backout:
/* Run /etc/rc.resume after we are back. */
if (devctl_process_running())
- acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
+ acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, stype);
return_ACPI_STATUS (status);
}
@@ -3602,16 +3674,21 @@ acpi_wake_set_enable(device_t dev, int enable)
}
static int
-acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate)
+acpi_wake_sleep_prep(ACPI_HANDLE handle, enum power_stype stype)
{
+ int sstate;
struct acpi_prw_data prw;
device_t dev;
+ struct acpi_softc *sc;
/* Check that this is a wake-capable device and get its GPE. */
if (acpi_parse_prw(handle, &prw) != 0)
return (ENXIO);
dev = acpi_get_device(handle);
+ sc = device_get_softc(dev);
+ sstate = acpi_stype_to_sstate(sc, stype);
+
/*
* The destination sleep state must be less than (i.e., higher power)
* or equal to the value specified by _PRW. If this GPE cannot be
@@ -3622,24 +3699,26 @@ acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate)
if (sstate > prw.lowest_wake) {
AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_DISABLE);
if (bootverbose)
- device_printf(dev, "wake_prep disabled wake for %s (S%d)\n",
- acpi_name(handle), sstate);
+ device_printf(dev, "wake_prep disabled wake for %s (%s)\n",
+ acpi_name(handle), power_stype_to_name(stype));
} else if (dev && (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) != 0) {
acpi_pwr_wake_enable(handle, 1);
acpi_SetInteger(handle, "_PSW", 1);
if (bootverbose)
- device_printf(dev, "wake_prep enabled for %s (S%d)\n",
- acpi_name(handle), sstate);
+ device_printf(dev, "wake_prep enabled for %s (%s)\n",
+ acpi_name(handle), power_stype_to_name(stype));
}
return (0);
}
static int
-acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
+acpi_wake_run_prep(ACPI_HANDLE handle, enum power_stype stype)
{
+ int sstate;
struct acpi_prw_data prw;
device_t dev;
+ struct acpi_softc *sc;
/*
* Check that this is a wake-capable device and get its GPE. Return
@@ -3651,6 +3730,9 @@ acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
if (dev == NULL || (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) == 0)
return (0);
+ sc = device_get_softc(dev);
+ sstate = acpi_stype_to_sstate(sc, stype);
+
/*
* If this GPE couldn't be enabled for the previous sleep state, it was
* disabled before going to sleep so re-enable it. If it was enabled,
@@ -3674,26 +3756,26 @@ acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
static ACPI_STATUS
acpi_wake_prep(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
{
- int sstate;
+ enum power_stype stype;
/* If suspending, run the sleep prep function, otherwise wake. */
- sstate = *(int *)context;
+ stype = *(enum power_stype *)context;
if (AcpiGbl_SystemAwakeAndRunning)
- acpi_wake_sleep_prep(handle, sstate);
+ acpi_wake_sleep_prep(handle, stype);
else
- acpi_wake_run_prep(handle, sstate);
+ acpi_wake_run_prep(handle, stype);
return (AE_OK);
}
/* Walk the tree rooted at acpi0 to prep devices for suspend/resume. */
static int
-acpi_wake_prep_walk(int sstate)
+acpi_wake_prep_walk(enum power_stype stype)
{
ACPI_HANDLE sb_handle;
if (ACPI_SUCCESS(AcpiGetHandle(ACPI_ROOT_OBJECT, "\\_SB_", &sb_handle)))
AcpiWalkNamespace(ACPI_TYPE_DEVICE, sb_handle, 100,
- acpi_wake_prep, NULL, &sstate, NULL);
+ acpi_wake_prep, NULL, &stype, NULL);
return (0);
}
@@ -3852,31 +3934,35 @@ out:
/* System Event Handlers (registered by EVENTHANDLER_REGISTER) */
static void
-acpi_system_eventhandler_sleep(void *arg, int state)
+acpi_system_eventhandler_sleep(void *arg, enum power_stype stype)
{
struct acpi_softc *sc = (struct acpi_softc *)arg;
int ret;
- ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
+ ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, stype);
/* Check if button action is disabled or unknown. */
- if (state == ACPI_STATE_UNKNOWN)
+ if (stype == ACPI_STATE_UNKNOWN)
return;
- /* Request that the system prepare to enter the given suspend state. */
- ret = acpi_ReqSleepState(sc, state);
+ /*
+ * Request that the system prepare to enter the given suspend state. We can
+ * totally pass an ACPI S-state to an enum power_stype.
+ */
+ ret = acpi_ReqSleepState(sc, stype);
if (ret != 0)
device_printf(sc->acpi_dev,
- "request to enter state S%d failed (err %d)\n", state, ret);
+ "request to enter state %s failed (err %d)\n",
+ power_stype_to_name(stype), ret);
return_VOID;
}
static void
-acpi_system_eventhandler_wakeup(void *arg, int state)
+acpi_system_eventhandler_wakeup(void *arg, enum power_stype stype)
{
- ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
+ ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, stype);
/* Currently, nothing to do for wakeup. */
@@ -3890,14 +3976,14 @@ static void
acpi_invoke_sleep_eventhandler(void *context)
{
- EVENTHANDLER_INVOKE(acpi_sleep_event, *(int *)context);
+ EVENTHANDLER_INVOKE(acpi_sleep_event, *(enum power_stype *)context);
}
static void
acpi_invoke_wake_eventhandler(void *context)
{
- EVENTHANDLER_INVOKE(acpi_wakeup_event, *(int *)context);
+ EVENTHANDLER_INVOKE(acpi_wakeup_event, *(enum power_stype *)context);
}
UINT32
@@ -3913,7 +3999,7 @@ acpi_event_power_button_sleep(void *context)
#if defined(__amd64__) || defined(__i386__)
if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
- acpi_invoke_sleep_eventhandler, &sc->acpi_power_button_sx)))
+ acpi_invoke_sleep_eventhandler, &sc->acpi_power_button_stype)))
return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
#else
shutdown_nice(RB_POWEROFF);
@@ -3930,7 +4016,7 @@ acpi_event_power_button_wake(void *context)
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
- acpi_invoke_wake_eventhandler, &sc->acpi_power_button_sx)))
+ acpi_invoke_wake_eventhandler, &sc->acpi_power_button_stype)))
return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
return_VALUE (ACPI_INTERRUPT_HANDLED);
}
@@ -3943,7 +4029,7 @@ acpi_event_sleep_button_sleep(void *context)
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
- acpi_invoke_sleep_eventhandler, &sc->acpi_sleep_button_sx)))
+ acpi_invoke_sleep_eventhandler, &sc->acpi_sleep_button_stype)))
return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
return_VALUE (ACPI_INTERRUPT_HANDLED);
}
@@ -3956,7 +4042,7 @@ acpi_event_sleep_button_wake(void *context)
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
- acpi_invoke_wake_eventhandler, &sc->acpi_sleep_button_sx)))
+ acpi_invoke_wake_eventhandler, &sc->acpi_sleep_button_stype)))
return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
return_VALUE (ACPI_INTERRUPT_HANDLED);
}
@@ -4152,7 +4238,8 @@ acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *t
{
struct acpi_softc *sc;
struct acpi_ioctl_hook *hp;
- int error, state;
+ int error;
+ int sstate;
error = 0;
hp = NULL;
@@ -4182,9 +4269,9 @@ acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *t
/* Core system ioctls. */
switch (cmd) {
case ACPIIO_REQSLPSTATE:
- state = *(int *)addr;
- if (state != ACPI_STATE_S5)
- return (acpi_ReqSleepState(sc, state));
+ sstate = *(int *)addr;
+ if (sstate != ACPI_STATE_S5)
+ return (acpi_ReqSleepState(sc, acpi_sstate_to_stype(sstate)));
device_printf(sc->acpi_dev, "power off via acpi ioctl not supported\n");
error = EOPNOTSUPP;
break;
@@ -4193,12 +4280,12 @@ acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *t
error = acpi_AckSleepState(sc->acpi_clone, error);
break;
case ACPIIO_SETSLPSTATE: /* DEPRECATED */
- state = *(int *)addr;
- if (state < ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
+ sstate = *(int *)addr;
+ if (sstate < ACPI_STATE_S0 || sstate > ACPI_STATE_S5)
return (EINVAL);
- if (!acpi_sleep_states[state])
+ if (!acpi_supported_sstates[sstate])
return (EOPNOTSUPP);
- if (ACPI_FAILURE(acpi_SetSleepState(sc, state)))
+ if (ACPI_FAILURE(acpi_SetSleepState(sc, acpi_sstate_to_stype(sstate))))
error = ENXIO;
break;
default:
@@ -4210,7 +4297,7 @@ acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *t
}
static int
-acpi_sname2sstate(const char *sname)
+acpi_sname_to_sstate(const char *sname)
{
int sstate;
@@ -4225,14 +4312,15 @@ acpi_sname2sstate(const char *sname)
}
static const char *
-acpi_sstate2sname(int sstate)
+acpi_sstate_to_sname(int state)
{
- static const char *snames[] = { "S0", "S1", "S2", "S3", "S4", "S5" };
+ static const char *snames[ACPI_S_STATE_COUNT] = {"S0", "S1", "S2", "S3",
+ "S4", "S5"};
- if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5)
- return (snames[sstate]);
- else if (sstate == ACPI_STATE_UNKNOWN)
+ if (state == ACPI_STATE_UNKNOWN)
return ("NONE");
+ if (state >= ACPI_STATE_S0 && state < ACPI_S_STATE_COUNT)
+ return (snames[state]);
return (NULL);
}
@@ -4245,8 +4333,8 @@ acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
- if (acpi_sleep_states[state])
- sbuf_printf(&sb, "%s ", acpi_sstate2sname(state));
+ if (acpi_supported_sstates[state])
+ sbuf_printf(&sb, "%s ", acpi_sstate_to_sname(state));
sbuf_trim(&sb);
sbuf_finish(&sb);
error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
@@ -4254,27 +4342,64 @@ acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
return (error);
}
+
static int
acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
{
char sleep_state[10];
- int error, new_state, old_state;
+ int error;
+ int new_sstate, old_sstate;
- old_state = *(int *)oidp->oid_arg1;
- strlcpy(sleep_state, acpi_sstate2sname(old_state), sizeof(sleep_state));
+ old_sstate = *(int *)oidp->oid_arg1;
+ strlcpy(sleep_state, acpi_sstate_to_sname(old_sstate), sizeof(sleep_state));
error = sysctl_handle_string(oidp, sleep_state, sizeof(sleep_state), req);
if (error == 0 && req->newptr != NULL) {
- new_state = acpi_sname2sstate(sleep_state);
- if (new_state < ACPI_STATE_S1)
+ new_sstate = acpi_sname_to_sstate(sleep_state);
+ if (new_sstate < 0)
return (EINVAL);
- if (new_state < ACPI_S_STATE_COUNT && !acpi_sleep_states[new_state])
+ if (new_sstate < ACPI_S_STATE_COUNT &&
+ !acpi_supported_sstates[new_sstate])
return (EOPNOTSUPP);
- if (new_state != old_state)
- *(int *)oidp->oid_arg1 = new_state;
+ if (new_sstate != old_sstate)
+ *(int *)oidp->oid_arg1 = new_sstate;
}
return (error);
}
+static int
+acpi_stype_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ char name[10];
+ int err;
+ int sstate;
+ enum power_stype new_stype, old_stype;
+
+ old_stype = *(enum power_stype *)oidp->oid_arg1;
+ strlcpy(name, power_stype_to_name(old_stype), sizeof(name));
+ err = sysctl_handle_string(oidp, name, sizeof(name), req);
+ if (err != 0 || req->newptr == NULL)
+ return (err);
+
+ new_stype = power_name_to_stype(name);
+ if (new_stype == POWER_STYPE_UNKNOWN) {
+ sstate = acpi_sname_to_sstate(name);
+ if (sstate < 0)
+ return (EINVAL);
+ printf("warning: this sysctl expects a sleep type, but an ACPI S-state has "
+ "been passed to it. This functionality is deprecated; see acpi(4).\n");
+ MPASS(sstate < ACPI_S_STATE_COUNT);
+ if (acpi_supported_sstates[sstate] == false)
+ return (EOPNOTSUPP);
+ new_stype = acpi_sstate_to_stype(sstate);
+ }
+
+ if (acpi_supported_stypes[new_stype] == false)
+ return (EOPNOTSUPP);
+ if (new_stype != old_stype)
+ *(enum power_stype *)oidp->oid_arg1 = new_stype;
+ return (0);
+}
+
/* Inform devctl(4) when we receive a Notify. */
void
acpi_UserNotify(const char *subsystem, ACPI_HANDLE h, uint8_t notify)
@@ -4621,12 +4746,10 @@ acpi_reset_interfaces(device_t dev)
}
static int
-acpi_pm_func(u_long cmd, void *arg, ...)
+acpi_pm_func(u_long cmd, void *arg, enum power_stype stype)
{
- int state, acpi_state;
int error;
struct acpi_softc *sc;
- va_list ap;
error = 0;
switch (cmd) {
@@ -4636,27 +4759,7 @@ acpi_pm_func(u_long cmd, void *arg, ...)
error = EINVAL;
goto out;
}
-
- va_start(ap, arg);
- state = va_arg(ap, int);
- va_end(ap);
-
- switch (state) {
- case POWER_SLEEP_STATE_STANDBY:
- acpi_state = sc->acpi_standby_sx;
- break;
- case POWER_SLEEP_STATE_SUSPEND:
- acpi_state = sc->acpi_suspend_sx;
- break;
- case POWER_SLEEP_STATE_HIBERNATE:
- acpi_state = ACPI_STATE_S4;
- break;
- default:
- error = EINVAL;
- goto out;
- }
-
- if (ACPI_FAILURE(acpi_EnterSleepState(sc, acpi_state)))
+ if (ACPI_FAILURE(acpi_EnterSleepState(sc, stype)))
error = ENXIO;
break;
default:
@@ -4674,7 +4777,8 @@ acpi_pm_register(void *arg)
if (!cold || resource_disabled("acpi", 0))
return;
- power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, NULL);
+ power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, NULL,
+ acpi_supported_stypes);
}
SYSINIT(power, SI_SUB_KLD, SI_ORDER_ANY, acpi_pm_register, NULL);
diff --git a/sys/dev/acpica/acpi_lid.c b/sys/dev/acpica/acpi_lid.c
index 142791f7282a..fb8755d9f0fe 100644
--- a/sys/dev/acpica/acpi_lid.c
+++ b/sys/dev/acpica/acpi_lid.c
@@ -235,9 +235,9 @@ acpi_lid_notify_status_changed(void *arg)
sc->lid_status ? "opened" : "closed");
if (sc->lid_status == 0)
- EVENTHANDLER_INVOKE(acpi_sleep_event, acpi_sc->acpi_lid_switch_sx);
+ EVENTHANDLER_INVOKE(acpi_sleep_event, acpi_sc->acpi_lid_switch_stype);
else
- EVENTHANDLER_INVOKE(acpi_wakeup_event, acpi_sc->acpi_lid_switch_sx);
+ EVENTHANDLER_INVOKE(acpi_wakeup_event, acpi_sc->acpi_lid_switch_stype);
out:
ACPI_SERIAL_END(lid);
diff --git a/sys/dev/acpica/acpivar.h b/sys/dev/acpica/acpivar.h
index 7495a010432b..4c789dd3e9f2 100644
--- a/sys/dev/acpica/acpivar.h
+++ b/sys/dev/acpica/acpivar.h
@@ -40,6 +40,7 @@
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/power.h>
#include <sys/selinfo.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
@@ -53,20 +54,19 @@ struct acpi_softc {
struct cdev *acpi_dev_t;
int acpi_enabled;
- int acpi_sstate;
+ enum power_stype acpi_stype;
int acpi_sleep_disabled;
struct sysctl_ctx_list acpi_sysctl_ctx;
struct sysctl_oid *acpi_sysctl_tree;
- int acpi_power_button_sx;
- int acpi_sleep_button_sx;
- int acpi_lid_switch_sx;
+ enum power_stype acpi_power_button_stype;
+ enum power_stype acpi_sleep_button_stype;
+ enum power_stype acpi_lid_switch_stype;
int acpi_standby_sx;
- int acpi_suspend_sx;
+ int acpi_s4bios;
int acpi_sleep_delay;
- int acpi_s4bios;
int acpi_do_disable;
int acpi_verbose;
int acpi_handle_reboot;
@@ -74,7 +74,7 @@ struct acpi_softc {
vm_offset_t acpi_wakeaddr;
vm_paddr_t acpi_wakephys;
- int acpi_next_sstate; /* Next suspend Sx state. */
+ enum power_stype acpi_next_stype; /* Next suspend sleep type. */
struct apm_clone_data *acpi_clone; /* Pseudo-dev for devd(8). */
STAILQ_HEAD(,apm_clone_data) apm_cdevs; /* All apm/apmctl/acpi cdevs. */
struct callout susp_force_to; /* Force suspend if no acks. */
@@ -411,7 +411,7 @@ ACPI_STATUS acpi_EvaluateOSC(ACPI_HANDLE handle, uint8_t *uuid,
uint32_t *caps_out, bool query);
ACPI_STATUS acpi_OverrideInterruptLevel(UINT32 InterruptNumber);
ACPI_STATUS acpi_SetIntrModel(int model);
-int acpi_ReqSleepState(struct acpi_softc *sc, int state);
+int acpi_ReqSleepState(struct acpi_softc *sc, enum power_stype stype);
int acpi_AckSleepState(struct apm_clone_data *clone, int error);
ACPI_STATUS acpi_SetSleepState(struct acpi_softc *sc, int state);
int acpi_wake_set_enable(device_t dev, int enable);
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index f29d803e99a8..82f56fc0d19e 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -195,6 +195,7 @@ static const struct {
{0x1f3f8086, 0x00, "Intel Avoton (RAID)", 0},
{0x23a38086, 0x00, "Intel Coleto Creek", 0},
{0x31e38086, 0x00, "Intel Gemini Lake", 0},
+ {0x4b638086, 0x00, "Intel Elkhart Lake", 0},
{0x5ae38086, 0x00, "Intel Apollo Lake", 0},
{0x7ae28086, 0x00, "Intel Alder Lake", 0},
{0x8c028086, 0x00, "Intel Lynx Point", 0},
diff --git a/sys/dev/amdgpio/amdgpio.c b/sys/dev/amdgpio/amdgpio.c
index 2bd455c612b8..20589ff71b0b 100644
--- a/sys/dev/amdgpio/amdgpio.c
+++ b/sys/dev/amdgpio/amdgpio.c
@@ -3,6 +3,10 @@
*
* Copyright (c) 2018 Advanced Micro Devices
* All rights reserved.
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Aymeric Wibo
+ * <obiwac@freebsd.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -51,11 +55,11 @@
#include <dev/acpica/acpivar.h>
#include <dev/gpio/gpiobusvar.h>
-#include "gpio_if.h"
#include "amdgpio.h"
static struct resource_spec amdgpio_spec[] = {
- { SYS_RES_MEMORY, 0, RF_ACTIVE },
+ { SYS_RES_MEMORY, 0, RF_ACTIVE },
+ { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE },
{ -1, 0, 0 }
};
@@ -196,7 +200,7 @@ static int
amdgpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags)
{
struct amdgpio_softc *sc;
- uint32_t reg, val, allowed;
+ uint32_t reg, val;
sc = device_get_softc(dev);
@@ -204,18 +208,19 @@ amdgpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags)
if (!amdgpio_valid_pin(sc, pin))
return (EINVAL);
- allowed = GPIO_PIN_INPUT | GPIO_PIN_OUTPUT;
+ if ((flags & ~AMDGPIO_DEFAULT_CAPS) != 0) {
+ device_printf(dev, "disallowed flags (0x%x) trying to be set "
+ "(allowed is 0x%x)\n", flags, AMDGPIO_DEFAULT_CAPS);
+ return (EINVAL);
+ }
- /*
- * Only directtion flag allowed
- */
- if (flags & ~allowed)
+ /* Either input or output must be selected. */
+ if ((flags & (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)) == 0)
return (EINVAL);
- /*
- * Not both directions simultaneously
- */
- if ((flags & allowed) == allowed)
+ /* Not both directions simultaneously. */
+ if ((flags & (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)) ==
+ (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT))
return (EINVAL);
/* Set the GPIO mode and state */
@@ -224,16 +229,21 @@ amdgpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags)
reg = AMDGPIO_PIN_REGISTER(pin);
val = amdgpio_read_4(sc, reg);
- if (flags & GPIO_PIN_INPUT) {
+ if ((flags & GPIO_PIN_INPUT) != 0)
val &= ~BIT(OUTPUT_ENABLE_OFF);
- sc->sc_gpio_pins[pin].gp_flags = GPIO_PIN_INPUT;
- } else {
+ else
val |= BIT(OUTPUT_ENABLE_OFF);
- sc->sc_gpio_pins[pin].gp_flags = GPIO_PIN_OUTPUT;
- }
+
+ val &= ~(BIT(PULL_DOWN_ENABLE_OFF) | BIT(PULL_UP_ENABLE_OFF));
+
+ if ((flags & GPIO_PIN_PULLDOWN) != 0)
+ val |= BIT(PULL_DOWN_ENABLE_OFF);
+ if ((flags & GPIO_PIN_PULLUP) != 0)
+ val |= BIT(PULL_UP_ENABLE_OFF);
amdgpio_write_4(sc, reg, val);
+ sc->sc_gpio_pins[pin].gp_flags = flags;
dprintf("pin %d flags 0x%x val 0x%x gp_flags 0x%x\n",
pin, flags, val, sc->sc_gpio_pins[pin].gp_flags);
@@ -359,11 +369,73 @@ amdgpio_probe(device_t dev)
return (rv);
}
+static void
+amdgpio_eoi_locked(struct amdgpio_softc *sc)
+{
+ uint32_t master_reg = amdgpio_read_4(sc, WAKE_INT_MASTER_REG);
+
+ AMDGPIO_ASSERT_LOCKED(sc);
+ master_reg |= EOI_MASK;
+ amdgpio_write_4(sc, WAKE_INT_MASTER_REG, master_reg);
+}
+
+static void
+amdgpio_eoi(struct amdgpio_softc *sc)
+{
+ AMDGPIO_LOCK(sc);
+ amdgpio_eoi_locked(sc);
+ AMDGPIO_UNLOCK(sc);
+}
+
+static int
+amdgpio_intr_filter(void *arg)
+{
+ struct amdgpio_softc *sc = arg;
+ int off, rv = FILTER_STRAY;
+ uint32_t reg;
+
+ /* We can lock in the filter routine as it is MTX_SPIN. */
+ AMDGPIO_LOCK(sc);
+
+ /*
+ * TODO Instead of just reading the registers of all pins, we should
+ * read WAKE_INT_STATUS_REG0/1. A bit set in here denotes a group of
+ * 4 pins where at least one has an interrupt for us. Then we can just
+ * iterate over those 4 pins.
+ *
+ * See GPIO_Interrupt_Status_Index_0 in BKDG.
+ */
+ for (size_t pin = 0; pin < AMD_GPIO_PINS_EXPOSED; pin++) {
+ off = AMDGPIO_PIN_REGISTER(pin);
+ reg = amdgpio_read_4(sc, off);
+ if ((reg & UNSERVICED_INTERRUPT_MASK) == 0)
+ continue;
+ /*
+ * Must write 1's to wake/interrupt status bits to clear them.
+ * We can do this simply by writing back to the register.
+ */
+ amdgpio_write_4(sc, off, reg);
+ }
+
+ amdgpio_eoi_locked(sc);
+ AMDGPIO_UNLOCK(sc);
+
+ rv = FILTER_HANDLED;
+ return (rv);
+}
+
+static void
+amdgpio_intr_handler(void *arg)
+{
+ /* TODO */
+}
+
static int
amdgpio_attach(device_t dev)
{
struct amdgpio_softc *sc;
- int i, pin, bank;
+ int i, pin, bank, reg;
+ uint32_t flags;
sc = device_get_softc(dev);
sc->sc_dev = dev;
@@ -386,6 +458,14 @@ amdgpio_attach(device_t dev)
sc->sc_bst = rman_get_bustag(sc->sc_res[0]);
sc->sc_bsh = rman_get_bushandle(sc->sc_res[0]);
+ /* Set up interrupt handler. */
+ if (bus_setup_intr(dev, sc->sc_res[1], INTR_TYPE_MISC | INTR_MPSAFE,
+ amdgpio_intr_filter, amdgpio_intr_handler, sc, &sc->sc_intr_handle)
+ != 0) {
+ device_printf(dev, "couldn't set up interrupt\n");
+ goto err_intr;
+ }
+
/* Initialize all possible pins to be Invalid */
for (i = 0; i < AMD_GPIO_PINS_MAX ; i++) {
snprintf(sc->sc_gpio_pins[i].gp_name, GPIOMAXNAME,
@@ -395,7 +475,12 @@ amdgpio_attach(device_t dev)
sc->sc_gpio_pins[i].gp_flags = 0;
}
- /* Initialize only driver exposed pins with appropriate capabilities */
+ /*
+ * Initialize only driver exposed pins with appropriate capabilities.
+ *
+ * XXX Also mask and disable interrupts on all pins, since we don't
+ * support them at the moment.
+ */
for (i = 0; i < AMD_GPIO_PINS_EXPOSED ; i++) {
pin = kernzp_pins[i].pin_num;
bank = pin/AMD_GPIO_PINS_PER_BANK;
@@ -406,7 +491,14 @@ amdgpio_attach(device_t dev)
sc->sc_gpio_pins[pin].gp_flags =
amdgpio_is_pin_output(sc, pin) ?
GPIO_PIN_OUTPUT : GPIO_PIN_INPUT;
+
+ reg = AMDGPIO_PIN_REGISTER(pin);
+ flags = amdgpio_read_4(sc, reg);
+ flags &= ~(1 << INTERRUPT_ENABLE_OFF);
+ flags &= ~(1 << INTERRUPT_MASK_OFF);
+ amdgpio_write_4(sc, reg, flags);
}
+ amdgpio_eoi(sc);
sc->sc_busdev = gpiobus_add_bus(dev);
if (sc->sc_busdev == NULL) {
@@ -418,8 +510,9 @@ amdgpio_attach(device_t dev)
return (0);
err_bus:
+ bus_teardown_intr(dev, sc->sc_res[1], sc->sc_intr_handle);
+err_intr:
bus_release_resources(dev, amdgpio_spec, sc->sc_res);
-
err_rsrc:
AMDGPIO_LOCK_DESTROY(sc);
@@ -434,7 +527,8 @@ amdgpio_detach(device_t dev)
if (sc->sc_busdev)
gpiobus_detach_bus(dev);
-
+ if (sc->sc_intr_handle)
+ bus_teardown_intr(dev, sc->sc_res[1], sc->sc_intr_handle);
bus_release_resources(dev, amdgpio_spec, sc->sc_res);
AMDGPIO_LOCK_DESTROY(sc);
diff --git a/sys/dev/amdgpio/amdgpio.h b/sys/dev/amdgpio/amdgpio.h
index aca3039bfc98..3743eba23e17 100644
--- a/sys/dev/amdgpio/amdgpio.h
+++ b/sys/dev/amdgpio/amdgpio.h
@@ -50,7 +50,8 @@
AMD_GPIO_PINS_BANK1 + \
AMD_GPIO_PINS_BANK2 + \
AMD_GPIO_PINS_BANK3)
-#define AMDGPIO_DEFAULT_CAPS (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)
+#define AMDGPIO_DEFAULT_CAPS (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT | \
+ GPIO_PIN_PULLDOWN | GPIO_PIN_PULLUP)
/* Register related macros */
#define AMDGPIO_PIN_REGISTER(pin) (pin * 4)
@@ -84,6 +85,9 @@
#define INTERRUPT_STS_OFF 28
#define WAKE_STS_OFF 29
+#define UNSERVICED_INTERRUPT_MASK \
+ ((1 << INTERRUPT_STS_OFF) | (1 << WAKE_STS_OFF))
+
#define DB_TMR_OUT_MASK 0xFUL
#define DB_CNTRL_MASK 0x3UL
#define ACTIVE_LEVEL_MASK 0x3UL
@@ -316,12 +320,13 @@ struct amdgpio_softc {
int sc_npins;
int sc_ngroups;
struct mtx sc_mtx;
- struct resource *sc_res[AMD_GPIO_NUM_PIN_BANK + 1];
+ struct resource *sc_res[2];
bus_space_tag_t sc_bst;
bus_space_handle_t sc_bsh;
struct gpio_pin sc_gpio_pins[AMD_GPIO_PINS_MAX];
const struct pin_info *sc_pin_info;
const struct amd_pingroup *sc_groups;
+ void *sc_intr_handle;
};
struct amdgpio_sysctl {
diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c
index 291d1ec64ed7..79bf08678249 100644
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@@ -179,7 +179,7 @@ ath_rate_sample_find_min_pktlength(struct ath_softc *sc,
const struct txschedule *sched = &sn->sched[rix0];
int max_pkt_length = 65530; // ATH_AGGR_MAXSIZE
// Note: this may not be true in all cases; need to check?
- int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40);
+ int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40);
// Note: not great, but good enough..
int idx = is_ht40 ? MCS_HT40 : MCS_HT20;
@@ -979,7 +979,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
const int size_bin = size_to_bin(frame_size);
const int size = bin_to_size(size_bin);
int tt;
- int is_ht40 = (an->an_node.ni_chw == IEEE80211_STA_RX_BW_40);
+ int is_ht40 = (an->an_node.ni_chw == NET80211_STA_RX_BW_40);
int pct;
if (!IS_RATE_DEFINED(sn, rix0))
@@ -1365,7 +1365,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni)
continue;
printf(" %d %s/%d", dot11rate(rt, rix), dot11rate_label(rt, rix),
calc_usecs_unicast_packet(sc, 1600, rix, 0,0,
- (ni->ni_chw == IEEE80211_STA_RX_BW_40)));
+ (ni->ni_chw == NET80211_STA_RX_BW_40)));
}
printf("\n");
}
@@ -1396,7 +1396,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni)
sn->stats[y][rix].perfect_tx_time =
calc_usecs_unicast_packet(sc, size, rix, 0, 0,
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
sn->stats[y][rix].average_tx_time =
sn->stats[y][rix].perfect_tx_time;
}
diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c
index e7ee029fecf0..f42058bacb0d 100644
--- a/sys/dev/ath/if_ath_tx_ht.c
+++ b/sys/dev/ath/if_ath_tx_ht.c
@@ -283,7 +283,7 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
if (IS_HT_RATE(rate)) {
rc[i].flags |= ATH_RC_HT_FLAG;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
rc[i].flags |= ATH_RC_CW40_FLAG;
/*
@@ -295,13 +295,13 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
* and doesn't return the fractional part, so
* we are always "out" by some amount.
*/
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40 &&
+ if (ni->ni_chw == NET80211_STA_RX_BW_40 &&
ieee80211_ht_check_tx_shortgi_40(ni) &&
(bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
rc[i].flags |= ATH_RC_SGI_FLAG;
}
- if (ni->ni_chw == IEEE80211_STA_RX_BW_20 &&
+ if (ni->ni_chw == NET80211_STA_RX_BW_20 &&
ieee80211_ht_check_tx_shortgi_20(ni) &&
(bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
rc[i].flags |= ATH_RC_SGI_FLAG;
diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c
index 290156ff11ca..6bc4bd33e162 100644
--- a/sys/dev/axgbe/if_axgbe_pci.c
+++ b/sys/dev/axgbe/if_axgbe_pci.c
@@ -2415,7 +2415,8 @@ axgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_OPACKETS:
return (pstats->txframecount_gb);
case IFCOUNTER_OERRORS:
- return (pstats->txframecount_gb - pstats->txframecount_g);
+ return (if_get_counter_default(ifp, cnt) +
+ pstats->txframecount_gb - pstats->txframecount_g);
case IFCOUNTER_IBYTES:
return (pstats->rxoctetcount_gb);
case IFCOUNTER_OBYTES:
diff --git a/sys/dev/bce/if_bce.c b/sys/dev/bce/if_bce.c
index 16bfce5338a7..6cf39e035ea6 100644
--- a/sys/dev/bce/if_bce.c
+++ b/sys/dev/bce/if_bce.c
@@ -1221,7 +1221,7 @@ bce_attach(device_t dev)
sc->bce_bc_ver[j++] = '.';
}
- /* Check if any management firwmare is enabled. */
+ /* Check if any management firmware is enabled. */
val = bce_shmem_rd(sc, BCE_PORT_FEATURE);
if (val & BCE_PORT_FEATURE_ASF_ENABLED) {
sc->bce_flags |= BCE_MFW_ENABLE_FLAG;
diff --git a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c
index 7dd555cfaadb..9e7f4614d9f9 100644
--- a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c
+++ b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c
@@ -714,7 +714,7 @@ int bnxt_hwrm_func_backing_store_cfg(struct bnxt_softc *softc, uint32_t enables)
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
}
req.flags = cpu_to_le32(flags);
- return hwrm_send_message(softc, &req, sizeof(req));
+ return hwrm_send_message(softc, &req, req_len);
}
int bnxt_hwrm_func_resc_qcaps(struct bnxt_softc *softc, bool all)
diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index c8592807f843..d805642541d3 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -1053,6 +1053,8 @@ send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen)
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX;
flowc->mnemval[0].val = htobe32(maxlen);
+ KASSERT(howmany(flowclen, 16) <= MAX_OFLD_TX_SDESC_CREDITS,
+ ("%s: tx_credits %u too large", __func__, howmany(flowclen, 16)));
txsd->tx_credits = howmany(flowclen, 16);
txsd->plen = 0;
KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c
index 0e374bc961c4..cbf4bae00a60 100644
--- a/sys/dev/cxgbe/iw_cxgbe/qp.c
+++ b/sys/dev/cxgbe/iw_cxgbe/qp.c
@@ -1326,6 +1326,8 @@ creds(struct toepcb *toep, struct inpcb *inp, size_t wrsize)
return (EINVAL);
}
txsd = &toep->txsd[toep->txsd_pidx];
+ KASSERT(howmany(wrsize, 16) <= MAX_OFLD_TX_SDESC_CREDITS,
+ ("%s: tx_credits %zu too large", __func__, howmany(wrsize, 16)));
txsd->tx_credits = howmany(wrsize, 16);
txsd->plen = 0;
KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 9e91250cb61c..9756a6945384 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -9016,7 +9016,7 @@ sysctl_loadavg(SYSCTL_HANDLER_ARGS)
rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
if (rc)
return (rc);
- if (hw_all_ok(sc))
+ if (!hw_all_ok(sc))
rc = ENXIO;
else {
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 7a6b1cbdd736..be20ea42474e 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -148,6 +148,8 @@ send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
KASSERT(paramidx == nparams, ("nparams mismatch"));
+ KASSERT(howmany(flowclen, 16) <= MAX_OFLD_TX_SDESC_CREDITS,
+ ("%s: tx_credits %u too large", __func__, howmany(flowclen, 16)));
txsd->tx_credits = howmany(flowclen, 16);
txsd->plen = 0;
KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
@@ -215,6 +217,8 @@ update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps)
else
flowc->mnemval[0].val = htobe32(tc_idx);
+ KASSERT(flowclen16 <= MAX_OFLD_TX_SDESC_CREDITS,
+ ("%s: tx_credits %u too large", __func__, flowclen16));
txsd->tx_credits = flowclen16;
txsd->plen = 0;
toep->tx_credits -= txsd->tx_credits;
@@ -491,6 +495,9 @@ t4_close_conn(struct adapter *sc, struct toepcb *toep)
#define MIN_TX_CREDITS(iso) \
(MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0))
+_Static_assert(MAX_OFLD_TX_CREDITS <= MAX_OFLD_TX_SDESC_CREDITS,
+ "MAX_OFLD_TX_SDESC_CREDITS too small");
+
/* Maximum amount of immediate data we could stuff in a WR */
static inline int
max_imm_payload(int tx_credits, int iso)
@@ -612,6 +619,48 @@ write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
__func__, nsegs, start, stop));
}
+bool
+t4_push_raw_wr(struct adapter *sc, struct toepcb *toep, struct mbuf *m)
+{
+#ifdef INVARIANTS
+ struct inpcb *inp = toep->inp;
+#endif
+ struct wrqe *wr;
+ struct ofld_tx_sdesc *txsd;
+ u_int credits, plen;
+
+ INP_WLOCK_ASSERT(inp);
+ MPASS(mbuf_raw_wr(m));
+ plen = m->m_pkthdr.len;
+ credits = howmany(plen, 16);
+ if (credits > toep->tx_credits)
+ return (false);
+
+ wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq);
+ if (wr == NULL)
+ return (false);
+
+ m_copydata(m, 0, plen, wrtod(wr));
+ m_freem(m);
+
+ toep->tx_credits -= credits;
+ if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
+ toep->flags |= TPF_TX_SUSPENDED;
+
+ KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
+ KASSERT(credits <= MAX_OFLD_TX_SDESC_CREDITS,
+ ("%s: tx_credits %u too large", __func__, credits));
+ txsd = &toep->txsd[toep->txsd_pidx];
+ txsd->plen = 0;
+ txsd->tx_credits = credits;
+ if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
+ toep->txsd_pidx = 0;
+ toep->txsd_avail--;
+
+ t4_wrq_tx(sc, wr);
+ return (true);
+}
+
/*
* Max number of SGL entries an offload tx work request can have. This is 41
* (1 + 40) for a full 512B work request.
@@ -644,6 +693,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
struct tcpcb *tp = intotcpcb(inp);
struct socket *so = inp->inp_socket;
struct sockbuf *sb = &so->so_snd;
+ struct mbufq *pduq = &toep->ulp_pduq;
int tx_credits, shove, compl, sowwakeup;
struct ofld_tx_sdesc *txsd;
bool nomap_mbuf_seen;
@@ -688,6 +738,19 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
max_imm = max_imm_payload(tx_credits, 0);
max_nsegs = max_dsgl_nsegs(tx_credits, 0);
+ if (__predict_false((sndptr = mbufq_first(pduq)) != NULL)) {
+ if (!t4_push_raw_wr(sc, toep, sndptr)) {
+ toep->flags |= TPF_TX_SUSPENDED;
+ return;
+ }
+
+ m = mbufq_dequeue(pduq);
+ MPASS(m == sndptr);
+
+ txsd = &toep->txsd[toep->txsd_pidx];
+ continue;
+ }
+
SOCKBUF_LOCK(sb);
sowwakeup = drop;
if (drop) {
@@ -705,6 +768,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
if ((m->m_flags & M_NOTREADY) != 0)
break;
+ if (plen + m->m_len > MAX_OFLD_TX_SDESC_PLEN)
+ break;
if (m->m_flags & M_EXTPG) {
#ifdef KERN_TLS
if (m->m_epg_tls != NULL) {
@@ -870,6 +935,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
toep->flags |= TPF_TX_SUSPENDED;
KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
+ KASSERT(plen <= MAX_OFLD_TX_SDESC_PLEN,
+ ("%s: plen %u too large", __func__, plen));
txsd->plen = plen;
txsd->tx_credits = credits;
txsd++;
@@ -1211,6 +1278,8 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
toep->flags |= TPF_TX_SUSPENDED;
KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
+ KASSERT(plen <= MAX_OFLD_TX_SDESC_PLEN,
+ ("%s: plen %u too large", __func__, plen));
txsd->plen = plen;
txsd->tx_credits = credits;
txsd++;
@@ -1240,6 +1309,35 @@ t4_push_data(struct adapter *sc, struct toepcb *toep, int drop)
t4_push_frames(sc, toep, drop);
}
+void
+t4_raw_wr_tx(struct adapter *sc, struct toepcb *toep, struct mbuf *m)
+{
+#ifdef INVARIANTS
+ struct inpcb *inp = toep->inp;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+
+ /*
+ * If there are other raw WRs enqueued, enqueue to preserve
+ * FIFO ordering.
+ */
+ if (!mbufq_empty(&toep->ulp_pduq)) {
+ mbufq_enqueue(&toep->ulp_pduq, m);
+ return;
+ }
+
+ /*
+ * Cannot call t4_push_data here as that will lock so_snd and
+ * some callers of this run in rx handlers with so_rcv locked.
+ * Instead, just try to transmit this WR.
+ */
+ if (!t4_push_raw_wr(sc, toep, m)) {
+ mbufq_enqueue(&toep->ulp_pduq, m);
+ toep->flags |= TPF_TX_SUSPENDED;
+ }
+}
+
int
t4_tod_output(struct toedev *tod, struct tcpcb *tp)
{
@@ -1941,25 +2039,16 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
void
-t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
+write_set_tcb_field(struct adapter *sc, void *dst, struct toepcb *toep,
uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie)
{
- struct wrqe *wr;
- struct cpl_set_tcb_field *req;
- struct ofld_tx_sdesc *txsd;
+ struct cpl_set_tcb_field *req = dst;
MPASS((cookie & ~M_COOKIE) == 0);
if (reply) {
MPASS(cookie != CPL_COOKIE_RESERVED);
}
- wr = alloc_wrqe(sizeof(*req), wrq);
- if (wr == NULL) {
- /* XXX */
- panic("%s: allocation failure.", __func__);
- }
- req = wrtod(wr);
-
INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id));
if (reply == 0)
@@ -1967,9 +2056,29 @@ t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie));
req->mask = htobe64(mask);
req->val = htobe64(val);
+}
+
+void
+t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
+ uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie)
+{
+ struct wrqe *wr;
+ struct ofld_tx_sdesc *txsd;
+ const u_int len = sizeof(struct cpl_set_tcb_field);
+
+ wr = alloc_wrqe(len, wrq);
+ if (wr == NULL) {
+ /* XXX */
+ panic("%s: allocation failure.", __func__);
+ }
+ write_set_tcb_field(sc, wrtod(wr), toep, word, mask, val, reply,
+ cookie);
+
if (wrq->eq.type == EQ_OFLD) {
txsd = &toep->txsd[toep->txsd_pidx];
- txsd->tx_credits = howmany(sizeof(*req), 16);
+ _Static_assert(howmany(len, 16) <= MAX_OFLD_TX_SDESC_CREDITS,
+ "MAX_OFLD_TX_SDESC_CREDITS too small");
+ txsd->tx_credits = howmany(len, 16);
txsd->plen = 0;
KASSERT(toep->tx_credits >= txsd->tx_credits &&
toep->txsd_avail > 0,
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index 2fee8fa91dac..da0753296532 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -1785,7 +1785,7 @@ t4_write_page_pods_for_rcvbuf(struct adapter *sc, struct sge_wrq *wrq, int tid,
return (0);
}
-static struct mbuf *
+struct mbuf *
alloc_raw_wr_mbuf(int len)
{
struct mbuf *m;
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index 27c16b9988ae..ad72c6a6b025 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -61,11 +61,21 @@
static void
t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
- uint64_t val)
+ uint64_t val, int reply, int cookie)
{
struct adapter *sc = td_adapter(toep->td);
+ struct mbuf *m;
+
+ m = alloc_raw_wr_mbuf(sizeof(struct cpl_set_tcb_field));
+ if (m == NULL) {
+ /* XXX */
+ panic("%s: out of memory", __func__);
+ }
- t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0);
+ write_set_tcb_field(sc, mtod(m, void *), toep, word, mask, val, reply,
+ cookie);
+
+ t4_raw_wr_tx(sc, toep, m);
}
/* TLS and DTLS common routines */
@@ -88,10 +98,9 @@ tls_tx_key(struct toepcb *toep)
static void
t4_set_rx_quiesce(struct toepcb *toep)
{
- struct adapter *sc = td_adapter(toep->td);
- t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, W_TCB_T_FLAGS,
- V_TF_RX_QUIESCE(1), V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM);
+ t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1),
+ V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM);
}
/* Clear TF_RX_QUIESCE to re-enable receive. */
@@ -99,7 +108,7 @@ static void
t4_clear_rx_quiesce(struct toepcb *toep)
{
- t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0);
+ t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0, 0, 0);
}
/* TLS/DTLS content type for CPL SFO */
@@ -145,16 +154,15 @@ get_tp_plen_max(struct ktls_session *tls)
return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX);
}
-/* Send request to get the key-id */
+/* Send request to save the key in on-card memory. */
static int
tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
int direction)
{
struct tls_ofld_info *tls_ofld = &toep->tls;
struct adapter *sc = td_adapter(toep->td);
- struct ofld_tx_sdesc *txsd;
int keyid;
- struct wrqe *wr;
+ struct mbuf *m;
struct tls_key_req *kwr;
struct tls_keyctx *kctx;
@@ -173,12 +181,12 @@ tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
return (ENOSPC);
}
- wr = alloc_wrqe(TLS_KEY_WR_SZ, &toep->ofld_txq->wrq);
- if (wr == NULL) {
+ m = alloc_raw_wr_mbuf(TLS_KEY_WR_SZ);
+ if (m == NULL) {
t4_free_tls_keyid(sc, keyid);
return (ENOMEM);
}
- kwr = wrtod(wr);
+ kwr = mtod(m, struct tls_key_req *);
memset(kwr, 0, TLS_KEY_WR_SZ);
t4_write_tlskey_wr(tls, direction, toep->tid, F_FW_WR_COMPL, keyid,
@@ -190,15 +198,7 @@ tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
tls_ofld->rx_key_addr = keyid;
t4_tls_key_ctx(tls, direction, kctx);
- txsd = &toep->txsd[toep->txsd_pidx];
- txsd->tx_credits = DIV_ROUND_UP(TLS_KEY_WR_SZ, 16);
- txsd->plen = 0;
- toep->tx_credits -= txsd->tx_credits;
- if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
- toep->txsd_pidx = 0;
- toep->txsd_avail--;
-
- t4_wrq_tx(sc, wr);
+ t4_raw_wr_tx(sc, toep, m);
return (0);
}
@@ -494,6 +494,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
struct tcpcb *tp = intotcpcb(inp);
struct socket *so = inp->inp_socket;
struct sockbuf *sb = &so->so_snd;
+ struct mbufq *pduq = &toep->ulp_pduq;
int tls_size, tx_credits, shove, sowwakeup;
struct ofld_tx_sdesc *txsd;
char *buf;
@@ -536,6 +537,18 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
for (;;) {
tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
+ if (__predict_false((m = mbufq_first(pduq)) != NULL)) {
+ if (!t4_push_raw_wr(sc, toep, m)) {
+ toep->flags |= TPF_TX_SUSPENDED;
+ return;
+ }
+
+ (void)mbufq_dequeue(pduq);
+
+ txsd = &toep->txsd[toep->txsd_pidx];
+ continue;
+ }
+
SOCKBUF_LOCK(sb);
sowwakeup = drop;
if (drop) {
@@ -694,6 +707,8 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
toep->flags |= TPF_TX_SUSPENDED;
KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
+ KASSERT(m->m_len <= MAX_OFLD_TX_SDESC_PLEN,
+ ("%s: plen %u too large", __func__, m->m_len));
txsd->plen = m->m_len;
txsd->tx_credits = credits;
txsd++;
@@ -1082,7 +1097,7 @@ out:
static void
tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno)
{
- struct wrqe *wr;
+ struct mbuf *m;
struct work_request_hdr *wrh;
struct ulp_txpkt *ulpmc;
int fields, key_offset, len;
@@ -1111,14 +1126,14 @@ tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno)
KASSERT(len <= SGE_MAX_WR_LEN,
("%s: WR with %d TCB field updates too large", __func__, fields));
- wr = alloc_wrqe(len, toep->ctrlq);
- if (wr == NULL) {
+ m = alloc_raw_wr_mbuf(len);
+ if (m == NULL) {
/* XXX */
panic("%s: out of memory", __func__);
}
- wrh = wrtod(wr);
- INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */
+ wrh = mtod(m, struct work_request_hdr *);
+ INIT_ULPTX_WRH(wrh, len, 1, toep->tid); /* atomic */
ulpmc = (struct ulp_txpkt *)(wrh + 1);
/*
@@ -1163,7 +1178,7 @@ tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno)
ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_FLAGS,
V_TF_RX_QUIESCE(1), 0);
- t4_wrq_tx(sc, wr);
+ t4_raw_wr_tx(sc, toep, m);
}
/*
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index 9b09facd05a7..0b54fdaa5c80 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -882,6 +882,8 @@ send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep)
flowc->mnemval[0].val = htobe32(toep->params.emss);
txsd = &toep->txsd[toep->txsd_pidx];
+ _Static_assert(flowclen16 <= MAX_OFLD_TX_SDESC_CREDITS,
+ "MAX_OFLD_TX_SDESC_CREDITS too small");
txsd->tx_credits = flowclen16;
txsd->plen = 0;
toep->tx_credits -= txsd->tx_credits;
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index 6295a3484b9f..4fb87d92d91e 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -122,10 +122,13 @@ struct conn_params {
};
struct ofld_tx_sdesc {
- uint32_t plen; /* payload length */
- uint8_t tx_credits; /* firmware tx credits (unit is 16B) */
+ uint32_t plen : 26; /* payload length */
+ uint32_t tx_credits : 6; /* firmware tx credits (unit is 16B) */
};
+#define MAX_OFLD_TX_SDESC_PLEN ((1u << 26) - 1)
+#define MAX_OFLD_TX_SDESC_CREDITS ((1u << 6) - 1)
+
struct ppod_region {
u_int pr_start;
u_int pr_len;
@@ -526,6 +529,10 @@ int t4_send_rst(struct toedev *, struct tcpcb *);
void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *,
uint16_t, uint64_t, uint64_t, int, int);
void t4_push_pdus(struct adapter *, struct toepcb *, int);
+bool t4_push_raw_wr(struct adapter *, struct toepcb *, struct mbuf *);
+void t4_raw_wr_tx(struct adapter *, struct toepcb *, struct mbuf *);
+void write_set_tcb_field(struct adapter *, void *, struct toepcb *, uint16_t,
+ uint64_t, uint64_t, int, int);
/* t4_ddp.c */
int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int,
@@ -551,6 +558,7 @@ int t4_aio_queue_ddp(struct socket *, struct kaiocb *);
int t4_enable_ddp_rcv(struct socket *, struct toepcb *);
void t4_ddp_mod_load(void);
void t4_ddp_mod_unload(void);
+struct mbuf *alloc_raw_wr_mbuf(int);
void ddp_assert_empty(struct toepcb *);
void ddp_uninit_toep(struct toepcb *);
void ddp_queue_toep(struct toepcb *);
diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c
index 50fa4faa560a..ed755f992949 100644
--- a/sys/dev/cyapa/cyapa.c
+++ b/sys/dev/cyapa/cyapa.c
@@ -761,42 +761,60 @@ again:
/*
* Generate report
*/
- c0 = 0;
- if (delta_x < 0)
- c0 |= 0x10;
- if (delta_y < 0)
- c0 |= 0x20;
- c0 |= 0x08;
- if (but & CYAPA_FNGR_LEFT)
- c0 |= 0x01;
- if (but & CYAPA_FNGR_MIDDLE)
- c0 |= 0x04;
- if (but & CYAPA_FNGR_RIGHT)
- c0 |= 0x02;
-
- fifo_write_char(sc, &sc->rfifo, c0);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
- switch(sc->zenabled) {
- case 1:
- /* Z axis all 8 bits */
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
- break;
- case 2:
- /*
- * Z axis low 4 bits + 4th button and 5th button
- * (high 2 bits must be left 0). Auto-scale
- * delta_z to fit to avoid a wrong-direction
- * overflow (don't try to retain the remainder).
- */
- while (delta_z > 7 || delta_z < -8)
- delta_z >>= 1;
- c0 = (uint8_t)delta_z & 0x0F;
+ if (sc->mode.level == 1) {
+ c0 = MOUSE_SYS_SYNC;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= MOUSE_SYS_BUTTON1UP;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= MOUSE_SYS_BUTTON2UP;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= MOUSE_SYS_BUTTON3UP;
fifo_write_char(sc, &sc->rfifo, c0);
- break;
- default:
- /* basic PS/2 */
- break;
+ fifo_write_char(sc, &sc->rfifo, delta_x >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_y >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_x - (delta_x >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_y - (delta_y >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_z >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_z - (delta_z >> 1));
+ fifo_write_char(sc, &sc->rfifo, MOUSE_SYS_EXTBUTTONS);
+ } else {
+ c0 = 0;
+ if (delta_x < 0)
+ c0 |= 0x10;
+ if (delta_y < 0)
+ c0 |= 0x20;
+ c0 |= 0x08;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= 0x01;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= 0x04;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= 0x02;
+
+ fifo_write_char(sc, &sc->rfifo, c0);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
+ switch(sc->zenabled) {
+ case 1:
+ /* Z axis all 8 bits */
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
+ break;
+ case 2:
+ /*
+ * Z axis low 4 bits + 4th button and 5th button
+ * (high 2 bits must be left 0). Auto-scale
+ * delta_z to fit to avoid a wrong-direction
+ * overflow (don't try to retain the remainder).
+ */
+ while (delta_z > 7 || delta_z < -8)
+ delta_z >>= 1;
+ c0 = (uint8_t)delta_z & 0x0F;
+ fifo_write_char(sc, &sc->rfifo, c0);
+ break;
+ default:
+ /* basic PS/2 */
+ break;
+ }
}
cyapa_notify(sc);
}
@@ -1205,6 +1223,11 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
((mousemode_t *)data)->packetsize =
MOUSE_PS2_PACKETSIZE;
break;
+ case 1:
+ ((mousemode_t *)data)->protocol = MOUSE_PROTO_SYSMOUSE;
+ ((mousemode_t *)data)->packetsize =
+ MOUSE_SYS_PACKETSIZE;
+ break;
case 2:
((mousemode_t *)data)->protocol = MOUSE_PROTO_PS2;
((mousemode_t *)data)->packetsize =
@@ -1223,7 +1246,7 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
error = EINVAL;
break;
}
- sc->mode.level = *(int *)data ? 2 : 0;
+ sc->mode.level = *(int *)data;
sc->zenabled = sc->mode.level ? 1 : 0;
break;
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 9c5ae2806f75..20df466b4e76 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -407,6 +407,7 @@ static int em_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int,
int);
static void em_if_queues_free(if_ctx_t);
+static uint64_t em_if_get_vf_counter(if_ctx_t, ift_counter);
static uint64_t em_if_get_counter(if_ctx_t, ift_counter);
static void em_if_init(if_ctx_t);
static void em_if_stop(if_ctx_t);
@@ -440,6 +441,7 @@ static int igb_if_tx_queue_intr_enable(if_ctx_t, uint16_t);
static void em_if_multi_set(if_ctx_t);
static void em_if_update_admin_status(if_ctx_t);
static void em_if_debug(if_ctx_t);
+static void em_update_vf_stats_counters(struct e1000_softc *);
static void em_update_stats_counters(struct e1000_softc *);
static void em_add_hw_stats(struct e1000_softc *);
static int em_if_set_promisc(if_ctx_t, int);
@@ -1377,6 +1379,11 @@ em_if_attach_post(if_ctx_t ctx)
em_reset(ctx);
/* Initialize statistics */
+ if (sc->vf_ifp)
+ sc->ustats.vf_stats = (struct e1000_vf_stats){};
+ else
+ sc->ustats.stats = (struct e1000_hw_stats){};
+
em_update_stats_counters(sc);
hw->mac.get_link_status = 1;
em_if_update_admin_status(ctx);
@@ -4668,122 +4675,175 @@ em_disable_aspm(struct e1000_softc *sc)
static void
em_update_stats_counters(struct e1000_softc *sc)
{
- u64 prev_xoffrxc = sc->stats.xoffrxc;
+ struct e1000_hw_stats *stats;
+ u64 prev_xoffrxc;
+
+ if (sc->vf_ifp) {
+ em_update_vf_stats_counters(sc);
+ return;
+ }
+
+ stats = &sc->ustats.stats;
+ prev_xoffrxc = stats->xoffrxc;
if(sc->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_LU)) {
- sc->stats.symerrs += E1000_READ_REG(&sc->hw, E1000_SYMERRS);
- sc->stats.sec += E1000_READ_REG(&sc->hw, E1000_SEC);
- }
- sc->stats.crcerrs += E1000_READ_REG(&sc->hw, E1000_CRCERRS);
- sc->stats.mpc += E1000_READ_REG(&sc->hw, E1000_MPC);
- sc->stats.scc += E1000_READ_REG(&sc->hw, E1000_SCC);
- sc->stats.ecol += E1000_READ_REG(&sc->hw, E1000_ECOL);
-
- sc->stats.mcc += E1000_READ_REG(&sc->hw, E1000_MCC);
- sc->stats.latecol += E1000_READ_REG(&sc->hw, E1000_LATECOL);
- sc->stats.colc += E1000_READ_REG(&sc->hw, E1000_COLC);
- sc->stats.dc += E1000_READ_REG(&sc->hw, E1000_DC);
- sc->stats.rlec += E1000_READ_REG(&sc->hw, E1000_RLEC);
- sc->stats.xonrxc += E1000_READ_REG(&sc->hw, E1000_XONRXC);
- sc->stats.xontxc += E1000_READ_REG(&sc->hw, E1000_XONTXC);
- sc->stats.xoffrxc += E1000_READ_REG(&sc->hw, E1000_XOFFRXC);
+ stats->symerrs += E1000_READ_REG(&sc->hw, E1000_SYMERRS);
+ stats->sec += E1000_READ_REG(&sc->hw, E1000_SEC);
+ }
+ stats->crcerrs += E1000_READ_REG(&sc->hw, E1000_CRCERRS);
+ stats->mpc += E1000_READ_REG(&sc->hw, E1000_MPC);
+ stats->scc += E1000_READ_REG(&sc->hw, E1000_SCC);
+ stats->ecol += E1000_READ_REG(&sc->hw, E1000_ECOL);
+
+ stats->mcc += E1000_READ_REG(&sc->hw, E1000_MCC);
+ stats->latecol += E1000_READ_REG(&sc->hw, E1000_LATECOL);
+ stats->colc += E1000_READ_REG(&sc->hw, E1000_COLC);
+ stats->dc += E1000_READ_REG(&sc->hw, E1000_DC);
+ stats->rlec += E1000_READ_REG(&sc->hw, E1000_RLEC);
+ stats->xonrxc += E1000_READ_REG(&sc->hw, E1000_XONRXC);
+ stats->xontxc += E1000_READ_REG(&sc->hw, E1000_XONTXC);
+ stats->xoffrxc += E1000_READ_REG(&sc->hw, E1000_XOFFRXC);
/*
** For watchdog management we need to know if we have been
** paused during the last interval, so capture that here.
*/
- if (sc->stats.xoffrxc != prev_xoffrxc)
+ if (stats->xoffrxc != prev_xoffrxc)
sc->shared->isc_pause_frames = 1;
- sc->stats.xofftxc += E1000_READ_REG(&sc->hw, E1000_XOFFTXC);
- sc->stats.fcruc += E1000_READ_REG(&sc->hw, E1000_FCRUC);
- sc->stats.prc64 += E1000_READ_REG(&sc->hw, E1000_PRC64);
- sc->stats.prc127 += E1000_READ_REG(&sc->hw, E1000_PRC127);
- sc->stats.prc255 += E1000_READ_REG(&sc->hw, E1000_PRC255);
- sc->stats.prc511 += E1000_READ_REG(&sc->hw, E1000_PRC511);
- sc->stats.prc1023 += E1000_READ_REG(&sc->hw, E1000_PRC1023);
- sc->stats.prc1522 += E1000_READ_REG(&sc->hw, E1000_PRC1522);
- sc->stats.gprc += E1000_READ_REG(&sc->hw, E1000_GPRC);
- sc->stats.bprc += E1000_READ_REG(&sc->hw, E1000_BPRC);
- sc->stats.mprc += E1000_READ_REG(&sc->hw, E1000_MPRC);
- sc->stats.gptc += E1000_READ_REG(&sc->hw, E1000_GPTC);
+ stats->xofftxc += E1000_READ_REG(&sc->hw, E1000_XOFFTXC);
+ stats->fcruc += E1000_READ_REG(&sc->hw, E1000_FCRUC);
+ stats->prc64 += E1000_READ_REG(&sc->hw, E1000_PRC64);
+ stats->prc127 += E1000_READ_REG(&sc->hw, E1000_PRC127);
+ stats->prc255 += E1000_READ_REG(&sc->hw, E1000_PRC255);
+ stats->prc511 += E1000_READ_REG(&sc->hw, E1000_PRC511);
+ stats->prc1023 += E1000_READ_REG(&sc->hw, E1000_PRC1023);
+ stats->prc1522 += E1000_READ_REG(&sc->hw, E1000_PRC1522);
+ stats->gprc += E1000_READ_REG(&sc->hw, E1000_GPRC);
+ stats->bprc += E1000_READ_REG(&sc->hw, E1000_BPRC);
+ stats->mprc += E1000_READ_REG(&sc->hw, E1000_MPRC);
+ stats->gptc += E1000_READ_REG(&sc->hw, E1000_GPTC);
/* For the 64-bit byte counters the low dword must be read first. */
/* Both registers clear on the read of the high dword */
- sc->stats.gorc += E1000_READ_REG(&sc->hw, E1000_GORCL) +
+ stats->gorc += E1000_READ_REG(&sc->hw, E1000_GORCL) +
((u64)E1000_READ_REG(&sc->hw, E1000_GORCH) << 32);
- sc->stats.gotc += E1000_READ_REG(&sc->hw, E1000_GOTCL) +
+ stats->gotc += E1000_READ_REG(&sc->hw, E1000_GOTCL) +
((u64)E1000_READ_REG(&sc->hw, E1000_GOTCH) << 32);
- sc->stats.rnbc += E1000_READ_REG(&sc->hw, E1000_RNBC);
- sc->stats.ruc += E1000_READ_REG(&sc->hw, E1000_RUC);
- sc->stats.rfc += E1000_READ_REG(&sc->hw, E1000_RFC);
- sc->stats.roc += E1000_READ_REG(&sc->hw, E1000_ROC);
- sc->stats.rjc += E1000_READ_REG(&sc->hw, E1000_RJC);
-
- sc->stats.mgprc += E1000_READ_REG(&sc->hw, E1000_MGTPRC);
- sc->stats.mgpdc += E1000_READ_REG(&sc->hw, E1000_MGTPDC);
- sc->stats.mgptc += E1000_READ_REG(&sc->hw, E1000_MGTPTC);
-
- sc->stats.tor += E1000_READ_REG(&sc->hw, E1000_TORH);
- sc->stats.tot += E1000_READ_REG(&sc->hw, E1000_TOTH);
-
- sc->stats.tpr += E1000_READ_REG(&sc->hw, E1000_TPR);
- sc->stats.tpt += E1000_READ_REG(&sc->hw, E1000_TPT);
- sc->stats.ptc64 += E1000_READ_REG(&sc->hw, E1000_PTC64);
- sc->stats.ptc127 += E1000_READ_REG(&sc->hw, E1000_PTC127);
- sc->stats.ptc255 += E1000_READ_REG(&sc->hw, E1000_PTC255);
- sc->stats.ptc511 += E1000_READ_REG(&sc->hw, E1000_PTC511);
- sc->stats.ptc1023 += E1000_READ_REG(&sc->hw, E1000_PTC1023);
- sc->stats.ptc1522 += E1000_READ_REG(&sc->hw, E1000_PTC1522);
- sc->stats.mptc += E1000_READ_REG(&sc->hw, E1000_MPTC);
- sc->stats.bptc += E1000_READ_REG(&sc->hw, E1000_BPTC);
+ stats->rnbc += E1000_READ_REG(&sc->hw, E1000_RNBC);
+ stats->ruc += E1000_READ_REG(&sc->hw, E1000_RUC);
+ stats->rfc += E1000_READ_REG(&sc->hw, E1000_RFC);
+ stats->roc += E1000_READ_REG(&sc->hw, E1000_ROC);
+ stats->rjc += E1000_READ_REG(&sc->hw, E1000_RJC);
+
+ stats->mgprc += E1000_READ_REG(&sc->hw, E1000_MGTPRC);
+ stats->mgpdc += E1000_READ_REG(&sc->hw, E1000_MGTPDC);
+ stats->mgptc += E1000_READ_REG(&sc->hw, E1000_MGTPTC);
+
+ stats->tor += E1000_READ_REG(&sc->hw, E1000_TORH);
+ stats->tot += E1000_READ_REG(&sc->hw, E1000_TOTH);
+
+ stats->tpr += E1000_READ_REG(&sc->hw, E1000_TPR);
+ stats->tpt += E1000_READ_REG(&sc->hw, E1000_TPT);
+ stats->ptc64 += E1000_READ_REG(&sc->hw, E1000_PTC64);
+ stats->ptc127 += E1000_READ_REG(&sc->hw, E1000_PTC127);
+ stats->ptc255 += E1000_READ_REG(&sc->hw, E1000_PTC255);
+ stats->ptc511 += E1000_READ_REG(&sc->hw, E1000_PTC511);
+ stats->ptc1023 += E1000_READ_REG(&sc->hw, E1000_PTC1023);
+ stats->ptc1522 += E1000_READ_REG(&sc->hw, E1000_PTC1522);
+ stats->mptc += E1000_READ_REG(&sc->hw, E1000_MPTC);
+ stats->bptc += E1000_READ_REG(&sc->hw, E1000_BPTC);
/* Interrupt Counts */
- sc->stats.iac += E1000_READ_REG(&sc->hw, E1000_IAC);
- sc->stats.icrxptc += E1000_READ_REG(&sc->hw, E1000_ICRXPTC);
- sc->stats.icrxatc += E1000_READ_REG(&sc->hw, E1000_ICRXATC);
- sc->stats.ictxptc += E1000_READ_REG(&sc->hw, E1000_ICTXPTC);
- sc->stats.ictxatc += E1000_READ_REG(&sc->hw, E1000_ICTXATC);
- sc->stats.ictxqec += E1000_READ_REG(&sc->hw, E1000_ICTXQEC);
- sc->stats.ictxqmtc += E1000_READ_REG(&sc->hw, E1000_ICTXQMTC);
- sc->stats.icrxdmtc += E1000_READ_REG(&sc->hw, E1000_ICRXDMTC);
- sc->stats.icrxoc += E1000_READ_REG(&sc->hw, E1000_ICRXOC);
+ stats->iac += E1000_READ_REG(&sc->hw, E1000_IAC);
+ stats->icrxptc += E1000_READ_REG(&sc->hw, E1000_ICRXPTC);
+ stats->icrxatc += E1000_READ_REG(&sc->hw, E1000_ICRXATC);
+ stats->ictxptc += E1000_READ_REG(&sc->hw, E1000_ICTXPTC);
+ stats->ictxatc += E1000_READ_REG(&sc->hw, E1000_ICTXATC);
+ stats->ictxqec += E1000_READ_REG(&sc->hw, E1000_ICTXQEC);
+ stats->ictxqmtc += E1000_READ_REG(&sc->hw, E1000_ICTXQMTC);
+ stats->icrxdmtc += E1000_READ_REG(&sc->hw, E1000_ICRXDMTC);
+ stats->icrxoc += E1000_READ_REG(&sc->hw, E1000_ICRXOC);
if (sc->hw.mac.type >= e1000_82543) {
- sc->stats.algnerrc +=
+ stats->algnerrc +=
E1000_READ_REG(&sc->hw, E1000_ALGNERRC);
- sc->stats.rxerrc +=
+ stats->rxerrc +=
E1000_READ_REG(&sc->hw, E1000_RXERRC);
- sc->stats.tncrs +=
+ stats->tncrs +=
E1000_READ_REG(&sc->hw, E1000_TNCRS);
- sc->stats.cexterr +=
+ stats->cexterr +=
E1000_READ_REG(&sc->hw, E1000_CEXTERR);
- sc->stats.tsctc +=
+ stats->tsctc +=
E1000_READ_REG(&sc->hw, E1000_TSCTC);
- sc->stats.tsctfc +=
+ stats->tsctfc +=
E1000_READ_REG(&sc->hw, E1000_TSCTFC);
}
}
+static void
+em_update_vf_stats_counters(struct e1000_softc *sc)
+{
+ struct e1000_vf_stats *stats;
+
+ if (sc->link_speed == 0)
+ return;
+
+ stats = &sc->ustats.vf_stats;
+
+ UPDATE_VF_REG(E1000_VFGPRC,
+ stats->last_gprc, stats->gprc);
+ UPDATE_VF_REG(E1000_VFGORC,
+ stats->last_gorc, stats->gorc);
+ UPDATE_VF_REG(E1000_VFGPTC,
+ stats->last_gptc, stats->gptc);
+ UPDATE_VF_REG(E1000_VFGOTC,
+ stats->last_gotc, stats->gotc);
+ UPDATE_VF_REG(E1000_VFMPRC,
+ stats->last_mprc, stats->mprc);
+}
+
+static uint64_t
+em_if_get_vf_counter(if_ctx_t ctx, ift_counter cnt)
+{
+ struct e1000_softc *sc = iflib_get_softc(ctx);
+ if_t ifp = iflib_get_ifp(ctx);
+
+ switch (cnt) {
+ case IFCOUNTER_IERRORS:
+ return sc->dropped_pkts;
+ case IFCOUNTER_OERRORS:
+ return sc->watchdog_events;
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
+
static uint64_t
em_if_get_counter(if_ctx_t ctx, ift_counter cnt)
{
struct e1000_softc *sc = iflib_get_softc(ctx);
+ struct e1000_hw_stats *stats;
if_t ifp = iflib_get_ifp(ctx);
+ if (sc->vf_ifp)
+ return (em_if_get_vf_counter(ctx, cnt));
+
+ stats = &sc->ustats.stats;
+
switch (cnt) {
case IFCOUNTER_COLLISIONS:
- return (sc->stats.colc);
+ return (stats->colc);
case IFCOUNTER_IERRORS:
- return (sc->dropped_pkts + sc->stats.rxerrc +
- sc->stats.crcerrs + sc->stats.algnerrc +
- sc->stats.ruc + sc->stats.roc +
- sc->stats.mpc + sc->stats.cexterr);
+ return (sc->dropped_pkts + stats->rxerrc +
+ stats->crcerrs + stats->algnerrc +
+ stats->ruc + stats->roc +
+ stats->mpc + stats->cexterr);
case IFCOUNTER_OERRORS:
- return (sc->stats.ecol + sc->stats.latecol +
- sc->watchdog_events);
+ return (if_get_counter_default(ifp, cnt) +
+ stats->ecol + stats->latecol + sc->watchdog_events);
default:
return (if_get_counter_default(ifp, cnt));
}
@@ -4884,7 +4944,7 @@ em_add_hw_stats(struct e1000_softc *sc)
struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
struct sysctl_oid *tree = device_get_sysctl_tree(dev);
struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
- struct e1000_hw_stats *stats = &sc->stats;
+ struct e1000_hw_stats *stats;
struct sysctl_oid *stat_node, *queue_node, *int_node;
struct sysctl_oid_list *stat_list, *queue_list, *int_list;
@@ -4975,6 +5035,33 @@ em_add_hw_stats(struct e1000_softc *sc)
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
stat_list = SYSCTL_CHILDREN(stat_node);
+ /*
+ ** VF adapter has a very limited set of stats
+ ** since its not managing the metal, so to speak.
+ */
+ if (sc->vf_ifp) {
+ struct e1000_vf_stats *vfstats = &sc->ustats.vf_stats;
+
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
+ CTLFLAG_RD, &vfstats->gprc,
+ "Good Packets Received");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
+ CTLFLAG_RD, &vfstats->gptc,
+ "Good Packets Transmitted");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
+ CTLFLAG_RD, &vfstats->gorc,
+ "Good Octets Received");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
+ CTLFLAG_RD, &vfstats->gotc,
+ "Good Octets Transmitted");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
+ CTLFLAG_RD, &vfstats->mprc,
+ "Multicast Packets Received");
+ return;
+ }
+
+ stats = &sc->ustats.stats;
+
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
CTLFLAG_RD, &stats->ecol,
"Excessive collisions");
@@ -4991,147 +5078,147 @@ em_add_hw_stats(struct e1000_softc *sc)
CTLFLAG_RD, &stats->colc,
"Collision Count");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
- CTLFLAG_RD, &sc->stats.symerrs,
+ CTLFLAG_RD, &stats->symerrs,
"Symbol Errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
- CTLFLAG_RD, &sc->stats.sec,
+ CTLFLAG_RD, &stats->sec,
"Sequence Errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
- CTLFLAG_RD, &sc->stats.dc,
+ CTLFLAG_RD, &stats->dc,
"Defer Count");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
- CTLFLAG_RD, &sc->stats.mpc,
+ CTLFLAG_RD, &stats->mpc,
"Missed Packets");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
- CTLFLAG_RD, &sc->stats.rlec,
+ CTLFLAG_RD, &stats->rlec,
"Receive Length Errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
- CTLFLAG_RD, &sc->stats.rnbc,
+ CTLFLAG_RD, &stats->rnbc,
"Receive No Buffers");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
- CTLFLAG_RD, &sc->stats.ruc,
+ CTLFLAG_RD, &stats->ruc,
"Receive Undersize");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
- CTLFLAG_RD, &sc->stats.rfc,
+ CTLFLAG_RD, &stats->rfc,
"Fragmented Packets Received ");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
- CTLFLAG_RD, &sc->stats.roc,
+ CTLFLAG_RD, &stats->roc,
"Oversized Packets Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
- CTLFLAG_RD, &sc->stats.rjc,
+ CTLFLAG_RD, &stats->rjc,
"Recevied Jabber");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
- CTLFLAG_RD, &sc->stats.rxerrc,
+ CTLFLAG_RD, &stats->rxerrc,
"Receive Errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
- CTLFLAG_RD, &sc->stats.crcerrs,
+ CTLFLAG_RD, &stats->crcerrs,
"CRC errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
- CTLFLAG_RD, &sc->stats.algnerrc,
+ CTLFLAG_RD, &stats->algnerrc,
"Alignment Errors");
/* On 82575 these are collision counts */
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
- CTLFLAG_RD, &sc->stats.cexterr,
+ CTLFLAG_RD, &stats->cexterr,
"Collision/Carrier extension errors");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
- CTLFLAG_RD, &sc->stats.xonrxc,
+ CTLFLAG_RD, &stats->xonrxc,
"XON Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
- CTLFLAG_RD, &sc->stats.xontxc,
+ CTLFLAG_RD, &stats->xontxc,
"XON Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
- CTLFLAG_RD, &sc->stats.xoffrxc,
+ CTLFLAG_RD, &stats->xoffrxc,
"XOFF Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
- CTLFLAG_RD, &sc->stats.xofftxc,
+ CTLFLAG_RD, &stats->xofftxc,
"XOFF Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
- CTLFLAG_RD, &sc->stats.fcruc,
+ CTLFLAG_RD, &stats->fcruc,
"Unsupported Flow Control Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
- CTLFLAG_RD, &sc->stats.mgprc,
+ CTLFLAG_RD, &stats->mgprc,
"Management Packets Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
- CTLFLAG_RD, &sc->stats.mgpdc,
+ CTLFLAG_RD, &stats->mgpdc,
"Management Packets Dropped");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
- CTLFLAG_RD, &sc->stats.mgptc,
+ CTLFLAG_RD, &stats->mgptc,
"Management Packets Transmitted");
/* Packet Reception Stats */
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
- CTLFLAG_RD, &sc->stats.tpr,
+ CTLFLAG_RD, &stats->tpr,
"Total Packets Received ");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
- CTLFLAG_RD, &sc->stats.gprc,
+ CTLFLAG_RD, &stats->gprc,
"Good Packets Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
- CTLFLAG_RD, &sc->stats.bprc,
+ CTLFLAG_RD, &stats->bprc,
"Broadcast Packets Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
- CTLFLAG_RD, &sc->stats.mprc,
+ CTLFLAG_RD, &stats->mprc,
"Multicast Packets Received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
- CTLFLAG_RD, &sc->stats.prc64,
+ CTLFLAG_RD, &stats->prc64,
"64 byte frames received ");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
- CTLFLAG_RD, &sc->stats.prc127,
+ CTLFLAG_RD, &stats->prc127,
"65-127 byte frames received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
- CTLFLAG_RD, &sc->stats.prc255,
+ CTLFLAG_RD, &stats->prc255,
"128-255 byte frames received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
- CTLFLAG_RD, &sc->stats.prc511,
+ CTLFLAG_RD, &stats->prc511,
"256-511 byte frames received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
- CTLFLAG_RD, &sc->stats.prc1023,
+ CTLFLAG_RD, &stats->prc1023,
"512-1023 byte frames received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
- CTLFLAG_RD, &sc->stats.prc1522,
+ CTLFLAG_RD, &stats->prc1522,
"1023-1522 byte frames received");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
- CTLFLAG_RD, &sc->stats.gorc,
+ CTLFLAG_RD, &stats->gorc,
"Good Octets Received");
/* Packet Transmission Stats */
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
- CTLFLAG_RD, &sc->stats.gotc,
+ CTLFLAG_RD, &stats->gotc,
"Good Octets Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
- CTLFLAG_RD, &sc->stats.tpt,
+ CTLFLAG_RD, &stats->tpt,
"Total Packets Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
- CTLFLAG_RD, &sc->stats.gptc,
+ CTLFLAG_RD, &stats->gptc,
"Good Packets Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
- CTLFLAG_RD, &sc->stats.bptc,
+ CTLFLAG_RD, &stats->bptc,
"Broadcast Packets Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
- CTLFLAG_RD, &sc->stats.mptc,
+ CTLFLAG_RD, &stats->mptc,
"Multicast Packets Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
- CTLFLAG_RD, &sc->stats.ptc64,
+ CTLFLAG_RD, &stats->ptc64,
"64 byte frames transmitted ");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
- CTLFLAG_RD, &sc->stats.ptc127,
+ CTLFLAG_RD, &stats->ptc127,
"65-127 byte frames transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
- CTLFLAG_RD, &sc->stats.ptc255,
+ CTLFLAG_RD, &stats->ptc255,
"128-255 byte frames transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
- CTLFLAG_RD, &sc->stats.ptc511,
+ CTLFLAG_RD, &stats->ptc511,
"256-511 byte frames transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
- CTLFLAG_RD, &sc->stats.ptc1023,
+ CTLFLAG_RD, &stats->ptc1023,
"512-1023 byte frames transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
- CTLFLAG_RD, &sc->stats.ptc1522,
+ CTLFLAG_RD, &stats->ptc1522,
"1024-1522 byte frames transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
- CTLFLAG_RD, &sc->stats.tsctc,
+ CTLFLAG_RD, &stats->tsctc,
"TSO Contexts Transmitted");
SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
- CTLFLAG_RD, &sc->stats.tsctfc,
+ CTLFLAG_RD, &stats->tsctfc,
"TSO Contexts Failed");
/* Interrupt Stats */
@@ -5140,39 +5227,39 @@ em_add_hw_stats(struct e1000_softc *sc)
int_list = SYSCTL_CHILDREN(int_node);
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
- CTLFLAG_RD, &sc->stats.iac,
+ CTLFLAG_RD, &stats->iac,
"Interrupt Assertion Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
- CTLFLAG_RD, &sc->stats.icrxptc,
+ CTLFLAG_RD, &stats->icrxptc,
"Interrupt Cause Rx Pkt Timer Expire Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
- CTLFLAG_RD, &sc->stats.icrxatc,
+ CTLFLAG_RD, &stats->icrxatc,
"Interrupt Cause Rx Abs Timer Expire Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
- CTLFLAG_RD, &sc->stats.ictxptc,
+ CTLFLAG_RD, &stats->ictxptc,
"Interrupt Cause Tx Pkt Timer Expire Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
- CTLFLAG_RD, &sc->stats.ictxatc,
+ CTLFLAG_RD, &stats->ictxatc,
"Interrupt Cause Tx Abs Timer Expire Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
- CTLFLAG_RD, &sc->stats.ictxqec,
+ CTLFLAG_RD, &stats->ictxqec,
"Interrupt Cause Tx Queue Empty Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
- CTLFLAG_RD, &sc->stats.ictxqmtc,
+ CTLFLAG_RD, &stats->ictxqmtc,
"Interrupt Cause Tx Queue Min Thresh Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
- CTLFLAG_RD, &sc->stats.icrxdmtc,
+ CTLFLAG_RD, &stats->icrxdmtc,
"Interrupt Cause Rx Desc Min Thresh Count");
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
- CTLFLAG_RD, &sc->stats.icrxoc,
+ CTLFLAG_RD, &stats->icrxoc,
"Interrupt Cause Receiver Overrun Count");
}
diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h
index 52bfed0f9a42..582e8d9c6327 100644
--- a/sys/dev/e1000/if_em.h
+++ b/sys/dev/e1000/if_em.h
@@ -370,6 +370,19 @@
#define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT)
#define EM_NVM_MSIX_N_SHIFT 7
+/*
+ * VFs use 32-bit counter that rolls over.
+ */
+#define UPDATE_VF_REG(reg, last, cur) \
+do { \
+ u32 new = E1000_READ_REG(&sc->hw, reg); \
+ if (new < last) \
+ cur += 0x100000000LL; \
+ last = new; \
+ cur &= 0xFFFFFFFF00000000LL; \
+ cur |= new; \
+} while (0)
+
struct e1000_softc;
struct em_int_delay_info {
@@ -546,7 +559,11 @@ struct e1000_softc {
unsigned long rx_overruns;
unsigned long watchdog_events;
- struct e1000_hw_stats stats;
+ union {
+ struct e1000_hw_stats stats; /* !sc->vf_ifp */
+ struct e1000_vf_stats vf_stats; /* sc->vf_ifp */
+ } ustats;
+
u16 vf_ifp;
};
diff --git a/sys/dev/enetc/if_enetc.c b/sys/dev/enetc/if_enetc.c
index 3a5d6ec23282..808397b229a7 100644
--- a/sys/dev/enetc/if_enetc.c
+++ b/sys/dev/enetc/if_enetc.c
@@ -1343,7 +1343,8 @@ enetc_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_IERRORS:
return (ENETC_PORT_RD8(sc, ENETC_PM0_RERR));
case IFCOUNTER_OERRORS:
- return (ENETC_PORT_RD8(sc, ENETC_PM0_TERR));
+ return (if_get_counter_default(ifp, cnt) +
+ ENETC_PORT_RD8(sc, ENETC_PM0_TERR));
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/gpio/gpio_if.m b/sys/dev/gpio/gpio_if.m
index 5501b2b5c0e7..0b6988ceba79 100644
--- a/sys/dev/gpio/gpio_if.m
+++ b/sys/dev/gpio/gpio_if.m
@@ -62,6 +62,22 @@ CODE {
return (0);
}
+
+ static int
+ gpio_default_get_pin_list(device_t dev, uint32_t *pin_list)
+ {
+ uint32_t maxpin;
+ int err;
+
+ err = GPIO_PIN_MAX(dev, &maxpin);
+ if (err != 0)
+ return (ENXIO);
+
+ for (int i = 0; i <= maxpin; i++)
+ pin_list[i] = i;
+
+ return (0);
+ }
};
HEADER {
@@ -185,3 +201,13 @@ METHOD int pin_config_32 {
uint32_t num_pins;
uint32_t *pin_flags;
} DEFAULT gpio_default_nosupport;
+
+#
+# Get the controller's pin numbers. pin_list is expected to be an array with at
+# least GPIO_PIN_MAX() elements. Populates pin_list from 0 to GPIO_PIN_MAX() by
+# default.
+#
+METHOD int get_pin_list {
+ device_t dev;
+ uint32_t *pin_list;
+} DEFAULT gpio_default_get_pin_list;
diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c
index 5f1f6532a79b..698b5e5fdd01 100644
--- a/sys/dev/gpio/gpiobus.c
+++ b/sys/dev/gpio/gpiobus.c
@@ -319,10 +319,6 @@ gpiobus_add_bus(device_t dev)
busdev = device_add_child(dev, "gpiobus", DEVICE_UNIT_ANY);
if (busdev == NULL)
return (NULL);
- if (device_add_child(dev, "gpioc", DEVICE_UNIT_ANY) == NULL) {
- device_delete_child(dev, busdev);
- return (NULL);
- }
#ifdef FDT
ofw_gpiobus_register_provider(dev);
#endif
@@ -372,6 +368,37 @@ gpiobus_init_softc(device_t dev)
}
int
+gpiobus_add_gpioc(device_t dev)
+{
+ struct gpiobus_ivar *devi;
+ struct gpiobus_softc *sc;
+ device_t gpioc;
+ int err;
+
+ gpioc = BUS_ADD_CHILD(dev, 0, "gpioc", DEVICE_UNIT_ANY);
+ if (gpioc == NULL)
+ return (ENXIO);
+
+ sc = device_get_softc(dev);
+ devi = device_get_ivars(gpioc);
+
+ devi->npins = sc->sc_npins;
+ err = gpiobus_alloc_ivars(devi);
+ if (err != 0) {
+ device_delete_child(dev, gpioc);
+ return (err);
+ }
+
+ err = GPIO_GET_PIN_LIST(sc->sc_dev, devi->pins);
+ if (err != 0) {
+ device_delete_child(dev, gpioc);
+ gpiobus_free_ivars(devi);
+ }
+
+ return (err);
+}
+
+int
gpiobus_alloc_ivars(struct gpiobus_ivar *devi)
{
@@ -562,6 +589,10 @@ gpiobus_attach(device_t dev)
if (err != 0)
return (err);
+ err = gpiobus_add_gpioc(dev);
+ if (err != 0)
+ return (err);
+
/*
* Get parent's pins and mark them as unmapped
*/
@@ -961,7 +992,7 @@ gpiobus_pin_getflags(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags);
+ return (GPIO_PIN_GETFLAGS(sc->sc_dev, devi->pins[pin], flags));
}
static int
@@ -974,7 +1005,7 @@ gpiobus_pin_getcaps(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps);
+ return (GPIO_PIN_GETCAPS(sc->sc_dev, devi->pins[pin], caps));
}
static int
@@ -987,7 +1018,7 @@ gpiobus_pin_set(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value);
+ return (GPIO_PIN_SET(sc->sc_dev, devi->pins[pin], value));
}
static int
@@ -1000,7 +1031,7 @@ gpiobus_pin_get(device_t dev, device_t child, uint32_t pin,
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value);
+ return (GPIO_PIN_GET(sc->sc_dev, devi->pins[pin], value));
}
static int
@@ -1012,7 +1043,57 @@ gpiobus_pin_toggle(device_t dev, device_t child, uint32_t pin)
if (pin >= devi->npins)
return (EINVAL);
- return GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin]);
+ return (GPIO_PIN_TOGGLE(sc->sc_dev, devi->pins[pin]));
+}
+
+/*
+ * Verify that a child has all the pins they are requesting
+ * to access in their ivars.
+ */
+static bool
+gpiobus_pin_verify_32(struct gpiobus_ivar *devi, uint32_t first_pin,
+ uint32_t num_pins)
+{
+ if (first_pin + num_pins > devi->npins)
+ return (false);
+
+ /* Make sure the pins are consecutive. */
+ for (uint32_t pin = first_pin; pin < first_pin + num_pins - 1; pin++) {
+ if (devi->pins[pin] + 1 != devi->pins[pin + 1])
+ return (false);
+ }
+
+ return (true);
+}
+
+static int
+gpiobus_pin_access_32(device_t dev, device_t child, uint32_t first_pin,
+ uint32_t clear_pins, uint32_t change_pins, uint32_t *orig_pins)
+{
+ struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev);
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+
+ if (!gpiobus_pin_verify_32(devi, first_pin, 32))
+ return (EINVAL);
+
+ return (GPIO_PIN_ACCESS_32(sc->sc_dev, devi->pins[first_pin],
+ clear_pins, change_pins, orig_pins));
+}
+
+static int
+gpiobus_pin_config_32(device_t dev, device_t child, uint32_t first_pin,
+ uint32_t num_pins, uint32_t *pin_flags)
+{
+ struct gpiobus_softc *sc = GPIOBUS_SOFTC(dev);
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+
+ if (num_pins > 32)
+ return (EINVAL);
+ if (!gpiobus_pin_verify_32(devi, first_pin, num_pins))
+ return (EINVAL);
+
+ return (GPIO_PIN_CONFIG_32(sc->sc_dev,
+ devi->pins[first_pin], num_pins, pin_flags));
}
static int
@@ -1093,6 +1174,8 @@ static device_method_t gpiobus_methods[] = {
DEVMETHOD(gpiobus_pin_get, gpiobus_pin_get),
DEVMETHOD(gpiobus_pin_set, gpiobus_pin_set),
DEVMETHOD(gpiobus_pin_toggle, gpiobus_pin_toggle),
+ DEVMETHOD(gpiobus_pin_access_32,gpiobus_pin_access_32),
+ DEVMETHOD(gpiobus_pin_config_32,gpiobus_pin_config_32),
DEVMETHOD(gpiobus_pin_getname, gpiobus_pin_getname),
DEVMETHOD(gpiobus_pin_setname, gpiobus_pin_setname),
diff --git a/sys/dev/gpio/gpiobus_if.m b/sys/dev/gpio/gpiobus_if.m
index 8bf29839ef4e..890738c4e809 100644
--- a/sys/dev/gpio/gpiobus_if.m
+++ b/sys/dev/gpio/gpiobus_if.m
@@ -107,6 +107,36 @@ METHOD int pin_setflags {
};
#
+# Simultaneously read and/or change up to 32 adjacent pins.
+# If the device cannot change the pins simultaneously, returns EOPNOTSUPP.
+#
+# More details about using this interface can be found in sys/gpio.h
+#
+METHOD int pin_access_32 {
+ device_t dev;
+ device_t child;
+ uint32_t first_pin;
+ uint32_t clear_pins;
+ uint32_t change_pins;
+ uint32_t *orig_pins;
+};
+
+#
+# Simultaneously configure up to 32 adjacent pins.
+# This is intended to change the configuration of all the pins simultaneously,
+# but unlike pin_access_32, this will not fail if the hardware can't do so.
+#
+# More details about using this interface can be found in sys/gpio.h
+#
+METHOD int pin_config_32 {
+ device_t dev;
+ device_t child;
+ uint32_t first_pin;
+ uint32_t num_pins;
+ uint32_t *pin_flags;
+};
+
+#
# Get the pin name
#
METHOD int pin_getname {
diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h
index c198e5f79989..58f862343403 100644
--- a/sys/dev/gpio/gpiobus_internal.h
+++ b/sys/dev/gpio/gpiobus_internal.h
@@ -44,6 +44,7 @@ int gpiobus_acquire_pin(device_t, uint32_t);
void gpiobus_release_pin(device_t, uint32_t);
int gpiobus_child_location(device_t, device_t, struct sbuf *);
device_t gpiobus_add_child_common(device_t, u_int, const char *, int, size_t);
+int gpiobus_add_gpioc(device_t);
extern driver_t gpiobus_driver;
#endif
diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c
index 87fed38ebe3e..5a60f939dc78 100644
--- a/sys/dev/gpio/gpioc.c
+++ b/sys/dev/gpio/gpioc.c
@@ -45,7 +45,6 @@
#include <dev/gpio/gpiobusvar.h>
-#include "gpio_if.h"
#include "gpiobus_if.h"
#undef GPIOC_DEBUG
@@ -59,7 +58,7 @@
struct gpioc_softc {
device_t sc_dev; /* gpiocX dev */
- device_t sc_pdev; /* gpioX dev */
+ device_t sc_pdev; /* gpiobusX dev */
struct cdev *sc_ctl_dev; /* controller device */
int sc_unit;
int sc_npins;
@@ -69,6 +68,7 @@ struct gpioc_softc {
struct gpioc_pin_intr {
struct gpioc_softc *sc;
gpio_pin_t pin;
+ uint32_t intr_mode;
bool config_locked;
int intr_rid;
struct resource *intr_res;
@@ -112,8 +112,10 @@ struct gpioc_pin_event {
static MALLOC_DEFINE(M_GPIOC, "gpioc", "gpioc device data");
-static int gpioc_allocate_pin_intr(struct gpioc_pin_intr*, uint32_t);
-static int gpioc_release_pin_intr(struct gpioc_pin_intr*);
+static int gpioc_allocate_pin_intr(struct gpioc_softc*,
+ struct gpioc_pin_intr*, uint32_t, uint32_t);
+static int gpioc_release_pin_intr(struct gpioc_softc*,
+ struct gpioc_pin_intr*);
static int gpioc_attach_priv_pin(struct gpioc_cdevpriv*,
struct gpioc_pin_intr*);
static int gpioc_detach_priv_pin(struct gpioc_cdevpriv*,
@@ -191,27 +193,36 @@ number_of_events(struct gpioc_cdevpriv *priv)
}
static int
-gpioc_allocate_pin_intr(struct gpioc_pin_intr *intr_conf, uint32_t flags)
+gpioc_allocate_pin_intr(struct gpioc_softc *sc,
+ struct gpioc_pin_intr *intr_conf, uint32_t pin, uint32_t flags)
{
int err;
intr_conf->config_locked = true;
mtx_unlock(&intr_conf->mtx);
- intr_conf->intr_res = gpio_alloc_intr_resource(intr_conf->pin->dev,
+ MPASS(intr_conf->pin == NULL);
+ err = gpio_pin_get_by_bus_pinnum(sc->sc_pdev, pin, &intr_conf->pin);
+ if (err != 0)
+ goto error_exit;
+
+ intr_conf->intr_res = gpio_alloc_intr_resource(sc->sc_dev,
&intr_conf->intr_rid, RF_ACTIVE, intr_conf->pin, flags);
if (intr_conf->intr_res == NULL) {
err = ENXIO;
- goto error_exit;
+ goto error_pin;
}
- err = bus_setup_intr(intr_conf->pin->dev, intr_conf->intr_res,
+ err = bus_setup_intr(sc->sc_dev, intr_conf->intr_res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL, gpioc_interrupt_handler,
intr_conf, &intr_conf->intr_cookie);
- if (err != 0)
- goto error_exit;
+ if (err != 0) {
+ bus_release_resource(sc->sc_dev, intr_conf->intr_res);
+ intr_conf->intr_res = NULL;
+ goto error_pin;
+ }
- intr_conf->pin->flags = flags;
+ intr_conf->intr_mode = flags;
error_exit:
mtx_lock(&intr_conf->mtx);
@@ -219,10 +230,15 @@ error_exit:
wakeup(&intr_conf->config_locked);
return (err);
+
+error_pin:
+ gpio_pin_release(intr_conf->pin);
+ intr_conf->pin = NULL;
+ goto error_exit;
}
static int
-gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
+gpioc_release_pin_intr(struct gpioc_softc *sc, struct gpioc_pin_intr *intr_conf)
{
int err;
@@ -230,8 +246,8 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
mtx_unlock(&intr_conf->mtx);
if (intr_conf->intr_cookie != NULL) {
- err = bus_teardown_intr(intr_conf->pin->dev,
- intr_conf->intr_res, intr_conf->intr_cookie);
+ err = bus_teardown_intr(sc->sc_dev, intr_conf->intr_res,
+ intr_conf->intr_cookie);
if (err != 0)
goto error_exit;
else
@@ -239,7 +255,7 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
}
if (intr_conf->intr_res != NULL) {
- err = bus_release_resource(intr_conf->pin->dev, SYS_RES_IRQ,
+ err = bus_release_resource(sc->sc_dev, SYS_RES_IRQ,
intr_conf->intr_rid, intr_conf->intr_res);
if (err != 0)
goto error_exit;
@@ -249,7 +265,10 @@ gpioc_release_pin_intr(struct gpioc_pin_intr *intr_conf)
}
}
- intr_conf->pin->flags = 0;
+ gpio_pin_release(intr_conf->pin);
+ intr_conf->pin = NULL;
+
+ intr_conf->intr_mode = 0;
err = 0;
error_exit:
@@ -386,7 +405,7 @@ gpioc_get_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
struct gpioc_privs *priv_link;
uint32_t flags;
- flags = intr_conf->pin->flags;
+ flags = intr_conf->intr_mode;
if (flags == 0)
return (0);
@@ -411,7 +430,7 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
int res;
res = 0;
- if (intr_conf->pin->flags == 0 && flags == 0) {
+ if (intr_conf->intr_mode == 0 && flags == 0) {
/* No interrupt configured and none requested: Do nothing. */
return (0);
}
@@ -419,17 +438,17 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
while (intr_conf->config_locked == true)
mtx_sleep(&intr_conf->config_locked, &intr_conf->mtx, 0,
"gpicfg", 0);
- if (intr_conf->pin->flags == 0 && flags != 0) {
+ if (intr_conf->intr_mode == 0 && flags != 0) {
/*
* No interrupt is configured, but one is requested: Allocate
* and setup interrupt on the according pin.
*/
- res = gpioc_allocate_pin_intr(intr_conf, flags);
+ res = gpioc_allocate_pin_intr(sc, intr_conf, pin, flags);
if (res == 0)
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
res = 0;
- } else if (intr_conf->pin->flags == flags) {
+ } else if (intr_conf->intr_mode == flags) {
/*
* Same interrupt requested as already configured: Attach the
* cdevpriv to the corresponding pin.
@@ -437,14 +456,14 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
res = 0;
- } else if (intr_conf->pin->flags != 0 && flags == 0) {
+ } else if (intr_conf->intr_mode != 0 && flags == 0) {
/*
* Interrupt configured, but none requested: Teardown and
* release the pin when no other cdevpriv is attached. Otherwise
* just detach pin and cdevpriv from each other.
*/
if (gpioc_intr_reconfig_allowed(priv, intr_conf)) {
- res = gpioc_release_pin_intr(intr_conf);
+ res = gpioc_release_pin_intr(sc, intr_conf);
}
if (res == 0)
res = gpioc_detach_priv_pin(priv, intr_conf);
@@ -456,9 +475,10 @@ gpioc_set_intr_config(struct gpioc_softc *sc, struct gpioc_cdevpriv *priv,
if (!gpioc_intr_reconfig_allowed(priv, intr_conf))
res = EBUSY;
else {
- res = gpioc_release_pin_intr(intr_conf);
+ res = gpioc_release_pin_intr(sc, intr_conf);
if (res == 0)
- res = gpioc_allocate_pin_intr(intr_conf, flags);
+ res = gpioc_allocate_pin_intr(sc, intr_conf,
+ pin, flags);
if (res == 0)
res = gpioc_attach_priv_pin(priv, intr_conf);
if (res == EEXIST)
@@ -475,18 +495,16 @@ gpioc_interrupt_handler(void *arg)
{
struct gpioc_pin_intr *intr_conf;
struct gpioc_privs *privs;
- struct gpioc_softc *sc;
sbintime_t evtime;
- uint32_t pin_state;
+ bool pin_state;
intr_conf = arg;
- sc = intr_conf->sc;
/* Capture time and pin state first. */
evtime = sbinuptime();
- if (intr_conf->pin->flags & GPIO_INTR_EDGE_BOTH)
- GPIO_PIN_GET(sc->sc_pdev, intr_conf->pin->pin, &pin_state);
- else if (intr_conf->pin->flags & GPIO_INTR_EDGE_RISING)
+ if (intr_conf->intr_mode & GPIO_INTR_EDGE_BOTH)
+ gpio_pin_is_active(intr_conf->pin, &pin_state);
+ else if (intr_conf->intr_mode & GPIO_INTR_EDGE_RISING)
pin_state = true;
else
pin_state = false;
@@ -575,18 +593,11 @@ gpioc_attach(device_t dev)
sc->sc_pdev = device_get_parent(dev);
sc->sc_unit = device_get_unit(dev);
- err = GPIO_PIN_MAX(sc->sc_pdev, &sc->sc_npins);
- sc->sc_npins++; /* Number of pins is one more than max pin number. */
- if (err != 0)
- return (err);
+ sc->sc_npins = gpiobus_get_npins(dev);
sc->sc_pin_intr = malloc(sizeof(struct gpioc_pin_intr) * sc->sc_npins,
M_GPIOC, M_WAITOK | M_ZERO);
for (int i = 0; i < sc->sc_npins; i++) {
- sc->sc_pin_intr[i].pin = malloc(sizeof(struct gpiobus_pin),
- M_GPIOC, M_WAITOK | M_ZERO);
sc->sc_pin_intr[i].sc = sc;
- sc->sc_pin_intr[i].pin->pin = i;
- sc->sc_pin_intr[i].pin->dev = sc->sc_pdev;
mtx_init(&sc->sc_pin_intr[i].mtx, "gpioc pin", NULL, MTX_DEF);
SLIST_INIT(&sc->sc_pin_intr[i].privs);
}
@@ -610,20 +621,16 @@ static int
gpioc_detach(device_t dev)
{
struct gpioc_softc *sc = device_get_softc(dev);
- int err;
if (sc->sc_ctl_dev)
destroy_dev(sc->sc_ctl_dev);
for (int i = 0; i < sc->sc_npins; i++) {
mtx_destroy(&sc->sc_pin_intr[i].mtx);
- free(sc->sc_pin_intr[i].pin, M_GPIOC);
+ MPASS(sc->sc_pin_intr[i].pin == NULL);
}
free(sc->sc_pin_intr, M_GPIOC);
- if ((err = bus_generic_detach(dev)) != 0)
- return (err);
-
return (0);
}
@@ -655,7 +662,7 @@ gpioc_cdevpriv_dtor(void *data)
KASSERT(consistency == 1,
("inconsistent links between pin config and cdevpriv"));
if (gpioc_intr_reconfig_allowed(priv, pin_link->pin)) {
- gpioc_release_pin_intr(pin_link->pin);
+ gpioc_release_pin_intr(priv->sc, pin_link->pin);
}
mtx_unlock(&pin_link->pin->mtx);
SLIST_REMOVE(&priv->pins, pin_link, gpioc_pins, next);
@@ -778,7 +785,6 @@ static int
gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
struct thread *td)
{
- device_t bus;
int max_pin, res;
struct gpioc_softc *sc = cdev->si_drv1;
struct gpioc_cdevpriv *priv;
@@ -789,30 +795,32 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
struct gpio_event_config *evcfg;
uint32_t caps, intrflags;
- bus = GPIO_GET_BUS(sc->sc_pdev);
- if (bus == NULL)
- return (EINVAL);
switch (cmd) {
case GPIOMAXPIN:
- max_pin = -1;
- res = GPIO_PIN_MAX(sc->sc_pdev, &max_pin);
+ res = 0;
+ max_pin = sc->sc_npins - 1;
bcopy(&max_pin, arg, sizeof(max_pin));
break;
case GPIOGETCONFIG:
bcopy(arg, &pin, sizeof(pin));
dprintf("get config pin %d\n", pin.gp_pin);
- res = GPIO_PIN_GETFLAGS(sc->sc_pdev, pin.gp_pin,
+ res = GPIOBUS_PIN_GETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
&pin.gp_flags);
/* Fail early */
- if (res)
+ if (res != 0)
break;
res = devfs_get_cdevpriv((void **)&priv);
- if (res)
+ if (res != 0)
break;
pin.gp_flags |= gpioc_get_intr_config(sc, priv,
pin.gp_pin);
- GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &pin.gp_caps);
- GPIOBUS_PIN_GETNAME(bus, pin.gp_pin, pin.gp_name);
+ res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
+ &pin.gp_caps);
+ if (res != 0)
+ break;
+ res = GPIOBUS_PIN_GETNAME(sc->sc_pdev, pin.gp_pin, pin.gp_name);
+ if (res != 0)
+ break;
bcopy(&pin, arg, sizeof(pin));
break;
case GPIOSETCONFIG:
@@ -821,7 +829,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
res = devfs_get_cdevpriv((void **)&priv);
if (res != 0)
break;
- res = GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &caps);
+ res = GPIOBUS_PIN_GETCAPS(sc->sc_pdev, sc->sc_dev,
+ pin.gp_pin, &caps);
if (res != 0)
break;
res = gpio_check_flags(caps, pin.gp_flags);
@@ -847,8 +856,8 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
}
if (res != 0)
break;
- res = GPIO_PIN_SETFLAGS(sc->sc_pdev, pin.gp_pin,
- (pin.gp_flags & ~GPIO_INTR_MASK));
+ res = GPIOBUS_PIN_SETFLAGS(sc->sc_pdev, sc->sc_dev, pin.gp_pin,
+ pin.gp_flags & ~GPIO_INTR_MASK);
if (res != 0)
break;
res = gpioc_set_intr_config(sc, priv, pin.gp_pin,
@@ -856,40 +865,43 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag,
break;
case GPIOGET:
bcopy(arg, &req, sizeof(req));
- res = GPIO_PIN_GET(sc->sc_pdev, req.gp_pin,
+ res = GPIOBUS_PIN_GET(sc->sc_pdev, sc->sc_dev, req.gp_pin,
&req.gp_value);
- dprintf("read pin %d -> %d\n",
+ if (res != 0)
+ break;
+ dprintf("read pin %d -> %d\n",
req.gp_pin, req.gp_value);
bcopy(&req, arg, sizeof(req));
break;
case GPIOSET:
bcopy(arg, &req, sizeof(req));
- res = GPIO_PIN_SET(sc->sc_pdev, req.gp_pin,
+ res = GPIOBUS_PIN_SET(sc->sc_pdev, sc->sc_dev, req.gp_pin,
req.gp_value);
- dprintf("write pin %d -> %d\n",
+ dprintf("write pin %d -> %d\n",
req.gp_pin, req.gp_value);
break;
case GPIOTOGGLE:
bcopy(arg, &req, sizeof(req));
- dprintf("toggle pin %d\n",
+ dprintf("toggle pin %d\n",
req.gp_pin);
- res = GPIO_PIN_TOGGLE(sc->sc_pdev, req.gp_pin);
+ res = GPIOBUS_PIN_TOGGLE(sc->sc_pdev, sc->sc_dev, req.gp_pin);
break;
case GPIOSETNAME:
bcopy(arg, &pin, sizeof(pin));
dprintf("set name on pin %d\n", pin.gp_pin);
- res = GPIOBUS_PIN_SETNAME(bus, pin.gp_pin,
+ res = GPIOBUS_PIN_SETNAME(sc->sc_pdev, pin.gp_pin,
pin.gp_name);
break;
case GPIOACCESS32:
a32 = (struct gpio_access_32 *)arg;
- res = GPIO_PIN_ACCESS_32(sc->sc_pdev, a32->first_pin,
- a32->clear_pins, a32->change_pins, &a32->orig_pins);
+ res = GPIOBUS_PIN_ACCESS_32(sc->sc_pdev, sc->sc_dev,
+ a32->first_pin, a32->clear_pins, a32->change_pins,
+ &a32->orig_pins);
break;
case GPIOCONFIG32:
c32 = (struct gpio_config_32 *)arg;
- res = GPIO_PIN_CONFIG_32(sc->sc_pdev, c32->first_pin,
- c32->num_pins, c32->pin_flags);
+ res = GPIOBUS_PIN_CONFIG_32(sc->sc_pdev, sc->sc_dev,
+ c32->first_pin, c32->num_pins, c32->pin_flags);
break;
case GPIOCONFIGEVENTS:
evcfg = (struct gpio_event_config *)arg;
@@ -1050,9 +1062,6 @@ static device_method_t gpioc_methods[] = {
DEVMETHOD(device_probe, gpioc_probe),
DEVMETHOD(device_attach, gpioc_attach),
DEVMETHOD(device_detach, gpioc_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
DEVMETHOD_END
};
@@ -1063,5 +1072,5 @@ driver_t gpioc_driver = {
sizeof(struct gpioc_softc)
};
-DRIVER_MODULE(gpioc, gpio, gpioc_driver, 0, 0);
+DRIVER_MODULE(gpioc, gpiobus, gpioc_driver, 0, 0);
MODULE_VERSION(gpioc, 1);
diff --git a/sys/dev/gpio/gpioled.c b/sys/dev/gpio/gpioled.c
index ba53cb733971..71af5741b2fe 100644
--- a/sys/dev/gpio/gpioled.c
+++ b/sys/dev/gpio/gpioled.c
@@ -55,13 +55,13 @@
device_get_nameunit((_sc)->sc_dev), "gpioled", MTX_DEF)
#define GPIOLED_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx)
-struct gpioled_softc
+struct gpioled_softc
{
device_t sc_dev;
device_t sc_busdev;
struct mtx sc_mtx;
struct cdev *sc_leddev;
- int sc_invert;
+ int sc_softinvert;
};
static void gpioled_control(void *, int);
@@ -69,20 +69,19 @@ static int gpioled_probe(device_t);
static int gpioled_attach(device_t);
static int gpioled_detach(device_t);
-static void
+static void
gpioled_control(void *priv, int onoff)
{
struct gpioled_softc *sc;
sc = (struct gpioled_softc *)priv;
+ if (onoff == -1) /* Keep the current state. */
+ return;
+ if (sc->sc_softinvert)
+ onoff = !onoff;
GPIOLED_LOCK(sc);
- if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
- GPIO_PIN_OUTPUT) == 0) {
- if (sc->sc_invert)
- onoff = !onoff;
- GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
- onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW);
- }
+ GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW);
GPIOLED_UNLOCK(sc);
}
@@ -95,26 +94,101 @@ gpioled_probe(device_t dev)
}
static int
+gpioled_inv(device_t dev, uint32_t *pin_flags)
+{
+ struct gpioled_softc *sc;
+ int invert;
+ uint32_t pin_caps;
+
+ sc = device_get_softc(dev);
+
+ if (resource_int_value(device_get_name(dev),
+ device_get_unit(dev), "invert", &invert))
+ invert = 0;
+
+ if (GPIOBUS_PIN_GETCAPS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ &pin_caps) != 0) {
+ if (bootverbose)
+ device_printf(sc->sc_dev, "unable to get pin caps\n");
+ return (-1);
+ }
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags &= ~GPIO_PIN_INVOUT;
+ sc->sc_softinvert = 0;
+ if (invert) {
+ const char *invmode;
+
+ if (resource_string_value(device_get_name(dev),
+ device_get_unit(dev), "invmode", &invmode))
+ invmode = NULL;
+
+ if (invmode) {
+ if (!strcmp(invmode, "sw"))
+ sc->sc_softinvert = 1;
+ else if (!strcmp(invmode, "hw")) {
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags |= GPIO_PIN_INVOUT;
+ else {
+ device_printf(sc->sc_dev, "hardware pin inversion not supported\n");
+ return (-1);
+ }
+ } else {
+ if (strcmp(invmode, "auto") != 0)
+ device_printf(sc->sc_dev, "invalid pin inversion mode\n");
+ invmode = NULL;
+ }
+ }
+ /*
+ * auto inversion mode: use hardware support if available, else fallback to
+ * software emulation.
+ */
+ if (invmode == NULL) {
+ if (pin_caps & GPIO_PIN_INVOUT)
+ *pin_flags |= GPIO_PIN_INVOUT;
+ else
+ sc->sc_softinvert = 1;
+ }
+ }
+ MPASS(!invert ||
+ (((*pin_flags & GPIO_PIN_INVOUT) != 0) && !sc->sc_softinvert) ||
+ (((*pin_flags & GPIO_PIN_INVOUT) == 0) && sc->sc_softinvert));
+ return (invert);
+}
+
+static int
gpioled_attach(device_t dev)
{
struct gpioled_softc *sc;
int state;
const char *name;
+ uint32_t pin_flags;
+ int invert;
sc = device_get_softc(dev);
sc->sc_dev = dev;
sc->sc_busdev = device_get_parent(dev);
GPIOLED_LOCK_INIT(sc);
- state = 0;
-
- if (resource_string_value(device_get_name(dev),
+ if (resource_string_value(device_get_name(dev),
device_get_unit(dev), "name", &name))
name = NULL;
- resource_int_value(device_get_name(dev),
- device_get_unit(dev), "invert", &sc->sc_invert);
- resource_int_value(device_get_name(dev),
- device_get_unit(dev), "state", &state);
+
+ if (resource_int_value(device_get_name(dev),
+ device_get_unit(dev), "state", &state))
+ state = 0;
+
+ pin_flags = GPIO_PIN_OUTPUT;
+ invert = gpioled_inv(dev, &pin_flags);
+ if (invert < 0)
+ return (ENXIO);
+ device_printf(sc->sc_dev, "state %d invert %s\n",
+ state, (invert ? (sc->sc_softinvert ? "sw" : "hw") : "no"));
+ if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
+ pin_flags) != 0) {
+ if (bootverbose)
+ device_printf(sc->sc_dev, "unable to set pin flags, %#x\n", pin_flags);
+ return (ENXIO);
+ }
sc->sc_leddev = led_create_state(gpioled_control, sc, name ? name :
device_get_nameunit(dev), state);
diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c
index b12b78fac18c..da1bfbc268b8 100644
--- a/sys/dev/gpio/ofw_gpiobus.c
+++ b/sys/dev/gpio/ofw_gpiobus.c
@@ -426,6 +426,9 @@ ofw_gpiobus_attach(device_t dev)
err = gpiobus_init_softc(dev);
if (err != 0)
return (err);
+ err = gpiobus_add_gpioc(dev);
+ if (err != 0)
+ return (err);
bus_identify_children(dev);
bus_enumerate_hinted_children(dev);
/*
diff --git a/sys/dev/hid/hkbd.c b/sys/dev/hid/hkbd.c
index 5eff7557bc42..6255c42d3b62 100644
--- a/sys/dev/hid/hkbd.c
+++ b/sys/dev/hid/hkbd.c
@@ -95,14 +95,16 @@
#ifdef HID_DEBUG
static int hkbd_debug = 0;
+#endif
static int hkbd_no_leds = 0;
static SYSCTL_NODE(_hw_hid, OID_AUTO, hkbd, CTLFLAG_RW, 0, "USB keyboard");
+#ifdef HID_DEBUG
SYSCTL_INT(_hw_hid_hkbd, OID_AUTO, debug, CTLFLAG_RWTUN,
&hkbd_debug, 0, "Debug level");
+#endif
SYSCTL_INT(_hw_hid_hkbd, OID_AUTO, no_leds, CTLFLAG_RWTUN,
&hkbd_no_leds, 0, "Disables setting of keyboard leds");
-#endif
#define INPUT_EPOCH global_epoch_preempt
@@ -1596,8 +1598,16 @@ hkbd_ioctl_locked(keyboard_t *kbd, u_long cmd, caddr_t arg)
sc->sc_state &= ~LOCK_MASK;
sc->sc_state |= *(int *)arg;
- /* set LEDs and quit */
- return (hkbd_ioctl_locked(kbd, KDSETLED, arg));
+ /*
+ * Attempt to set the keyboard LEDs; ignore the return value
+ * intentionally. Note: Some hypervisors/emulators (e.g., QEMU,
+ * Parallels—at least as of the time of writing) may fail when
+ * setting LEDs. This can prevent kbdmux from attaching the
+ * keyboard, which in turn may block the console from accessing
+ * it.
+ */
+ (void)hkbd_ioctl_locked(kbd, KDSETLED, arg);
+ return (0);
case KDSETREPEAT: /* set keyboard repeat rate (new
* interface) */
@@ -1766,10 +1776,8 @@ hkbd_set_leds(struct hkbd_softc *sc, uint8_t leds)
SYSCONS_LOCK_ASSERT();
DPRINTF("leds=0x%02x\n", leds);
-#ifdef HID_DEBUG
if (hkbd_no_leds)
return (0);
-#endif
memset(sc->sc_buffer, 0, HKBD_BUFFER_SIZE);
@@ -1820,6 +1828,7 @@ hkbd_set_leds(struct hkbd_softc *sc, uint8_t leds)
SYSCONS_UNLOCK();
error = hid_write(sc->sc_dev, buf, len);
SYSCONS_LOCK();
+ DPRINTF("error %d", error);
return (error);
}
diff --git a/sys/dev/hpt27xx/hptintf.h b/sys/dev/hpt27xx/hptintf.h
index 558b479ec2ee..eb8105ec5666 100644
--- a/sys/dev/hpt27xx/hptintf.h
+++ b/sys/dev/hpt27xx/hptintf.h
@@ -155,8 +155,8 @@ typedef HPT_U32 DEVICEID;
#define ARRAY_FLAG_NEED_AUTOREBUILD 0x00000080 /* auto-rebuild should start */
#define ARRAY_FLAG_VERIFYING 0x00000100 /* is being verified */
#define ARRAY_FLAG_INITIALIZING 0x00000200 /* is being initialized */
-#define ARRAY_FLAG_TRANSFORMING 0x00000400 /* tranform in progress */
-#define ARRAY_FLAG_NEEDTRANSFORM 0x00000800 /* array need tranform */
+#define ARRAY_FLAG_TRANSFORMING 0x00000400 /* transform in progress */
+#define ARRAY_FLAG_NEEDTRANSFORM 0x00000800 /* array need transform */
#define ARRAY_FLAG_NEEDINITIALIZING 0x00001000 /* the array's initialization hasn't finished*/
#define ARRAY_FLAG_BROKEN_REDUNDANT 0x00002000 /* broken but redundant (raid6) */
#define ARRAY_FLAG_RAID15PLUS 0x80000000 /* display this RAID 1 as RAID 1.5 */
@@ -2018,7 +2018,7 @@ DEVICEID hpt_create_transform_v2(DEVICEID idArray, PCREATE_ARRAY_PARAMS_V3 destI
#endif
/* hpt_step_transform
- * move a block in a tranform progress.
+ * move a block in a transform progress.
* This function is called by mid-layer, not GUI (which uses set_array_state instead).
* Version compatibility: v2.0.0.0 or later
* Parameters:
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 9b85c989dc96..a6a6ae68996c 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -210,7 +210,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static bool pmc_can_allocate_row(int ri, enum pmc_mode mode);
static bool pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
-static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static bool pmc_can_attach(struct pmc *pm, struct proc *p);
static void pmc_capture_user_callchain(int cpu, int soft,
struct trapframe *tf);
static void pmc_cleanup(void);
@@ -1029,19 +1029,19 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
* Check if PMC 'pm' may be attached to target process 't'.
*/
-static int
+static bool
pmc_can_attach(struct pmc *pm, struct proc *t)
{
struct proc *o; /* pmc owner */
struct ucred *oc, *tc; /* owner, target credentials */
- int decline_attach, i;
+ bool decline_attach;
/*
* A PMC's owner can always attach that PMC to itself.
*/
if ((o = pm->pm_owner->po_owner) == t)
- return 0;
+ return (true);
PROC_LOCK(o);
oc = o->p_ucred;
@@ -1066,18 +1066,17 @@ pmc_can_attach(struct pmc *pm, struct proc *t)
* Every one of the target's group ids, must be in the owner's
* group list.
*/
- for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
+ for (int i = 0; !decline_attach && i < tc->cr_ngroups; i++)
decline_attach = !groupmember(tc->cr_groups[i], oc);
-
- /* check the read and saved gids too */
- if (decline_attach == 0)
- decline_attach = !groupmember(tc->cr_rgid, oc) ||
+ if (!decline_attach)
+ decline_attach = !groupmember(tc->cr_gid, oc) ||
+ !groupmember(tc->cr_rgid, oc) ||
!groupmember(tc->cr_svgid, oc);
crfree(tc);
crfree(oc);
- return !decline_attach;
+ return (!decline_attach);
}
/*
@@ -1412,7 +1411,7 @@ pmc_process_exec(struct thread *td, struct pmckern_procexec *pk)
*/
for (ri = 0; ri < md->pmd_npmc; ri++) {
if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
- if (pmc_can_attach(pm, td->td_proc) != 0) {
+ if (pmc_can_attach(pm, td->td_proc)) {
pmc_detach_one_process(td->td_proc, pm,
PMC_FLAG_NONE);
}
diff --git a/sys/dev/hwt/hwt_ioctl.c b/sys/dev/hwt/hwt_ioctl.c
index 592db4931bb4..184c7e72f986 100644
--- a/sys/dev/hwt/hwt_ioctl.c
+++ b/sys/dev/hwt/hwt_ioctl.c
@@ -112,12 +112,11 @@ hwt_priv_check(struct proc *o, struct proc *t)
error = EPERM;
goto done;
}
-
- /* Check the read and saved GIDs too. */
- if (!groupmember(tc->cr_rgid, oc) ||
+ if (!groupmember(tc->cr_gid, oc) ||
+ !groupmember(tc->cr_rgid, oc) ||
!groupmember(tc->cr_svgid, oc)) {
- error = EPERM;
- goto done;
+ error = EPERM;
+ goto done;
}
done:
diff --git a/sys/dev/ice/ice_fw_logging.c b/sys/dev/ice/ice_fw_logging.c
index 0025a65d73fc..16a9ab6823bf 100644
--- a/sys/dev/ice/ice_fw_logging.c
+++ b/sys/dev/ice/ice_fw_logging.c
@@ -48,7 +48,7 @@ SDT_PROVIDER_DEFINE(ice_fwlog);
/*
* SDT DTrace probe fired when a firmware log message is received over the
- * AdminQ. It passes the buffer of the firwmare log message along with its
+ * AdminQ. It passes the buffer of the firmware log message along with its
* length in bytes to the DTrace framework.
*/
SDT_PROBE_DEFINE2(ice_fwlog, , , message, "uint8_t *", "int");
diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c
index 442111e5ffaf..8b6349f686eb 100644
--- a/sys/dev/ice/ice_lib.c
+++ b/sys/dev/ice/ice_lib.c
@@ -7818,7 +7818,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
case IFCOUNTER_OPACKETS:
return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
case IFCOUNTER_OERRORS:
- return (es->tx_errors);
+ return (if_get_counter_default(vsi->sc->ifp, counter) +
+ es->tx_errors);
case IFCOUNTER_COLLISIONS:
return (0);
case IFCOUNTER_IBYTES:
@@ -7832,7 +7833,8 @@ ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
case IFCOUNTER_IQDROPS:
return (es->rx_discards);
case IFCOUNTER_OQDROPS:
- return (hs->tx_dropped_link_down);
+ return (if_get_counter_default(vsi->sc->ifp, counter) +
+ hs->tx_dropped_link_down);
case IFCOUNTER_NOPROTO:
return (es->rx_unknown_protocol);
default:
diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c
index 728bb942d503..e4d87fe1fed2 100644
--- a/sys/dev/ichsmb/ichsmb_pci.c
+++ b/sys/dev/ichsmb/ichsmb_pci.c
@@ -107,6 +107,7 @@
#define ID_COMETLAKE2 0x06a3
#define ID_TIGERLAKE 0xa0a3
#define ID_TIGERLAKE2 0x43a3
+#define ID_ELKHARTLAKE 0x4b23
#define ID_GEMINILAKE 0x31d4
#define ID_CEDARFORK 0x18df
#define ID_ICELAKE 0x34a3
@@ -206,6 +207,8 @@ static const struct pci_device_table ichsmb_devices[] = {
PCI_DESCR("Intel Tiger Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_TIGERLAKE2),
PCI_DESCR("Intel Tiger Lake SMBus controller") },
+ { PCI_DEV(PCI_VENDOR_INTEL, ID_ELKHARTLAKE),
+ PCI_DESCR("Intel Elkhart Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_GEMINILAKE),
PCI_DESCR("Intel Gemini Lake SMBus controller") },
{ PCI_DEV(PCI_VENDOR_INTEL, ID_CEDARFORK),
diff --git a/sys/dev/ichwd/i6300esbwd.c b/sys/dev/ichwd/i6300esbwd.c
new file mode 100644
index 000000000000..03d504a350aa
--- /dev/null
+++ b/sys/dev/ichwd/i6300esbwd.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+/*
+ * Reference: Intel 6300ESB Controller Hub Datasheet Section 16
+ */
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <sys/watchdog.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <dev/ichwd/ichwd.h>
+#include <dev/ichwd/i6300esbwd.h>
+
+#include <x86/pci_cfgreg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pci_private.h>
+
+struct i6300esbwd_softc {
+ device_t dev;
+ int res_id;
+ struct resource *res;
+ eventhandler_tag ev_tag;
+ bool locked;
+};
+
+static const struct i6300esbwd_pci_id {
+ uint16_t id;
+ const char *name;
+} i6300esbwd_pci_devices[] = {
+ { DEVICEID_6300ESB_2, "6300ESB Watchdog Timer" },
+};
+
+static uint16_t __unused
+i6300esbwd_cfg_read(struct i6300esbwd_softc *sc)
+{
+ return (pci_read_config(sc->dev, WDT_CONFIG_REG, 2));
+}
+
+static void
+i6300esbwd_cfg_write(struct i6300esbwd_softc *sc, uint16_t val)
+{
+ pci_write_config(sc->dev, WDT_CONFIG_REG, val, 2);
+}
+
+static uint8_t
+i6300esbwd_lock_read(struct i6300esbwd_softc *sc)
+{
+ return (pci_read_config(sc->dev, WDT_LOCK_REG, 1));
+}
+
+static void
+i6300esbwd_lock_write(struct i6300esbwd_softc *sc, uint8_t val)
+{
+ pci_write_config(sc->dev, WDT_LOCK_REG, val, 1);
+}
+
+/*
+ * According to Intel 6300ESB I/O Controller Hub Datasheet 16.5.2,
+ * the resource should be unlocked before modifing any registers.
+ * The way to unlock is by write 0x80, 0x86 to the reload register.
+ */
+static void
+i6300esbwd_unlock_res(struct i6300esbwd_softc *sc)
+{
+ bus_write_2(sc->res, WDT_RELOAD_REG, WDT_UNLOCK_SEQ_1_VAL);
+ bus_write_2(sc->res, WDT_RELOAD_REG, WDT_UNLOCK_SEQ_2_VAL);
+}
+
+static int
+i6300esbwd_sysctl_locked(SYSCTL_HANDLER_ARGS)
+{
+ struct i6300esbwd_softc *sc = (struct i6300esbwd_softc *)arg1;
+ int error;
+ int result;
+
+ result = sc->locked;
+ error = sysctl_handle_int(oidp, &result, 0, req);
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (result == 1 && !sc->locked) {
+ i6300esbwd_lock_write(sc, i6300esbwd_lock_read(sc) | WDT_LOCK);
+ sc->locked = true;
+ }
+
+ return (0);
+}
+
+static void
+i6300esbwd_event(void *arg, unsigned int cmd, int *error)
+{
+ struct i6300esbwd_softc *sc = arg;
+ uint32_t timeout;
+ uint16_t regval;
+
+ cmd &= WD_INTERVAL;
+ if (cmd != 0 &&
+ (cmd < WD_TO_1MS || (cmd - WD_TO_1MS) >= WDT_PRELOAD_BIT)) {
+ *error = EINVAL;
+ return;
+ }
+ timeout = 1 << (cmd - WD_TO_1MS);
+
+ /* reset the timer to prevent timeout a timeout is about to occur */
+ i6300esbwd_unlock_res(sc);
+ bus_write_2(sc->res, WDT_RELOAD_REG, WDT_RELOAD);
+
+ if (!cmd) {
+ /*
+ * when the lock is enabled, we are unable to overwrite LOCK
+ * register
+ */
+ if (sc->locked)
+ *error = EPERM;
+ else
+ i6300esbwd_lock_write(sc,
+ i6300esbwd_lock_read(sc) & ~WDT_ENABLE);
+ return;
+ }
+
+ i6300esbwd_unlock_res(sc);
+ bus_write_4(sc->res, WDT_PRELOAD_1_REG, timeout);
+
+ i6300esbwd_unlock_res(sc);
+ bus_write_4(sc->res, WDT_PRELOAD_2_REG, timeout);
+
+ i6300esbwd_unlock_res(sc);
+ bus_write_2(sc->res, WDT_RELOAD_REG, WDT_RELOAD);
+
+ if (!sc->locked) {
+ i6300esbwd_lock_write(sc, WDT_ENABLE);
+ regval = i6300esbwd_lock_read(sc);
+ sc->locked = regval & WDT_LOCK;
+ }
+}
+
+static int
+i6300esbwd_probe(device_t dev)
+{
+ const struct i6300esbwd_pci_id *pci_id;
+ uint16_t pci_dev_id;
+ int err = ENXIO;
+
+ if (pci_get_vendor(dev) != VENDORID_INTEL)
+ goto end;
+
+ pci_dev_id = pci_get_device(dev);
+ for (pci_id = i6300esbwd_pci_devices;
+ pci_id < i6300esbwd_pci_devices + nitems(i6300esbwd_pci_devices);
+ ++pci_id) {
+ if (pci_id->id == pci_dev_id) {
+ device_set_desc(dev, pci_id->name);
+ err = BUS_PROBE_DEFAULT;
+ break;
+ }
+ }
+
+end:
+ return (err);
+}
+
+static int
+i6300esbwd_attach(device_t dev)
+{
+ struct i6300esbwd_softc *sc = device_get_softc(dev);
+ uint16_t regval;
+
+ sc->dev = dev;
+ sc->res_id = PCIR_BAR(0);
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->res_id,
+ RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "unable to map memory region\n");
+ return (ENXIO);
+ }
+
+ i6300esbwd_cfg_write(sc, WDT_INT_TYPE_DISABLED_VAL);
+ regval = i6300esbwd_lock_read(sc);
+ if (regval & WDT_LOCK)
+ sc->locked = true;
+ else {
+ sc->locked = false;
+ i6300esbwd_lock_write(sc, WDT_TOUT_CNF_WT_MODE);
+ }
+
+ i6300esbwd_unlock_res(sc);
+ bus_write_2(sc->res, WDT_RELOAD_REG, WDT_RELOAD | WDT_TIMEOUT);
+
+ sc->ev_tag = EVENTHANDLER_REGISTER(watchdog_list, i6300esbwd_event, sc,
+ 0);
+
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "locked",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ i6300esbwd_sysctl_locked, "I",
+ "Lock the timer so that we cannot disable it");
+
+ return (0);
+}
+
+static int
+i6300esbwd_detach(device_t dev)
+{
+ struct i6300esbwd_softc *sc = device_get_softc(dev);
+
+ if (sc->ev_tag)
+ EVENTHANDLER_DEREGISTER(watchdog_list, sc->ev_tag);
+
+ if (sc->res)
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->res_id, sc->res);
+
+ return (0);
+}
+
+static device_method_t i6300esbwd_methods[] = {
+ DEVMETHOD(device_probe, i6300esbwd_probe),
+ DEVMETHOD(device_attach, i6300esbwd_attach),
+ DEVMETHOD(device_detach, i6300esbwd_detach),
+ DEVMETHOD(device_shutdown, i6300esbwd_detach),
+ DEVMETHOD_END
+};
+
+static driver_t i6300esbwd_driver = {
+ "i6300esbwd",
+ i6300esbwd_methods,
+ sizeof(struct i6300esbwd_softc),
+};
+
+DRIVER_MODULE(i6300esbwd, pci, i6300esbwd_driver, NULL, NULL);
diff --git a/sys/dev/ichwd/i6300esbwd.h b/sys/dev/ichwd/i6300esbwd.h
new file mode 100644
index 000000000000..39ed5d5a84f6
--- /dev/null
+++ b/sys/dev/ichwd/i6300esbwd.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _I6300ESBWD_H_
+#define _I6300ESBWD_H_
+
+#define WDT_CONFIG_REG 0x60
+#define WDT_LOCK_REG 0x68
+
+#define WDT_PRELOAD_1_REG 0x00
+#define WDT_PRELOAD_2_REG 0x04
+#define WDT_INTR_REG 0x08
+#define WDT_RELOAD_REG 0x0C
+
+/* For config register */
+#define WDT_OUTPUT_EN (0x1 << 5)
+#define WDT_PRE_SEL (0x1 << 2)
+#define WDT_INT_TYPE_BITS (0x3)
+#define WDT_INT_TYPE_IRQ_VAL (0x0)
+#define WDT_INT_TYPE_RES_VAL (0x1)
+#define WDT_INT_TYPE_SMI_VAL (0x2)
+#define WDT_INT_TYPE_DISABLED_VAL (0x3)
+
+/* For lock register */
+#define WDT_TOUT_CNF_WT_MODE (0x0 << 2)
+#define WDT_TOUT_CNF_FR_MODE (0x1 << 2)
+#define WDT_ENABLE (0x02)
+#define WDT_LOCK (0x01)
+
+/* For preload 1/2 registers */
+#define WDT_PRELOAD_BIT 20
+#define WDT_PRELOAD_BITS ((0x1 << WDT_PRELOAD_BIT) - 1)
+
+/* For interrupt register */
+#define WDT_INTR_ACT (0x01 << 0)
+
+/* For reload register */
+#define WDT_TIMEOUT (0x01 << 9)
+#define WDT_RELOAD (0x01 << 8)
+#define WDT_UNLOCK_SEQ_1_VAL 0x80
+#define WDT_UNLOCK_SEQ_2_VAL 0x86
+
+#endif /* _I6300ESBWD_H_ */
diff --git a/sys/dev/ichwd/ichwd.c b/sys/dev/ichwd/ichwd.c
index cade2cc4fb45..5481553cc175 100644
--- a/sys/dev/ichwd/ichwd.c
+++ b/sys/dev/ichwd/ichwd.c
@@ -90,7 +90,7 @@ static struct ichwd_device ichwd_devices[] = {
{ DEVICEID_82801E, "Intel 82801E watchdog timer", 5, 1 },
{ DEVICEID_82801EB, "Intel 82801EB watchdog timer", 5, 1 },
{ DEVICEID_82801EBR, "Intel 82801EB/ER watchdog timer", 5, 1 },
- { DEVICEID_6300ESB, "Intel 6300ESB watchdog timer", 5, 1 },
+ { DEVICEID_6300ESB_1, "Intel 6300ESB watchdog timer", 5, 1 },
{ DEVICEID_82801FBR, "Intel 82801FB/FR watchdog timer", 6, 2 },
{ DEVICEID_ICH6M, "Intel ICH6M watchdog timer", 6, 2 },
{ DEVICEID_ICH6W, "Intel ICH6W watchdog timer", 6, 2 },
diff --git a/sys/dev/ichwd/ichwd.h b/sys/dev/ichwd/ichwd.h
index 90fda08b74c1..72d0ca1cd6aa 100644
--- a/sys/dev/ichwd/ichwd.h
+++ b/sys/dev/ichwd/ichwd.h
@@ -151,7 +151,8 @@ struct ichwd_softc {
#define DEVICEID_82801E 0x2450
#define DEVICEID_82801EB 0x24dc
#define DEVICEID_82801EBR 0x24d0
-#define DEVICEID_6300ESB 0x25a1
+#define DEVICEID_6300ESB_1 0x25a1
+#define DEVICEID_6300ESB_2 0x25ab
#define DEVICEID_82801FBR 0x2640
#define DEVICEID_ICH6M 0x2641
#define DEVICEID_ICH6W 0x2642
diff --git a/sys/dev/igc/if_igc.c b/sys/dev/igc/if_igc.c
index a1ae35c7aa43..f199a128c783 100644
--- a/sys/dev/igc/if_igc.c
+++ b/sys/dev/igc/if_igc.c
@@ -2599,8 +2599,8 @@ igc_if_get_counter(if_ctx_t ctx, ift_counter cnt)
sc->stats.ruc + sc->stats.roc +
sc->stats.mpc + sc->stats.htdpmc);
case IFCOUNTER_OERRORS:
- return (sc->stats.ecol + sc->stats.latecol +
- sc->watchdog_events);
+ return (if_get_counter_default(ifp, cnt) +
+ sc->stats.ecol + sc->stats.latecol + sc->watchdog_events);
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/iicbus/iicbb.c b/sys/dev/iicbus/iicbb.c
index c344bda930b0..5f6423135f46 100644
--- a/sys/dev/iicbus/iicbb.c
+++ b/sys/dev/iicbus/iicbb.c
@@ -331,7 +331,7 @@ iicbb_getack(device_t dev)
{
struct iicbb_softc *sc = device_get_softc(dev);
int noack, err;
- int t;
+ int t = 0;
/* Release SDA so that the slave can drive it. */
err = iicbb_clockin(dev, 1);
@@ -341,12 +341,13 @@ iicbb_getack(device_t dev)
}
/* Sample SDA until ACK (low) or udelay runs out. */
- for (t = 0; t < sc->udelay; t++) {
+ do {
noack = I2C_GETSDA(dev);
if (!noack)
break;
DELAY(1);
- }
+ t++;
+ } while(t < sc->udelay);
DELAY(sc->udelay - t);
iicbb_clockout(dev);
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 6856b0551dde..668ccf056463 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -114,8 +114,8 @@ iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
* domain, and must collectively be assigned to use either IOMMU or
* bounce mapping.
*/
-device_t
-iommu_get_requester(device_t dev, uint16_t *rid)
+int
+iommu_get_requester(device_t dev, device_t *requesterp, uint16_t *rid)
{
devclass_t pci_class;
device_t l, pci, pcib, pcip, pcibp, requester;
@@ -129,7 +129,8 @@ iommu_get_requester(device_t dev, uint16_t *rid)
pci = device_get_parent(dev);
if (pci == NULL || device_get_devclass(pci) != pci_class) {
*rid = 0; /* XXXKIB: Could be ACPI HID */
- return (requester);
+ *requesterp = NULL;
+ return (ENOTTY);
}
*rid = pci_get_rid(dev);
@@ -141,16 +142,39 @@ iommu_get_requester(device_t dev, uint16_t *rid)
*/
for (;;) {
pci = device_get_parent(l);
- KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
- "for %s", device_get_name(dev), device_get_name(l)));
- KASSERT(device_get_devclass(pci) == pci_class,
- ("iommu_get_requester(%s): non-pci parent %s for %s",
- device_get_name(dev), device_get_name(pci),
- device_get_name(l)));
+ if (pci == NULL) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): NULL parent for %s\n",
+ device_get_name(dev), device_get_name(l));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
+ if (device_get_devclass(pci) != pci_class) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): non-pci parent %s for %s\n",
+ device_get_name(dev), device_get_name(pci),
+ device_get_name(l));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
pcib = device_get_parent(pci);
- KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
- "for %s", device_get_name(dev), device_get_name(pci)));
+ if (pcib == NULL) {
+ if (bootverbose) {
+ printf(
+ "iommu_get_requester(%s): NULL bridge for %s\n",
+ device_get_name(dev), device_get_name(pci));
+ }
+ *rid = 0;
+ *requesterp = NULL;
+ return (ENXIO);
+ }
/*
* The parent of our "bridge" isn't another PCI bus,
@@ -229,7 +253,8 @@ iommu_get_requester(device_t dev, uint16_t *rid)
}
}
}
- return (requester);
+ *requesterp = requester;
+ return (0);
}
struct iommu_ctx *
@@ -237,10 +262,13 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
{
device_t requester;
struct iommu_ctx *ctx;
+ int error;
bool disabled;
uint16_t rid;
- requester = iommu_get_requester(dev, &rid);
+ error = iommu_get_requester(dev, &requester, &rid);
+ if (error != 0)
+ return (NULL);
/*
* If the user requested the IOMMU disabled for the device, we
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index b1858f0df9f7..55044042c5d2 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -170,7 +170,7 @@ void iommu_domain_unload(struct iommu_domain *domain,
void iommu_unit_pre_instantiate_ctx(struct iommu_unit *iommu);
struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit *iommu,
device_t dev, bool rmrr);
-device_t iommu_get_requester(device_t dev, uint16_t *rid);
+int iommu_get_requester(device_t dev, device_t *requester, uint16_t *rid);
int iommu_init_busdma(struct iommu_unit *unit);
void iommu_fini_busdma(struct iommu_unit *unit);
diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c
index 450fae662dd8..d4d4f328fb43 100644
--- a/sys/dev/irdma/irdma_cm.c
+++ b/sys/dev/irdma/irdma_cm.c
@@ -1316,7 +1316,7 @@ irdma_cm_timer_tick(struct timer_list *t)
struct irdma_timer_entry *send_entry, *close_entry;
struct list_head *list_core_temp;
struct list_head *list_node;
- struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
+ struct irdma_cm_core *cm_core = timer_container_of(cm_core, t, tcp_timer);
struct irdma_sc_vsi *vsi;
u32 settimer = 0;
unsigned long timetosend;
diff --git a/sys/dev/irdma/irdma_utils.c b/sys/dev/irdma/irdma_utils.c
index 5fc37022981f..038f1980082b 100644
--- a/sys/dev/irdma/irdma_utils.c
+++ b/sys/dev/irdma/irdma_utils.c
@@ -876,7 +876,7 @@ irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred)
static void
irdma_terminate_timeout(struct timer_list *t)
{
- struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer);
+ struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer);
struct irdma_sc_qp *qp = &iwqp->sc_qp;
irdma_terminate_done(qp, 1);
@@ -1528,7 +1528,7 @@ static void
irdma_hw_stats_timeout(struct timer_list *t)
{
struct irdma_vsi_pestat *pf_devstat =
- from_timer(pf_devstat, t, stats_timer);
+ timer_container_of(pf_devstat, t, stats_timer);
struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
diff --git a/sys/dev/iwx/if_iwx.c b/sys/dev/iwx/if_iwx.c
index d60ef1874a6c..1fe531d69933 100644
--- a/sys/dev/iwx/if_iwx.c
+++ b/sys/dev/iwx/if_iwx.c
@@ -5673,6 +5673,10 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
if (rinfo == NULL)
return EINVAL;
+ /* Offloaded sequence number assignment */
+ /* Note: Should be done in firmware on all supported devices */
+
+ /* Radiotap */
if (ieee80211_radiotap_active_vap(vap)) {
struct iwx_tx_radiotap_header *tap = &sc->sc_txtap;
@@ -5685,6 +5689,7 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
ieee80211_radiotap_tx(vap, m);
}
+ /* Encrypt - CCMP via direct HW path, TKIP/WEP indirected openbsd-style for now */
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_get_txkey(ni, m);
if (k == NULL) {
@@ -10467,6 +10472,8 @@ iwx_attach(device_t dev)
IEEE80211_C_BGSCAN /* capable of bg scanning */
;
ic->ic_flags_ext = IEEE80211_FEXT_SCAN_OFFLOAD;
+ /* Enable seqno offload */
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
ic->ic_txstream = 2;
ic->ic_rxstream = 2;
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 73c0fd1ab16f..6d08bd49bc04 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -184,6 +184,7 @@ static int ixgbe_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int,
int);
static void ixgbe_if_queues_free(if_ctx_t);
static void ixgbe_if_timer(if_ctx_t, uint16_t);
+static const char *ixgbe_link_speed_to_str(u32 link_speed);
static void ixgbe_if_update_admin_status(if_ctx_t);
static void ixgbe_if_vlan_register(if_ctx_t, u16);
static void ixgbe_if_vlan_unregister(if_ctx_t, u16);
@@ -1349,8 +1350,6 @@ ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt)
return (0);
case IFCOUNTER_IQDROPS:
return (sc->iqdrops);
- case IFCOUNTER_OQDROPS:
- return (0);
case IFCOUNTER_IERRORS:
return (sc->ierrors);
default:
@@ -4027,6 +4026,33 @@ ixgbe_if_stop(if_ctx_t ctx)
} /* ixgbe_if_stop */
/************************************************************************
+ * ixgbe_link_speed_to_str - Convert link speed to string
+ *
+ * Helper function to convert link speed constants to human-readable
+ * string representations in conventional Gbps or Mbps.
+ ************************************************************************/
+static const char *
+ixgbe_link_speed_to_str(u32 link_speed)
+{
+ switch (link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ return "10 Gbps";
+ case IXGBE_LINK_SPEED_5GB_FULL:
+ return "5 Gbps";
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ return "2.5 Gbps";
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ return "1 Gbps";
+ case IXGBE_LINK_SPEED_100_FULL:
+ return "100 Mbps";
+ case IXGBE_LINK_SPEED_10_FULL:
+ return "10 Mbps";
+ default:
+ return "Unknown";
+ }
+} /* ixgbe_link_speed_to_str */
+
+/************************************************************************
* ixgbe_update_link_status - Update OS on link state
*
* Note: Only updates the OS on the cached link state.
@@ -4042,9 +4068,9 @@ ixgbe_if_update_admin_status(if_ctx_t ctx)
if (sc->link_up) {
if (sc->link_active == false) {
if (bootverbose)
- device_printf(dev, "Link is up %d Gbps %s \n",
- ((sc->link_speed == 128) ? 10 : 1),
- "Full Duplex");
+ device_printf(dev,
+ "Link is up %s Full Duplex\n",
+ ixgbe_link_speed_to_str(sc->link_speed));
sc->link_active = true;
/* Update any Flow Control changes */
ixgbe_fc_enable(&sc->hw);
diff --git a/sys/dev/ixgbe/ixgbe_e610.c b/sys/dev/ixgbe/ixgbe_e610.c
index 95c6dca416c6..18c4612446e0 100644
--- a/sys/dev/ixgbe/ixgbe_e610.c
+++ b/sys/dev/ixgbe/ixgbe_e610.c
@@ -1400,40 +1400,6 @@ s32 ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link)
}
/**
- * ixgbe_is_media_cage_present - check if media cage is present
- * @hw: pointer to the HW struct
- *
- * Identify presence of media cage using the ACI command (0x06E0).
- *
- * Return: true if media cage is present, else false. If no cage, then
- * media type is backplane or BASE-T.
- */
-static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw)
-{
- struct ixgbe_aci_cmd_get_link_topo *cmd;
- struct ixgbe_aci_desc desc;
-
- cmd = &desc.params.get_link_topo;
-
- ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo);
-
- cmd->addr.topo_params.node_type_ctx =
- (IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT <<
- IXGBE_ACI_LINK_TOPO_NODE_CTX_S);
-
- /* set node type */
- cmd->addr.topo_params.node_type_ctx |=
- (IXGBE_ACI_LINK_TOPO_NODE_TYPE_M &
- IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE);
-
- /* Node type cage can be used to determine if cage is present. If AQC
- * returns error (ENOENT), then no cage present. If no cage present then
- * connection type is backplane or BASE-T.
- */
- return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL);
-}
-
-/**
* ixgbe_get_media_type_from_phy_type - Gets media type based on phy type
* @hw: pointer to the HW struct
*
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 43c3af056b67..261f76055901 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -1785,7 +1785,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_OPACKETS:
return (vsi->opackets);
case IFCOUNTER_OERRORS:
- return (vsi->oerrors);
+ return (if_get_counter_default(ifp, cnt) + vsi->oerrors);
case IFCOUNTER_COLLISIONS:
/* Collisions are by standard impossible in 40G/10G Ethernet */
return (0);
@@ -1800,7 +1800,7 @@ ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
case IFCOUNTER_IQDROPS:
return (vsi->iqdrops);
case IFCOUNTER_OQDROPS:
- return (vsi->oqdrops);
+ return (if_get_counter_default(ifp, cnt) + vsi->oqdrops);
case IFCOUNTER_NOPROTO:
return (vsi->noproto);
default:
diff --git a/sys/dev/mpi3mr/mpi3mr.c b/sys/dev/mpi3mr/mpi3mr.c
index 99edd3542619..bcf8f46ddf5d 100644
--- a/sys/dev/mpi3mr/mpi3mr.c
+++ b/sys/dev/mpi3mr/mpi3mr.c
@@ -2799,10 +2799,11 @@ retry_init:
U32 fault = mpi3mr_regread(sc, MPI3_SYSIF_FAULT_OFFSET) &
MPI3_SYSIF_FAULT_CODE_MASK;
- if (fault == MPI3_SYSIF_FAULT_CODE_INSUFFICIENT_PCI_SLOT_POWER)
+ if (fault == MPI3_SYSIF_FAULT_CODE_INSUFFICIENT_PCI_SLOT_POWER) {
mpi3mr_dprint(sc, MPI3MR_INFO,
"controller faulted due to insufficient power, try by connecting it in a different slot\n");
goto err;
+ }
U32 host_diagnostic;
timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10;
@@ -4486,7 +4487,7 @@ static void mpi3mr_process_admin_reply_desc(struct mpi3mr_softc *sc,
Mpi3SuccessReplyDescriptor_t *success_desc;
Mpi3DefaultReply_t *def_reply = NULL;
struct mpi3mr_drvr_cmd *cmdptr = NULL;
- Mpi3SCSIIOReply_t *scsi_reply;
+ Mpi3SCSIIOReply_t *scsi_reply = NULL;
U8 *sense_buf = NULL;
*reply_dma = 0;
@@ -4589,7 +4590,7 @@ static void mpi3mr_process_admin_reply_desc(struct mpi3mr_softc *sc,
}
}
out:
- if (sense_buf != NULL)
+ if (scsi_reply != NULL && sense_buf != NULL)
mpi3mr_repost_sense_buf(sc,
scsi_reply->SenseDataBufferAddress);
return;
@@ -6161,7 +6162,7 @@ static int mpi3mr_issue_reset(struct mpi3mr_softc *sc, U16 reset_type,
{
int retval = -1;
U8 unlock_retry_count = 0;
- U32 host_diagnostic, ioc_status, ioc_config, scratch_pad0;
+ U32 host_diagnostic = 0, ioc_status, ioc_config, scratch_pad0;
U32 timeout = MPI3MR_RESET_ACK_TIMEOUT * 10;
if ((reset_type != MPI3_SYSIF_HOST_DIAG_RESET_ACTION_SOFT_RESET) &&
diff --git a/sys/dev/mpi3mr/mpi3mr_cam.c b/sys/dev/mpi3mr/mpi3mr_cam.c
index 77e25339a1a9..a5120e2788db 100644
--- a/sys/dev/mpi3mr/mpi3mr_cam.c
+++ b/sys/dev/mpi3mr/mpi3mr_cam.c
@@ -1856,10 +1856,11 @@ int mpi3mr_remove_device_from_os(struct mpi3mr_softc *sc, U16 handle)
"Poll reply queue once\n", target_outstanding, target->per_id);
mpi3mr_poll_pend_io_completions(sc);
target_outstanding = mpi3mr_atomic_read(&target->outstanding);
- if (target_outstanding)
+ if (target_outstanding) {
target_outstanding = mpi3mr_atomic_read(&target->outstanding);
mpi3mr_dprint(sc, MPI3MR_ERROR, "[%2d] outstanding IOs present on target: %d "
- "despite poll\n", target_outstanding, target->per_id);
+ "despite poll\n", target_outstanding, target->per_id);
+ }
}
if (target->exposed_to_os && !sc->reset_in_progress) {
diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c
index d1c572e40669..262d6b58b705 100644
--- a/sys/dev/mpr/mpr.c
+++ b/sys/dev/mpr/mpr.c
@@ -1729,6 +1729,7 @@ mpr_get_tunables(struct mpr_softc *sc)
sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD;
sc->spinup_wait_time = DEFAULT_SPINUP_WAIT;
sc->use_phynum = 1;
+ sc->encl_min_slots = 0;
sc->max_reqframes = MPR_REQ_FRAMES;
sc->max_prireqframes = MPR_PRI_REQ_FRAMES;
sc->max_replyframes = MPR_REPLY_FRAMES;
@@ -1748,6 +1749,7 @@ mpr_get_tunables(struct mpr_softc *sc)
TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu);
TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time);
TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum);
+ TUNABLE_INT_FETCH("hw.mpr.encl_min_slots", &sc->encl_min_slots);
TUNABLE_INT_FETCH("hw.mpr.max_reqframes", &sc->max_reqframes);
TUNABLE_INT_FETCH("hw.mpr.max_prireqframes", &sc->max_prireqframes);
TUNABLE_INT_FETCH("hw.mpr.max_replyframes", &sc->max_replyframes);
@@ -1797,6 +1799,10 @@ mpr_get_tunables(struct mpr_softc *sc)
device_get_unit(sc->mpr_dev));
TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum);
+ snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.encl_min_slots",
+ device_get_unit(sc->mpr_dev));
+ TUNABLE_INT_FETCH(tmpstr, &sc->encl_min_slots);
+
snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_reqframes",
device_get_unit(sc->mpr_dev));
TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes);
@@ -1951,6 +1957,10 @@ mpr_setup_sysctl(struct mpr_softc *sc)
SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD,
&sc->prp_page_alloc_fail, "PRP page allocation failures");
+
+ SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+ OID_AUTO, "encl_min_slots", CTLFLAG_RW, &sc->encl_min_slots, 0,
+ "force enclosure minimum slots");
}
static struct mpr_debug_string {
diff --git a/sys/dev/mpr/mpr_mapping.c b/sys/dev/mpr/mpr_mapping.c
index f9a9ac1c53d0..38aa4dfc7ef2 100644
--- a/sys/dev/mpr/mpr_mapping.c
+++ b/sys/dev/mpr/mpr_mapping.c
@@ -2785,6 +2785,8 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
* DPM, if it's being used.
*/
if (enc_idx != MPR_ENCTABLE_BAD_IDX) {
+ u16 new_num_slots;
+
et_entry = &sc->enclosure_table[enc_idx];
if (et_entry->init_complete &&
!et_entry->missing_count) {
@@ -2796,6 +2798,17 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
et_entry->enc_handle = le16toh(event_data->
EnclosureHandle);
et_entry->start_slot = le16toh(event_data->StartSlot);
+ new_num_slots = le16toh(event_data->NumSlots);
+ if (new_num_slots < sc->encl_min_slots) {
+ mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d num_slots %d, overriding with %d.\n",
+ __func__, enc_idx, new_num_slots, sc->encl_min_slots);
+ new_num_slots = sc->encl_min_slots;
+ }
+ if (et_entry->num_slots != new_num_slots) {
+ mpr_dprint(sc, MPR_MAPPING, "%s: Enclosure %d old num_slots %d, new %d.\n",
+ __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots);
+ et_entry->num_slots = new_num_slots;
+ }
saved_phy_bits = et_entry->phy_bits;
et_entry->phy_bits |= le32toh(event_data->PhyBits);
if (saved_phy_bits != et_entry->phy_bits)
@@ -2858,6 +2871,11 @@ mpr_mapping_enclosure_dev_status_change_event(struct mpr_softc *sc,
et_entry->start_index = MPR_MAPTABLE_BAD_IDX;
et_entry->dpm_entry_num = MPR_DPM_BAD_IDX;
et_entry->num_slots = le16toh(event_data->NumSlots);
+ if (et_entry->num_slots < sc->encl_min_slots) {
+ mpr_dprint(sc, MPR_ERROR | MPR_MAPPING, "%s: Enclosure %d num_slots is %d, overriding with %d.\n",
+ __func__, enc_idx, et_entry->num_slots, sc->encl_min_slots);
+ et_entry->num_slots = sc->encl_min_slots;
+ }
et_entry->start_slot = le16toh(event_data->StartSlot);
et_entry->phy_bits = le32toh(event_data->PhyBits);
}
diff --git a/sys/dev/mpr/mprvar.h b/sys/dev/mpr/mprvar.h
index 0f1743f4266e..93f3fbffe079 100644
--- a/sys/dev/mpr/mprvar.h
+++ b/sys/dev/mpr/mprvar.h
@@ -366,6 +366,7 @@ struct mpr_softc {
int spinup_wait_time;
int use_phynum;
int dump_reqs_alltypes;
+ int encl_min_slots;
uint64_t chain_alloc_fail;
uint64_t prp_page_alloc_fail;
struct sysctl_ctx_list sysctl_ctx;
diff --git a/sys/dev/mwl/if_mwl.c b/sys/dev/mwl/if_mwl.c
index 2570cbce525b..c885968dfe15 100644
--- a/sys/dev/mwl/if_mwl.c
+++ b/sys/dev/mwl/if_mwl.c
@@ -1797,7 +1797,7 @@ mwl_updateslot(struct ieee80211com *ic)
return;
/*
- * Calculate the ERP flags. The firwmare will use
+ * Calculate the ERP flags. The firmware will use
* this to carry out the appropriate measures.
*/
prot = 0;
@@ -4017,7 +4017,7 @@ mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40;
if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20;
- if (ni->ni_chw != IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw != NET80211_STA_RX_BW_40)
pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40;
}
return pi;
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index 84f365024f13..ead91f0d01fe 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -295,7 +295,6 @@ nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
void
nvme_unregister_consumer(struct nvme_consumer *consumer)
{
-
consumer->id = INVALID_CONSUMER_ID;
}
diff --git a/sys/dev/nvme/nvme_ahci.c b/sys/dev/nvme/nvme_ahci.c
index 888207a454f7..b06661226d34 100644
--- a/sys/dev/nvme/nvme_ahci.c
+++ b/sys/dev/nvme/nvme_ahci.c
@@ -124,6 +124,5 @@ bad:
static int
nvme_ahci_detach(device_t dev)
{
-
return (nvme_detach(dev));
}
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index fd7f00ced14b..3a1894bf754d 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -41,6 +41,9 @@
#include <sys/endian.h>
#include <sys/stdarg.h>
#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
#include "nvme_private.h"
#include "nvme_linux.h"
@@ -597,7 +600,6 @@ nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
static bool
is_log_page_id_valid(uint8_t page_id)
{
-
switch (page_id) {
case NVME_LOG_ERROR:
case NVME_LOG_HEALTH_INFORMATION:
@@ -653,7 +655,6 @@ static void
nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
uint8_t state)
{
-
if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE)
nvme_printf(ctrlr, "SMART WARNING: available spare space below threshold\n");
@@ -781,7 +782,6 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
static void
nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
{
-
ctrlr->int_coal_time = 0;
TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
&ctrlr->int_coal_time);
@@ -1268,6 +1268,34 @@ nvme_ctrlr_shared_handler(void *arg)
nvme_mmio_write_4(ctrlr, intmc, 1);
}
+#define NVME_MAX_PAGES (int)(1024 / sizeof(vm_page_t))
+
+static int
+nvme_user_ioctl_req(vm_offset_t addr, size_t len, bool is_read,
+ vm_page_t *upages, int max_pages, int *npagesp, struct nvme_request **req,
+ nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+ vm_prot_t prot = VM_PROT_READ;
+ int err;
+
+ if (is_read)
+ prot |= VM_PROT_WRITE; /* Device will write to host memory */
+ err = vm_fault_hold_pages(&curproc->p_vmspace->vm_map,
+ addr, len, prot, upages, max_pages, npagesp);
+ if (err != 0)
+ return (err);
+ *req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
+ (*req)->payload = memdesc_vmpages(upages, len, addr & PAGE_MASK);
+ (*req)->payload_valid = true;
+ return (0);
+}
+
+static void
+nvme_user_ioctl_free(vm_page_t *pages, int npage)
+{
+ vm_page_unhold_pages(pages, npage);
+}
+
static void
nvme_pt_done(void *arg, const struct nvme_completion *cpl)
{
@@ -1290,30 +1318,28 @@ nvme_pt_done(void *arg, const struct nvme_completion *cpl)
int
nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
- struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer,
+ struct nvme_pt_command *pt, uint32_t nsid, int is_user,
int is_admin_cmd)
{
- struct nvme_request *req;
- struct mtx *mtx;
- struct buf *buf = NULL;
- int ret = 0;
+ struct nvme_request *req;
+ struct mtx *mtx;
+ int ret = 0;
+ int npages = 0;
+ vm_page_t upages[NVME_MAX_PAGES];
if (pt->len > 0) {
if (pt->len > ctrlr->max_xfer_size) {
- nvme_printf(ctrlr, "pt->len (%d) "
- "exceeds max_xfer_size (%d)\n", pt->len,
- ctrlr->max_xfer_size);
- return EIO;
+ nvme_printf(ctrlr,
+ "len (%d) exceeds max_xfer_size (%d)\n",
+ pt->len, ctrlr->max_xfer_size);
+ return (EIO);
}
- if (is_user_buffer) {
- buf = uma_zalloc(pbuf_zone, M_WAITOK);
- buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
- if (vmapbuf(buf, pt->buf, pt->len, 1) < 0) {
- ret = EFAULT;
- goto err;
- }
- req = nvme_allocate_request_vaddr(buf->b_data, pt->len,
- M_WAITOK, nvme_pt_done, pt);
+ if (is_user) {
+ ret = nvme_user_ioctl_req((vm_offset_t)pt->buf, pt->len,
+ pt->is_read, upages, nitems(upages), &npages, &req,
+ nvme_pt_done, pt);
+ if (ret != 0)
+ return (ret);
} else
req = nvme_allocate_request_vaddr(pt->buf, pt->len,
M_WAITOK, nvme_pt_done, pt);
@@ -1347,11 +1373,8 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0);
mtx_unlock(mtx);
- if (buf != NULL) {
- vunmapbuf(buf);
-err:
- uma_zfree(pbuf_zone, buf);
- }
+ if (npages > 0)
+ nvme_user_ioctl_free(upages, npages);
return (ret);
}
@@ -1377,8 +1400,9 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
{
struct nvme_request *req;
struct mtx *mtx;
- struct buf *buf = NULL;
int ret = 0;
+ int npages = 0;
+ vm_page_t upages[NVME_MAX_PAGES];
/*
* We don't support metadata.
@@ -1389,28 +1413,16 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
if (npc->data_len > 0 && npc->addr != 0) {
if (npc->data_len > ctrlr->max_xfer_size) {
nvme_printf(ctrlr,
- "npc->data_len (%d) exceeds max_xfer_size (%d)\n",
+ "data_len (%d) exceeds max_xfer_size (%d)\n",
npc->data_len, ctrlr->max_xfer_size);
return (EIO);
}
- /*
- * We only support data out or data in commands, but not both at
- * once. However, there's some comands with lower bit cleared
- * that are really read commands, so we should filter & 3 == 0,
- * but don't.
- */
- if ((npc->opcode & 0x3) == 3)
- return (EINVAL);
if (is_user) {
- buf = uma_zalloc(pbuf_zone, M_WAITOK);
- buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ;
- if (vmapbuf(buf, (void *)(uintptr_t)npc->addr,
- npc->data_len, 1) < 0) {
- ret = EFAULT;
- goto err;
- }
- req = nvme_allocate_request_vaddr(buf->b_data,
- npc->data_len, M_WAITOK, nvme_npc_done, npc);
+ ret = nvme_user_ioctl_req(npc->addr, npc->data_len,
+ npc->opcode & 0x1, upages, nitems(upages), &npages,
+ &req, nvme_npc_done, npc);
+ if (ret != 0)
+ return (ret);
} else
req = nvme_allocate_request_vaddr(
(void *)(uintptr_t)npc->addr, npc->data_len,
@@ -1420,8 +1432,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
req->cmd.opc = npc->opcode;
req->cmd.fuse = npc->flags;
- req->cmd.rsvd2 = htole16(npc->cdw2);
- req->cmd.rsvd3 = htole16(npc->cdw3);
+ req->cmd.rsvd2 = htole32(npc->cdw2);
+ req->cmd.rsvd3 = htole32(npc->cdw3);
req->cmd.cdw10 = htole32(npc->cdw10);
req->cmd.cdw11 = htole32(npc->cdw11);
req->cmd.cdw12 = htole32(npc->cdw12);
@@ -1445,11 +1457,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0);
mtx_unlock(mtx);
- if (buf != NULL) {
- vunmapbuf(buf);
-err:
- uma_zfree(pbuf_zone, buf);
- }
+ if (npages > 0)
+ nvme_user_ioctl_free(upages, npages);
return (ret);
}
@@ -1776,7 +1785,6 @@ void
nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
struct nvme_request *req)
{
-
nvme_qpair_submit_request(&ctrlr->adminq, req);
}
@@ -1793,14 +1801,12 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
device_t
nvme_ctrlr_get_device(struct nvme_controller *ctrlr)
{
-
return (ctrlr->dev);
}
const struct nvme_controller_data *
nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
{
-
return (&ctrlr->cdata);
}
@@ -1853,7 +1859,6 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
int
nvme_ctrlr_resume(struct nvme_controller *ctrlr)
{
-
/*
* Can't touch failed controllers, so nothing to do to resume.
*/
diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c
index 993a7718356d..5a44ed425acb 100644
--- a/sys/dev/nvme/nvme_ctrlr_cmd.c
+++ b/sys/dev/nvme/nvme_ctrlr_cmd.c
@@ -281,7 +281,6 @@ nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
struct nvme_error_information_entry *payload, uint32_t num_entries,
nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
KASSERT(num_entries > 0, ("%s called with num_entries==0\n", __func__));
/* Controller's error log page entries is 0-based. */
@@ -302,7 +301,6 @@ nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
uint32_t nsid, struct nvme_health_information_page *payload,
nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_HEALTH_INFORMATION,
nsid, payload, sizeof(*payload), cb_fn, cb_arg);
}
@@ -311,7 +309,6 @@ void
nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
struct nvme_firmware_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg)
{
-
nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_FIRMWARE_SLOT,
NVME_GLOBAL_NAMESPACE_TAG, payload, sizeof(*payload), cb_fn,
cb_arg);
diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c
index 3f29382fe42f..e84d2066930e 100644
--- a/sys/dev/nvme/nvme_ns.c
+++ b/sys/dev/nvme/nvme_ns.c
@@ -129,7 +129,6 @@ static int
nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused,
struct thread *td)
{
-
return (0);
}
@@ -231,7 +230,6 @@ nvme_ns_get_model_number(struct nvme_namespace *ns)
const struct nvme_namespace_data *
nvme_ns_get_data(struct nvme_namespace *ns)
{
-
return (&ns->data);
}
@@ -631,7 +629,6 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
void
nvme_ns_destruct(struct nvme_namespace *ns)
{
-
if (ns->cdev != NULL) {
if (ns->cdev->si_drv2 != NULL)
destroy_dev(ns->cdev->si_drv2);
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
index 29b49b7df403..c07a68d2f0dc 100644
--- a/sys/dev/nvme/nvme_pci.c
+++ b/sys/dev/nvme/nvme_pci.c
@@ -151,7 +151,6 @@ nvme_pci_probe (device_t device)
static int
nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
{
-
ctrlr->resource_id = PCIR_BAR(0);
ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 36f00fedc48e..52f9e12f8f9a 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -459,8 +459,7 @@ int nvme_detach(device_t dev);
* vast majority of these without waiting for a tick plus scheduling delays. Since
* these are on startup, this drastically reduces startup time.
*/
-static __inline
-void
+static __inline void
nvme_completion_poll(struct nvme_completion_poll_status *status)
{
int timeout = ticks + 10 * hz;
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index bd8626e32209..4f2c44da3b4f 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -793,7 +793,6 @@ nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
void
nvme_io_qpair_destroy(struct nvme_qpair *qpair)
{
-
nvme_qpair_destroy(qpair);
}
@@ -1202,7 +1201,6 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
void
nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
{
-
mtx_lock(&qpair->lock);
_nvme_qpair_submit_request(qpair, req);
mtx_unlock(&qpair->lock);
@@ -1226,7 +1224,6 @@ nvme_qpair_enable(struct nvme_qpair *qpair)
void
nvme_qpair_reset(struct nvme_qpair *qpair)
{
-
qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
/*
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index 4974bb718222..a06774a64761 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -301,7 +301,6 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
static void
nvme_sim_poll(struct cam_sim *sim)
{
-
nvme_ctrlr_poll(sim2ctrlr(sim));
}
diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c
index a5a44721f9f9..50d19e730a16 100644
--- a/sys/dev/nvme/nvme_sysctl.c
+++ b/sys/dev/nvme/nvme_sysctl.c
@@ -153,7 +153,6 @@ nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
static void
nvme_qpair_reset_stats(struct nvme_qpair *qpair)
{
-
/*
* Reset the values. Due to sanity checks in
* nvme_qpair_process_completions, we reset the number of interrupt
diff --git a/sys/dev/nvme/nvme_util.c b/sys/dev/nvme/nvme_util.c
index 0a07653a7378..cb0ba729ac96 100644
--- a/sys/dev/nvme/nvme_util.c
+++ b/sys/dev/nvme/nvme_util.c
@@ -208,31 +208,33 @@ nvme_opcode_sbuf(bool admin, uint8_t opc, struct sbuf *sb)
if (s == NULL)
sbuf_printf(sb, "%s (%02x)", type, opc);
else
- sbuf_printf(sb, "%s", s);
+ sbuf_printf(sb, "%s (%02x)", s, opc);
}
void
nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb)
{
const char *s, *type;
- uint16_t status;
+ uint16_t status, sc, sct;
status = le16toh(cpl->status);
- switch (NVME_STATUS_GET_SCT(status)) {
+ sc = NVME_STATUS_GET_SC(status);
+ sct = NVME_STATUS_GET_SCT(status);
+ switch (sct) {
case NVME_SCT_GENERIC:
- s = generic_status[NVME_STATUS_GET_SC(status)];
+ s = generic_status[sc];
type = "GENERIC";
break;
case NVME_SCT_COMMAND_SPECIFIC:
- s = command_specific_status[NVME_STATUS_GET_SC(status)];
+ s = command_specific_status[sc];
type = "COMMAND SPECIFIC";
break;
case NVME_SCT_MEDIA_ERROR:
- s = media_error_status[NVME_STATUS_GET_SC(status)];
+ s = media_error_status[sc];
type = "MEDIA ERROR";
break;
case NVME_SCT_PATH_RELATED:
- s = path_related_status[NVME_STATUS_GET_SC(status)];
+ s = path_related_status[sc];
type = "PATH RELATED";
break;
case NVME_SCT_VENDOR_SPECIFIC:
@@ -246,12 +248,11 @@ nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sb)
}
if (type == NULL)
- sbuf_printf(sb, "RESERVED (%02x/%02x)",
- NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status));
+ sbuf_printf(sb, "RESERVED (%02x/%02x)", sct, sc);
else if (s == NULL)
- sbuf_printf(sb, "%s (%02x)", type, NVME_STATUS_GET_SC(status));
+ sbuf_printf(sb, "%s (%02x/%02x)", type, sct, sc);
else
- sbuf_printf(sb, "%s", s);
+ sbuf_printf(sb, "%s (%02x/%02x)", s, sct, sc);
}
void
diff --git a/sys/dev/nvmf/nvmf_tcp.c b/sys/dev/nvmf/nvmf_tcp.c
index 6ad5229f6043..e50d7ff48d2b 100644
--- a/sys/dev/nvmf/nvmf_tcp.c
+++ b/sys/dev/nvmf/nvmf_tcp.c
@@ -970,7 +970,7 @@ nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
}
/*
- * XXX: The spec does not specify how to handle R2T tranfers
+ * XXX: The spec does not specify how to handle R2T transfers
* out of range of the original command.
*/
data_len = le32toh(r2t->r2tl);
diff --git a/sys/dev/pci/pci_user.c b/sys/dev/pci/pci_user.c
index f68b5b7e71ff..9768030995e7 100644
--- a/sys/dev/pci/pci_user.c
+++ b/sys/dev/pci/pci_user.c
@@ -79,6 +79,9 @@ struct pci_conf32 {
u_int8_t pc_revid; /* chip revision ID */
char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
u_int32_t pd_unit; /* device unit number */
+ int pd_numa_domain; /* device NUMA domain */
+ u_int32_t pc_reported_len;/* length of PCI data reported */
+ char pc_spare[64]; /* space for future fields */
};
struct pci_match_conf32 {
@@ -502,11 +505,58 @@ pci_conf_match_freebsd6_32(struct pci_match_conf_freebsd6_32 *matches, int num_m
#endif /* COMPAT_FREEBSD32 */
#endif /* !PRE7_COMPAT */
+#ifdef COMPAT_FREEBSD14
+struct pci_conf_freebsd14 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_long pd_unit; /* device unit number */
+};
+#define PCIOCGETCONF_FREEBSD14 _IOWR('p', 5, struct pci_conf_io)
+
+#ifdef COMPAT_FREEBSD32
+struct pci_conf_freebsd14_32 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_int32_t pd_unit; /* device unit number */
+};
+#define PCIOCGETCONF_FREEBSD14_32 \
+ _IOC_NEWTYPE(PCIOCGETCONF_FREEBSD14, struct pci_conf_io32)
+#endif /* COMPAT_FREEBSD32 */
+#endif /* COMPAT_FREEBSD14 */
+
union pci_conf_union {
struct pci_conf pc;
#ifdef COMPAT_FREEBSD32
struct pci_conf32 pc32;
#endif
+#ifdef COMPAT_FREEBSD14
+ struct pci_conf_freebsd14 pc14;
+#ifdef COMPAT_FREEBSD32
+ struct pci_conf_freebsd14_32 pc14_32;
+#endif
+#endif
#ifdef PRE7_COMPAT
struct pci_conf_freebsd6 pco;
#ifdef COMPAT_FREEBSD32
@@ -522,10 +572,16 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches,
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
return (pci_conf_match_native(
(struct pci_match_conf *)matches, num_matches, match_buf));
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
return (pci_conf_match32((struct pci_match_conf32 *)matches,
num_matches, match_buf));
#endif
@@ -645,9 +701,15 @@ pci_match_conf_size(u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
return (sizeof(struct pci_match_conf));
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
return (sizeof(struct pci_match_conf32));
#endif
#ifdef PRE7_COMPAT
@@ -675,6 +737,14 @@ pci_conf_size(u_long cmd)
case PCIOCGETCONF32:
return (sizeof(struct pci_conf32));
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+ return (sizeof(struct pci_conf_freebsd14));
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+ return (sizeof(struct pci_conf_freebsd14_32));
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
return (sizeof(struct pci_conf_freebsd6));
@@ -698,6 +768,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#endif
@@ -706,6 +779,9 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6_32:
#endif
@@ -739,6 +815,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
switch (cmd) {
case PCIOCGETCONF:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#endif
@@ -751,6 +830,9 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6_32:
#endif
@@ -781,8 +863,17 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
pcup->pc = *pcp;
return;
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+ memcpy(&pcup->pc14, pcp, sizeof(pcup->pc14));
+ return;
+#endif
+
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
pcup->pc32.pc_sel = pcp->pc_sel;
pcup->pc32.pc_hdr = pcp->pc_hdr;
pcup->pc32.pc_subvendor = pcp->pc_subvendor;
@@ -796,8 +887,13 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
strlcpy(pcup->pc32.pd_name, pcp->pd_name,
sizeof(pcup->pc32.pd_name));
pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit;
+ if (cmd == PCIOCGETCONF32) {
+ pcup->pc32.pd_numa_domain = pcp->pd_numa_domain;
+ pcup->pc32.pc_reported_len =
+ (uint32_t)offsetof(struct pci_conf32, pc_spare);
+ }
return;
-#endif
+#endif /* COMPAT_FREEBSD32 */
#ifdef PRE7_COMPAT
#ifdef COMPAT_FREEBSD32
@@ -1024,7 +1120,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
struct pci_map *pm;
struct pci_bar_mmap *pbm;
size_t confsz, iolen;
- int error, ionum, i, num_patterns;
+ int domain, error, ionum, i, num_patterns;
union pci_conf_union pcu;
#ifdef PRE7_COMPAT
struct pci_io iodata;
@@ -1044,6 +1140,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#ifdef COMPAT_FREEBSD32
@@ -1069,6 +1171,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF32:
#endif
+#ifdef COMPAT_FREEBSD14
+ case PCIOCGETCONF_FREEBSD14:
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_FREEBSD14_32:
+#endif
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_FREEBSD6:
#ifdef COMPAT_FREEBSD32
@@ -1201,6 +1309,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
dinfo->conf.pd_unit = 0;
}
+ if (dinfo->cfg.dev != NULL &&
+ bus_get_domain(dinfo->cfg.dev, &domain) == 0)
+ dinfo->conf.pd_numa_domain = domain;
+ else
+ dinfo->conf.pd_numa_domain = 0;
+
if (pattern_buf == NULL ||
pci_conf_match(cmd, pattern_buf, num_patterns,
&dinfo->conf) == 0) {
@@ -1217,6 +1331,9 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
break;
}
+ dinfo->conf.pc_reported_len =
+ offsetof(struct pci_conf, pc_spare);
+
pci_conf_for_copyout(&dinfo->conf, &pcu, cmd);
error = copyout(&pcu,
(caddr_t)cio->matches +
diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c
index e911a407cca9..436af76001da 100644
--- a/sys/dev/puc/pucdata.c
+++ b/sys/dev/puc/pucdata.c
@@ -64,6 +64,7 @@ static puc_config_f puc_config_quatech;
static puc_config_f puc_config_syba;
static puc_config_f puc_config_siig;
static puc_config_f puc_config_sunix;
+static puc_config_f puc_config_systembase;
static puc_config_f puc_config_timedia;
static puc_config_f puc_config_titan;
@@ -1705,6 +1706,23 @@ const struct puc_cfg puc_pci_devices[] = {
PUC_PORT_4S, 0x10, 0, 8,
.config_function = puc_config_icbook
},
+
+ /*
+ * Systembase cards using SB16C1050 UARTs:
+ */
+ { 0x14a1, 0x0008, 0x14a1, 0x0008,
+ "Systembase SB16C1058",
+ DEFAULT_RCLK * 8,
+ PUC_PORT_8S, 0x10, 0, 8,
+ .config_function = puc_config_systembase,
+ },
+ { 0x14a1, 0x0004, 0x14a1, 0x0004,
+ "Systembase SB16C1054",
+ DEFAULT_RCLK * 8,
+ PUC_PORT_4S, 0x10, 0, 8,
+ .config_function = puc_config_systembase,
+ },
+
{ 0xffff, 0, 0xffff, 0, NULL, 0 }
};
@@ -2294,3 +2312,28 @@ puc_config_titan(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd,
}
return (ENXIO);
}
+
+static int
+puc_config_systembase(struct puc_softc *sc __unused,
+ enum puc_cfg_cmd cmd, int port, intptr_t *res)
+{
+ struct puc_bar *bar;
+
+ switch (cmd) {
+ case PUC_CFG_SETUP:
+ bar = puc_get_bar(sc, 0x14);
+ if (bar == NULL)
+ return (ENXIO);
+
+ /*
+ * The Systembase SB16C1058 (and probably other devices
+ * based on the SB16C1050 UART core) require poking a
+ * register in the *other* RID to turn on interrupts.
+ */
+ bus_write_1(bar->b_res, /* OPT_IMRREG0 */ 0xc, 0xff);
+ return (0);
+ default:
+ break;
+ }
+ return (ENXIO);
+}
diff --git a/sys/dev/qat/include/common/adf_accel_devices.h b/sys/dev/qat/include/common/adf_accel_devices.h
index c09aee8ea4bd..eeffc6a9132c 100644
--- a/sys/dev/qat/include/common/adf_accel_devices.h
+++ b/sys/dev/qat/include/common/adf_accel_devices.h
@@ -39,12 +39,16 @@
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
#define ADF_401XX_PCI_DEVICE_ID 0x4942
#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943
+#define ADF_402XX_PCI_DEVICE_ID 0x4944
+#define ADF_402XXIOV_PCI_DEVICE_ID 0x4945
#define IS_QAT_GEN3(ID) ({ (ID == ADF_C4XXX_PCI_DEVICE_ID); })
static inline bool
IS_QAT_GEN4(const unsigned int id)
{
return (id == ADF_4XXX_PCI_DEVICE_ID || id == ADF_401XX_PCI_DEVICE_ID ||
+ id == ADF_402XX_PCI_DEVICE_ID ||
+ id == ADF_402XXIOV_PCI_DEVICE_ID ||
id == ADF_4XXXIOV_PCI_DEVICE_ID ||
id == ADF_401XXIOV_PCI_DEVICE_ID);
}
diff --git a/sys/dev/qat/qat_api/include/icp_sal_versions.h b/sys/dev/qat/qat_api/include/icp_sal_versions.h
index 03bcef4fcbbb..0eb227ade09c 100644
--- a/sys/dev/qat/qat_api/include/icp_sal_versions.h
+++ b/sys/dev/qat/qat_api/include/icp_sal_versions.h
@@ -26,7 +26,7 @@
/* Part name and number of the accelerator device */
#define SAL_INFO2_DRIVER_SW_VERSION_MAJ_NUMBER 3
-#define SAL_INFO2_DRIVER_SW_VERSION_MIN_NUMBER 15
+#define SAL_INFO2_DRIVER_SW_VERSION_MIN_NUMBER 16
#define SAL_INFO2_DRIVER_SW_VERSION_PATCH_NUMBER 0
/**
diff --git a/sys/dev/qat/qat_common/adf_gen4_timer.c b/sys/dev/qat/qat_common/adf_gen4_timer.c
index 96b65cdff181..2c74d09418e5 100644
--- a/sys/dev/qat/qat_common/adf_gen4_timer.c
+++ b/sys/dev/qat/qat_common/adf_gen4_timer.c
@@ -57,7 +57,7 @@ end:
static void
timer_handler(struct timer_list *tl)
{
- struct adf_int_timer *int_timer = from_timer(int_timer, tl, timer);
+ struct adf_int_timer *int_timer = timer_container_of(int_timer, tl, timer);
struct adf_accel_dev *accel_dev = int_timer->accel_dev;
struct adf_hb_timer_data *hb_timer_data = NULL;
u64 timeout_val = adf_get_next_timeout(int_timer->timeout_val);
diff --git a/sys/dev/qat/qat_common/qat_uclo.c b/sys/dev/qat/qat_common/qat_uclo.c
index 54e8e8eb7421..b17020286d24 100644
--- a/sys/dev/qat/qat_common/qat_uclo.c
+++ b/sys/dev/qat/qat_common/qat_uclo.c
@@ -892,6 +892,7 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle)
return ICP_QAT_AC_C4XXX_DEV_TYPE;
case ADF_4XXX_PCI_DEVICE_ID:
case ADF_401XX_PCI_DEVICE_ID:
+ case ADF_402XX_PCI_DEVICE_ID:
return ICP_QAT_AC_4XXX_A_DEV_TYPE;
default:
pr_err("QAT: unsupported device 0x%x\n",
diff --git a/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.c b/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.c
index d730efd5952b..49e1e1859e78 100644
--- a/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.c
+++ b/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.c
@@ -536,8 +536,8 @@ adf_exit_accel_units(struct adf_accel_dev *accel_dev)
}
static const char *
-get_obj_name(struct adf_accel_dev *accel_dev,
- enum adf_accel_unit_services service)
+get_obj_name_4xxx(struct adf_accel_dev *accel_dev,
+ enum adf_accel_unit_services service)
{
switch (service) {
case ADF_ACCEL_ASYM:
@@ -553,6 +553,24 @@ get_obj_name(struct adf_accel_dev *accel_dev,
}
}
+static const char *
+get_obj_name_402xx(struct adf_accel_dev *accel_dev,
+ enum adf_accel_unit_services service)
+{
+ switch (service) {
+ case ADF_ACCEL_ASYM:
+ return ADF_402XX_ASYM_OBJ;
+ case ADF_ACCEL_CRYPTO:
+ return ADF_402XX_SYM_OBJ;
+ case ADF_ACCEL_COMPRESSION:
+ return ADF_402XX_DC_OBJ;
+ case ADF_ACCEL_ADMIN:
+ return ADF_402XX_ADMIN_OBJ;
+ default:
+ return NULL;
+ }
+}
+
static uint32_t
get_objs_num(struct adf_accel_dev *accel_dev)
{
@@ -982,8 +1000,23 @@ adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 id)
hw_data->clock_frequency = ADF_4XXX_AE_FREQ;
hw_data->get_sku = get_sku;
hw_data->heartbeat_ctr_num = ADF_NUM_HB_CNT_PER_AE;
- hw_data->fw_name = ADF_4XXX_FW;
- hw_data->fw_mmp_name = ADF_4XXX_MMP;
+ switch (id) {
+ case ADF_402XX_PCI_DEVICE_ID:
+ hw_data->fw_name = ADF_402XX_FW;
+ hw_data->fw_mmp_name = ADF_402XX_MMP;
+ hw_data->asym_ae_active_thd_mask = DEFAULT_4XXX_ASYM_AE_MASK;
+ break;
+ case ADF_401XX_PCI_DEVICE_ID:
+ hw_data->fw_name = ADF_4XXX_FW;
+ hw_data->fw_mmp_name = ADF_4XXX_MMP;
+ hw_data->asym_ae_active_thd_mask = DEFAULT_401XX_ASYM_AE_MASK;
+ break;
+
+ default:
+ hw_data->fw_name = ADF_4XXX_FW;
+ hw_data->fw_mmp_name = ADF_4XXX_MMP;
+ hw_data->asym_ae_active_thd_mask = DEFAULT_4XXX_ASYM_AE_MASK;
+ }
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->send_admin_init = adf_4xxx_send_admin_init;
@@ -1002,7 +1035,13 @@ adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 id)
hw_data->get_ring_svc_map_data = get_ring_svc_map_data;
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
hw_data->get_objs_num = get_objs_num;
- hw_data->get_obj_name = get_obj_name;
+ switch (id) {
+ case ADF_402XX_PCI_DEVICE_ID:
+ hw_data->get_obj_name = get_obj_name_402xx;
+ break;
+ default:
+ hw_data->get_obj_name = get_obj_name_4xxx;
+ }
hw_data->get_obj_cfg_ae_mask = get_obj_cfg_ae_mask;
hw_data->get_service_type = adf_4xxx_get_service_type;
hw_data->set_msix_rttable = set_msix_default_rttable;
@@ -1022,15 +1061,6 @@ adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 id)
hw_data->query_storage_cap = 1;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
- switch (id) {
- case ADF_401XX_PCI_DEVICE_ID:
- hw_data->asym_ae_active_thd_mask = DEFAULT_401XX_ASYM_AE_MASK;
- break;
- case ADF_4XXX_PCI_DEVICE_ID:
- default:
- hw_data->asym_ae_active_thd_mask = DEFAULT_4XXX_ASYM_AE_MASK;
- }
-
adf_gen4_init_hw_csr_info(&hw_data->csr_info);
adf_gen4_init_pf_pfvf_ops(&hw_data->csr_info.pfvf_ops);
}
diff --git a/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.h b/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.h
index c35ebbcadcd7..fa7249dca596 100644
--- a/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.h
+++ b/sys/dev/qat/qat_hw/qat_4xxx/adf_4xxx_hw_data.h
@@ -87,6 +87,12 @@
#define ADF_4XXX_SYM_OBJ "qat_4xxx_sym.bin"
#define ADF_4XXX_ASYM_OBJ "qat_4xxx_asym.bin"
#define ADF_4XXX_ADMIN_OBJ "qat_4xxx_admin.bin"
+#define ADF_402XX_FW "qat_402xx_fw"
+#define ADF_402XX_MMP "qat_402xx_mmp_fw"
+#define ADF_402XX_DC_OBJ "qat_402xx_dc.bin"
+#define ADF_402XX_SYM_OBJ "qat_402xx_sym.bin"
+#define ADF_402XX_ASYM_OBJ "qat_402xx_asym.bin"
+#define ADF_402XX_ADMIN_OBJ "qat_402xx_admin.bin"
/* Only 3 types of images can be loaded including the admin image */
#define ADF_4XXX_MAX_OBJ 3
diff --git a/sys/dev/qat/qat_hw/qat_4xxx/adf_drv.c b/sys/dev/qat/qat_hw/qat_4xxx/adf_drv.c
index cb534dd03b86..f9ad39fa45f0 100644
--- a/sys/dev/qat/qat_hw/qat_4xxx/adf_drv.c
+++ b/sys/dev/qat/qat_hw/qat_4xxx/adf_drv.c
@@ -22,12 +22,14 @@ static MALLOC_DEFINE(M_QAT_4XXX, "qat_4xxx", "qat_4xxx");
PCI_VENDOR_ID_INTEL, device_id \
}
-static const struct pci_device_id adf_pci_tbl[] =
- { ADF_SYSTEM_DEVICE(ADF_4XXX_PCI_DEVICE_ID),
- ADF_SYSTEM_DEVICE(ADF_401XX_PCI_DEVICE_ID),
- {
- 0,
- } };
+static const struct pci_device_id adf_pci_tbl[] = {
+ ADF_SYSTEM_DEVICE(ADF_4XXX_PCI_DEVICE_ID),
+ ADF_SYSTEM_DEVICE(ADF_401XX_PCI_DEVICE_ID),
+ ADF_SYSTEM_DEVICE(ADF_402XX_PCI_DEVICE_ID),
+ {
+ 0,
+ }
+};
static int
adf_probe(device_t dev)
@@ -135,6 +137,7 @@ adf_cleanup_accel(struct adf_accel_dev *accel_dev)
switch (pci_get_device(accel_pci_dev->pci_dev)) {
case ADF_4XXX_PCI_DEVICE_ID:
case ADF_401XX_PCI_DEVICE_ID:
+ case ADF_402XX_PCI_DEVICE_ID:
adf_clean_hw_data_4xxx(accel_dev->hw_device);
break;
default:
diff --git a/sys/dev/qat/qat_hw/qat_4xxxvf/adf_drv.c b/sys/dev/qat/qat_hw/qat_4xxxvf/adf_drv.c
index 2bbccb4d6b17..dbe40835ccbf 100644
--- a/sys/dev/qat/qat_hw/qat_4xxxvf/adf_drv.c
+++ b/sys/dev/qat/qat_hw/qat_4xxxvf/adf_drv.c
@@ -22,12 +22,14 @@ static MALLOC_DEFINE(M_QAT_4XXXVF, "qat_4xxxvf", "qat_4xxxvf");
PCI_VENDOR_ID_INTEL, device_id \
}
-static const struct pci_device_id adf_pci_tbl[] =
- { ADF_SYSTEM_DEVICE(ADF_4XXXIOV_PCI_DEVICE_ID),
- ADF_SYSTEM_DEVICE(ADF_401XXIOV_PCI_DEVICE_ID),
- {
- 0,
- } };
+static const struct pci_device_id adf_pci_tbl[] = {
+ ADF_SYSTEM_DEVICE(ADF_4XXXIOV_PCI_DEVICE_ID),
+ ADF_SYSTEM_DEVICE(ADF_401XXIOV_PCI_DEVICE_ID),
+ ADF_SYSTEM_DEVICE(ADF_402XXIOV_PCI_DEVICE_ID),
+ {
+ 0,
+ }
+};
static int
adf_probe(device_t dev)
@@ -76,6 +78,7 @@ adf_cleanup_accel(struct adf_accel_dev *accel_dev)
switch (pci_get_device(accel_pci_dev->pci_dev)) {
case ADF_4XXXIOV_PCI_DEVICE_ID:
case ADF_401XXIOV_PCI_DEVICE_ID:
+ case ADF_402XXIOV_PCI_DEVICE_ID:
adf_clean_hw_data_4xxxiov(accel_dev->hw_device);
break;
default:
diff --git a/sys/dev/qlnx/qlnxe/ecore_dev.c b/sys/dev/qlnx/qlnxe/ecore_dev.c
index 6187ecdbc446..389a95a4164c 100644
--- a/sys/dev/qlnx/qlnxe/ecore_dev.c
+++ b/sys/dev/qlnx/qlnxe/ecore_dev.c
@@ -5268,7 +5268,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
}
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n",
+ "Read default link: Speed %u Mb/sec, Adv. Speeds 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%u usec]\n",
link->speed.forced_speed, link->speed.advertised_speeds,
link->speed.autoneg, link->pause.autoneg,
p_caps->default_eee, p_caps->eee_lpi_timer);
@@ -6860,7 +6860,7 @@ int __ecore_configure_pf_max_bandwidth(struct ecore_hwfn *p_hwfn,
p_hwfn->qm_info.pf_rl);
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configured MAX bandwidth to be %08x Mb/sec\n",
+ "Configured MAX bandwidth to be %u Mb/sec\n",
p_link->speed);
return rc;
@@ -6918,7 +6918,7 @@ int __ecore_configure_pf_min_bandwidth(struct ecore_hwfn *p_hwfn,
rc = ecore_init_pf_wfq(p_hwfn, p_ptt, p_hwfn->rel_pf_id, min_bw);
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configured MIN bandwidth to be %d Mb/sec\n",
+ "Configured MIN bandwidth to be %u Mb/sec\n",
p_link->min_pf_rate);
return rc;
diff --git a/sys/dev/qlnx/qlnxe/ecore_mcp.c b/sys/dev/qlnx/qlnxe/ecore_mcp.c
index ab14b1eb5186..6d1e5fe24d06 100644
--- a/sys/dev/qlnx/qlnxe/ecore_mcp.c
+++ b/sys/dev/qlnx/qlnxe/ecore_mcp.c
@@ -1638,7 +1638,7 @@ enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn,
if (b_up)
DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
- "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n",
+ "Configuring Link: Speed %u Mb/sec, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n",
phy_cfg.speed, phy_cfg.pause, phy_cfg.adv_speed,
phy_cfg.loopback_mode);
else
diff --git a/sys/dev/qlnx/qlnxe/qlnx_def.h b/sys/dev/qlnx/qlnxe/qlnx_def.h
index 4342bba89587..796845f3f8c6 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_def.h
+++ b/sys/dev/qlnx/qlnxe/qlnx_def.h
@@ -696,22 +696,6 @@ extern int qlnx_alloc_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info,
* Some OS specific stuff
*/
-#if (defined IFM_100G_SR4)
-#define QLNX_IFM_100G_SR4 IFM_100G_SR4
-#define QLNX_IFM_100G_LR4 IFM_100G_LR4
-#define QLNX_IFM_100G_CR4 IFM_100G_CR4
-#else
-#define QLNX_IFM_100G_SR4 IFM_UNKNOWN
-#define QLNX_IFM_100G_LR4 IFM_UNKNOWN
-#endif /* #if (defined IFM_100G_SR4) */
-
-#if (defined IFM_25G_SR)
-#define QLNX_IFM_25G_SR IFM_25G_SR
-#define QLNX_IFM_25G_CR IFM_25G_CR
-#else
-#define QLNX_IFM_25G_SR IFM_UNKNOWN
-#define QLNX_IFM_25G_CR IFM_UNKNOWN
-#endif /* #if (defined IFM_25G_SR) */
#define QLNX_INC_IERRORS(ifp) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1)
#define QLNX_INC_IQDROPS(ifp) if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1)
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 4ad190374f87..9963f472c615 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2375,18 +2375,15 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_CR4), 0, NULL);
} else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) ||
(device_id == QLOGIC_PCI_DEVICE_ID_8070)) {
- ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_SR), 0, NULL);
- ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_CR), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_SR), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_25G_CR), 0, NULL);
} else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) {
ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_KR2), 0, NULL);
ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_CR2), 0, NULL);
} else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) {
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_LR4), 0, NULL);
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_SR4), 0, NULL);
- ifmedia_add(&ha->media,
- (IFM_ETHER | QLNX_IFM_100G_CR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_LR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_SR4), 0, NULL);
+ ifmedia_add(&ha->media, (IFM_ETHER | IFM_100G_CR4), 0, NULL);
}
ifmedia_add(&ha->media, (IFM_ETHER | IFM_FDX), 0, NULL);
@@ -2724,7 +2721,9 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
- QL_DPRINT4(ha, "SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", cmd);
+ case SIOCGIFXMEDIA:
+ QL_DPRINT4(ha,
+ "SIOCSIFMEDIA/SIOCGIFMEDIA/SIOCGIFXMEDIA (0x%lx)\n", cmd);
ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd);
break;
@@ -3808,11 +3807,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link)
case MEDIA_MODULE_FIBER:
case MEDIA_UNSPECIFIED:
if (if_link->speed == (100 * 1000))
- ifm_type = QLNX_IFM_100G_SR4;
+ ifm_type = IFM_100G_SR4;
else if (if_link->speed == (40 * 1000))
ifm_type = IFM_40G_SR4;
else if (if_link->speed == (25 * 1000))
- ifm_type = QLNX_IFM_25G_SR;
+ ifm_type = IFM_25G_SR;
else if (if_link->speed == (10 * 1000))
ifm_type = (IFM_10G_LR | IFM_10G_SR);
else if (if_link->speed == (1 * 1000))
@@ -3822,11 +3821,11 @@ qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link)
case MEDIA_DA_TWINAX:
if (if_link->speed == (100 * 1000))
- ifm_type = QLNX_IFM_100G_CR4;
+ ifm_type = IFM_100G_CR4;
else if (if_link->speed == (40 * 1000))
ifm_type = IFM_40G_CR4;
else if (if_link->speed == (25 * 1000))
- ifm_type = QLNX_IFM_25G_CR;
+ ifm_type = IFM_25G_CR;
else if (if_link->speed == (10 * 1000))
ifm_type = IFM_10G_TWINAX;
diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index 84ec174bd08e..2d7af254c52c 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -103,8 +103,10 @@ static const char *random_source_descr[ENTROPYSOURCE];
volatile int random_kthread_control;
-/* Allow the sysadmin to select the broad category of
- * entropy types to harvest.
+/*
+ * Allow the sysadmin to select the broad category of entropy types to harvest.
+ *
+ * Updates are synchronized by the harvest mutex.
*/
__read_frequently u_int hc_source_mask;
@@ -278,8 +280,15 @@ random_sources_feed(void)
epoch_enter_preempt(rs_epoch, &et);
CK_LIST_FOREACH(rrs, &source_list, rrs_entries) {
for (i = 0; i < npools; i++) {
+ if (rrs->rrs_source->rs_read == NULL) {
+ /* Source pushes entropy asynchronously. */
+ continue;
+ }
n = rrs->rrs_source->rs_read(entropy, sizeof(entropy));
- KASSERT((n <= sizeof(entropy)), ("%s: rs_read returned too much data (%u > %zu)", __func__, n, sizeof(entropy)));
+ KASSERT((n <= sizeof(entropy)),
+ ("%s: rs_read returned too much data (%u > %zu)",
+ __func__, n, sizeof(entropy)));
+
/*
* Sometimes the HW entropy source doesn't have anything
* ready for us. This isn't necessarily untrustworthy.
@@ -334,7 +343,17 @@ copy_event(uint32_t dst[static HARVESTSIZE + 1],
{
memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1));
memcpy(dst, event->he_entropy, event->he_size);
- dst[HARVESTSIZE] = event->he_somecounter;
+ if (event->he_source <= RANDOM_ENVIRONMENTAL_END) {
+ /*
+ * For pure entropy sources the timestamp counter is generally
+ * quite determinstic since samples are taken at regular
+ * intervals, so does not contribute much to the entropy. To
+ * make health tests more effective, exclude it from the sample,
+ * since it might otherwise defeat the health tests in a
+ * scenario where the source is stuck.
+ */
+ dst[HARVESTSIZE] = event->he_somecounter;
+ }
}
static void
@@ -464,11 +483,12 @@ SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled,
"Enable NIST SP 800-90B health tests for noise sources");
static void
-random_healthtest_init(enum random_entropy_source source)
+random_healthtest_init(enum random_entropy_source source, int min_entropy)
{
struct health_test_softc *ht;
ht = &healthtest[source];
+ memset(ht, 0, sizeof(*ht));
KASSERT(ht->ht_state == INIT,
("%s: health test state is %d for source %d",
__func__, ht->ht_state, source));
@@ -485,20 +505,62 @@ random_healthtest_init(enum random_entropy_source source)
}
/*
- * Set cutoff values for the two tests, assuming that each sample has
- * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}.
- * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false
- * positive once in ~54.5 years.
+ * Set cutoff values for the two tests, given a min-entropy estimate for
+ * the source and allowing for an error rate of 1 in 2^{34}. With a
+ * min-entropy estimate of 1 bit and a sample rate of RANDOM_KTHREAD_HZ,
+ * we expect to see an false positive once in ~54.5 years.
*
* The RCT limit comes from the formula in section 4.4.1.
*
- * The APT cutoff is calculated using the formula in section 4.4.2
+ * The APT cutoffs are calculated using the formula in section 4.4.2
* footnote 10 with the number of Bernoulli trials changed from W to
* W-1, since the test as written counts the number of samples equal to
- * the first sample in the window, and thus tests W-1 samples.
+ * the first sample in the window, and thus tests W-1 samples. We
+ * provide cutoffs for estimates up to sizeof(uint32_t)*HARVESTSIZE*8
+ * bits.
*/
- ht->ht_rct_limit = 35;
- ht->ht_apt_cutoff = 330;
+ const int apt_cutoffs[] = {
+ [1] = 329,
+ [2] = 195,
+ [3] = 118,
+ [4] = 73,
+ [5] = 48,
+ [6] = 33,
+ [7] = 23,
+ [8] = 17,
+ [9] = 13,
+ [10] = 11,
+ [11] = 9,
+ [12] = 8,
+ [13] = 7,
+ [14] = 6,
+ [15] = 5,
+ [16] = 5,
+ [17 ... 19] = 4,
+ [20 ... 25] = 3,
+ [26 ... 42] = 2,
+ [43 ... 64] = 1,
+ };
+ const int error_rate = 34;
+
+ if (min_entropy == 0) {
+ /*
+ * For environmental sources, the main source of entropy is the
+ * associated timecounter value. Since these sources can be
+ * influenced by unprivileged users, we conservatively use a
+ * min-entropy estimate of 1 bit per sample. For "pure"
+ * sources, we assume 8 bits per sample, as such sources provide
+ * a variable amount of data per read and in particular might
+ * only provide a single byte at a time.
+ */
+ min_entropy = source >= RANDOM_PURE_START ? 8 : 1;
+ } else if (min_entropy < 0 || min_entropy >= nitems(apt_cutoffs)) {
+ panic("invalid min_entropy %d for %s", min_entropy,
+ random_source_descr[source]);
+ }
+
+ ht->ht_rct_limit = 1 + howmany(error_rate, min_entropy);
+ ht->ht_apt_cutoff = apt_cutoffs[min_entropy];
}
static int
@@ -533,9 +595,9 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
_RANDOM_HARVEST_ETHER_OFF | _RANDOM_HARVEST_UMA_OFF;
int error;
- u_int value, orig_value;
+ u_int value;
- orig_value = value = hc_source_mask;
+ value = atomic_load_int(&hc_source_mask);
error = sysctl_handle_int(oidp, &value, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
@@ -546,12 +608,14 @@ random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
/*
* Disallow userspace modification of pure entropy sources.
*/
+ RANDOM_HARVEST_LOCK();
hc_source_mask = (value & ~user_immutable_mask) |
- (orig_value & user_immutable_mask);
+ (hc_source_mask & user_immutable_mask);
+ RANDOM_HARVEST_UNLOCK();
return (0);
}
SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask,
- CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
random_check_uint_harvestmask, "IU",
"Entropy harvesting mask");
@@ -563,9 +627,16 @@ random_print_harvestmask(SYSCTL_HANDLER_ARGS)
error = sysctl_wire_old_buffer(req, 0);
if (error == 0) {
+ u_int mask;
+
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
- for (i = ENTROPYSOURCE - 1; i >= 0; i--)
- sbuf_cat(&sbuf, (hc_source_mask & (1 << i)) ? "1" : "0");
+ mask = atomic_load_int(&hc_source_mask);
+ for (i = ENTROPYSOURCE - 1; i >= 0; i--) {
+ bool present;
+
+ present = (mask & (1u << i)) != 0;
+ sbuf_cat(&sbuf, present ? "1" : "0");
+ }
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
}
@@ -619,16 +690,21 @@ random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS)
first = true;
error = sysctl_wire_old_buffer(req, 0);
if (error == 0) {
+ u_int mask;
+
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ mask = atomic_load_int(&hc_source_mask);
for (i = ENTROPYSOURCE - 1; i >= 0; i--) {
- if (i >= RANDOM_PURE_START &&
- (hc_source_mask & (1 << i)) == 0)
+ bool present;
+
+ present = (mask & (1u << i)) != 0;
+ if (i >= RANDOM_PURE_START && !present)
continue;
if (!first)
sbuf_cat(&sbuf, ",");
- sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "[" : "");
+ sbuf_cat(&sbuf, !present ? "[" : "");
sbuf_cat(&sbuf, random_source_descr[i]);
- sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "]" : "");
+ sbuf_cat(&sbuf, !present ? "]" : "");
first = false;
}
error = sbuf_finish(&sbuf);
@@ -652,8 +728,8 @@ random_harvestq_init(void *unused __unused)
RANDOM_HARVEST_INIT_LOCK();
harvest_context.hc_active_buf = 0;
- for (int i = 0; i < ENTROPYSOURCE; i++)
- random_healthtest_init(i);
+ for (int i = RANDOM_START; i <= RANDOM_ENVIRONMENTAL_END; i++)
+ random_healthtest_init(i, 0);
}
SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL);
@@ -835,20 +911,6 @@ random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_sour
}
void
-random_harvest_register_source(enum random_entropy_source source)
-{
-
- hc_source_mask |= (1 << source);
-}
-
-void
-random_harvest_deregister_source(enum random_entropy_source source)
-{
-
- hc_source_mask &= ~(1 << source);
-}
-
-void
random_source_register(const struct random_source *rsource)
{
struct random_sources *rrs;
@@ -858,11 +920,12 @@ random_source_register(const struct random_source *rsource)
rrs = malloc(sizeof(*rrs), M_ENTROPY, M_WAITOK);
rrs->rrs_source = rsource;
- random_harvest_register_source(rsource->rs_source);
-
printf("random: registering fast source %s\n", rsource->rs_ident);
+ random_healthtest_init(rsource->rs_source, rsource->rs_min_entropy);
+
RANDOM_HARVEST_LOCK();
+ hc_source_mask |= (1 << rsource->rs_source);
CK_LIST_INSERT_HEAD(&source_list, rrs, rrs_entries);
RANDOM_HARVEST_UNLOCK();
}
@@ -874,9 +937,8 @@ random_source_deregister(const struct random_source *rsource)
KASSERT(rsource != NULL, ("invalid input to %s", __func__));
- random_harvest_deregister_source(rsource->rs_source);
-
RANDOM_HARVEST_LOCK();
+ hc_source_mask &= ~(1 << rsource->rs_source);
CK_LIST_FOREACH(rrs, &source_list, rrs_entries)
if (rrs->rrs_source == rsource) {
CK_LIST_REMOVE(rrs, rrs_entries);
diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h
index 6d742447ea8b..a6ca66c7d92e 100644
--- a/sys/dev/random/randomdev.h
+++ b/sys/dev/random/randomdev.h
@@ -52,7 +52,9 @@ random_check_uint_##name(SYSCTL_HANDLER_ARGS) \
}
#endif /* SYSCTL_DECL */
+#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_ENTROPY);
+#endif
extern bool random_bypass_before_seeding;
extern bool read_random_bypassed_before_seeding;
@@ -101,6 +103,7 @@ struct random_source {
const char *rs_ident;
enum random_entropy_source rs_source;
random_source_read_t *rs_read;
+ int rs_min_entropy;
};
void random_source_register(const struct random_source *);
diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c
index 091ab2db72ec..67864c2de388 100644
--- a/sys/dev/re/if_re.c
+++ b/sys/dev/re/if_re.c
@@ -3558,6 +3558,7 @@ re_ioctl(if_t ifp, u_long command, caddr_t data)
static void
re_watchdog(struct rl_softc *sc)
{
+ struct epoch_tracker et;
if_t ifp;
RL_LOCK_ASSERT(sc);
@@ -3578,7 +3579,9 @@ re_watchdog(struct rl_softc *sc)
if_printf(ifp, "watchdog timeout\n");
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_ENTER(et);
re_rxeof(sc, NULL);
+ NET_EPOCH_EXIT(et);
if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
re_init_locked(sc);
if (!if_sendq_empty(ifp))
diff --git a/sys/dev/rtwn/if_rtwn.c b/sys/dev/rtwn/if_rtwn.c
index 7a547e13cafa..25287f222270 100644
--- a/sys/dev/rtwn/if_rtwn.c
+++ b/sys/dev/rtwn/if_rtwn.c
@@ -268,6 +268,9 @@ rtwn_attach(struct rtwn_softc *sc)
ic->ic_flags_ext |= IEEE80211_FEXT_WATCHDOG;
#endif
+ /* Enable seqno offload */
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
+
/* Adjust capabilities. */
rtwn_adj_devcaps(sc);
diff --git a/sys/dev/rtwn/if_rtwn_tx.c b/sys/dev/rtwn/if_rtwn_tx.c
index 2c9c246dfbb4..fa7f35f2de83 100644
--- a/sys/dev/rtwn/if_rtwn_tx.c
+++ b/sys/dev/rtwn/if_rtwn_tx.c
@@ -183,6 +183,10 @@ rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni,
}
}
+ /* seqno allocate, only if AMPDU isn't running */
+ if ((m->m_flags & M_AMPDU_MPDU) == 0)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
cipher = IEEE80211_CIPHER_NONE;
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_encap(ni, m);
@@ -229,6 +233,10 @@ rtwn_tx_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
uint8_t type;
u_int cipher;
+ /* seqno allocate, only if AMPDU isn't running */
+ if ((m->m_flags & M_AMPDU_MPDU) == 0)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
/* Encrypt the frame if need be. */
cipher = IEEE80211_CIPHER_NONE;
if (params->ibp_flags & IEEE80211_BPF_CRYPTO) {
diff --git a/sys/dev/rtwn/rtl8192c/r92c_tx.c b/sys/dev/rtwn/rtl8192c/r92c_tx.c
index 6b013de0c536..ba2f60bd9295 100644
--- a/sys/dev/rtwn/rtl8192c/r92c_tx.c
+++ b/sys/dev/rtwn/rtl8192c/r92c_tx.c
@@ -452,11 +452,10 @@ r92c_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
} else {
uint16_t seqno;
- if (m->m_flags & M_AMPDU_MPDU) {
- seqno = ni->ni_txseqs[tid] % IEEE80211_SEQ_RANGE;
- ni->ni_txseqs[tid]++;
- } else
- seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE;
+ if (m->m_flags & M_AMPDU_MPDU)
+ ieee80211_output_seqno_assign(ni, -1, m);
+
+ seqno = M_SEQNO_GET(m);
/* Set sequence number. */
txd->txdseq = htole16(seqno);
@@ -511,7 +510,7 @@ r92c_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
rtwn_r92c_tx_setup_hwseq(sc, txd);
} else {
/* Set sequence number. */
- txd->txdseq |= htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE);
+ txd->txdseq |= htole16(M_SEQNO_GET(m));
}
}
diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c
index acb238316559..6a7af0a9b674 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -101,12 +101,12 @@ r12a_tx_set_vht_bw(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni)
prim_chan = r12a_get_primary_channel(sc, ni->ni_chan);
- if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_80)) {
+ if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_80)) {
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
R12A_TXDW5_DATA_BW80));
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
prim_chan));
- } else if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_40)) {
+ } else if (ieee80211_vht_check_tx_bw(ni, NET80211_STA_RX_BW_40)) {
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
R12A_TXDW5_DATA_BW40));
txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
@@ -433,12 +433,9 @@ r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
} else {
uint16_t seqno;
- if (m->m_flags & M_AMPDU_MPDU) {
- seqno = ni->ni_txseqs[tid];
- ni->ni_txseqs[tid]++;
- } else
- seqno = M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE;
-
+ if (m->m_flags & M_AMPDU_MPDU)
+ ieee80211_output_seqno_assign(ni, -1, m);
+ seqno = M_SEQNO_GET(m);
/* Set sequence number. */
txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, seqno));
}
@@ -493,8 +490,7 @@ r12a_fill_tx_desc_raw(struct rtwn_softc *sc, struct ieee80211_node *ni,
txd->txdw3 |= htole32(SM(R12A_TXDW3_SEQ_SEL, uvp->id));
} else {
/* Set sequence number. */
- txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ,
- M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE));
+ txd->txdw9 |= htole32(SM(R12A_TXDW9_SEQ, M_SEQNO_GET(m)));
}
}
diff --git a/sys/dev/sound/pci/hda/hdaa_patches.c b/sys/dev/sound/pci/hda/hdaa_patches.c
index 8967cb49125c..91bb244578c7 100644
--- a/sys/dev/sound/pci/hda/hdaa_patches.c
+++ b/sys/dev/sound/pci/hda/hdaa_patches.c
@@ -362,8 +362,10 @@ hdac_pin_patch(struct hdaa_widget *w)
patch_str = "as=3 seq=15 color=Black loc=Left";
break;
}
- } else if (id == HDA_CODEC_ALC295 &&
- subid == FRAMEWORK_LAPTOP_0005_SUBVENDOR) {
+ } else if ((id == HDA_CODEC_ALC295 &&
+ subid == FRAMEWORK_LAPTOP_0005_SUBVENDOR) ||
+ (id == HDA_CODEC_ALC285 &&
+ subid == FRAMEWORK_LAPTOP_000D_SUBVENDOR)) {
switch (nid) {
case 20:
/*
diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c
index 900578b73de4..80028063bb0d 100644
--- a/sys/dev/sound/pci/hda/hdac.c
+++ b/sys/dev/sound/pci/hda/hdac.c
@@ -133,6 +133,7 @@ static const struct {
{ HDA_INTEL_PCH, "Intel Ibex Peak", 0, 0 },
{ HDA_INTEL_PCH2, "Intel Ibex Peak", 0, 0 },
{ HDA_INTEL_ELLK, "Intel Elkhart Lake", 0, 0 },
+ { HDA_INTEL_ELLK2, "Intel Elkhart Lake", 0, 0 },
{ HDA_INTEL_JLK2, "Intel Jasper Lake", 0, 0 },
{ HDA_INTEL_BXTNP, "Intel Broxton-P", 0, 0 },
{ HDA_INTEL_SCH, "Intel SCH", 0, 0 },
@@ -1773,17 +1774,17 @@ hdac_detach(device_t dev)
struct hdac_softc *sc = device_get_softc(dev);
int i, error;
+ callout_drain(&sc->poll_callout);
+ hdac_irq_free(sc);
+ taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
+
error = bus_generic_detach(dev);
if (error != 0)
return (error);
hdac_lock(sc);
- callout_stop(&sc->poll_callout);
hdac_reset(sc, false);
hdac_unlock(sc);
- callout_drain(&sc->poll_callout);
- taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
- hdac_irq_free(sc);
for (i = 0; i < sc->num_ss; i++)
hdac_dma_free(sc, &sc->streams[i].bdl);
@@ -2206,4 +2207,4 @@ static driver_t hdac_driver = {
sizeof(struct hdac_softc),
};
-DRIVER_MODULE(snd_hda, pci, hdac_driver, NULL, NULL);
+DRIVER_MODULE_ORDERED(snd_hda, pci, hdac_driver, NULL, NULL, SI_ORDER_ANY);
diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h
index 223434a214b1..c11e6b2d6810 100644
--- a/sys/dev/sound/pci/hda/hdac.h
+++ b/sys/dev/sound/pci/hda/hdac.h
@@ -66,6 +66,7 @@
#define HDA_INTEL_PCH HDA_MODEL_CONSTRUCT(INTEL, 0x3b56)
#define HDA_INTEL_PCH2 HDA_MODEL_CONSTRUCT(INTEL, 0x3b57)
#define HDA_INTEL_ELLK HDA_MODEL_CONSTRUCT(INTEL, 0x4b55)
+#define HDA_INTEL_ELLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4b58)
#define HDA_INTEL_JLK2 HDA_MODEL_CONSTRUCT(INTEL, 0x4dc8)
#define HDA_INTEL_BXTNP HDA_MODEL_CONSTRUCT(INTEL, 0x5a98)
#define HDA_INTEL_MACBOOKPRO92 HDA_MODEL_CONSTRUCT(INTEL, 0x7270)
@@ -535,6 +536,7 @@
#define FRAMEWORK_LAPTOP_0003_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0003)
#define FRAMEWORK_LAPTOP_0005_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0005)
#define FRAMEWORK_LAPTOP_0006_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0006)
+#define FRAMEWORK_LAPTOP_000D_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x000d)
/* All codecs you can eat... */
#define HDA_CODEC_CONSTRUCT(vendor, id) \
diff --git a/sys/dev/tpm/tpm20.c b/sys/dev/tpm/tpm20.c
index 876dd0bcc40d..067e7ccae8f9 100644
--- a/sys/dev/tpm/tpm20.c
+++ b/sys/dev/tpm/tpm20.c
@@ -25,8 +25,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include <sys/random.h>
+#include <dev/random/randomdev.h>
#include "tpm20.h"
@@ -184,6 +184,13 @@ tpm20_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
return (ENOTTY);
}
+#ifdef TPM_HARVEST
+static const struct random_source random_tpm = {
+ .rs_ident = "TPM",
+ .rs_source = RANDOM_PURE_TPM,
+};
+#endif
+
int
tpm20_init(struct tpm_sc *sc)
{
@@ -206,7 +213,7 @@ tpm20_init(struct tpm_sc *sc)
tpm20_release(sc);
#ifdef TPM_HARVEST
- random_harvest_register_source(RANDOM_PURE_TPM);
+ random_source_register(&random_tpm);
TIMEOUT_TASK_INIT(taskqueue_thread, &sc->harvest_task, 0,
tpm20_harvest, sc);
taskqueue_enqueue_timeout(taskqueue_thread, &sc->harvest_task, 0);
@@ -223,7 +230,7 @@ tpm20_release(struct tpm_sc *sc)
#ifdef TPM_HARVEST
if (device_is_attached(sc->dev))
taskqueue_drain_timeout(taskqueue_thread, &sc->harvest_task);
- random_harvest_deregister_source(RANDOM_PURE_TPM);
+ random_source_deregister(&random_tpm);
#endif
if (sc->buf != NULL)
diff --git a/sys/dev/tpm/tpm_tis_core.c b/sys/dev/tpm/tpm_tis_core.c
index d8421f8156c9..4159de4daf3b 100644
--- a/sys/dev/tpm/tpm_tis_core.c
+++ b/sys/dev/tpm/tpm_tis_core.c
@@ -97,6 +97,7 @@ tpmtis_attach(device_t dev)
{
struct tpm_sc *sc;
int result;
+ int poll = 0;
sc = device_get_softc(dev);
sc->dev = dev;
@@ -105,6 +106,12 @@ tpmtis_attach(device_t dev)
sx_init(&sc->dev_lock, "TPM driver lock");
sc->buf = malloc(TPM_BUFSIZE, M_TPM20, M_WAITOK);
+ resource_int_value("tpm", device_get_unit(dev), "use_polling", &poll);
+ if (poll != 0) {
+ device_printf(dev, "Using poll method to get TPM operation status \n");
+ goto skip_irq;
+ }
+
sc->irq_rid = 0;
sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid,
RF_ACTIVE | RF_SHAREABLE);
diff --git a/sys/dev/uart/uart_bus_pci.c b/sys/dev/uart/uart_bus_pci.c
index 14ac213066b8..22af8ee8663c 100644
--- a/sys/dev/uart/uart_bus_pci.c
+++ b/sys/dev/uart/uart_bus_pci.c
@@ -141,6 +141,8 @@ static const struct pci_id pci_ns8250_ids[] = {
0x10, 16384000 },
{ 0x1415, 0xc120, 0xffff, 0, "Oxford Semiconductor OXPCIe952 PCIe 16950 UART",
0x10 },
+{ 0x14a1, 0x0008, 0x14a1, 0x0008, "Systembase SB16C1058",
+ 0x10, 8 * DEFAULT_RCLK, },
{ 0x14e4, 0x160a, 0xffff, 0, "Broadcom TruManage UART", 0x10,
128 * DEFAULT_RCLK, 2},
{ 0x14e4, 0x4344, 0xffff, 0, "Sony Ericsson GC89 PC Card", 0x10},
diff --git a/sys/dev/ufshci/ufshci.h b/sys/dev/ufshci/ufshci.h
index b96d82ff836e..b055d2d2d769 100644
--- a/sys/dev/ufshci/ufshci.h
+++ b/sys/dev/ufshci/ufshci.h
@@ -716,6 +716,42 @@ struct ufshci_device_descriptor {
_Static_assert(sizeof(struct ufshci_device_descriptor) == 89,
"bad size for ufshci_device_descriptor");
+/* Defines the bit field of dExtendedUfsFeaturesSupport. */
+enum ufshci_desc_wb_ext_ufs_feature {
+ UFSHCI_DESC_EXT_UFS_FEATURE_FFU = (1 << 0),
+ UFSHCI_DESC_EXT_UFS_FEATURE_PSA = (1 << 1),
+ UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LIFE_SPAN = (1 << 2),
+ UFSHCI_DESC_EXT_UFS_FEATURE_REFRESH_OP = (1 << 3),
+ UFSHCI_DESC_EXT_UFS_FEATURE_TOO_HIGH_TEMP = (1 << 4),
+ UFSHCI_DESC_EXT_UFS_FEATURE_TOO_LOW_TEMP = (1 << 5),
+ UFSHCI_DESC_EXT_UFS_FEATURE_EXT_TEMP = (1 << 6),
+ UFSHCI_DESC_EXT_UFS_FEATURE_HPB_SUPPORT = (1 << 7),
+ UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER = (1 << 8),
+ UFSHCI_DESC_EXT_UFS_FEATURE_PERF_THROTTLING = (1 << 9),
+ UFSHCI_DESC_EXT_UFS_FEATURE_ADVANCED_RPMB = (1 << 10),
+ UFSHCI_DESC_EXT_UFS_FEATURE_ZONED_UFS_EXTENSION = (1 << 11),
+ UFSHCI_DESC_EXT_UFS_FEATURE_DEV_LEVEL_EXCEPTION = (1 << 12),
+ UFSHCI_DESC_EXT_UFS_FEATURE_HID = (1 << 13),
+ UFSHCI_DESC_EXT_UFS_FEATURE_BARRIER = (1 << 14),
+ UFSHCI_DESC_EXT_UFS_FEATURE_CLEAR_ERROR_HISTORY = (1 << 15),
+ UFSHCI_DESC_EXT_UFS_FEATURE_EXT_IID = (1 << 16),
+ UFSHCI_DESC_EXT_UFS_FEATURE_FBO = (1 << 17),
+ UFSHCI_DESC_EXT_UFS_FEATURE_FAST_RECOVERY_MODE = (1 << 18),
+ UFSHCI_DESC_EXT_UFS_FEATURE_RPMB_VENDOR_CMD = (1 << 19),
+};
+
+/* Defines the bit field of bWriteBoosterBufferType. */
+enum ufshci_desc_wb_buffer_type {
+ UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED = 0x00,
+ UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED = 0x01,
+};
+
+/* Defines the bit field of bWriteBoosterBufferPreserveUserSpaceEn. */
+enum ufshci_desc_user_space_config {
+ UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION = 0x00,
+ UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE = 0x01,
+};
+
/*
* UFS Spec 4.1, section 14.1.5.3 "Configuration Descriptor"
* ConfigurationDescriptor use big-endian byte ordering.
@@ -1014,4 +1050,37 @@ enum ufshci_attributes {
UFSHCI_ATTR_B_REFRESH_METHOD = 0x2f,
};
+/* bAvailableWriteBoosterBufferSize codes (UFS WriteBooster abailable buffer
+ * left %) */
+enum ufshci_wb_available_buffer_Size {
+ UFSHCI_ATTR_WB_AVAILABLE_0 = 0x00, /* 0% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_10 = 0x01, /* 10% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_20 = 0x02, /* 20% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_30 = 0x03, /* 30% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_40 = 0x04, /* 40% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_50 = 0x05, /* 50% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_60 = 0x06, /* 60% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_70 = 0x07, /* 70% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_80 = 0x08, /* 80% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_90 = 0x09, /* 90% buffer remains */
+ UFSHCI_ATTR_WB_AVAILABLE_100 = 0x0A, /* 100% buffer remains */
+};
+
+/* bWriteBoosterBufferLifeTimeEst codes (UFS WriteBooster buffer life %) */
+enum ufshci_wb_lifetime {
+ UFSHCI_ATTR_WB_LIFE_DISABLED = 0x00, /* Info not available */
+ UFSHCI_ATTR_WB_LIFE_0_10 = 0x01, /* 0%–10% used */
+ UFSHCI_ATTR_WB_LIFE_10_20 = 0x02, /* 10%–20% used */
+ UFSHCI_ATTR_WB_LIFE_20_30 = 0x03, /* 20%–30% used */
+ UFSHCI_ATTR_WB_LIFE_30_40 = 0x04, /* 30%–40% used */
+ UFSHCI_ATTR_WB_LIFE_40_50 = 0x05, /* 40%–50% used */
+ UFSHCI_ATTR_WB_LIFE_50_60 = 0x06, /* 50%–60% used */
+ UFSHCI_ATTR_WB_LIFE_60_70 = 0x07, /* 60%–70% used */
+ UFSHCI_ATTR_WB_LIFE_70_80 = 0x08, /* 70%–80% used */
+ UFSHCI_ATTR_WB_LIFE_80_90 = 0x09, /* 80%–90% used */
+ UFSHCI_ATTR_WB_LIFE_90_100 = 0x0A, /* 90%–100% used */
+ UFSHCI_ATTR_WB_LIFE_EXCEEDED =
+ 0x0B, /* Exceeded estimated life (treat as WB disabled) */
+};
+
#endif /* __UFSHCI_H__ */
diff --git a/sys/dev/ufshci/ufshci_ctrlr.c b/sys/dev/ufshci/ufshci_ctrlr.c
index 37bd32665b2b..36be94b8b8b7 100644
--- a/sys/dev/ufshci/ufshci_ctrlr.c
+++ b/sys/dev/ufshci/ufshci_ctrlr.c
@@ -61,7 +61,7 @@ ufshci_ctrlr_enable_host_ctrlr(struct ufshci_controller *ctrlr)
int
ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
{
- uint32_t ver, cap, hcs, ie;
+ uint32_t ver, cap, hcs, ie, ahit;
uint32_t timeout_period, retry_count;
int error;
@@ -127,6 +127,13 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
if (error)
return (error);
+ /* Read the UECPA register to clear */
+ ufshci_mmio_read_4(ctrlr, uecpa);
+
+ /* Diable Auto-hibernate */
+ ahit = 0;
+ ufshci_mmio_write_4(ctrlr, ahit, ahit);
+
/*
* The device_present(UFSHCI_HCS_REG_DP) bit becomes true if the host
* controller has successfully received a Link Startup UIC command
@@ -139,6 +146,16 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
return (ENXIO);
}
+ /* Allocate and initialize UTP Task Management Request List. */
+ error = ufshci_utmr_req_queue_construct(ctrlr);
+ if (error)
+ return (error);
+
+ /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */
+ error = ufshci_utr_req_queue_construct(ctrlr);
+ if (error)
+ return (error);
+
/* Enable additional interrupts by programming the IE register. */
ie = ufshci_mmio_read_4(ctrlr, ie);
ie |= UFSHCIM(UFSHCI_IE_REG_UTRCE); /* UTR Completion */
@@ -153,19 +170,12 @@ ufshci_ctrlr_construct(struct ufshci_controller *ctrlr, device_t dev)
/* TODO: Initialize interrupt Aggregation Control Register (UTRIACR) */
- /* Allocate and initialize UTP Task Management Request List. */
- error = ufshci_utmr_req_queue_construct(ctrlr);
- if (error)
- return (error);
-
- /* Allocate and initialize UTP Transfer Request List or SQ/CQ. */
- error = ufshci_utr_req_queue_construct(ctrlr);
- if (error)
- return (error);
-
/* TODO: Separate IO and Admin slot */
- /* max_hw_pend_io is the number of slots in the transfer_req_queue */
- ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries;
+ /*
+ * max_hw_pend_io is the number of slots in the transfer_req_queue.
+ * Reduce num_entries by one to reserve an admin slot.
+ */
+ ctrlr->max_hw_pend_io = ctrlr->transfer_req_queue.num_entries - 1;
return (0);
}
@@ -342,18 +352,19 @@ ufshci_ctrlr_start(struct ufshci_controller *ctrlr)
return;
}
- /* Read Controller Descriptor (Device, Geometry)*/
+ /* Read Controller Descriptor (Device, Geometry) */
if (ufshci_dev_get_descriptor(ctrlr) != 0) {
ufshci_ctrlr_fail(ctrlr, false);
return;
}
- /* TODO: Configure Write Protect */
+ if (ufshci_dev_config_write_booster(ctrlr)) {
+ ufshci_ctrlr_fail(ctrlr, false);
+ return;
+ }
/* TODO: Configure Background Operations */
- /* TODO: Configure Write Booster */
-
if (ufshci_sim_attach(ctrlr) != 0) {
ufshci_ctrlr_fail(ctrlr, false);
return;
diff --git a/sys/dev/ufshci/ufshci_dev.c b/sys/dev/ufshci/ufshci_dev.c
index a0e32914e2aa..dd196b1d638b 100644
--- a/sys/dev/ufshci/ufshci_dev.c
+++ b/sys/dev/ufshci/ufshci_dev.c
@@ -60,6 +60,14 @@ ufshci_dev_read_geometry_descriptor(struct ufshci_controller *ctrlr,
}
static int
+ufshci_dev_read_unit_descriptor(struct ufshci_controller *ctrlr, uint8_t lun,
+ struct ufshci_unit_descriptor *desc)
+{
+ return (ufshci_dev_read_descriptor(ctrlr, UFSHCI_DESC_TYPE_UNIT, lun, 0,
+ desc, sizeof(struct ufshci_unit_descriptor)));
+}
+
+static int
ufshci_dev_read_flag(struct ufshci_controller *ctrlr,
enum ufshci_flags flag_type, uint8_t *flag)
{
@@ -114,6 +122,61 @@ ufshci_dev_set_flag(struct ufshci_controller *ctrlr,
}
static int
+ufshci_dev_clear_flag(struct ufshci_controller *ctrlr,
+ enum ufshci_flags flag_type)
+{
+ struct ufshci_completion_poll_status status;
+ struct ufshci_query_param param;
+
+ param.function = UFSHCI_QUERY_FUNC_STANDARD_WRITE_REQUEST;
+ param.opcode = UFSHCI_QUERY_OPCODE_CLEAR_FLAG;
+ param.type = flag_type;
+ param.index = 0;
+ param.selector = 0;
+ param.value = 0;
+
+ status.done = 0;
+ ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb,
+ &status, param);
+ ufshci_completion_poll(&status);
+ if (status.error) {
+ ufshci_printf(ctrlr, "ufshci_dev_clear_flag failed!\n");
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+static int
+ufshci_dev_read_attribute(struct ufshci_controller *ctrlr,
+ enum ufshci_attributes attr_type, uint8_t index, uint8_t selector,
+ uint64_t *value)
+{
+ struct ufshci_completion_poll_status status;
+ struct ufshci_query_param param;
+
+ param.function = UFSHCI_QUERY_FUNC_STANDARD_READ_REQUEST;
+ param.opcode = UFSHCI_QUERY_OPCODE_READ_ATTRIBUTE;
+ param.type = attr_type;
+ param.index = index;
+ param.selector = selector;
+ param.value = 0;
+
+ status.done = 0;
+ ufshci_ctrlr_cmd_send_query_request(ctrlr, ufshci_completion_poll_cb,
+ &status, param);
+ ufshci_completion_poll(&status);
+ if (status.error) {
+ ufshci_printf(ctrlr, "ufshci_dev_read_attribute failed!\n");
+ return (ENXIO);
+ }
+
+ *value = status.cpl.response_upiu.query_response_upiu.value_64;
+
+ return (0);
+}
+
+static int
ufshci_dev_write_attribute(struct ufshci_controller *ctrlr,
enum ufshci_attributes attr_type, uint8_t index, uint8_t selector,
uint64_t value)
@@ -270,7 +333,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
*/
const uint32_t fast_mode = 1;
const uint32_t rx_bit_shift = 4;
- const uint32_t power_mode = (fast_mode << rx_bit_shift) | fast_mode;
+ uint32_t power_mode, peer_granularity;
/* Update lanes with available TX/RX lanes */
if (ufshci_uic_send_dme_get(ctrlr, PA_AvailTxDataLanes,
@@ -295,6 +358,20 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
ctrlr->rx_lanes))
return (ENXIO);
+ if (ctrlr->quirks & UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY) {
+ /* Before changing gears, first change the number of lanes. */
+ if (ufshci_uic_send_dme_get(ctrlr, PA_PWRMode, &power_mode))
+ return (ENXIO);
+ if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode))
+ return (ENXIO);
+
+ /* Wait for power mode changed. */
+ if (ufshci_uic_power_mode_ready(ctrlr)) {
+ ufshci_reg_dump(ctrlr);
+ return (ENXIO);
+ }
+ }
+
/* Set HS-GEAR to max gear */
ctrlr->hs_gear = ctrlr->max_rx_hs_gear;
if (ufshci_uic_send_dme_set(ctrlr, PA_TxGear, ctrlr->hs_gear))
@@ -346,6 +423,7 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
return (ENXIO);
/* Set TX/RX PWRMode */
+ power_mode = (fast_mode << rx_bit_shift) | fast_mode;
if (ufshci_uic_send_dme_set(ctrlr, PA_PWRMode, power_mode))
return (ENXIO);
@@ -366,7 +444,8 @@ ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr)
pause_sbt("ufshci", ustosbt(1250), 0, C_PREL(1));
/* Test with dme_peer_get to make sure there are no errors. */
- if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity, NULL))
+ if (ufshci_uic_send_dme_peer_get(ctrlr, PA_Granularity,
+ &peer_granularity))
return (ENXIO);
}
@@ -398,7 +477,7 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr)
return (error);
ver = be16toh(device->dev_desc.wSpecVersion);
- ufshci_printf(ctrlr, "UFS device spec version %u.%u%u\n",
+ ufshci_printf(ctrlr, "UFS device spec version %u.%u.%u\n",
UFSHCIV(UFSHCI_VER_REG_MJR, ver), UFSHCIV(UFSHCI_VER_REG_MNR, ver),
UFSHCIV(UFSHCI_VER_REG_VS, ver));
ufshci_printf(ctrlr, "%u enabled LUNs found\n",
@@ -426,3 +505,273 @@ ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr)
return (0);
}
+
+static int
+ufshci_dev_enable_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ int error;
+
+ /* Enable WriteBooster */
+ error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN);
+ if (error) {
+ ufshci_printf(ctrlr, "Failed to enable WriteBooster\n");
+ return (error);
+ }
+ dev->is_wb_enabled = true;
+
+ /* Enable WriteBooster buffer flush during hibernate */
+ error = ufshci_dev_set_flag(ctrlr,
+ UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to enable WriteBooster buffer flush during hibernate\n");
+ return (error);
+ }
+
+ /* Enable WriteBooster buffer flush */
+ error = ufshci_dev_set_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to enable WriteBooster buffer flush\n");
+ return (error);
+ }
+ dev->is_wb_flush_enabled = true;
+
+ return (0);
+}
+
+static int
+ufshci_dev_disable_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ int error;
+
+ /* Disable WriteBooster buffer flush */
+ error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WB_BUFFER_FLUSH_EN);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to disable WriteBooster buffer flush\n");
+ return (error);
+ }
+ dev->is_wb_flush_enabled = false;
+
+ /* Disable WriteBooster buffer flush during hibernate */
+ error = ufshci_dev_clear_flag(ctrlr,
+ UFSHCI_FLAG_F_WB_BUFFER_FLUSH_DURING_HIBERNATE);
+ if (error) {
+ ufshci_printf(ctrlr,
+ "Failed to disable WriteBooster buffer flush during hibernate\n");
+ return (error);
+ }
+
+ /* Disable WriteBooster */
+ error = ufshci_dev_clear_flag(ctrlr, UFSHCI_FLAG_F_WRITE_BOOSTER_EN);
+ if (error) {
+ ufshci_printf(ctrlr, "Failed to disable WriteBooster\n");
+ return (error);
+ }
+ dev->is_wb_enabled = false;
+
+ return (0);
+}
+
+static int
+ufshci_dev_is_write_booster_buffer_life_time_left(
+ struct ufshci_controller *ctrlr, bool *is_life_time_left)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ uint8_t buffer_lun;
+ uint64_t life_time;
+ uint32_t error;
+
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED)
+ buffer_lun = dev->wb_dedicated_lu;
+ else
+ buffer_lun = 0;
+
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_B_WB_BUFFER_LIFE_TIME_EST, buffer_lun, 0, &life_time);
+ if (error)
+ return (error);
+
+ *is_life_time_left = (life_time != UFSHCI_ATTR_WB_LIFE_EXCEEDED);
+
+ return (0);
+}
+
+/*
+ * This function is not yet in use. It will be used when suspend/resume is
+ * implemented.
+ */
+static __unused int
+ufshci_dev_need_write_booster_buffer_flush(struct ufshci_controller *ctrlr,
+ bool *need_flush)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ bool is_life_time_left = false;
+ uint64_t available_buffer_size, current_buffer_size;
+ uint8_t buffer_lun;
+ uint32_t error;
+
+ *need_flush = false;
+
+ if (!dev->is_wb_enabled)
+ return (0);
+
+ error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr,
+ &is_life_time_left);
+ if (error)
+ return (error);
+
+ if (!is_life_time_left)
+ return (ufshci_dev_disable_write_booster(ctrlr));
+
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED)
+ buffer_lun = dev->wb_dedicated_lu;
+ else
+ buffer_lun = 0;
+
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_B_AVAILABLE_WB_BUFFER_SIZE, buffer_lun, 0,
+ &available_buffer_size);
+ if (error)
+ return (error);
+
+ switch (dev->wb_user_space_config_option) {
+ case UFSHCI_DESC_WB_BUF_USER_SPACE_REDUCTION:
+ *need_flush = (available_buffer_size <=
+ UFSHCI_ATTR_WB_AVAILABLE_10);
+ break;
+ case UFSHCI_DESC_WB_BUF_PRESERVE_USER_SPACE:
+ /*
+ * In PRESERVE USER SPACE mode, flush should be performed when
+ * the current buffer is greater than 0 and the available buffer
+ * below write_booster_flush_threshold is left.
+ */
+ error = ufshci_dev_read_attribute(ctrlr,
+ UFSHCI_ATTR_D_CURRENT_WB_BUFFER_SIZE, buffer_lun, 0,
+ &current_buffer_size);
+ if (error)
+ return (error);
+
+ if (current_buffer_size == 0)
+ return (0);
+
+ *need_flush = (available_buffer_size <
+ dev->write_booster_flush_threshold);
+ break;
+ default:
+ ufshci_printf(ctrlr,
+ "Invalid bWriteBoosterBufferPreserveUserSpaceEn value");
+ return (EINVAL);
+ }
+
+ /*
+ * TODO: Need to handle WRITEBOOSTER_FLUSH_NEEDED exception case from
+ * wExceptionEventStatus attribute.
+ */
+
+ return (0);
+}
+
+int
+ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr)
+{
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
+ uint32_t extended_ufs_feature_support;
+ uint32_t alloc_units;
+ struct ufshci_unit_descriptor unit_desc;
+ uint8_t lun;
+ bool is_life_time_left;
+ uint32_t mega_byte = 1024 * 1024;
+ uint32_t error = 0;
+
+ extended_ufs_feature_support = be32toh(
+ dev->dev_desc.dExtendedUfsFeaturesSupport);
+ if (!(extended_ufs_feature_support &
+ UFSHCI_DESC_EXT_UFS_FEATURE_WRITE_BOOSTER)) {
+ /* This device does not support Write Booster */
+ return (0);
+ }
+
+ if (ufshci_dev_enable_write_booster(ctrlr))
+ return (0);
+
+ /* Get WriteBooster buffer parameters */
+ dev->wb_buffer_type = dev->dev_desc.bWriteBoosterBufferType;
+ dev->wb_user_space_config_option =
+ dev->dev_desc.bWriteBoosterBufferPreserveUserSpaceEn;
+
+ /*
+ * Find the size of the write buffer.
+ * With LU-dedicated (00h), the WriteBooster buffer is assigned
+ * exclusively to one chosen LU (not one-per-LU), whereas Shared (01h)
+ * uses a single device-wide buffer shared by multiple LUs.
+ */
+ if (dev->wb_buffer_type == UFSHCI_DESC_WB_BUF_TYPE_SINGLE_SHARED) {
+ alloc_units = be32toh(
+ dev->dev_desc.dNumSharedWriteBoosterBufferAllocUnits);
+ ufshci_printf(ctrlr,
+ "WriteBooster buffer type = Shared, alloc_units=%d\n",
+ alloc_units);
+ } else if (dev->wb_buffer_type ==
+ UFSHCI_DESC_WB_BUF_TYPE_LU_DEDICATED) {
+ ufshci_printf(ctrlr, "WriteBooster buffer type = Dedicated\n");
+ for (lun = 0; lun < ctrlr->max_lun_count; lun++) {
+ /* Find a dedicated buffer using a unit descriptor */
+ if (ufshci_dev_read_unit_descriptor(ctrlr, lun,
+ &unit_desc))
+ continue;
+
+ alloc_units = be32toh(
+ unit_desc.dLUNumWriteBoosterBufferAllocUnits);
+ if (alloc_units) {
+ dev->wb_dedicated_lu = lun;
+ break;
+ }
+ }
+ } else {
+ ufshci_printf(ctrlr,
+ "Not supported WriteBooster buffer type: 0x%x\n",
+ dev->wb_buffer_type);
+ goto out;
+ }
+
+ if (alloc_units == 0) {
+ ufshci_printf(ctrlr, "The WriteBooster buffer size is zero\n");
+ goto out;
+ }
+
+ dev->wb_buffer_size_mb = alloc_units *
+ dev->geo_desc.bAllocationUnitSize *
+ (be32toh(dev->geo_desc.dSegmentSize)) /
+ (mega_byte / UFSHCI_SECTOR_SIZE);
+
+ /* Set to flush when 40% of the available buffer size remains */
+ dev->write_booster_flush_threshold = UFSHCI_ATTR_WB_AVAILABLE_40;
+
+ /*
+ * Check if WriteBooster Buffer lifetime is left.
+ * WriteBooster Buffer lifetime — percent of life used based on P/E
+ * cycles. If "preserve user space" is enabled, writes to normal user
+ * space also consume WB life since the area is shared.
+ */
+ error = ufshci_dev_is_write_booster_buffer_life_time_left(ctrlr,
+ &is_life_time_left);
+ if (error)
+ goto out;
+
+ if (!is_life_time_left) {
+ ufshci_printf(ctrlr,
+ "There is no WriteBooster buffer life time left.\n");
+ goto out;
+ }
+
+ ufshci_printf(ctrlr, "WriteBooster Enabled\n");
+ return (0);
+out:
+ ufshci_dev_disable_write_booster(ctrlr);
+ return (error);
+}
+
diff --git a/sys/dev/ufshci/ufshci_pci.c b/sys/dev/ufshci/ufshci_pci.c
index 65a69ee0b518..d64b7526f713 100644
--- a/sys/dev/ufshci/ufshci_pci.c
+++ b/sys/dev/ufshci/ufshci_pci.c
@@ -53,7 +53,8 @@ static struct _pcsid {
{ 0x98fa8086, "Intel Lakefield UFS Host Controller",
UFSHCI_REF_CLK_19_2MHz,
UFSHCI_QUIRK_LONG_PEER_PA_TACTIVATE |
- UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE },
+ UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE |
+ UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY },
{ 0x54ff8086, "Intel UFS Host Controller", UFSHCI_REF_CLK_19_2MHz },
{ 0x00000000, NULL } };
diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h
index 1a2742ae2e80..2e033f84c373 100644
--- a/sys/dev/ufshci/ufshci_private.h
+++ b/sys/dev/ufshci/ufshci_private.h
@@ -46,6 +46,8 @@ MALLOC_DECLARE(M_UFSHCI);
#define UFSHCI_UTR_ENTRIES (32)
#define UFSHCI_UTRM_ENTRIES (8)
+#define UFSHCI_SECTOR_SIZE (512)
+
struct ufshci_controller;
struct ufshci_completion_poll_status {
@@ -214,6 +216,15 @@ struct ufshci_device {
struct ufshci_geometry_descriptor geo_desc;
uint32_t unipro_version;
+
+ /* WriteBooster */
+ bool is_wb_enabled;
+ bool is_wb_flush_enabled;
+ uint32_t wb_buffer_type;
+ uint32_t wb_buffer_size_mb;
+ uint32_t wb_user_space_config_option;
+ uint8_t wb_dedicated_lu;
+ uint32_t write_booster_flush_threshold;
};
/*
@@ -229,7 +240,8 @@ struct ufshci_controller {
2 /* Need an additional 200 ms of PA_TActivate */
#define UFSHCI_QUIRK_WAIT_AFTER_POWER_MODE_CHANGE \
4 /* Need to wait 1250us after power mode change */
-
+#define UFSHCI_QUIRK_CHANGE_LANE_AND_GEAR_SEPARATELY \
+ 8 /* Need to change the number of lanes before changing HS-GEAR. */
uint32_t ref_clk;
struct cam_sim *ufshci_sim;
@@ -356,6 +368,7 @@ int ufshci_dev_init_unipro(struct ufshci_controller *ctrlr);
int ufshci_dev_init_uic_power_mode(struct ufshci_controller *ctrlr);
int ufshci_dev_init_ufs_power_mode(struct ufshci_controller *ctrlr);
int ufshci_dev_get_descriptor(struct ufshci_controller *ctrlr);
+int ufshci_dev_config_write_booster(struct ufshci_controller *ctrlr);
/* Controller Command */
void ufshci_ctrlr_cmd_send_task_mgmt_request(struct ufshci_controller *ctrlr,
diff --git a/sys/dev/ufshci/ufshci_reg.h b/sys/dev/ufshci/ufshci_reg.h
index 6c9b3e2c8c04..6d5768505102 100644
--- a/sys/dev/ufshci/ufshci_reg.h
+++ b/sys/dev/ufshci/ufshci_reg.h
@@ -274,7 +274,7 @@ struct ufshci_registers {
#define UFSHCI_HCS_REG_UTMRLRDY_MASK (0x1)
#define UFSHCI_HCS_REG_UCRDY_SHIFT (3)
#define UFSHCI_HCS_REG_UCRDY_MASK (0x1)
-#define UFSHCI_HCS_REG_UPMCRS_SHIFT (7)
+#define UFSHCI_HCS_REG_UPMCRS_SHIFT (8)
#define UFSHCI_HCS_REG_UPMCRS_MASK (0x7)
#define UFSHCI_HCS_REG_UTPEC_SHIFT (12)
#define UFSHCI_HCS_REG_UTPEC_MASK (0xF)
diff --git a/sys/dev/ufshci/ufshci_sysctl.c b/sys/dev/ufshci/ufshci_sysctl.c
index 5e5069f12e5f..56bc06b13f3c 100644
--- a/sys/dev/ufshci/ufshci_sysctl.c
+++ b/sys/dev/ufshci/ufshci_sysctl.c
@@ -152,6 +152,7 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr)
struct sysctl_ctx_list *ctrlr_ctx;
struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree;
struct sysctl_oid_list *ctrlr_list, *ioq_list;
+ struct ufshci_device *dev = &ctrlr->ufs_dev;
#define QUEUE_NAME_LENGTH 16
char queue_name[QUEUE_NAME_LENGTH];
int i;
@@ -177,6 +178,25 @@ ufshci_sysctl_initialize_ctrlr(struct ufshci_controller *ctrlr)
SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap", CTLFLAG_RD,
&ctrlr->cap, 0, "Number of I/O queue pairs");
+ SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_enabled",
+ CTLFLAG_RD, &dev->is_wb_enabled, 0, "WriteBooster enable/disable");
+
+ SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_flush_enabled",
+ CTLFLAG_RD, &dev->is_wb_flush_enabled, 0,
+ "WriteBooster flush enable/disable");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_type",
+ CTLFLAG_RD, &dev->wb_buffer_type, 0, "WriteBooster type");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "wb_buffer_size_mb",
+ CTLFLAG_RD, &dev->wb_buffer_size_mb, 0,
+ "WriteBooster buffer size in MB");
+
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO,
+ "wb_user_space_config_option", CTLFLAG_RD,
+ &dev->wb_user_space_config_option, 0,
+ "WriteBooster preserve user space mode");
+
SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "timeout_period",
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ctrlr->timeout_period,
0, ufshci_sysctl_timeout_period, "IU",
diff --git a/sys/dev/ufshci/ufshci_uic_cmd.c b/sys/dev/ufshci/ufshci_uic_cmd.c
index 2c5f635dc11e..b9c867ff7065 100644
--- a/sys/dev/ufshci/ufshci_uic_cmd.c
+++ b/sys/dev/ufshci/ufshci_uic_cmd.c
@@ -14,7 +14,7 @@
int
ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr)
{
- uint32_t is;
+ uint32_t is, hcs;
int timeout;
/* Wait for the IS flag to change */
@@ -40,6 +40,15 @@ ufshci_uic_power_mode_ready(struct ufshci_controller *ctrlr)
DELAY(10);
}
+ /* Check HCS power mode change request status */
+ hcs = ufshci_mmio_read_4(ctrlr, hcs);
+ if (UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs) != 0x01) {
+ ufshci_printf(ctrlr,
+ "Power mode change request status error: 0x%x\n",
+ UFSHCIV(UFSHCI_HCS_REG_UPMCRS, hcs));
+ return (ENXIO);
+ }
+
return (0);
}
@@ -112,6 +121,7 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr,
struct ufshci_uic_cmd *uic_cmd, uint32_t *return_value)
{
int error;
+ uint32_t config_result_code;
mtx_lock(&ctrlr->uic_cmd_lock);
@@ -134,6 +144,13 @@ ufshci_uic_send_cmd(struct ufshci_controller *ctrlr,
if (error)
return (ENXIO);
+ config_result_code = ufshci_mmio_read_4(ctrlr, ucmdarg2);
+ if (config_result_code) {
+ ufshci_printf(ctrlr,
+ "Failed to send UIC command. (config result code = 0x%x)\n",
+ config_result_code);
+ }
+
if (return_value != NULL)
*return_value = ufshci_mmio_read_4(ctrlr, ucmdarg3);
diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c
index 5be592512196..788b2b718062 100644
--- a/sys/dev/usb/controller/xhci.c
+++ b/sys/dev/usb/controller/xhci.c
@@ -156,6 +156,7 @@ struct xhci_std_temp {
static void xhci_do_poll(struct usb_bus *);
static void xhci_device_done(struct usb_xfer *, usb_error_t);
+static void xhci_get_xecp(struct xhci_softc *);
static void xhci_root_intr(struct xhci_softc *);
static void xhci_free_device_ext(struct usb_device *);
static struct xhci_endpoint_ext *xhci_get_endpoint_ext(struct usb_device *,
@@ -566,6 +567,8 @@ xhci_init(struct xhci_softc *sc, device_t self, uint8_t dma32)
device_printf(self, "%d bytes context size, %d-bit DMA\n",
sc->sc_ctx_is_64_byte ? 64 : 32, (int)sc->sc_bus.dma_bits);
+ xhci_get_xecp(sc);
+
/* enable 64Kbyte control endpoint quirk */
sc->sc_bus.control_ep_quirk = (xhcictlquirk ? 1 : 0);
@@ -654,6 +657,88 @@ xhci_uninit(struct xhci_softc *sc)
}
static void
+xhci_get_xecp(struct xhci_softc *sc)
+{
+
+ uint32_t hccp1;
+ uint32_t eec;
+ uint32_t eecp;
+ bool first = true;
+
+ hccp1 = XREAD4(sc, capa, XHCI_HCSPARAMS0);
+
+ if (XHCI_HCS0_XECP(hccp1) == 0) {
+ device_printf(sc->sc_bus.parent,
+ "xECP: no capabilities found\n");
+ return;
+ }
+
+ /*
+ * Parse the xECP Capabilities table and print known caps.
+ * Implemented, vendor and reserved xECP Capabilities values are
+ * documented in Table 7.2 of eXtensible Host Controller Interface for
+ * Universal Serial Bus (xHCI) Rev 1.2b 2023.
+ */
+ device_printf(sc->sc_bus.parent, "xECP capabilities <");
+
+ eec = -1;
+ for (eecp = XHCI_HCS0_XECP(hccp1) << 2;
+ eecp != 0 && XHCI_XECP_NEXT(eec) != 0;
+ eecp += XHCI_XECP_NEXT(eec) << 2) {
+ eec = XREAD4(sc, capa, eecp);
+
+ uint8_t xecpid = XHCI_XECP_ID(eec);
+
+ if ((xecpid >= 11 && xecpid <= 16) ||
+ (xecpid >= 19 && xecpid <= 191)) {
+ if (!first)
+ printf(",");
+ printf("RES(%x)", xecpid);
+ } else if (xecpid > 191) {
+ if (!first)
+ printf(",");
+ printf("VEND(%x)", xecpid);
+ } else {
+ if (!first)
+ printf(",");
+ switch (xecpid)
+ {
+ case XHCI_ID_USB_LEGACY:
+ printf("LEGACY");
+ break;
+ case XHCI_ID_PROTOCOLS:
+ printf("PROTO");
+ break;
+ case XHCI_ID_POWER_MGMT:
+ printf("POWER");
+ break;
+ case XHCI_ID_VIRTUALIZATION:
+ printf("VIRT");
+ break;
+ case XHCI_ID_MSG_IRQ:
+ printf("MSG IRQ");
+ break;
+ case XHCI_ID_USB_LOCAL_MEM:
+ printf("LOCAL MEM");
+ break;
+ case XHCI_ID_USB_DEBUG:
+ printf("DEBUG");
+ break;
+ case XHCI_ID_EXT_MSI:
+ printf("EXT MSI");
+ break;
+ case XHCI_ID_USB3_TUN:
+ printf("TUN");
+ break;
+
+ }
+ }
+ first = false;
+ }
+ printf(">\n");
+}
+
+static void
xhci_set_hw_power_sleep(struct usb_bus *bus, uint32_t state)
{
struct xhci_softc *sc = XHCI_BUS2SC(bus);
diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c
index d5cfd228a429..820fb2f738a1 100644
--- a/sys/dev/usb/controller/xhci_pci.c
+++ b/sys/dev/usb/controller/xhci_pci.c
@@ -178,6 +178,8 @@ xhci_pci_match(device_t self)
return ("Intel Tiger Lake-H USB 3.2 controller");
case 0x461e8086:
return ("Intel Alder Lake-P Thunderbolt 4 USB controller");
+ case 0x4b7d8086:
+ return ("Intel Elkhart Lake USB 3.1 controller");
case 0x51ed8086:
return ("Intel Alder Lake USB 3.2 controller");
case 0x5aa88086:
diff --git a/sys/dev/usb/controller/xhcireg.h b/sys/dev/usb/controller/xhcireg.h
index 9d0b6e2f4b4b..821897155544 100644
--- a/sys/dev/usb/controller/xhcireg.h
+++ b/sys/dev/usb/controller/xhcireg.h
@@ -205,6 +205,11 @@
#define XHCI_ID_VIRTUALIZATION 0x0004
#define XHCI_ID_MSG_IRQ 0x0005
#define XHCI_ID_USB_LOCAL_MEM 0x0006
+/* values 7-9 are reserved */
+#define XHCI_ID_USB_DEBUG 0x000a
+/* values 11-16 are reserved */
+#define XHCI_ID_EXT_MSI 0x0011
+#define XHCI_ID_USB3_TUN 0x0012
/* XHCI register R/W wrappers */
#define XREAD1(sc, what, a) \
diff --git a/sys/dev/usb/net/if_umb.c b/sys/dev/usb/net/if_umb.c
index 5703bc03dd39..b1082b117259 100644
--- a/sys/dev/usb/net/if_umb.c
+++ b/sys/dev/usb/net/if_umb.c
@@ -177,9 +177,7 @@ static void umb_ncm_setup(struct umb_softc *, struct usb_config *);
static void umb_close_bulkpipes(struct umb_softc *);
static int umb_ioctl(if_t , u_long, caddr_t);
static void umb_init(void *);
-#ifdef DEV_NETMAP
static void umb_input(if_t , struct mbuf *);
-#endif
static int umb_output(if_t , struct mbuf *,
const struct sockaddr *, struct route *);
static void umb_start(if_t );
@@ -585,9 +583,7 @@ umb_attach_task(struct usb_proc_msg *msg)
if_setsoftc(ifp, sc);
if_setflags(ifp, IFF_SIMPLEX | IFF_MULTICAST | IFF_POINTOPOINT);
if_setioctlfn(ifp, umb_ioctl);
-#ifdef DEV_NETMAP
if_setinputfn(ifp, umb_input);
-#endif
if_setoutputfn(ifp, umb_output);
if_setstartfn(ifp, umb_start);
if_setinitfn(ifp, umb_init);
@@ -666,7 +662,7 @@ umb_ncm_setup(struct umb_softc *sc, struct usb_config * config)
struct ncm_ntb_parameters np;
usb_error_t error;
- /* Query NTB tranfers sizes */
+ /* Query NTB transfers sizes */
req.bmRequestType = UT_READ_CLASS_INTERFACE;
req.bRequest = NCM_GET_NTB_PARAMETERS;
USETW(req.wValue, 0);
diff --git a/sys/dev/usb/serial/udbc.c b/sys/dev/usb/serial/udbc.c
new file mode 100644
index 000000000000..d7ca6b25bf32
--- /dev/null
+++ b/sys/dev/usb/serial/udbc.c
@@ -0,0 +1,404 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-NetBSD
+ *
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * Copyright (c) 2016-2024 Hiroki Sato <hrs@FreeBSD.org>
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Lennart Augustsson (lennart@augustsson.net).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/condvar.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/queue.h>
+#include <sys/stddef.h>
+#include <sys/stdint.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+
+#include <dev/usb/usb.h>
+#include <dev/usb/usb_ioctl.h>
+#include <dev/usb/usbdi.h>
+#include <dev/usb/usbdi_util.h>
+#include <dev/usb/usb_core.h>
+
+#include "usbdevs.h"
+
+#define USB_DEBUG_VAR udbc_debug
+#include <dev/usb/usb_process.h>
+#include <dev/usb/serial/usb_serial.h>
+#include <dev/usb/usb_debug.h>
+
+static SYSCTL_NODE(_hw_usb, OID_AUTO, udbc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "USB DbC Client");
+
+#ifdef USB_DEBUG
+static int udbc_debug = 0;
+SYSCTL_INT(_hw_usb_udbc, OID_AUTO, debug, CTLFLAG_RWTUN, &udbc_debug, 0,
+ "Debug level");
+#endif
+
+#define UDBC_CONFIG_INDEX 0
+
+#define UDBC_IBUFSIZE 1024
+#define UDBC_OBUFSIZE 1024
+
+enum {
+ UDBC_BULK_DT_WR,
+ UDBC_BULK_DT_RD,
+ UDBC_N_TRANSFER, /* n of EP */
+};
+
+struct udbc_softc {
+ struct ucom_super_softc sc_super_ucom;
+ struct ucom_softc sc_ucom;
+
+ struct usb_device *sc_udev;
+ struct usb_xfer *sc_xfer[UDBC_N_TRANSFER];
+ device_t sc_dev;
+ struct mtx sc_mtx;
+
+ uint32_t sc_unit;
+};
+
+/* prototypes */
+
+static device_probe_t udbc_probe;
+static device_attach_t udbc_attach;
+static device_detach_t udbc_detach;
+static void udbc_free_softc(struct udbc_softc *);
+
+static usb_callback_t udbc_write_callback;
+static usb_callback_t udbc_read_callback;
+
+static void udbc_free(struct ucom_softc *);
+static void udbc_cfg_open(struct ucom_softc *);
+static void udbc_cfg_close(struct ucom_softc *);
+static int udbc_pre_param(struct ucom_softc *, struct termios *);
+static int udbc_ioctl(struct ucom_softc *, uint32_t, caddr_t, int,
+ struct thread *);
+static void udbc_start_read(struct ucom_softc *);
+static void udbc_stop_read(struct ucom_softc *);
+static void udbc_start_write(struct ucom_softc *);
+static void udbc_stop_write(struct ucom_softc *);
+static void udbc_poll(struct ucom_softc *ucom);
+
+static const struct usb_config udbc_config[UDBC_N_TRANSFER] = {
+ [UDBC_BULK_DT_WR] = {
+ .type = UE_BULK,
+ .endpoint = UE_ADDR_ANY,
+ .direction = UE_DIR_OUT,
+ .bufsize = UDBC_OBUFSIZE,
+ .flags = {.pipe_bof = 1,},
+ .callback = &udbc_write_callback,
+ },
+
+ [UDBC_BULK_DT_RD] = {
+ .type = UE_BULK,
+ .endpoint = UE_ADDR_ANY,
+ .direction = UE_DIR_IN,
+ .bufsize = UDBC_IBUFSIZE,
+ .flags = {.pipe_bof = 1,.short_xfer_ok = 1,},
+ .callback = &udbc_read_callback,
+ },
+};
+
+static const struct ucom_callback udbc_callback = {
+ .ucom_cfg_open = &udbc_cfg_open,
+ .ucom_cfg_close = &udbc_cfg_close,
+ .ucom_pre_param = &udbc_pre_param,
+ .ucom_ioctl = &udbc_ioctl,
+ .ucom_start_read = &udbc_start_read,
+ .ucom_stop_read = &udbc_stop_read,
+ .ucom_start_write = &udbc_start_write,
+ .ucom_stop_write = &udbc_stop_write,
+ .ucom_poll = &udbc_poll,
+ .ucom_free = &udbc_free,
+};
+
+static device_method_t udbc_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, udbc_probe),
+ DEVMETHOD(device_attach, udbc_attach),
+ DEVMETHOD(device_detach, udbc_detach),
+ DEVMETHOD_END
+};
+
+static int
+udbc_probe(device_t dev)
+{
+ struct usb_attach_arg *uaa = device_get_ivars(dev);
+
+ if (uaa->usb_mode != USB_MODE_HOST)
+ return (ENXIO);
+ if (uaa->info.bConfigIndex != UDBC_CONFIG_INDEX)
+ return (ENXIO);
+ if (uaa->info.bInterfaceClass != UICLASS_DIAGNOSTIC)
+ return (ENXIO);
+ if (uaa->info.bDeviceProtocol != 0x00) /* GNU GDB == 1 */
+ return (ENXIO);
+
+ return (BUS_PROBE_SPECIFIC);
+}
+
+static int
+udbc_attach(device_t dev)
+{
+ struct usb_attach_arg *uaa = device_get_ivars(dev);
+ struct udbc_softc *sc = device_get_softc(dev);
+ int error;
+
+ DPRINTF("\n");
+
+ sc->sc_udev = uaa->device;
+ sc->sc_dev = dev;
+ sc->sc_unit = device_get_unit(dev);
+
+ device_set_usb_desc(dev);
+ mtx_init(&sc->sc_mtx, "udbc", NULL, MTX_DEF);
+ ucom_ref(&sc->sc_super_ucom);
+
+ sc->sc_ucom.sc_portno = 0;
+
+ error = usbd_transfer_setup(uaa->device, &uaa->info.bIfaceIndex,
+ sc->sc_xfer, udbc_config, UDBC_N_TRANSFER, sc, &sc->sc_mtx);
+
+ if (error) {
+ device_printf(dev,
+ "allocating USB transfers failed\n");
+ goto detach;
+ }
+ /* clear stall at first run */
+ mtx_lock(&sc->sc_mtx);
+ usbd_xfer_set_stall(sc->sc_xfer[UDBC_BULK_DT_WR]);
+ usbd_xfer_set_stall(sc->sc_xfer[UDBC_BULK_DT_RD]);
+ mtx_unlock(&sc->sc_mtx);
+
+ error = ucom_attach(&sc->sc_super_ucom, &sc->sc_ucom, 1, sc,
+ &udbc_callback, &sc->sc_mtx);
+ if (error)
+ goto detach;
+ ucom_set_pnpinfo_usb(&sc->sc_super_ucom, dev);
+
+ return (0); /* success */
+
+detach:
+ udbc_detach(dev);
+ return (ENXIO);
+}
+
+static int
+udbc_detach(device_t dev)
+{
+ struct udbc_softc *sc = device_get_softc(dev);
+
+ ucom_detach(&sc->sc_super_ucom, &sc->sc_ucom);
+ usbd_transfer_unsetup(sc->sc_xfer, UDBC_N_TRANSFER);
+
+ device_claim_softc(dev);
+
+ udbc_free_softc(sc);
+
+ return (0);
+}
+
+UCOM_UNLOAD_DRAIN(udbc);
+
+static void
+udbc_free_softc(struct udbc_softc *sc)
+{
+ if (ucom_unref(&sc->sc_super_ucom)) {
+ mtx_destroy(&sc->sc_mtx);
+ device_free_softc(sc);
+ }
+}
+
+static void
+udbc_free(struct ucom_softc *ucom)
+{
+ udbc_free_softc(ucom->sc_parent);
+}
+
+static void
+udbc_cfg_open(struct ucom_softc *ucom)
+{
+ /*
+ * This do-nothing open routine exists for the sole purpose of this
+ * DPRINTF() so that you can see the point at which open gets called
+ * when debugging is enabled.
+ */
+ DPRINTF("\n");
+}
+
+static void
+udbc_cfg_close(struct ucom_softc *ucom)
+{
+ /*
+ * This do-nothing close routine exists for the sole purpose of this
+ * DPRINTF() so that you can see the point at which close gets called
+ * when debugging is enabled.
+ */
+ DPRINTF("\n");
+}
+
+static void
+udbc_write_callback(struct usb_xfer *xfer, usb_error_t error)
+{
+ struct udbc_softc *sc = usbd_xfer_softc(xfer);
+ struct usb_page_cache *pc;
+ uint32_t buflen;
+
+ DPRINTFN(3, "\n");
+
+ switch (USB_GET_STATE(xfer)) {
+ default: /* Error */
+ if (error != USB_ERR_CANCELLED) {
+ /* try to clear stall first */
+ usbd_xfer_set_stall(xfer);
+ }
+ /* FALLTHROUGH */
+ case USB_ST_SETUP:
+ case USB_ST_TRANSFERRED:
+ pc = usbd_xfer_get_frame(xfer, 0);
+ if (ucom_get_data(&sc->sc_ucom, pc, 0, UDBC_OBUFSIZE,
+ &buflen) == 0)
+ break;
+ if (buflen != 0) {
+ usbd_xfer_set_frame_len(xfer, 0, buflen);
+ usbd_transfer_submit(xfer);
+ }
+ break;
+ }
+}
+
+static void
+udbc_read_callback(struct usb_xfer *xfer, usb_error_t error)
+{
+ struct udbc_softc *sc = usbd_xfer_softc(xfer);
+ struct usb_page_cache *pc;
+ int buflen;
+
+ DPRINTFN(3, "\n");
+
+ usbd_xfer_status(xfer, &buflen, NULL, NULL, NULL);
+
+ switch (USB_GET_STATE(xfer)) {
+ case USB_ST_TRANSFERRED:
+ pc = usbd_xfer_get_frame(xfer, 0);
+ ucom_put_data(&sc->sc_ucom, pc, 0, buflen);
+ /* FALLTHROUGH */
+ case USB_ST_SETUP:
+tr_setup:
+ usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer));
+ usbd_transfer_submit(xfer);
+ return;
+
+ default: /* Error */
+ if (error != USB_ERR_CANCELLED) {
+ /* try to clear stall first */
+ usbd_xfer_set_stall(xfer);
+ goto tr_setup;
+ }
+ return;
+ }
+}
+
+static int
+udbc_pre_param(struct ucom_softc *ucom, struct termios *t)
+{
+ DPRINTF("\n");
+
+ return (0);
+}
+
+static int
+udbc_ioctl(struct ucom_softc *ucom, uint32_t cmd, caddr_t data, int flag,
+ struct thread *td)
+{
+ return (ENOIOCTL);
+}
+
+static void
+udbc_start_read(struct ucom_softc *ucom)
+{
+ struct udbc_softc *sc = ucom->sc_parent;
+
+ usbd_transfer_start(sc->sc_xfer[UDBC_BULK_DT_RD]);
+}
+
+static void
+udbc_stop_read(struct ucom_softc *ucom)
+{
+ struct udbc_softc *sc = ucom->sc_parent;
+
+ usbd_transfer_stop(sc->sc_xfer[UDBC_BULK_DT_RD]);
+}
+
+static void
+udbc_start_write(struct ucom_softc *ucom)
+{
+ struct udbc_softc *sc = ucom->sc_parent;
+
+ usbd_transfer_start(sc->sc_xfer[UDBC_BULK_DT_WR]);
+}
+
+static void
+udbc_stop_write(struct ucom_softc *ucom)
+{
+ struct udbc_softc *sc = ucom->sc_parent;
+
+ usbd_transfer_stop(sc->sc_xfer[UDBC_BULK_DT_WR]);
+}
+
+static void
+udbc_poll(struct ucom_softc *ucom)
+{
+ struct udbc_softc *sc = ucom->sc_parent;
+
+ usbd_transfer_poll(sc->sc_xfer, UDBC_N_TRANSFER);
+}
+
+static driver_t udbc_driver = {
+ .name = "udbc",
+ .methods = udbc_methods,
+ .size = sizeof(struct udbc_softc),
+};
+
+DRIVER_MODULE(udbc, uhub, udbc_driver, NULL, NULL);
+MODULE_DEPEND(udbc, ucom, 1, 1, 1);
+MODULE_DEPEND(udbc, usb, 1, 1, 1);
+MODULE_VERSION(udbc, 1);
diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c
index e3509862ef54..ee9d8ab0c9bb 100644
--- a/sys/dev/usb/usb_hub.c
+++ b/sys/dev/usb/usb_hub.c
@@ -954,7 +954,8 @@ done:
* packet. This function is called having the "bus_mtx" locked.
*------------------------------------------------------------------------*/
void
-uhub_root_intr(struct usb_bus *bus, const uint8_t *ptr, uint8_t len)
+uhub_root_intr(struct usb_bus *bus,
+ const uint8_t *ptr __unused, uint8_t len __unused)
{
USB_BUS_LOCK_ASSERT(bus, MA_OWNED);
diff --git a/sys/dev/usb/wlan/if_rsu.c b/sys/dev/usb/wlan/if_rsu.c
index 07f7b6f3a708..e976948f6849 100644
--- a/sys/dev/usb/wlan/if_rsu.c
+++ b/sys/dev/usb/wlan/if_rsu.c
@@ -371,18 +371,16 @@ rsu_update_chw(struct ieee80211com *ic)
/*
* notification from net80211 that it'd like to do A-MPDU on the given TID.
- *
- * Note: this actually hangs traffic at the present moment, so don't use it.
- * The firmware debug does indiciate it's sending and establishing a TX AMPDU
- * session, but then no traffic flows.
*/
static int
rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
{
-#if 0
struct rsu_softc *sc = ni->ni_ic->ic_softc;
struct r92s_add_ba_req req;
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: called, tid=%d\n",
+ __func__, tap->txa_tid);
+
/* Don't enable if it's requested or running */
if (IEEE80211_AMPDU_REQUESTED(tap))
return (0);
@@ -397,23 +395,30 @@ rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
return (0);
/* Send the firmware command */
- RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: establishing AMPDU TX for TID %d\n",
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU,
+ "%s: establishing AMPDU TX for TID %d\n",
__func__,
tap->txa_tid);
RSU_LOCK(sc);
- if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 1) {
+ if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 0) {
RSU_UNLOCK(sc);
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd failure\n",
+ __func__);
/* Mark failure */
- (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0);
+ ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0);
+ /* Return 0, we've been driving this ourselves */
return (0);
}
RSU_UNLOCK(sc);
+ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: AMPDU TX cmd success\n",
+ __func__);
+
/* Mark success; we don't get any further notifications */
- (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1);
-#endif
- /* Return 0, we're driving this ourselves */
+ ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1);
+
+ /* Return 0, we've been driving this ourselves */
return (0);
}
@@ -563,9 +568,7 @@ rsu_attach(device_t self)
/* Enable basic HT */
ic->ic_htcaps = IEEE80211_HTC_HT |
-#if 0
IEEE80211_HTC_AMPDU |
-#endif
IEEE80211_HTC_AMSDU |
IEEE80211_HTCAP_MAXAMSDU_3839 |
IEEE80211_HTCAP_SMPS_OFF;
@@ -576,6 +579,7 @@ rsu_attach(device_t self)
ic->ic_rxstream = sc->sc_nrxstream;
}
ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
rsu_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
ic->ic_channels);
@@ -1537,6 +1541,10 @@ rsu_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k,
is_checked = 1;
k->wk_flags |= IEEE80211_KEY_SWCRYPT;
} else
+ /*
+ * TODO: should allocate these from the CAM space;
+ * skipping over the fixed slots and _BC / _BSS.
+ */
*keyix = R92S_MACID_BSS;
}
@@ -2166,7 +2174,7 @@ rsu_event_addba_req_report(struct rsu_softc *sc, uint8_t *buf, int len)
__func__,
ether_sprintf(ba->mac_addr),
(int) ba->tid,
- (int) le16toh(ba->ssn));
+ (int) le16toh(ba->ssn) >> 4);
/* XXX do node lookup; this is STA specific */
@@ -2212,6 +2220,11 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len)
if (vap->iv_state == IEEE80211_S_AUTH)
rsu_event_join_bss(sc, buf, len);
break;
+
+ /* TODO: what about R92S_EVT_ADD_STA? and decoding macid? */
+ /* It likely is required for IBSS/AP mode */
+
+ /* TODO: should I be doing this transition in AP mode? */
case R92S_EVT_DEL_STA:
RSU_DPRINTF(sc, RSU_DEBUG_FWCMD | RSU_DEBUG_STATE,
"%s: disassociated from %s\n", __func__,
@@ -2229,6 +2242,7 @@ rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len)
break;
case R92S_EVT_FWDBG:
buf[60] = '\0';
+ /* TODO: some are \n terminated, some aren't, sigh */
RSU_DPRINTF(sc, RSU_DEBUG_FWDBG, "FWDBG: %s\n", (char *)buf);
break;
case R92S_EVT_ADDBA_REQ_REPORT:
@@ -2782,6 +2796,9 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
if (rate != 0)
ridx = rate2ridx(rate);
+ /* Assign sequence number, A-MPDU or otherwise */
+ ieee80211_output_seqno_assign(ni, -1, m0);
+
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_encap(ni, m0);
if (k == NULL) {
@@ -2838,8 +2855,10 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
SM(R92S_TXDW0_OFFSET, sizeof(*txd)) |
R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG);
+ /* TODO: correct macid here? It should be in the node */
txd->txdw1 |= htole32(
SM(R92S_TXDW1_MACID, R92S_MACID_BSS) | SM(R92S_TXDW1_QSEL, qid));
+
if (!hasqos)
txd->txdw1 |= htole32(R92S_TXDW1_NONQOS);
if (k != NULL && !(k->wk_flags & IEEE80211_KEY_SWENCRYPT)) {
@@ -2860,8 +2879,13 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
SM(R92S_TXDW1_CIPHER, cipher) |
SM(R92S_TXDW1_KEYIDX, k->wk_keyix));
}
- /* XXX todo: set AGGEN bit if appropriate? */
- txd->txdw2 |= htole32(R92S_TXDW2_BK);
+
+ /*
+ * Note: no need to set TXDW2_AGGEN/TXDW2_BK to mark
+ * A-MPDU and non-AMPDU candidates; the firmware will
+ * handle this for us.
+ */
+
if (ismcast)
txd->txdw2 |= htole32(R92S_TXDW2_BMCAST);
@@ -2880,8 +2904,11 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
}
/*
- * Firmware will use and increment the sequence number for the
- * specified priority.
+ * Pass in prio here, NOT the sequence number.
+ *
+ * The hardware is in theory incrementing sequence numbers
+ * for us, but I haven't yet figured out exactly when/how
+ * it's supposed to work.
*/
txd->txdw3 |= htole32(SM(R92S_TXDW3_SEQ, prio));
@@ -3481,7 +3508,8 @@ rsu_load_firmware(struct rsu_softc *sc)
dmem.vcs_mode = R92S_VCS_MODE_RTS_CTS;
dmem.turbo_mode = 0;
dmem.bw40_en = !! (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40);
- dmem.amsdu2ampdu_en = !! (sc->sc_ht);
+ /* net80211 handles AMSDUs just fine */
+ dmem.amsdu2ampdu_en = 0;
dmem.ampdu_en = !! (sc->sc_ht);
dmem.agg_offload = !! (sc->sc_ht);
dmem.qos_en = 1;
diff --git a/sys/dev/usb/wlan/if_rsureg.h b/sys/dev/usb/wlan/if_rsureg.h
index fb706a4d9b1a..e2074e1dd2ad 100644
--- a/sys/dev/usb/wlan/if_rsureg.h
+++ b/sys/dev/usb/wlan/if_rsureg.h
@@ -593,7 +593,14 @@ struct r92s_event_join_bss {
struct ndis_wlan_bssid_ex bss;
} __packed;
-#define R92S_MACID_BSS 5 /* XXX hardcoded somewhere */
+/*
+ * This is hard-coded in the firmware for a STA mode
+ * BSS join. If you turn on FWDEBUG, you'll see this
+ * in the logs:
+ *
+ * rsu0: FWDBG: mac id #5: 0000005b, 000fffff, 00000000
+ */
+#define R92S_MACID_BSS 5
/* Rx MAC descriptor. */
struct r92s_rx_stat {
diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c
index 97c790dd5b81..147aa4044057 100644
--- a/sys/dev/usb/wlan/if_run.c
+++ b/sys/dev/usb/wlan/if_run.c
@@ -882,6 +882,7 @@ run_attach(device_t self)
ic->ic_flags |= IEEE80211_F_DATAPAD;
ic->ic_flags_ext |= IEEE80211_FEXT_SWBMISS;
+ ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD;
run_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
ic->ic_channels);
@@ -3522,6 +3523,9 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
data->ni = ni;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
/*
@@ -3627,6 +3631,9 @@ run_tx_mgt(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
data->ni = ni;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending mgt frame len=%d rate=%d\n",
@@ -3771,6 +3778,9 @@ run_tx_param(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
break;
data->ridx = ridx;
+ /* Assign sequence number now, regardless of A-MPDU TX or otherwise (for now) */
+ ieee80211_output_seqno_assign(ni, -1, m);
+
run_set_tx_desc(sc, data);
RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "sending raw frame len=%u rate=%u\n",
@@ -6416,6 +6426,10 @@ run_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
{
/* For now, no A-MPDU TX support in the driver */
+ /*
+ * TODO: maybe we needed to enable seqno generation too?
+ * What other TX desc bits are missing/needed?
+ */
return (0);
}
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index ecb3dbb370e5..528ff3372097 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -133,12 +133,14 @@ static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_new_buf(struct vtnet_rxq *);
+#if defined(INET) || defined(INET6)
static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
- uint16_t, int, struct virtio_net_hdr *);
-static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
- uint16_t, int, struct virtio_net_hdr *);
+ bool, int, struct virtio_net_hdr *);
+static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
+ int);
static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
struct virtio_net_hdr *);
+#endif
static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
@@ -1178,6 +1180,7 @@ vtnet_setup_interface(struct vtnet_softc *sc)
if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
/*
* Capabilities after here are not enabled by default.
@@ -1760,164 +1763,165 @@ vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
return (error);
}
+#if defined(INET) || defined(INET6)
static int
-vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
- int hoff, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
+ int protocol, struct virtio_net_hdr *hdr)
{
struct vtnet_softc *sc;
- int error;
- sc = rxq->vtnrx_sc;
+ /*
+ * The packet is likely from another VM on the same host or from the
+ * host that itself performed checksum offloading so Tx/Rx is basically
+ * a memcpy and the checksum has little value so far.
+ */
+
+ KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+ ("%s: unsupported IP protocol %d", __func__, protocol));
/*
- * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
- * not have an analogous CSUM flag. The checksum has been validated,
- * but is incomplete (TCP/UDP pseudo header).
- *
- * The packet is likely from another VM on the same host that itself
- * performed checksum offloading so Tx/Rx is basically a memcpy and
- * the checksum has little value.
- *
- * Default to receiving the packet as-is for performance reasons, but
- * this can cause issues if the packet is to be forwarded because it
- * does not contain a valid checksum. This patch may be helpful:
- * https://reviews.freebsd.org/D6611. In the meantime, have the driver
- * compute the checksum if requested.
- *
- * BMV: Need to add an CSUM_PARTIAL flag?
+ * If the user don't want us to fix it up here by computing the
+ * checksum, just forward the order to compute the checksum by setting
+ * the corresponding mbuf flag (e.g., CSUM_TCP).
*/
+ sc = rxq->vtnrx_sc;
if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
- error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
- return (error);
+ switch (protocol) {
+ case IPPROTO_TCP:
+ m->m_pkthdr.csum_flags |=
+ (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
+ break;
+ case IPPROTO_UDP:
+ m->m_pkthdr.csum_flags |=
+ (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
+ break;
+ }
+ m->m_pkthdr.csum_data = hdr->csum_offset;
+ return (0);
}
/*
* Compute the checksum in the driver so the packet will contain a
* valid checksum. The checksum is at csum_offset from csum_start.
*/
- switch (etype) {
-#if defined(INET) || defined(INET6)
- case ETHERTYPE_IP:
- case ETHERTYPE_IPV6: {
- int csum_off, csum_end;
- uint16_t csum;
-
- csum_off = hdr->csum_start + hdr->csum_offset;
- csum_end = csum_off + sizeof(uint16_t);
+ int csum_off, csum_end;
+ uint16_t csum;
- /* Assume checksum will be in the first mbuf. */
- if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
- return (1);
+ csum_off = hdr->csum_start + hdr->csum_offset;
+ csum_end = csum_off + sizeof(uint16_t);
- /*
- * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
- * checksum and write it at the specified offset. We could
- * try to verify the packet: csum_start should probably
- * correspond to the start of the TCP/UDP header.
- *
- * BMV: Need to properly handle UDP with zero checksum. Is
- * the IPv4 header checksum implicitly validated?
- */
- csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
- *(uint16_t *)(mtodo(m, csum_off)) = csum;
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
- break;
- }
-#endif
- default:
- sc->vtnet_stats.rx_csum_bad_ethtype++;
+ /* Assume checksum will be in the first mbuf. */
+ if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) {
+ sc->vtnet_stats.rx_csum_bad_offset++;
return (1);
}
+ /*
+ * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
+ * checksum and write it at the specified offset. We could
+ * try to verify the packet: csum_start should probably
+ * correspond to the start of the TCP/UDP header.
+ *
+ * BMV: Need to properly handle UDP with zero checksum. Is
+ * the IPv4 header checksum implicitly validated?
+ */
+ csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
+ *(uint16_t *)(mtodo(m, csum_off)) = csum;
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+
return (0);
}
+static void
+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
+{
+ KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+ ("%s: unsupported IP protocol %d", __func__, protocol));
+
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+}
+
static int
-vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
- uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
{
-#if 0
+ const struct ether_header *eh;
struct vtnet_softc *sc;
-#endif
- int protocol;
+ int hoff, protocol;
+ uint16_t etype;
+ bool isipv6;
+
+ KASSERT(hdr->flags &
+ (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
+ ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
+
+ eh = mtod(m, const struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (etype == ETHERTYPE_VLAN) {
+ /* TODO BMV: Handle QinQ. */
+ const struct ether_vlan_header *evh =
+ mtod(m, const struct ether_vlan_header *);
+ etype = ntohs(evh->evl_proto);
+ hoff = sizeof(struct ether_vlan_header);
+ } else
+ hoff = sizeof(struct ether_header);
-#if 0
sc = rxq->vtnrx_sc;
-#endif
+ /* Check whether ethernet type is IP or IPv6, and get protocol. */
switch (etype) {
#if defined(INET)
case ETHERTYPE_IP:
- if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
- protocol = IPPROTO_DONE;
- else {
+ if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
+ sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+ return (1);
+ } else {
struct ip *ip = (struct ip *)(m->m_data + hoff);
protocol = ip->ip_p;
}
+ isipv6 = false;
break;
#endif
#if defined(INET6)
case ETHERTYPE_IPV6:
if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
- || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
- protocol = IPPROTO_DONE;
+ || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
+ sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+ return (1);
+ }
+ isipv6 = true;
break;
#endif
default:
- protocol = IPPROTO_DONE;
- break;
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ return (1);
}
+ /* Check whether protocol is TCP or UDP. */
switch (protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
break;
default:
/*
* FreeBSD does not support checksum offloading of this
- * protocol. Let the stack re-verify the checksum later
- * if the protocol is supported.
+ * protocol here.
*/
-#if 0
- if_printf(sc->vtnet_ifp,
- "%s: checksum offload of unsupported protocol "
- "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
- __func__, etype, protocol, hdr->csum_start,
- hdr->csum_offset);
-#endif
- break;
+ sc->vtnet_stats.rx_csum_bad_ipproto++;
+ return (1);
}
- return (0);
-}
-
-static int
-vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
- struct virtio_net_hdr *hdr)
-{
- const struct ether_header *eh;
- int hoff;
- uint16_t etype;
-
- eh = mtod(m, const struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (etype == ETHERTYPE_VLAN) {
- /* TODO BMV: Handle QinQ. */
- const struct ether_vlan_header *evh =
- mtod(m, const struct ether_vlan_header *);
- etype = ntohs(evh->evl_proto);
- hoff = sizeof(struct ether_vlan_header);
- } else
- hoff = sizeof(struct ether_header);
-
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
+ return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol,
+ hdr));
else /* VIRTIO_NET_HDR_F_DATA_VALID */
- return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
+ vtnet_rxq_csum_data_valid(rxq, m, protocol);
+
+ return (0);
}
+#endif
static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
@@ -2040,10 +2044,15 @@ vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
if (hdr->flags &
(VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
+#if defined(INET) || defined(INET6)
if (vtnet_rxq_csum(rxq, m, hdr) == 0)
rxq->vtnrx_stats.vrxs_csum++;
else
rxq->vtnrx_stats.vrxs_csum_failed++;
+#else
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ rxq->vtnrx_stats.vrxs_csum_failed++;
+#endif
}
if (hdr->gso_size != 0) {
@@ -2497,6 +2506,10 @@ vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
hdr->csum_start = vtnet_gtoh16(sc, csum_start);
hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
txq->vtntx_stats.vtxs_csum++;
+ } else if ((flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) &&
+ (proto == IPPROTO_TCP || proto == IPPROTO_UDP) &&
+ (m->m_pkthdr.csum_data == 0xFFFF)) {
+ hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
}
if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
@@ -2551,8 +2564,10 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
error = sglist_append_mbuf(sg, m);
if (error) {
m = m_defrag(m, M_NOWAIT);
- if (m == NULL)
+ if (m == NULL) {
+ sc->vtnet_stats.tx_defrag_failed++;
goto fail;
+ }
*m_head = m;
sc->vtnet_stats.tx_defragged++;
@@ -2568,7 +2583,6 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
return (error);
fail:
- sc->vtnet_stats.tx_defrag_failed++;
m_freem(*m_head);
*m_head = NULL;
@@ -2609,7 +2623,8 @@ vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
m->m_flags &= ~M_VLANTAG;
}
- if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+ if (m->m_pkthdr.csum_flags &
+ (VTNET_CSUM_ALL_OFFLOAD | CSUM_DATA_VALID)) {
m = vtnet_txq_offload(txq, m, hdr);
if ((*m_head = m) == NULL) {
error = ENOBUFS;
@@ -3031,16 +3046,14 @@ vtnet_get_counter(if_t ifp, ift_counter cnt)
return (rxaccum.vrxs_iqdrops);
case IFCOUNTER_IERRORS:
return (rxaccum.vrxs_ierrors);
+ case IFCOUNTER_IBYTES:
+ return (rxaccum.vrxs_ibytes);
case IFCOUNTER_OPACKETS:
return (txaccum.vtxs_opackets);
case IFCOUNTER_OBYTES:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_obytes);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_obytes);
case IFCOUNTER_OMCASTS:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_omcasts);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_omcasts);
default:
return (if_get_counter_default(ifp, cnt));
}
@@ -3813,9 +3826,9 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
if_printf(ifp, "error setting host MAC filter table\n");
out:
- if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
+ if (promisc && vtnet_set_promisc(sc, true) != 0)
if_printf(ifp, "cannot enable promiscuous mode\n");
- if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
+ if (allmulti && vtnet_set_allmulti(sc, true) != 0)
if_printf(ifp, "cannot enable all-multicast mode\n");
}
@@ -4100,21 +4113,29 @@ vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
stats = &rxq->vtnrx_stats;
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ipackets, "Receive packets");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ibytes, "Receive bytes");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_iqdrops, "Receive drops");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_ierrors, "Receive errors");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_csum, "Receive checksum offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_csum_failed, "Receive checksum offload failed");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_host_lro, "Receive host segmentation offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vrxs_rescheduled,
"Receive interrupt handler rescheduled");
}
@@ -4135,17 +4156,23 @@ vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
stats = &txq->vtntx_stats;
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_opackets, "Transmit packets");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_obytes, "Transmit bytes");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_omcasts, "Transmit multicasts");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_csum, "Transmit checksum offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_tso, "Transmit TCP segmentation offloaded");
- SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
+ CTLFLAG_RD | CTLFLAG_STATS,
&stats->vtxs_rescheduled,
"Transmit interrupt handler rescheduled");
}
@@ -4170,6 +4197,102 @@ vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
}
}
+static int
+vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_csum_failed = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_csum_failed += rxst->vrxs_csum_failed;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req));
+}
+
+static int
+vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_csum_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_csum_offloaded += rxst->vrxs_csum;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req));
+}
+
+static int
+vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_rxq_stats *rxst;
+ int i;
+
+ stats->rx_task_rescheduled = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
+ stats->rx_task_rescheduled += rxst->vrxs_rescheduled;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req));
+}
+
+static int
+vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_csum_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_csum_offloaded += txst->vtxs_csum;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req));
+}
+
+static int
+vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_tso_offloaded = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_tso_offloaded += txst->vtxs_tso;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req));
+}
+
+static int
+vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)
+{
+ struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
+ struct vtnet_statistics *stats = &sc->vtnet_stats;
+ struct vtnet_txq_stats *txst;
+ int i;
+
+ stats->tx_task_rescheduled = 0;
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ txst = &sc->vtnet_txqs[i].vtntx_stats;
+ stats->tx_task_rescheduled += txst->vtxs_rescheduled;
+ }
+ return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req));
+}
+
static void
vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *child, struct vtnet_softc *sc)
@@ -4189,69 +4312,75 @@ vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
- CTLFLAG_RD, &stats->mbuf_alloc_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed,
"Mbuf cluster allocation failures");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
- CTLFLAG_RD, &stats->rx_frame_too_large,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large,
"Received frame larger than the mbuf chain");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
- CTLFLAG_RD, &stats->rx_enq_replacement_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed,
"Enqueuing the replacement receive mbuf failed");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
- CTLFLAG_RD, &stats->rx_mergeable_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed,
"Mergeable buffers receive failures");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
- CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype,
"Received checksum offloaded buffer with unsupported "
"Ethernet type");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
- CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto,
"Received checksum offloaded buffer with incorrect IP protocol");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
- CTLFLAG_RD, &stats->rx_csum_bad_offset,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset,
"Received checksum offloaded buffer with incorrect offset");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
- CTLFLAG_RD, &stats->rx_csum_bad_proto,
- "Received checksum offloaded buffer with incorrect protocol");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
- CTLFLAG_RD, &stats->rx_csum_failed,
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
+ "Received checksum offloaded buffer with inaccessible IP protocol");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
"Received buffer checksum offload failed");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
- CTLFLAG_RD, &stats->rx_csum_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU",
"Received buffer checksum offload succeeded");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
- CTLFLAG_RD, &stats->rx_task_rescheduled,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU",
"Times the receive interrupt task rescheduled itself");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
- CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype,
"Aborted transmit of checksum offloaded buffer with unknown "
"Ethernet type");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
- CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch,
"Aborted transmit of checksum offloaded buffer because mismatched "
"protocols");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
- CTLFLAG_RD, &stats->tx_tso_not_tcp,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp,
"Aborted transmit of TSO buffer with non TCP protocol");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
- CTLFLAG_RD, &stats->tx_tso_without_csum,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum,
"Aborted transmit of TSO buffer without TCP checksum offload");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
- CTLFLAG_RD, &stats->tx_defragged,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged,
"Transmit mbufs defragged");
SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
- CTLFLAG_RD, &stats->tx_defrag_failed,
+ CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed,
"Aborted transmit of buffer because defrag failed");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
- CTLFLAG_RD, &stats->tx_csum_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU",
"Offloaded checksum of transmitted buffer");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
- CTLFLAG_RD, &stats->tx_tso_offloaded,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU",
"Segmentation offload of transmitted buffer");
- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
- CTLFLAG_RD, &stats->tx_task_rescheduled,
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled",
+ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
+ sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU",
"Times the transmit interrupt task rescheduled itself");
}
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index 0144b0f3232d..cab7ced639a7 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -46,7 +46,7 @@ struct vtnet_statistics {
uint64_t rx_csum_bad_ethtype;
uint64_t rx_csum_bad_ipproto;
uint64_t rx_csum_bad_offset;
- uint64_t rx_csum_bad_proto;
+ uint64_t rx_csum_inaccessible_ipproto;
uint64_t tx_csum_unknown_ethtype;
uint64_t tx_csum_proto_mismatch;
uint64_t tx_tso_not_tcp;
diff --git a/sys/dev/vmgenc/vmgenc_acpi.c b/sys/dev/vmgenc/vmgenc_acpi.c
index 2ad8929dfd34..18519a8e4f22 100644
--- a/sys/dev/vmgenc/vmgenc_acpi.c
+++ b/sys/dev/vmgenc/vmgenc_acpi.c
@@ -56,6 +56,7 @@
#include <contrib/dev/acpica/include/acpi.h>
#include <dev/acpica/acpivar.h>
+#include <dev/random/randomdev.h>
#include <dev/random/random_harvestq.h>
#include <dev/vmgenc/vmgenc_acpi.h>
@@ -210,6 +211,11 @@ acpi_GetPackedUINT64(device_t dev, ACPI_HANDLE handle, char *path,
}
+static const struct random_source random_vmgenid = {
+ .rs_ident = "VM Generation ID",
+ .rs_source = RANDOM_PURE_VMGENID,
+};
+
static int
vmgenc_attach(device_t dev)
{
@@ -234,7 +240,7 @@ vmgenc_attach(device_t dev)
memcpy(sc->vmg_cache_guid, __DEVOLATILE(void *, sc->vmg_pguid),
sizeof(sc->vmg_cache_guid));
- random_harvest_register_source(RANDOM_PURE_VMGENID);
+ random_source_register(&random_vmgenid);
vmgenc_harvest_all(sc->vmg_cache_guid, sizeof(sc->vmg_cache_guid));
AcpiInstallNotifyHandler(h, ACPI_DEVICE_NOTIFY, vmgenc_notify, dev);
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 9f2b009d02ec..460a508a60dc 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -901,6 +901,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
sc->cdev = NULL;
sx_xunlock(&vmmdev_mtx);
+ vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
destroy_dev(cdev);
vmmdev_destroy(sc);
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index de60a4717dd4..8712679375c6 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -953,7 +953,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
*ap->a_outoffp += fwo->size;
fuse_internal_clear_suid_on_write(outvp, outcred, td);
if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
- fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
+ fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
getnanouptime(&outfvdat->last_local_modify);
}
fuse_vnode_update(invp, FN_ATIMECHANGE);
diff --git a/sys/fs/msdosfs/bootsect.h b/sys/fs/msdosfs/bootsect.h
index 170d94cb9512..94b1137a153e 100644
--- a/sys/fs/msdosfs/bootsect.h
+++ b/sys/fs/msdosfs/bootsect.h
@@ -20,7 +20,7 @@
/*
* Format of a boot sector. This is the first sector on a DOS floppy disk
- * or the fist sector of a partition on a hard disk. But, it is not the
+ * or the first sector of a partition on a hard disk. But, it is not the
* first sector of a partitioned hard disk.
*/
struct bootsector33 {
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index e5fdb395c9f7..862780741ee7 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -371,8 +371,6 @@ nfsrv_atroot(struct vnode *vp, uint64_t *retp)
/*
* Set the credentials to refer to root.
- * If only the various BSDen could agree on whether cr_gid is a separate
- * field or cr_groups[0]...
*/
void
newnfs_setroot(struct ucred *cred)
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index e9f1dc23ddbe..77e71d4153c9 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -1098,9 +1098,10 @@ newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr)
KASSERT(cr->cr_ngroups >= 0,
("newnfs_copyincred: negative cr_ngroups"));
nfscr->nfsc_uid = cr->cr_uid;
- nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1);
- for (i = 0; i < nfscr->nfsc_ngroups; i++)
- nfscr->nfsc_groups[i] = cr->cr_groups[i];
+ nfscr->nfsc_ngroups = MIN(cr->cr_ngroups + 1, NFS_MAXGRPS + 1);
+ nfscr->nfsc_groups[0] = cr->cr_gid;
+ for (i = 1; i < nfscr->nfsc_ngroups; i++)
+ nfscr->nfsc_groups[i] = cr->cr_groups[i - 1];
}
/*
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index a8b06fdb261b..eee571a04821 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -3474,7 +3474,7 @@ nfs_advlock(struct vop_advlock_args *ap)
u_quad_t size;
struct nfsmount *nmp;
- error = NFSVOPLOCK(vp, LK_SHARED);
+ error = NFSVOPLOCK(vp, LK_EXCLUSIVE);
if (error != 0)
return (EBADF);
nmp = VFSTONFS(vp->v_mount);
@@ -3511,11 +3511,6 @@ nfs_advlock(struct vop_advlock_args *ap)
cred = p->p_ucred;
else
cred = td->td_ucred;
- NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
- if (VN_IS_DOOMED(vp)) {
- error = EBADF;
- goto out;
- }
/*
* If this is unlocking a write locked region, flush and
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index b2966934f9b7..7040c4afb797 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -2607,6 +2607,7 @@ again:
* rpc reply
*/
if (siz == 0) {
+ateof:
vput(vp);
if (nd->nd_flag & ND_NFSV3)
nfsrv_postopattr(nd, getret, &at);
@@ -2648,6 +2649,8 @@ again:
ncookies--;
}
if (cpos >= cend || ncookies == 0) {
+ if (eofflag != 0)
+ goto ateof;
siz = fullsiz;
toff = off;
goto again;
diff --git a/sys/fs/procfs/procfs.c b/sys/fs/procfs/procfs.c
index ab60ba47f322..cd66dd6f8b3b 100644
--- a/sys/fs/procfs/procfs.c
+++ b/sys/fs/procfs/procfs.c
@@ -156,42 +156,42 @@ procfs_init(PFS_INIT_ARGS)
root = pi->pi_root;
- pfs_create_link(root, "curproc", procfs_docurproc,
- NULL, NULL, NULL, 0);
- pfs_create_link(root, "self", procfs_docurproc,
- NULL, NULL, NULL, 0);
-
- dir = pfs_create_dir(root, "pid",
- procfs_attr_all_rx, NULL, NULL, PFS_PROCDEP);
- pfs_create_file(dir, "cmdline", procfs_doproccmdline,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "dbregs", procfs_doprocdbregs,
+ pfs_create_link(root, NULL, "curproc", procfs_docurproc, NULL, NULL,
+ NULL, 0);
+ pfs_create_link(root, NULL, "self", procfs_docurproc, NULL, NULL, NULL,
+ 0);
+
+ pfs_create_dir(root, &dir, "pid", procfs_attr_all_rx, NULL, NULL,
+ PFS_PROCDEP);
+ pfs_create_file(dir, NULL, "cmdline", procfs_doproccmdline, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "dbregs", procfs_doprocdbregs,
procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "etype", procfs_doproctype,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "fpregs", procfs_doprocfpregs,
+ pfs_create_file(dir, NULL, "etype", procfs_doproctype, NULL, NULL, NULL,
+ PFS_RD);
+ pfs_create_file(dir, NULL, "fpregs", procfs_doprocfpregs,
procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "map", procfs_doprocmap,
- NULL, procfs_notsystem, NULL, PFS_RD);
- pfs_create_file(dir, "mem", procfs_doprocmem,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "note", procfs_doprocnote,
- procfs_attr_w, procfs_candebug, NULL, PFS_WR);
- pfs_create_file(dir, "notepg", procfs_doprocnote,
- procfs_attr_w, procfs_candebug, NULL, PFS_WR);
- pfs_create_file(dir, "regs", procfs_doprocregs,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
- pfs_create_file(dir, "rlimit", procfs_doprocrlimit,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "status", procfs_doprocstatus,
- NULL, NULL, NULL, PFS_RD);
- pfs_create_file(dir, "osrel", procfs_doosrel,
- procfs_attr_rw, procfs_candebug, NULL, PFS_RDWR);
-
- pfs_create_link(dir, "file", procfs_doprocfile,
- NULL, procfs_notsystem, NULL, 0);
- pfs_create_link(dir, "exe", procfs_doprocfile,
- NULL, procfs_notsystem, NULL, 0);
+ pfs_create_file(dir, NULL, "map", procfs_doprocmap, NULL,
+ procfs_notsystem, NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "mem", procfs_doprocmem, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "note", procfs_doprocnote, procfs_attr_w,
+ procfs_candebug, NULL, PFS_WR);
+ pfs_create_file(dir, NULL, "notepg", procfs_doprocnote, procfs_attr_w,
+ procfs_candebug, NULL, PFS_WR);
+ pfs_create_file(dir, NULL, "regs", procfs_doprocregs, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
+ pfs_create_file(dir, NULL, "rlimit", procfs_doprocrlimit, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "status", procfs_doprocstatus, NULL, NULL,
+ NULL, PFS_RD);
+ pfs_create_file(dir, NULL, "osrel", procfs_doosrel, procfs_attr_rw,
+ procfs_candebug, NULL, PFS_RDWR);
+
+ pfs_create_link(dir, NULL, "file", procfs_doprocfile, NULL,
+ procfs_notsystem, NULL, 0);
+ pfs_create_link(dir, NULL, "exe", procfs_doprocfile, NULL,
+ procfs_notsystem, NULL, 0);
return (0);
}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
index 38070e0946bb..49c084d02ff8 100644
--- a/sys/fs/procfs/procfs_status.c
+++ b/sys/fs/procfs/procfs_status.c
@@ -141,13 +141,9 @@ procfs_doprocstatus(PFS_FILL_ARGS)
(u_long)cr->cr_uid,
(u_long)cr->cr_ruid,
(u_long)cr->cr_rgid);
-
- /* egid (cr->cr_svgid) is equal to cr_ngroups[0]
- see also getegid(2) in /sys/kern/kern_prot.c */
-
- for (i = 0; i < cr->cr_ngroups; i++) {
+ sbuf_printf(sb, ",%lu", (u_long)cr->cr_gid);
+ for (i = 0; i < cr->cr_ngroups; i++)
sbuf_printf(sb, ",%lu", (u_long)cr->cr_groups[i]);
- }
if (jailed(cr)) {
mtx_lock(&cr->cr_prison->pr_mtx);
diff --git a/sys/fs/pseudofs/pseudofs.c b/sys/fs/pseudofs/pseudofs.c
index ef45f96a6192..7a4e67455214 100644
--- a/sys/fs/pseudofs/pseudofs.c
+++ b/sys/fs/pseudofs/pseudofs.c
@@ -133,7 +133,7 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
if (strcmp(pn->pn_name, iter->pn_name) != 0)
continue;
- printf("pfs_add_node: homonymous siblings: '%s/%s' type %d",
+ printf("pfs_add_node: homonymous siblings: '%s/%s' type %d\n",
parent->pn_name, pn->pn_name, pn->pn_type);
/* Do not detach, because we are not yet attached. */
pn->pn_parent = NULL;
@@ -234,81 +234,101 @@ pfs_fixup_dir(struct pfs_node *parent)
/*
* Create a directory
*/
-struct pfs_node *
-pfs_create_dir(struct pfs_node *parent, const char *name,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_dir(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags)
{
- struct pfs_node *pn;
+ struct pfs_node *pdir, *pn;
int rc;
- pn = pfs_alloc_node_flags(parent->pn_info, name,
+ /* Preserve in case the caller is reusing the one pointer for both. */
+ pdir = parent;
+ if (opn != NULL)
+ *opn = NULL;
+ pn = pfs_alloc_node_flags(pdir->pn_info, name,
(flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- rc = pfs_add_node(parent, pn);
+ rc = pfs_add_node(pdir, pn);
if (rc == 0)
rc = pfs_fixup_dir_flags(pn, flags);
if (rc != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+
+ return (rc);
}
/*
* Create a file
*/
-struct pfs_node *
-pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_file(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill, pfs_attr_t attr,
+ pfs_vis_t vis, pfs_destroy_t destroy, int flags)
{
struct pfs_node *pn;
+ int rc;
+ if (opn != NULL)
+ *opn = NULL;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_file, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
+
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- if (pfs_add_node(parent, pn) != 0) {
+ if ((rc = pfs_add_node(parent, pn)) != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+
+ return (rc);
}
/*
* Create a symlink
*/
-struct pfs_node *
-pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
- pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
- int flags)
+int
+pfs_create_link(struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill, pfs_attr_t attr,
+ pfs_vis_t vis, pfs_destroy_t destroy, int flags)
{
struct pfs_node *pn;
+ int rc;
+ if (opn != NULL)
+ *opn = NULL;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_symlink, flags);
if (pn == NULL)
- return (NULL);
+ return (ENOMEM);
+
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- if (pfs_add_node(parent, pn) != 0) {
+ if ((rc = pfs_add_node(parent, pn)) != 0) {
pfs_destroy(pn);
pn = NULL;
+ } else if (opn != NULL) {
+ *opn = pn;
}
- return (pn);
+ return (rc);
}
/*
@@ -475,6 +495,7 @@ pfs_init(struct pfs_info *pi, struct vfsconf *vfc)
if (error) {
pfs_destroy(root);
pi->pi_root = NULL;
+ pfs_fileno_uninit(pi);
return (error);
}
diff --git a/sys/fs/pseudofs/pseudofs.h b/sys/fs/pseudofs/pseudofs.h
index c60dd7b339d1..2b08dcad978d 100644
--- a/sys/fs/pseudofs/pseudofs.h
+++ b/sys/fs/pseudofs/pseudofs.h
@@ -255,17 +255,18 @@ int pfs_uninit (struct pfs_info *pi, struct vfsconf *vfc);
/*
* Directory structure construction and manipulation
*/
-struct pfs_node *pfs_create_dir (struct pfs_node *parent, const char *name,
- pfs_attr_t attr, pfs_vis_t vis,
- pfs_destroy_t destroy, int flags);
-struct pfs_node *pfs_create_file(struct pfs_node *parent, const char *name,
- pfs_fill_t fill, pfs_attr_t attr,
- pfs_vis_t vis, pfs_destroy_t destroy,
- int flags);
-struct pfs_node *pfs_create_link(struct pfs_node *parent, const char *name,
- pfs_fill_t fill, pfs_attr_t attr,
+int pfs_create_dir (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_attr_t attr,
pfs_vis_t vis, pfs_destroy_t destroy,
int flags);
+int pfs_create_file (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill,
+ pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags);
+int pfs_create_link (struct pfs_node *parent, struct pfs_node **opn,
+ const char *name, pfs_fill_t fill,
+ pfs_attr_t attr, pfs_vis_t vis,
+ pfs_destroy_t destroy, int flags);
struct pfs_node *pfs_find_node (struct pfs_node *parent, const char *name);
void pfs_purge (struct pfs_node *pn);
int pfs_destroy (struct pfs_node *pn);
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
index acf18de5ab51..c110107bb210 100644
--- a/sys/fs/tarfs/tarfs_vnops.c
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -334,6 +334,10 @@ tarfs_readdir(struct vop_readdir_args *ap)
tnp, tnp->name, uio->uio_offset, uio->uio_resid);
if (uio->uio_offset == TARFS_COOKIE_EOF) {
+ if (eofflag != NULL) {
+ TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__);
+ *eofflag = 1;
+ }
TARFS_DPF(VNODE, "%s: EOF\n", __func__);
return (0);
}
diff --git a/sys/geom/cache/g_cache.c b/sys/geom/cache/g_cache.c
index 9d0b10f4192e..c6b80786ade5 100644
--- a/sys/geom/cache/g_cache.c
+++ b/sys/geom/cache/g_cache.c
@@ -504,7 +504,7 @@ g_cache_create(struct g_class *mp, struct g_provider *pp,
return (NULL);
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
sc->sc_type = type;
sc->sc_bshift = bshift;
@@ -665,7 +665,7 @@ g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_CACHE_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "cache:taste");
+ gp = g_new_geom(mp, "cache:taste");
gp->start = g_cache_start;
gp->orphan = g_cache_orphan;
gp->access = g_cache_access;
diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c
index 2173a84c7acf..5461c6dd73d3 100644
--- a/sys/geom/concat/g_concat.c
+++ b/sys/geom/concat/g_concat.c
@@ -646,7 +646,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO);
gp->start = g_concat_start;
gp->spoiled = g_concat_orphan;
@@ -753,7 +753,7 @@ g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_CONCAT_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "concat:taste");
+ gp = g_new_geom(mp, "concat:taste");
gp->start = g_concat_start;
gp->access = g_concat_access;
gp->orphan = g_concat_orphan;
@@ -1107,8 +1107,6 @@ g_concat_ctl_append(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "No 'arg%u' argument.", 1);
goto fail;
}
- if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
- name += strlen("/dev/");
pp = g_provider_by_name(name);
if (pp == NULL) {
G_CONCAT_DEBUG(1, "Disk %s is invalid.", name);
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index 5bd2d465183e..7fca50e7635c 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -769,7 +769,7 @@ g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
g_topology_assert();
- gp = g_new_geomf(mp, "eli:taste");
+ gp = g_new_geom(mp, "eli:taste");
gp->start = g_eli_start;
gp->access = g_std_access;
/*
diff --git a/sys/geom/gate/g_gate.c b/sys/geom/gate/g_gate.c
index ecdcacff6707..76a4328227dd 100644
--- a/sys/geom/gate/g_gate.c
+++ b/sys/geom/gate/g_gate.c
@@ -571,7 +571,7 @@ g_gate_create(struct g_gate_ctl_create *ggio)
}
}
- gp = g_new_geomf(&g_gate_class, "%s", name);
+ gp = g_new_geom(&g_gate_class, name);
gp->start = g_gate_start;
gp->access = g_gate_access;
gp->orphan = g_gate_orphan;
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index 908ce86f03a6..50e6627b0157 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -289,8 +289,9 @@ int g_handleattr_int(struct bio *bp, const char *attribute, int val);
int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
-struct g_consumer * g_new_consumer(struct g_geom *gp);
-struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
+struct g_consumer *g_new_consumer(struct g_geom *gp);
+struct g_geom *g_new_geom(struct g_class *mp, const char *name);
+struct g_geom *g_new_geomf(struct g_class *mp, const char *fmt, ...)
__printflike(2, 3);
struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...)
__printflike(2, 3);
diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
index 4a2a850c2eab..27c65f15d5e3 100644
--- a/sys/geom/geom_dev.c
+++ b/sys/geom/geom_dev.c
@@ -355,7 +355,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
g_topology_assert();
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF);
cp = g_new_consumer(gp);
diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c
index 341233a6ef47..ffd46db55416 100644
--- a/sys/geom/geom_event.c
+++ b/sys/geom/geom_event.c
@@ -347,6 +347,7 @@ static void
g_post_event_ep_va(g_event_t *func, void *arg, int wuflag,
struct g_event *ep, va_list ap)
{
+ struct thread *td;
void *p;
u_int n;
@@ -366,8 +367,12 @@ g_post_event_ep_va(g_event_t *func, void *arg, int wuflag,
TAILQ_INSERT_TAIL(&g_events, ep, events);
mtx_unlock(&g_eventlock);
wakeup(&g_wait_event);
- curthread->td_pflags |= TDP_GEOM;
- ast_sched(curthread, TDA_GEOM);
+
+ td = curthread;
+ if ((td->td_pflags & TDP_KTHREAD) == 0) {
+ td->td_pflags |= TDP_GEOM;
+ ast_sched(td, TDA_GEOM);
+ }
}
void
diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c
index 0491b0069be4..935293950c37 100644
--- a/sys/geom/geom_slice.c
+++ b/sys/geom/geom_slice.c
@@ -529,7 +529,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co
g_topology_assert();
vp = (void **)extrap;
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
gsp = g_slice_alloc(slices, extra);
gsp->start = start;
gp->softc = gsp;
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index 1429c84942ed..2a6ce1ab6486 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -368,20 +368,15 @@ g_retaste(struct g_class *mp)
}
struct g_geom *
-g_new_geomf(struct g_class *mp, const char *fmt, ...)
+g_new_geom(struct g_class *mp, const char *name)
{
+ int len;
struct g_geom *gp;
- va_list ap;
- struct sbuf *sb;
g_topology_assert();
G_VALID_CLASS(mp);
- sb = sbuf_new_auto();
- va_start(ap, fmt);
- sbuf_vprintf(sb, fmt, ap);
- va_end(ap);
- sbuf_finish(sb);
- gp = g_malloc(sizeof(*gp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+ len = strlen(name);
+ gp = g_malloc(sizeof(*gp) + len + 1, M_WAITOK | M_ZERO);
gp->name = (char *)(gp + 1);
gp->class = mp;
gp->rank = 1;
@@ -389,8 +384,7 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...)
LIST_INIT(&gp->provider);
LIST_INSERT_HEAD(&mp->geom, gp, geom);
TAILQ_INSERT_HEAD(&geoms, gp, geoms);
- strcpy(gp->name, sbuf_data(sb));
- sbuf_delete(sb);
+ memcpy(gp->name, name, len);
/* Fill in defaults from class */
gp->start = mp->start;
gp->spoiled = mp->spoiled;
@@ -404,6 +398,23 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...)
return (gp);
}
+struct g_geom *
+g_new_geomf(struct g_class *mp, const char *fmt, ...)
+{
+ struct g_geom *gp;
+ va_list ap;
+ struct sbuf *sb;
+
+ sb = sbuf_new_auto();
+ va_start(ap, fmt);
+ sbuf_vprintf(sb, fmt, ap);
+ va_end(ap);
+ sbuf_finish(sb);
+ gp = g_new_geom(mp, sbuf_data(sb));
+ sbuf_delete(sb);
+ return (gp);
+}
+
void
g_destroy_geom(struct g_geom *gp)
{
diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c
index 6d9f6239e632..b520194b7d7c 100644
--- a/sys/geom/journal/g_journal.c
+++ b/sys/geom/journal/g_journal.c
@@ -2477,7 +2477,7 @@ g_journal_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
if (pp->geom->class == mp)
return (NULL);
- gp = g_new_geomf(mp, "journal:taste");
+ gp = g_new_geom(mp, "journal:taste");
/* This orphan function should be never called. */
gp->orphan = g_journal_taste_orphan;
cp = g_new_consumer(gp);
diff --git a/sys/geom/label/g_label.c b/sys/geom/label/g_label.c
index acb17d40914e..faefbd7c2ef6 100644
--- a/sys/geom/label/g_label.c
+++ b/sys/geom/label/g_label.c
@@ -399,7 +399,7 @@ g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
if (strcmp(pp->geom->class->name, mp->name) == 0)
return (NULL);
- gp = g_new_geomf(mp, "label:taste");
+ gp = g_new_geom(mp, "label:taste");
gp->start = g_label_start_taste;
gp->access = g_label_access_taste;
gp->orphan = g_label_orphan_taste;
diff --git a/sys/geom/linux_lvm/g_linux_lvm.c b/sys/geom/linux_lvm/g_linux_lvm.c
index c63318fed729..f333c08f45d9 100644
--- a/sys/geom/linux_lvm/g_linux_lvm.c
+++ b/sys/geom/linux_lvm/g_linux_lvm.c
@@ -537,7 +537,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_topology_assert();
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
- gp = g_new_geomf(mp, "linux_lvm:taste");
+ gp = g_new_geom(mp, "linux_lvm:taste");
/* This orphan function should be never called. */
gp->orphan = g_llvm_taste_orphan;
cp = g_new_consumer(gp);
@@ -557,7 +557,7 @@ g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
vg = md.md_vg;
if (vg->vg_geom == NULL) {
/* new volume group */
- gp = g_new_geomf(mp, "%s", vg->vg_name);
+ gp = g_new_geom(mp, vg->vg_name);
gp->start = g_llvm_start;
gp->spoiled = g_llvm_orphan;
gp->orphan = g_llvm_orphan;
diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c
index 25c0490938ef..03902a2f2491 100644
--- a/sys/geom/mirror/g_mirror.c
+++ b/sys/geom/mirror/g_mirror.c
@@ -3149,7 +3149,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
/*
* Action geom.
*/
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
gp->start = g_mirror_start;
gp->orphan = g_mirror_orphan;
@@ -3290,7 +3290,7 @@ g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "mirror:taste");
+ gp = g_new_geom(mp, "mirror:taste");
/*
* This orphan function should be never called.
*/
diff --git a/sys/geom/mirror/g_mirror_ctl.c b/sys/geom/mirror/g_mirror_ctl.c
index 82bc05a142c0..b31bf098ac4b 100644
--- a/sys/geom/mirror/g_mirror_ctl.c
+++ b/sys/geom/mirror/g_mirror_ctl.c
@@ -433,7 +433,7 @@ g_mirror_ctl_create(struct gctl_req *req, struct g_class *mp)
g_topology_lock();
mediasize = OFF_MAX;
sectorsize = 0;
- gp = g_new_geomf(mp, "%s", md.md_name);
+ gp = g_new_geom(mp, md.md_name);
gp->orphan = g_mirror_create_orphan;
cp = g_new_consumer(gp);
for (no = 1; no < *nargs; no++) {
diff --git a/sys/geom/mountver/g_mountver.c b/sys/geom/mountver/g_mountver.c
index de3a298735d4..c7d55c4734a2 100644
--- a/sys/geom/mountver/g_mountver.c
+++ b/sys/geom/mountver/g_mountver.c
@@ -291,7 +291,7 @@ g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *p
return (EEXIST);
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE);
TAILQ_INIT(&sc->sc_queue);
diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c
index a4935df7eaa1..4459bd9f03f5 100644
--- a/sys/geom/multipath/g_multipath.c
+++ b/sys/geom/multipath/g_multipath.c
@@ -549,7 +549,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
memcpy(sc->sc_uuid, md->md_uuid, sizeof(sc->sc_uuid));
@@ -821,7 +821,7 @@ g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_topology_assert();
- gp = g_new_geomf(mp, "multipath:taste");
+ gp = g_new_geom(mp, "multipath:taste");
gp->start = g_multipath_start;
gp->access = g_multipath_access;
gp->orphan = g_multipath_orphan;
@@ -949,7 +949,6 @@ g_multipath_ctl_add_name(struct gctl_req *req, struct g_class *mp,
struct g_consumer *cp;
struct g_provider *pp;
const char *mpname;
- static const char devpf[6] = _PATH_DEV;
int error;
g_topology_assert();
@@ -966,8 +965,6 @@ g_multipath_ctl_add_name(struct gctl_req *req, struct g_class *mp,
}
sc = gp->softc;
- if (strncmp(name, devpf, 5) == 0)
- name += 5;
pp = g_provider_by_name(name);
if (pp == NULL) {
gctl_error(req, "Provider %s is invalid", name);
diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c
index a32111e3a29a..1fb99f4a0a5b 100644
--- a/sys/geom/nop/g_nop.c
+++ b/sys/geom/nop/g_nop.c
@@ -416,7 +416,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
return (EEXIST);
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
sc->sc_offset = offset;
sc->sc_explicitsize = explicitsize;
diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c
index 88e44b335b29..4c0d0c3aa902 100644
--- a/sys/geom/part/g_part.c
+++ b/sys/geom/part/g_part.c
@@ -552,8 +552,6 @@ g_part_parm_provider(struct gctl_req *req, const char *name,
pname = gctl_get_asciiparam(req, name);
if (pname == NULL)
return (ENOATTR);
- if (strncmp(pname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- pname += sizeof(_PATH_DEV) - 1;
pp = g_provider_by_name(pname);
if (pp == NULL) {
gctl_error(req, "%d %s '%s'", EINVAL, name, pname);
@@ -998,7 +996,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp)
}
if (null == NULL)
- gp = g_new_geomf(&g_part_class, "%s", pp->name);
+ gp = g_new_geom(&g_part_class, pp->name);
gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM,
M_WAITOK);
table = gp->softc;
@@ -1979,7 +1977,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
* With that we become part of the topology. Obtain read access
* to the provider.
*/
- gp = g_new_geomf(mp, "%s", pp->name);
+ gp = g_new_geom(mp, pp->name);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
diff --git a/sys/geom/raid/g_raid.c b/sys/geom/raid/g_raid.c
index a483622d14a5..d35695482eea 100644
--- a/sys/geom/raid/g_raid.c
+++ b/sys/geom/raid/g_raid.c
@@ -775,8 +775,6 @@ g_raid_open_consumer(struct g_raid_softc *sc, const char *name)
g_topology_assert();
- if (strncmp(name, _PATH_DEV, 5) == 0)
- name += 5;
pp = g_provider_by_name(name);
if (pp == NULL)
return (NULL);
@@ -1876,7 +1874,7 @@ g_raid_create_node(struct g_class *mp,
g_topology_assert();
G_RAID_DEBUG(1, "Creating array %s.", name);
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO);
gp->start = g_raid_start;
gp->orphan = g_raid_orphan;
@@ -2217,7 +2215,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
geom = NULL;
status = G_RAID_MD_TASTE_FAIL;
- gp = g_new_geomf(mp, "raid:taste");
+ gp = g_new_geom(mp, "raid:taste");
/*
* This orphan function should be never called.
*/
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c
index c2d05b48d80d..64951bd01deb 100644
--- a/sys/geom/raid3/g_raid3.c
+++ b/sys/geom/raid3/g_raid3.c
@@ -3164,7 +3164,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
/*
* Action geom.
*/
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
M_WAITOK | M_ZERO);
@@ -3338,7 +3338,7 @@ g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "raid3:taste");
+ gp = g_new_geom(mp, "raid3:taste");
/* This orphan function should be never called. */
gp->orphan = g_raid3_taste_orphan;
cp = g_new_consumer(gp);
diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c
index 824de07e4836..5eafcce917cf 100644
--- a/sys/geom/raid3/g_raid3_ctl.c
+++ b/sys/geom/raid3/g_raid3_ctl.c
@@ -425,7 +425,7 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
no = gctl_get_paraml(req, "number", sizeof(*no));
else
no = NULL;
- gp = g_new_geomf(mp, "raid3:insert");
+ gp = g_new_geom(mp, "raid3:insert");
gp->orphan = g_raid3_ctl_insert_orphan;
cp = g_new_consumer(gp);
error = g_attach(cp, pp);
diff --git a/sys/geom/shsec/g_shsec.c b/sys/geom/shsec/g_shsec.c
index 3ccc23e7eb8b..9da814e5eb34 100644
--- a/sys/geom/shsec/g_shsec.c
+++ b/sys/geom/shsec/g_shsec.c
@@ -545,7 +545,7 @@ g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md)
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO);
gp->start = g_shsec_start;
gp->spoiled = g_shsec_orphan;
@@ -643,7 +643,7 @@ g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_SHSEC_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "shsec:taste");
+ gp = g_new_geom(mp, "shsec:taste");
gp->start = g_shsec_start;
gp->access = g_shsec_access;
gp->orphan = g_shsec_orphan;
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c
index 6f336c18c8e6..ba1953f036d3 100644
--- a/sys/geom/stripe/g_stripe.c
+++ b/sys/geom/stripe/g_stripe.c
@@ -454,11 +454,9 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_length = length;
- if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- bp->bio_ma_n = round_page(bp->bio_ma_offset +
- bp->bio_length) / PAGE_SIZE;
+ if ((bp->bio_flags & BIO_UNMAPPED) != 0)
addr = NULL;
- } else
+ else
addr = bp->bio_data;
cbp->bio_caller2 = sc->sc_disks[no];
@@ -864,7 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
gp->start = g_stripe_start;
gp->spoiled = g_stripe_orphan;
@@ -965,7 +963,7 @@ g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
- gp = g_new_geomf(mp, "stripe:taste");
+ gp = g_new_geom(mp, "stripe:taste");
gp->start = g_stripe_start;
gp->access = g_stripe_access;
gp->orphan = g_stripe_orphan;
diff --git a/sys/geom/union/g_union.c b/sys/geom/union/g_union.c
index 9734fc1bcfe3..43c16c86e5a8 100644
--- a/sys/geom/union/g_union.c
+++ b/sys/geom/union/g_union.c
@@ -246,7 +246,7 @@ g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
return;
}
}
- gp = g_new_geomf(mp, "%s", name);
+ gp = g_new_geom(mp, name);
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
rw_init(&sc->sc_rwlock, "gunion");
TAILQ_INIT(&sc->sc_wiplist);
@@ -358,6 +358,8 @@ fail2:
fail1:
g_destroy_consumer(lowercp);
g_destroy_provider(newpp);
+ rw_destroy(&sc->sc_rwlock);
+ g_free(sc);
g_destroy_geom(gp);
}
diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c
index c7d737493f11..1490ed103329 100644
--- a/sys/geom/virstor/g_virstor.c
+++ b/sys/geom/virstor/g_virstor.c
@@ -771,7 +771,7 @@ g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
/* We need a dummy geom to attach a consumer to the given provider */
- gp = g_new_geomf(mp, "virstor:taste.helper");
+ gp = g_new_geom(mp, "virstor:taste.helper");
gp->start = (void *)invalid_call; /* XXX: hacked up so the */
gp->access = (void *)invalid_call; /* compiler doesn't complain. */
gp->orphan = (void *)invalid_call; /* I really want these to fail. */
@@ -1085,7 +1085,7 @@ create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
return (NULL);
}
}
- gp = g_new_geomf(mp, "%s", md->md_name);
+ gp = g_new_geom(mp, md->md_name);
gp->softc = NULL; /* to circumevent races that test softc */
gp->start = g_virstor_start;
diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c
index 91ef0fb1ef95..e9934ba6c784 100644
--- a/sys/geom/zero/g_zero.c
+++ b/sys/geom/zero/g_zero.c
@@ -102,7 +102,7 @@ g_zero_init(struct g_class *mp)
struct g_provider *pp;
g_topology_assert();
- gp = g_new_geomf(mp, "gzero");
+ gp = g_new_geom(mp, "gzero");
gp->start = g_zero_start;
gp->access = g_std_access;
gpp = pp = g_new_providerf(gp, "%s", gp->name);
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 88b8967cd693..ac0cc4ba74e7 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -249,9 +249,9 @@ device wlan # 802.11 support
options IEEE80211_DEBUG # enable debug msgs
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 5a53fac50f2c..1bc2491a1a12 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -2610,11 +2610,13 @@ note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
int structsize;
p = arg;
- size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
+ size = sizeof(structsize) +
+ (1 + p->p_ucred->cr_ngroups) * sizeof(gid_t);
if (sb != NULL) {
KASSERT(*sizep == size, ("invalid size"));
structsize = sizeof(gid_t);
sbuf_bcat(sb, &structsize, sizeof(structsize));
+ sbuf_bcat(sb, &p->p_ucred->cr_gid, sizeof(gid_t));
sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
sizeof(gid_t));
}
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 36ce44b988be..87ffdb8dbf07 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -145,13 +145,6 @@ FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
#endif
/*
- * This ensures that there is at least one entry so that the sysinit_set
- * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never
- * executed.
- */
-SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);
-
-/*
* The sysinit linker set compiled into the kernel. These are placed onto the
* sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional
* sysinits to the linked list but the linker set here does not change.
@@ -296,7 +289,7 @@ mi_startup(void)
BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem);
#if defined(VERBOSE_SYSINIT)
- if (sip->subsystem > last && verbose_sysinit != 0) {
+ if (sip->subsystem != last && verbose_sysinit != 0) {
verbose = 1;
printf("subsystem %x\n", sip->subsystem);
}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index fcd232cde21e..e42e7dcf8b44 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -663,4 +663,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index a27ab33b34da..2a833d2eafbe 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -665,20 +665,26 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
} while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
got_set = tmp & ~flg;
got_cleared = flg & ~tmp;
- tmp = fp->f_flag & FNONBLOCK;
- error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
- if (error != 0)
- goto revert_f_setfl;
- tmp = fp->f_flag & FASYNC;
- error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
- if (error == 0) {
- fdrop(fp, td);
- break;
+ if (((got_set | got_cleared) & FNONBLOCK) != 0) {
+ tmp = fp->f_flag & FNONBLOCK;
+ error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ if (error != 0)
+ goto revert_flags;
+ }
+ if (((got_set | got_cleared) & FASYNC) != 0) {
+ tmp = fp->f_flag & FASYNC;
+ error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+ if (error != 0)
+ goto revert_nonblock;
+ }
+ fdrop(fp, td);
+ break;
+revert_nonblock:
+ if (((got_set | got_cleared) & FNONBLOCK) != 0) {
+ tmp = ~fp->f_flag & FNONBLOCK;
+ (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
}
- atomic_clear_int(&fp->f_flag, FNONBLOCK);
- tmp = 0;
- (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
-revert_f_setfl:
+revert_flags:
do {
tmp = flg = fp->f_flag;
tmp &= ~FCNTLFLAGS;
@@ -5250,6 +5256,8 @@ file_type_to_name(short type)
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
+ case DTYPE_JAILDESC:
+ return ("jail");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c
index 0cb0f566a839..7c0654769581 100644
--- a/sys/kern/kern_environment.c
+++ b/sys/kern/kern_environment.c
@@ -1098,65 +1098,65 @@ kernenv_next(char *cp)
}
void
-tunable_int_init(void *data)
+tunable_int_init(const void *data)
{
- struct tunable_int *d = (struct tunable_int *)data;
+ const struct tunable_int *d = data;
TUNABLE_INT_FETCH(d->path, d->var);
}
void
-tunable_long_init(void *data)
+tunable_long_init(const void *data)
{
- struct tunable_long *d = (struct tunable_long *)data;
+ const struct tunable_long *d = data;
TUNABLE_LONG_FETCH(d->path, d->var);
}
void
-tunable_ulong_init(void *data)
+tunable_ulong_init(const void *data)
{
- struct tunable_ulong *d = (struct tunable_ulong *)data;
+ const struct tunable_ulong *d = data;
TUNABLE_ULONG_FETCH(d->path, d->var);
}
void
-tunable_int64_init(void *data)
+tunable_int64_init(const void *data)
{
- struct tunable_int64 *d = (struct tunable_int64 *)data;
+ const struct tunable_int64 *d = data;
TUNABLE_INT64_FETCH(d->path, d->var);
}
void
-tunable_uint64_init(void *data)
+tunable_uint64_init(const void *data)
{
- struct tunable_uint64 *d = (struct tunable_uint64 *)data;
+ const struct tunable_uint64 *d = data;
TUNABLE_UINT64_FETCH(d->path, d->var);
}
void
-tunable_quad_init(void *data)
+tunable_quad_init(const void *data)
{
- struct tunable_quad *d = (struct tunable_quad *)data;
+ const struct tunable_quad *d = data;
TUNABLE_QUAD_FETCH(d->path, d->var);
}
void
-tunable_bool_init(void *data)
+tunable_bool_init(const void *data)
{
- struct tunable_bool *d = (struct tunable_bool *)data;
+ const struct tunable_bool *d = data;
TUNABLE_BOOL_FETCH(d->path, d->var);
}
void
-tunable_str_init(void *data)
+tunable_str_init(const void *data)
{
- struct tunable_str *d = (struct tunable_str *)data;
+ const struct tunable_str *d = data;
TUNABLE_STR_FETCH(d->path, d->var, d->size);
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index eb77a5064113..23d8dc9cf54a 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -50,6 +50,8 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
#include <sys/queue.h>
@@ -163,6 +165,9 @@ static int filt_kqueue(struct knote *kn, long hint);
static int filt_procattach(struct knote *kn);
static void filt_procdetach(struct knote *kn);
static int filt_proc(struct knote *kn, long hint);
+static int filt_jailattach(struct knote *kn);
+static void filt_jaildetach(struct knote *kn);
+static int filt_jail(struct knote *kn, long hint);
static int filt_fileattach(struct knote *kn);
static void filt_timerexpire(void *knx);
static void filt_timerexpire_l(struct knote *kn, bool proc_locked);
@@ -195,6 +200,12 @@ static const struct filterops proc_filtops = {
.f_detach = filt_procdetach,
.f_event = filt_proc,
};
+static const struct filterops jail_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_jailattach,
+ .f_detach = filt_jaildetach,
+ .f_event = filt_jail,
+};
static const struct filterops timer_filtops = {
.f_isfd = 0,
.f_attach = filt_timerattach,
@@ -365,6 +376,8 @@ static struct {
[~EVFILT_USER] = { &user_filtops, 1 },
[~EVFILT_SENDFILE] = { &null_filtops },
[~EVFILT_EMPTY] = { &file_filtops, 1 },
+ [~EVFILT_JAIL] = { &jail_filtops, 1 },
+ [~EVFILT_JAILDESC] = { &file_filtops, 1 },
};
/*
@@ -614,6 +627,86 @@ knote_fork(struct knlist *list, int pid)
}
}
+int
+filt_jailattach(struct knote *kn)
+{
+ struct prison *pr;
+
+ if (kn->kn_id == 0) {
+ /* Let jid=0 watch the current prison (including prison0). */
+ pr = curthread->td_ucred->cr_prison;
+ mtx_lock(&pr->pr_mtx);
+ } else {
+ sx_slock(&allprison_lock);
+ pr = prison_find_child(curthread->td_ucred->cr_prison,
+ kn->kn_id);
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ return (ENOENT);
+ if (!prison_isalive(pr)) {
+ mtx_unlock(&pr->pr_mtx);
+ return (ENOENT);
+ }
+ }
+ kn->kn_ptr.p_prison = pr;
+ kn->kn_flags |= EV_CLEAR;
+ knlist_add(pr->pr_klist, kn, 1);
+ mtx_unlock(&pr->pr_mtx);
+ return (0);
+}
+
+void
+filt_jaildetach(struct knote *kn)
+{
+ if (kn->kn_ptr.p_prison != NULL) {
+ knlist_remove(kn->kn_knlist, kn, 0);
+ kn->kn_ptr.p_prison = NULL;
+ } else
+ kn->kn_status |= KN_DETACHED;
+}
+
+int
+filt_jail(struct knote *kn, long hint)
+{
+ struct prison *pr;
+ u_int event;
+
+ pr = kn->kn_ptr.p_prison;
+ if (pr == NULL) /* already activated, from attach filter */
+ return (0);
+
+ /*
+ * Mask off extra data. In the NOTE_JAIL_CHILD case, that's
+ * everything except the NOTE_JAIL_CHILD bit itself, since a
+ * JID is any positive integer.
+ */
+ event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
+ (u_int)hint & NOTE_JAIL_CTRLMASK;
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event) {
+ kn->kn_fflags |= event;
+ /* Report the created jail id or attached process id. */
+ if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
+ if (kn->kn_data != 0)
+ kn->kn_fflags |= NOTE_JAIL_MULTI;
+ kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
+ (u_int)hint & ~event;
+ }
+ }
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ kn->kn_ptr.p_prison = NULL;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
/*
* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
* interval timer support code.
@@ -1771,7 +1864,7 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp)
kq = fp->f_data;
if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
- return (EBADF);
+ return (EINVAL);
*kqp = kq;
KQ_LOCK(kq);
if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
@@ -2800,6 +2893,7 @@ knote_init(void)
knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue);
+ prison0.pr_klist = knlist_alloc(&prison0.pr_mtx);
}
SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
@@ -3033,7 +3127,7 @@ sysctl_kern_proc_kqueue(SYSCTL_HANDLER_ARGS)
return (error);
td = curthread;
-#ifdef FREEBSD_COMPAT32
+#ifdef COMPAT_FREEBSD32
compat32 = SV_CURPROC_FLAG(SV_ILP32);
#else
compat32 = false;
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 7c9a15ae18f3..a75ba89d2a7e 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,15 +39,18 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/epoch.h>
+#include <sys/event.h>
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@@ -154,7 +157,8 @@ static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
static int prison_lock_xlock(struct prison *pr, int flags);
-static void prison_cleanup(struct prison *pr);
+static void prison_cleanup_locked(struct prison *pr);
+static void prison_cleanup_unlocked(struct prison *pr);
static void prison_free_not_last(struct prison *pr);
static void prison_proc_free_not_last(struct prison *pr);
static void prison_proc_relink(struct prison *opr, struct prison *npr,
@@ -167,6 +171,7 @@ static void prison_racct_attach(struct prison *pr);
static void prison_racct_modify(struct prison *pr);
static void prison_racct_detach(struct prison *pr);
#endif
+static void prison_knote(struct prison *pr, long hint);
/* Flags for prison_deref */
#define PD_DEREF 0x01 /* Decrement pr_ref */
@@ -985,6 +990,7 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
+ struct file *jfp_out;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@@ -995,6 +1001,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+ struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@@ -1008,7 +1015,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int deadid, jid, jsys, len, level;
+ int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@@ -1018,22 +1025,32 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int ip6s;
bool redo_ip6;
#endif
+ bool maybe_changed;
uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
uint64_t pr_allow_diff;
unsigned tallow;
char numbuf[12];
- error = priv_check(td, PRIV_JAIL_SET);
- if (!error && (flags & JAIL_ATTACH))
- error = priv_check(td, PRIV_JAIL_ATTACH);
- if (error)
- return (error);
mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+ if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE) &&
+ mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) ==
+ (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
+ prison_hold(mypr);
+#ifdef INET
+ ip4 = NULL;
+#endif
+#ifdef INET6
+ ip6 = NULL;
+#endif
+ g_path = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@@ -1046,14 +1063,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
*/
error = vfs_buildopts(optuio, &opts);
if (error)
- return (error);
-#ifdef INET
- ip4 = NULL;
-#endif
-#ifdef INET6
- ip6 = NULL;
-#endif
- g_path = NULL;
+ goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@@ -1062,6 +1072,61 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_errmsg;
}
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done_errmsg;
+ }
+ jfd_in = -1;
+ } else if (error != 0)
+ goto done_free;
+ else {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done_errmsg;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /*
+ * Look up and create jails based on the
+ * descriptor's prison.
+ */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &mypr, NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done_errmsg;
+ }
+ if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+ error = EPERM;
+ goto done_free;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done_free;
+ }
+ }
+
+ /*
+ * Delay the permission check if using a jail descriptor,
+ * until we get the descriptor's credentials.
+ */
+ if (!(flags & JAIL_USE_DESC)) {
+ error = priv_check(td, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check(td, PRIV_JAIL_ATTACH);
+ if (error)
+ goto done_free;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1422,6 +1487,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
pr = NULL;
inspr = NULL;
deadpr = NULL;
+ maybe_changed = false;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
@@ -1436,7 +1502,45 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = EAGAIN;
goto done_deref;
}
- if (jid != 0) {
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &pr, &jdcred);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done_deref;
+ }
+ drflags |= PD_DEREF;
+ error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto done_deref;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (cuflags == JAIL_CREATE) {
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists",
+ pr->pr_id);
+ goto done_deref;
+ }
+ if (!prison_isalive(pr)) {
+ /* While a jid can be resurrected, the prison
+ * itself cannot.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying", pr->pr_id);
+ goto done_deref;
+ }
+ if (jid != 0 && jid != pr->pr_id) {
+ error = EINVAL;
+ vfs_opterror(opts, "cannot change jid");
+ goto done_deref;
+ }
+ jid = pr->pr_id;
+ } else if (jid != 0) {
if (jid < 0) {
error = EINVAL;
vfs_opterror(opts, "negative jid");
@@ -1570,7 +1674,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
}
- /* Update: must provide a jid or name. */
+ /* Update: must provide a desc, jid, or name. */
else if (cuflags == JAIL_UPDATE && pr == NULL) {
error = ENOENT;
vfs_opterror(opts, "update specified no jail");
@@ -1643,6 +1747,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
tpr->pr_childcount++;
+ pr->pr_klist = knlist_alloc(&pr->pr_mtx);
/* Set some default values, and inherit some from the parent. */
if (namelc == NULL)
@@ -1722,8 +1827,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -1880,6 +1987,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref;
}
}
+ maybe_changed = true;
/* Set the parameters of the prison. */
#ifdef INET
@@ -2112,7 +2220,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* reference via persistence, or is about to gain one via attachment.
*/
if (created) {
- drflags = prison_lock_xlock(pr, drflags);
+ sx_assert(&allprison_lock, SX_XLOCKED);
+ prison_knote(ppr, NOTE_JAIL_CHILD | pr->pr_id);
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
pr->pr_state = PRISON_STATE_ALIVE;
}
@@ -2146,10 +2257,37 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
printf("Warning jail jid=%d: mountd/nfsd requires a separate"
" file system\n", pr->pr_id);
+ /*
+ * Now that the prison is fully created without error, set the
+ * jail descriptor if one was requested. This is the only
+ * parameter that is returned to the caller (except the error
+ * message).
+ */
+ if (jfd_out >= 0) {
+ if (!(drflags & PD_LOCKED)) {
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ }
+ jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1;
+ if (optuio->uio_segflg == UIO_SYSSPACE)
+ *(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out;
+ else
+ (void)copyout(&jfd_out,
+ optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out));
+ jaildesc_set_prison(jfp_out, pr);
+ }
+
drflags &= ~PD_KILL;
td->td_retval[0] = pr->pr_id;
done_deref:
+ /*
+ * Report changes to kevent. This can happen even if the
+ * system call fails, as changes might have been made before
+ * the failure.
+ */
+ if (maybe_changed && !created)
+ prison_knote(pr, NOTE_JAIL_SET);
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
prison_deref(pr, drflags);
@@ -2176,15 +2314,21 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
done_free:
+ /* Clean up other resources. */
#ifdef INET
prison_ip_free(ip4);
#endif
#ifdef INET6
prison_ip_free(ip6);
#endif
+ if (jfp_out != NULL)
+ fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (g_path != NULL)
free(g_path, M_TEMP);
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2329,16 +2473,21 @@ int
kern_jail_get(struct thread *td, struct uio *optuio, int flags)
{
struct bool_flags *bf;
+ struct file *jfp_out;
struct jailsys_flags *jsf;
struct prison *pr, *mypr;
struct vfsopt *opt;
struct vfsoptlist *opts;
char *errmsg, *name;
int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
+ int jfd_in, jfd_out;
unsigned f;
if (flags & ~JAIL_GET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) ==
+ (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
/* Get the parameter list. */
error = vfs_buildopts(optuio, &opts);
@@ -2346,13 +2495,70 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
return (error);
errmsg_pos = vfs_getopt_pos(opts, "errmsg");
mypr = td->td_ucred->cr_prison;
+ prison_hold(mypr);
pr = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
- * Find the prison specified by one of: lastjid, jid, name.
+ * Find the prison specified by one of: desc, lastjid, jid, name.
*/
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
+
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done;
+ }
+ } else if (error == 0) {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done;
+ }
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &pr, NULL);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done;
+ }
+ drflags |= PD_DEREF;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying",
+ pr->pr_id);
+ goto done;
+ }
+ goto found_prison;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /* Look up jails based on the descriptor's prison. */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &mypr, NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT ?
+ "descriptor to dead jail" :
+ "not a jail descriptor");
+ goto done;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done;
+ }
+ } else
+ goto done;
+
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
@@ -2421,9 +2627,17 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
found_prison:
/* Get the parameters of the prison. */
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
td->td_retval[0] = pr->pr_id;
+ if (jfd_out >= 0) {
+ error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ jaildesc_set_prison(jfp_out, pr);
+ }
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
if (error != 0 && error != ENOENT)
goto done;
@@ -2603,6 +2817,13 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
prison_deref(pr, drflags);
else if (drflags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
+ else if (drflags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
+ /* Clean up other resources. */
+ if (jfp_out != NULL)
+ (void)fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (error && errmsg_pos >= 0) {
/* Write the error message back to userspace. */
vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
@@ -2619,6 +2840,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
}
}
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2643,14 +2865,54 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
sx_xunlock(&allprison_lock);
return (EINVAL);
}
+ prison_hold(pr);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * struct jail_remove_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap)
+{
+ struct prison *pr;
+ struct ucred *jdcred;
+ int error;
+
+ error = jaildesc_find(td, uap->fd, &pr, &jdcred);
+ if (error)
+ return (error);
+ error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE);
+ crfree(jdcred);
+ if (error) {
+ prison_free(pr);
+ return (error);
+ }
+ sx_xlock(&allprison_lock);
+ mtx_lock(&pr->pr_mtx);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * Begin the removal process for a prison. The allprison lock should
+ * be held exclusively, and the prison should be both locked and held.
+ */
+void
+prison_remove(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
if (!prison_isalive(pr)) {
/* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
sx_xunlock(&allprison_lock);
- return (0);
+ return;
}
- prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
- return (0);
+ prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
}
/*
@@ -2685,6 +2947,44 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
+/*
+ * struct jail_attach_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap)
+{
+ struct prison *pr;
+ struct ucred *jdcred;
+ int drflags, error;
+
+ sx_slock(&allprison_lock);
+ drflags = PD_LIST_SLOCKED;
+ error = jaildesc_find(td, uap->fd, &pr, &jdcred);
+ if (error)
+ goto fail;
+ drflags |= PD_DEREF;
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto fail;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+
+ /* Do not allow a process to attach to a prison that is not alive. */
+ if (!prison_isalive(pr)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ return (do_jail_attach(td, pr, drflags));
+
+ fail:
+ prison_deref(pr, drflags);
+ return (error);
+}
+
static int
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
@@ -2703,9 +3003,12 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
* a process root from one prison, but attached to the jail
* of another.
*/
- prison_hold(pr);
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
refcount_acquire(&pr->pr_uref);
- drflags |= PD_DEREF | PD_DEUREF;
+ drflags |= PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@@ -2755,6 +3058,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
prison_proc_relink(oldcred->cr_prison, pr, p);
prison_deref(oldcred->cr_prison, drflags);
crfree(oldcred);
+ prison_knote(pr, NOTE_JAIL_ATTACH | td->td_proc->p_pid);
/*
* If the prison was killed while changing credentials, die along
@@ -3182,9 +3486,10 @@ prison_deref(struct prison *pr, int flags)
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
pr->pr_state = PRISON_STATE_DYING;
+ prison_cleanup_locked(pr);
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
}
}
}
@@ -3327,8 +3632,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
}
if (!(cpr->pr_flags & PR_REMOVE))
continue;
- prison_cleanup(cpr);
+ prison_cleanup_unlocked(cpr);
mtx_lock(&cpr->pr_mtx);
+ prison_cleanup_locked(cpr);
cpr->pr_flags &= ~PR_REMOVE;
if (cpr->pr_flags & PR_PERSIST) {
cpr->pr_flags &= ~PR_PERSIST;
@@ -3363,8 +3669,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
if (rpr != NULL)
LIST_REMOVE(rpr, pr_sibling);
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
mtx_lock(&pr->pr_mtx);
+ prison_cleanup_locked(pr);
if (pr->pr_flags & PR_PERSIST) {
pr->pr_flags &= ~PR_PERSIST;
prison_proc_free_not_last(pr);
@@ -3411,10 +3718,22 @@ prison_lock_xlock(struct prison *pr, int flags)
/*
* Release a prison's resources when it starts dying (when the last user
- * reference is dropped, or when it is killed).
+ * reference is dropped, or when it is killed). Two functions are called,
+ * for work that requires a locked prison or an unlocked one.
*/
static void
-prison_cleanup(struct prison *pr)
+prison_cleanup_locked(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ prison_knote(pr, NOTE_JAIL_REMOVE);
+ knlist_detach(pr->pr_klist);
+ jaildesc_prison_cleanup(pr);
+ pr->pr_klist = NULL;
+}
+
+static void
+prison_cleanup_unlocked(struct prison *pr)
{
sx_assert(&allprison_lock, SA_XLOCKED);
mtx_assert(&pr->pr_mtx, MA_NOTOWNED);
@@ -5039,6 +5358,23 @@ prison_racct_detach(struct prison *pr)
}
#endif /* RACCT */
+/*
+ * Submit a knote for a prison, locking if necessary.
+ */
+static void
+prison_knote(struct prison *pr, long hint)
+{
+ int locked;
+
+ locked = mtx_owned(&pr->pr_mtx);
+ if (!locked)
+ mtx_lock(&pr->pr_mtx);
+ KNOTE_LOCKED(pr->pr_klist, hint);
+ jaildesc_knote(pr, hint);
+ if (!locked)
+ mtx_unlock(&pr->pr_mtx);
+}
+
#ifdef DDB
static void
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
new file mode 100644
index 000000000000..3f322b271400
--- /dev/null
+++ b/sys/kern/kern_jaildesc.c
@@ -0,0 +1,412 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+
+MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
+
+static fo_poll_t jaildesc_poll;
+static fo_kqfilter_t jaildesc_kqfilter;
+static fo_stat_t jaildesc_stat;
+static fo_close_t jaildesc_close;
+static fo_fill_kinfo_t jaildesc_fill_kinfo;
+static fo_cmp_t jaildesc_cmp;
+
+static struct fileops jaildesc_ops = {
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = invfo_ioctl,
+ .fo_poll = jaildesc_poll,
+ .fo_kqfilter = jaildesc_kqfilter,
+ .fo_stat = jaildesc_stat,
+ .fo_close = jaildesc_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = jaildesc_fill_kinfo,
+ .fo_cmp = jaildesc_cmp,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+/*
+ * Given a jail descriptor number, return its prison and/or its
+ * credential. They are returned held, and will need to be released
+ * by the caller.
+ */
+int
+jaildesc_find(struct thread *td, int fd, struct prison **prp,
+ struct ucred **ucredp)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ struct prison *pr;
+ int error;
+
+ error = fget(td, fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_JAILDESC) {
+ error = EINVAL;
+ goto out;
+ }
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr == NULL || !prison_isvalid(pr)) {
+ error = ENOENT;
+ JAILDESC_UNLOCK(jd);
+ goto out;
+ }
+ if (prp != NULL) {
+ prison_hold(pr);
+ *prp = pr;
+ }
+ JAILDESC_UNLOCK(jd);
+ if (ucredp != NULL)
+ *ucredp = crhold(fp->f_cred);
+ out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Allocate a new jail decriptor, not yet associated with a prison.
+ * Return the file pointer (with a reference held) and the descriptor
+ * number.
+ */
+int
+jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ int error;
+
+ if (owning) {
+ error = priv_check(td, PRIV_JAIL_REMOVE);
+ if (error != 0)
+ return (error);
+ }
+ jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
+ error = falloc_caps(td, &fp, fdp, 0, NULL);
+ if (error != 0) {
+ free(jd, M_JAILDESC);
+ return (error);
+ }
+ finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
+ FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
+ JAILDESC_LOCK_INIT(jd);
+ knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
+ if (owning)
+ jd->jd_flags |= JDF_OWNING;
+ *fpp = fp;
+ return (0);
+}
+
+/*
+ * Assocate a jail descriptor with its prison.
+ */
+void
+jaildesc_set_prison(struct file *fp, struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ jd->jd_prison = pr;
+ LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+}
+
+/*
+ * Detach all the jail descriptors from a prison.
+ */
+void
+jaildesc_prison_cleanup(struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ while ((jd = LIST_FIRST(&pr->pr_descs))) {
+ JAILDESC_LOCK(jd);
+ LIST_REMOVE(jd, jd_list);
+ jd->jd_prison = NULL;
+ JAILDESC_UNLOCK(jd);
+ prison_free(pr);
+ }
+}
+
+/*
+ * Pass a note to all listening kqueues.
+ */
+void
+jaildesc_knote(struct prison *pr, long hint)
+{
+ struct jaildesc *jd;
+ int prison_locked;
+
+ if (!LIST_EMPTY(&pr->pr_descs)) {
+ prison_locked = mtx_owned(&pr->pr_mtx);
+ if (!prison_locked)
+ prison_lock(pr);
+ LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
+ JAILDESC_LOCK(jd);
+ if (hint == NOTE_JAIL_REMOVE) {
+ jd->jd_flags |= JDF_REMOVED;
+ if (jd->jd_flags & JDF_SELECTED) {
+ jd->jd_flags &= ~JDF_SELECTED;
+ selwakeup(&jd->jd_selinfo);
+ }
+ }
+ KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
+ JAILDESC_UNLOCK(jd);
+ }
+ if (!prison_locked)
+ prison_unlock(pr);
+ }
+}
+
+static int
+jaildesc_close(struct file *fp, struct thread *td)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+
+ jd = fp->f_data;
+ fp->f_data = NULL;
+ if (jd != NULL) {
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr != NULL) {
+ /*
+ * Free or remove the associated prison.
+ * This requires a second check after re-
+ * ordering locks. This jaildesc can remain
+ * unlocked once we have a prison reference,
+ * because that prison is the only place that
+ * still points back to it.
+ */
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+ if (jd->jd_flags & JDF_OWNING) {
+ sx_xlock(&allprison_lock);
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ /*
+ * Unlink the prison, but don't free
+ * it; that will be done as part of
+ * of prison_remove.
+ */
+ LIST_REMOVE(jd, jd_list);
+ prison_remove(pr);
+ } else {
+ prison_unlock(pr);
+ sx_xunlock(&allprison_lock);
+ }
+ } else {
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ LIST_REMOVE(jd, jd_list);
+ prison_free(pr);
+ }
+ prison_unlock(pr);
+ }
+ prison_free(pr);
+ }
+ knlist_destroy(&jd->jd_selinfo.si_note);
+ JAILDESC_LOCK_DESTROY(jd);
+ free(jd, M_JAILDESC);
+ }
+ return (0);
+}
+
+static int
+jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int revents;
+
+ revents = 0;
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_flags & JDF_REMOVED)
+ revents |= POLLHUP;
+ if (revents == 0) {
+ selrecord(td, &jd->jd_selinfo);
+ jd->jd_flags |= JDF_SELECTED;
+ }
+ JAILDESC_UNLOCK(jd);
+ return (revents);
+}
+
+static void
+jaildesc_kqops_detach(struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = kn->kn_fp->f_data;
+ knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
+}
+
+static int
+jaildesc_kqops_event(struct knote *kn, long hint)
+{
+ struct jaildesc *jd;
+ u_int event;
+
+ jd = kn->kn_fp->f_data;
+ if (hint == 0) {
+ /*
+ * Initial test after registration. Generate a
+ * NOTE_JAIL_REMOVE in case the prison already died
+ * before registration.
+ */
+ event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
+ } else {
+ /*
+ * Mask off extra data. In the NOTE_JAIL_CHILD case,
+ * that's everything except the NOTE_JAIL_CHILD bit
+ * itself, since a JID is any positive integer.
+ */
+ event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
+ (u_int)hint & NOTE_JAIL_CTRLMASK;
+ }
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event) {
+ kn->kn_fflags |= event;
+ /* Report the created jail id or attached process id. */
+ if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
+ if (kn->kn_data != 0)
+ kn->kn_fflags |= NOTE_JAIL_MULTI;
+ kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
+ (u_int)hint & ~event;
+ }
+ }
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
+static const struct filterops jaildesc_kqops = {
+ .f_isfd = 1,
+ .f_detach = jaildesc_kqops_detach,
+ .f_event = jaildesc_kqops_event,
+};
+
+static int
+jaildesc_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ switch (kn->kn_filter) {
+ case EVFILT_JAILDESC:
+ kn->kn_fop = &jaildesc_kqops;
+ kn->kn_flags |= EV_CLEAR;
+ knlist_add(&jd->jd_selinfo.si_note, kn, 0);
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+static int
+jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
+{
+ struct jaildesc *jd;
+
+ bzero(sb, sizeof(struct stat));
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_prison != NULL) {
+ sb->st_ino = jd->jd_prison->pr_id;
+ sb->st_mode = S_IFREG | S_IRWXU;
+ } else
+ sb->st_mode = S_IFREG;
+ JAILDESC_UNLOCK(jd);
+ return (0);
+}
+
+static int
+jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ kif->kf_type = KF_TYPE_JAILDESC;
+ kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
+ return (0);
+}
+
+static int
+jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
+{
+ struct jaildesc *jd1, *jd2;
+ int jid1, jid2;
+
+ if (fp2->f_type != DTYPE_JAILDESC)
+ return (3);
+ jd1 = fp1->f_data;
+ JAILDESC_LOCK(jd1);
+ jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd1);
+ jd2 = fp2->f_data;
+ JAILDESC_LOCK(jd2);
+ jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd2);
+ return (kcmp_cmp(jid1, jid2));
+}
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 879220be050b..653ce1ee556b 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -751,11 +751,14 @@ malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds,
return (malloc_large(size, mtp, DOMAINSET_RR(), flags
DEBUG_REDZONE_ARG));
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
- do {
- va = malloc_domain(&size, &indx, mtp, domain, flags);
- } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0);
+ indx = -1;
+ va = NULL;
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) == 0)
+ do {
+ va = malloc_domain(&size, &indx, mtp, domain, flags);
+ } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0);
malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
+
if (__predict_false(va == NULL)) {
KASSERT((flags & M_WAITOK) == 0,
("malloc(M_WAITOK) returned NULL"));
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index f952b3fc8805..8b5908f5219a 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -1136,9 +1136,9 @@ __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
* General init routine used by the MTX_SYSINIT() macro.
*/
void
-mtx_sysinit(void *arg)
+mtx_sysinit(const void *arg)
{
- struct mtx_args *margs = arg;
+ const struct mtx_args *margs = arg;
mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
margs->ma_opts);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 379fbda619c0..6e56664d12ce 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1112,13 +1112,14 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
if (cred->cr_flags & CRED_FLAG_CAPMODE)
kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
/* XXX bde doesn't like KI_NGROUPS */
- if (cred->cr_ngroups > KI_NGROUPS) {
+ if (1 + cred->cr_ngroups > KI_NGROUPS) {
kp->ki_ngroups = KI_NGROUPS;
kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
} else
- kp->ki_ngroups = cred->cr_ngroups;
- bcopy(cred->cr_groups, kp->ki_groups,
- kp->ki_ngroups * sizeof(gid_t));
+ kp->ki_ngroups = 1 + cred->cr_ngroups;
+ kp->ki_groups[0] = cred->cr_gid;
+ bcopy(cred->cr_groups, kp->ki_groups + 1,
+ (kp->ki_ngroups - 1) * sizeof(gid_t));
kp->ki_rgid = cred->cr_rgid;
kp->ki_svgid = cred->cr_svgid;
/* If jailed(cred), emulate the old P_JAILED flag. */
@@ -2943,8 +2944,11 @@ sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
cred = crhold(p->p_ucred);
PROC_UNLOCK(p);
- error = SYSCTL_OUT(req, cred->cr_groups,
- cred->cr_ngroups * sizeof(gid_t));
+ error = SYSCTL_OUT(req, &cred->cr_gid, sizeof(gid_t));
+ if (error == 0)
+ error = SYSCTL_OUT(req, cred->cr_groups,
+ cred->cr_ngroups * sizeof(gid_t));
+
crfree(cred);
return (error);
}
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index c1633dd19de2..7206572ffc02 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -337,9 +337,9 @@ rm_wowned(const struct rmlock *rm)
}
void
-rm_sysinit(void *arg)
+rm_sysinit(const void *arg)
{
- struct rm_args *args;
+ const struct rm_args *args;
args = arg;
rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags);
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index e182d1fe9baf..84a3a890be63 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -266,9 +266,9 @@ _rw_destroy(volatile uintptr_t *c)
}
void
-rw_sysinit(void *arg)
+rw_sysinit(const void *arg)
{
- struct rw_args *args;
+ const struct rw_args *args;
args = arg;
rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index accea5d288eb..c005e112d3b9 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -222,9 +222,9 @@ owner_sx(const struct lock_object *lock, struct thread **owner)
#endif
void
-sx_sysinit(void *arg)
+sx_sysinit(const void *arg)
{
- struct sx_args *sargs = arg;
+ const struct sx_args *sargs = arg;
sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
}
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 0e8c2b9f362e..4329959a2ef4 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -347,6 +347,17 @@ kern_thr_exit(struct thread *td)
p = td->td_proc;
/*
+ * Clear kernel ASTs in advance of selecting the last exiting
+ * thread and acquiring schedulers locks. It is fine to
+ * clear the ASTs here even if we are not going to exit after
+ * all. On the other hand, leaving them pending could trigger
+ * execution in subsystems in a context where they are not
+ * prepared to handle top kernel actions, even in execution of
+ * an unrelated thread.
+ */
+ ast_kclear(td);
+
+ /*
* If all of the threads in a process call this routine to
* exit (e.g. all threads call pthread_exit()), exactly one
* thread should return to the caller to terminate the process
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 50b040132396..3180c66cb42b 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -1694,8 +1694,10 @@ thread_single_end(struct proc *p, int mode)
thread_unlock(td);
}
}
- KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
- ("inconsistent boundary count %d", p->p_boundary_count));
+ KASSERT(mode != SINGLE_BOUNDARY || P_SHOULDSTOP(p) ||
+ p->p_boundary_count == 0,
+ ("pid %d proc %p flags %#x inconsistent boundary count %d",
+ p->p_pid, p, p->p_flag, p->p_boundary_count));
PROC_SUNLOCK(p);
wakeup(&p->p_flag);
}
diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c
index fbf81d423b95..09070eea284f 100644
--- a/sys/kern/kern_tslog.c
+++ b/sys/kern/kern_tslog.c
@@ -220,3 +220,13 @@ SYSCTL_PROC(_debug, OID_AUTO, tslog_user,
CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_SKIP,
0, 0, sysctl_debug_tslog_user,
"", "Dump recorded userland event timestamps");
+
+void
+sysinit_tslog_shim(const void *data)
+{
+ const struct sysinit_tslog *x = data;
+
+ tslog(curthread, TS_ENTER, "SYSINIT", x->name);
+ (x->func)(x->data);
+ tslog(curthread, TS_EXIT, "SYSINIT", x->name);
+}
diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index 62a3da964c37..bf5bda7e058d 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -280,6 +280,9 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS)
struct sbuf sb;
device_t dev = (device_t)arg1;
device_t iommu;
+#ifdef IOMMU
+ device_t requester;
+#endif
int error;
uint16_t rid;
const char *c;
@@ -314,9 +317,15 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS)
}
rid = 0;
#ifdef IOMMU
- iommu_get_requester(dev, &rid);
+ error = iommu_get_requester(dev, &requester, &rid);
+ /*
+ * Do not return requester error from sysctl, iommu
+ * unit might be assigned by other means.
+ */
+#else
+ error = ENXIO;
#endif
- if (rid != 0)
+ if (error == 0)
sbuf_printf(&sb, "%srid=%#x", c, rid);
break;
default:
diff --git a/sys/kern/subr_power.c b/sys/kern/subr_power.c
index db0e7bf5b0e3..44ad82860649 100644
--- a/sys/kern/subr_power.c
+++ b/sys/kern/subr_power.c
@@ -3,6 +3,10 @@
*
* Copyright (c) 2001 Mitsuru IWASAKI
* All rights reserved.
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Aymeric Wibo
+ * <obiwac@freebsd.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -30,24 +34,113 @@
#include <sys/eventhandler.h>
#include <sys/power.h>
#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
+enum power_stype power_standby_stype = POWER_STYPE_UNKNOWN;
+enum power_stype power_suspend_stype = POWER_STYPE_UNKNOWN;
+enum power_stype power_hibernate_stype = POWER_STYPE_UNKNOWN;
+
static u_int power_pm_type = POWER_PM_TYPE_NONE;
static power_pm_fn_t power_pm_fn = NULL;
static void *power_pm_arg = NULL;
+static bool power_pm_supported[POWER_STYPE_COUNT] = {0};
static struct task power_pm_task;
+enum power_stype
+power_name_to_stype(const char *name)
+{
+ enum power_stype stype;
+
+ for (stype = 0; stype < POWER_STYPE_COUNT; stype++) {
+ if (strcasecmp(name, power_stype_names[stype]) == 0)
+ return (stype);
+ }
+ return (POWER_STYPE_UNKNOWN);
+}
+
+const char *
+power_stype_to_name(enum power_stype stype)
+{
+ if (stype == POWER_STYPE_UNKNOWN)
+ return ("NONE");
+ if (stype < POWER_STYPE_AWAKE || stype >= POWER_STYPE_COUNT)
+ return (NULL);
+ return (power_stype_names[stype]);
+}
+
+static int
+sysctl_supported_stypes(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct sbuf sb;
+ enum power_stype stype;
+
+ sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
+ for (stype = 0; stype < POWER_STYPE_COUNT; stype++) {
+ if (power_pm_supported[stype])
+ sbuf_printf(&sb, "%s ", power_stype_to_name(stype));
+ }
+ sbuf_trim(&sb);
+ sbuf_finish(&sb);
+ error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+ sbuf_delete(&sb);
+
+ return (error);
+}
+
+static int
+power_sysctl_stype(SYSCTL_HANDLER_ARGS)
+{
+ char name[10];
+ int err;
+ enum power_stype new_stype, old_stype;
+
+ old_stype = *(enum power_stype *)oidp->oid_arg1;
+ strlcpy(name, power_stype_to_name(old_stype), sizeof(name));
+ err = sysctl_handle_string(oidp, name, sizeof(name), req);
+ if (err != 0 || req->newptr == NULL)
+ return (err);
+
+ new_stype = power_name_to_stype(name);
+ if (new_stype == POWER_STYPE_UNKNOWN)
+ return (EINVAL);
+ if (!power_pm_supported[new_stype])
+ return (EOPNOTSUPP);
+ if (new_stype != old_stype)
+ *(enum power_stype *)oidp->oid_arg1 = new_stype;
+ return (0);
+}
+
+static SYSCTL_NODE(_kern, OID_AUTO, power, CTLFLAG_RW, 0,
+ "Generic power management related sysctls");
+
+SYSCTL_PROC(_kern_power, OID_AUTO, supported_stype,
+ CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_supported_stypes, "A",
+ "List supported sleep types");
+SYSCTL_PROC(_kern_power, OID_AUTO, standby, CTLTYPE_STRING | CTLFLAG_RW,
+ &power_standby_stype, 0, power_sysctl_stype, "A",
+ "Sleep type to enter on standby");
+SYSCTL_PROC(_kern_power, OID_AUTO, suspend, CTLTYPE_STRING | CTLFLAG_RW,
+ &power_suspend_stype, 0, power_sysctl_stype, "A",
+ "Sleep type to enter on suspend");
+SYSCTL_PROC(_kern_power, OID_AUTO, hibernate, CTLTYPE_STRING | CTLFLAG_RW,
+ &power_hibernate_stype, 0, power_sysctl_stype, "A",
+ "Sleep type to enter on hibernate");
+
static void
power_pm_deferred_fn(void *arg, int pending)
{
- int state = (intptr_t)arg;
+ enum power_stype stype = (intptr_t)arg;
- power_pm_fn(POWER_CMD_SUSPEND, power_pm_arg, state);
+ power_pm_fn(POWER_CMD_SUSPEND, power_pm_arg, stype);
}
int
-power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg)
+power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg,
+ bool pm_supported[static POWER_STYPE_COUNT])
{
int error;
@@ -56,6 +149,16 @@ power_pm_register(u_int pm_type, power_pm_fn_t pm_fn, void *pm_arg)
power_pm_type = pm_type;
power_pm_fn = pm_fn;
power_pm_arg = pm_arg;
+ memcpy(power_pm_supported, pm_supported,
+ sizeof(power_pm_supported));
+ if (power_pm_supported[POWER_STYPE_STANDBY])
+ power_standby_stype = POWER_STYPE_STANDBY;
+ if (power_pm_supported[POWER_STYPE_SUSPEND_TO_IDLE])
+ power_suspend_stype = POWER_STYPE_SUSPEND_TO_IDLE;
+ else if (power_pm_supported[POWER_STYPE_SUSPEND_TO_MEM])
+ power_suspend_stype = POWER_STYPE_SUSPEND_TO_MEM;
+ if (power_pm_supported[POWER_STYPE_HIBERNATE])
+ power_hibernate_stype = POWER_STYPE_HIBERNATE;
error = 0;
TASK_INIT(&power_pm_task, 0, power_pm_deferred_fn, NULL);
} else {
@@ -75,14 +178,27 @@ power_pm_get_type(void)
void
power_pm_suspend(int state)
{
+ enum power_stype stype;
+
if (power_pm_fn == NULL)
return;
- if (state != POWER_SLEEP_STATE_STANDBY &&
- state != POWER_SLEEP_STATE_SUSPEND &&
- state != POWER_SLEEP_STATE_HIBERNATE)
+ switch (state) {
+ case POWER_SLEEP_STATE_STANDBY:
+ stype = power_standby_stype;
+ break;
+ case POWER_SLEEP_STATE_SUSPEND:
+ stype = power_suspend_stype;
+ break;
+ case POWER_SLEEP_STATE_HIBERNATE:
+ stype = power_hibernate_stype;
+ break;
+ default:
+ printf("%s: unknown sleep state %d\n", __func__, state);
return;
- power_pm_task.ta_context = (void *)(intptr_t)state;
+ }
+
+ power_pm_task.ta_context = (void *)(intptr_t)stype;
taskqueue_enqueue(taskqueue_thread, &power_pm_task);
}
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index ab47b6ad29a3..a65c3ca128d9 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -57,7 +57,7 @@
* b : public affirmation by word or example of usually
* religious faith or conviction <the heroic witness to divine
* life -- Pilot>
- * 6 capitalized : a member of the Jehovah's Witnesses
+ * 6 capitalized : a member of the Jehovah's Witnesses
*/
/*
@@ -131,7 +131,7 @@
#define LI_SLEEPABLE 0x00040000 /* Lock may be held while sleeping. */
#ifndef WITNESS_COUNT
-#define WITNESS_COUNT 1536
+#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
#define WITNESS_PENDLIST (512 + (MAXCPU * 4))
@@ -158,20 +158,18 @@
* These flags go in the witness relationship matrix and describe the
* relationship between any two struct witness objects.
*/
-#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
-#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
-#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
-#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
-#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
-#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
-#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
-#define WITNESS_RELATED_MASK \
- (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
-#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been
- * observed. */
-#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
-#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
-#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
+#define WITNESS_UNRELATED 0x00 /* No lock order relation. */
+#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
+#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
+#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
+#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
+#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
+#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
+#define WITNESS_RELATED_MASK (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
+#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been observed. */
+#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
+#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
+#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
/* Descendant to ancestor flags */
#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2)
@@ -218,20 +216,18 @@ struct lock_list_entry {
* (for example, "vnode interlock").
*/
struct witness {
- char w_name[MAX_W_NAME];
- uint32_t w_index; /* Index in the relationship matrix */
+ char w_name[MAX_W_NAME];
+ uint32_t w_index; /* Index in the relationship matrix */
struct lock_class *w_class;
- STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
- STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
- struct witness *w_hash_next; /* Linked list in hash buckets. */
- const char *w_file; /* File where last acquired */
- uint32_t w_line; /* Line where last acquired */
- uint32_t w_refcount;
- uint16_t w_num_ancestors; /* direct/indirect
- * ancestor count */
- uint16_t w_num_descendants; /* direct/indirect
- * descendant count */
- int16_t w_ddb_level;
+ STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */
+ STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */
+ struct witness *w_hash_next; /* Linked list in hash buckets. */
+ const char *w_file; /* File where last acquired */
+ uint32_t w_line; /* Line where last acquired */
+ uint32_t w_refcount;
+ uint16_t w_num_ancestors; /* direct/indirect ancestor count */
+ uint16_t w_num_descendants; /* direct/indirect descendant count */
+ int16_t w_ddb_level;
unsigned w_displayed:1;
unsigned w_reversed:1;
};
@@ -265,7 +261,7 @@ struct witness_lock_order_data {
/*
* The witness lock order data hash table. Keys are witness index tuples
* (struct witness_lock_order_key), elements are lock order data objects
- * (struct witness_lock_order_data).
+ * (struct witness_lock_order_data).
*/
struct witness_lock_order_hash {
struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE];
@@ -295,7 +291,6 @@ struct witness_order_list_entry {
static __inline int
witness_lock_type_equal(struct witness *w1, struct witness *w2)
{
-
return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
}
@@ -304,7 +299,6 @@ static __inline int
witness_lock_order_key_equal(const struct witness_lock_order_key *a,
const struct witness_lock_order_key *b)
{
-
return (a->from == b->from && a->to == b->to);
}
@@ -415,7 +409,7 @@ SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin,
int badstack_sbuf_size;
int witness_count = WITNESS_COUNT;
-SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
+SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN,
&witness_count, 0, "");
/*
@@ -760,7 +754,6 @@ static int witness_spin_warn = 0;
static const char *
fixup_filename(const char *file)
{
-
if (file == NULL)
return (NULL);
while (strncmp(file, "../", 3) == 0)
@@ -835,7 +828,7 @@ witness_startup(void *mem)
w_free_cnt--;
for (i = 0; i < witness_count; i++) {
- memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
+ memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) *
(witness_count + 1));
}
@@ -989,16 +982,16 @@ witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
{
int i;
- for (i = 0; i < indent; i++)
- prnt(" ");
+ for (i = 0; i < indent; i++)
+ prnt(" ");
prnt("%s (type: %s, depth: %d, active refs: %d)",
w->w_name, w->w_class->lc_name,
w->w_ddb_level, w->w_refcount);
- if (w->w_displayed) {
- prnt(" -- (already displayed)\n");
- return;
- }
- w->w_displayed = 1;
+ if (w->w_displayed) {
+ prnt(" -- (already displayed)\n");
+ return;
+ }
+ w->w_displayed = 1;
if (w->w_file != NULL && w->w_line != 0)
prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
w->w_line);
@@ -1079,7 +1072,6 @@ witness_ddb_display(int(*prnt)(const char *fmt, ...))
int
witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
{
-
if (witness_watch == -1 || KERNEL_PANICKED())
return (0);
@@ -1257,7 +1249,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
w->w_reversed = 1;
mtx_unlock_spin(&w_mtx);
witness_output(
- "acquiring duplicate lock of same type: \"%s\"\n",
+ "acquiring duplicate lock of same type: \"%s\"\n",
w->w_name);
witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
fixup_filename(plock->li_file), plock->li_line);
@@ -1743,7 +1735,7 @@ found:
/*
* In order to reduce contention on w_mtx, we want to keep always an
- * head object into lists so that frequent allocation from the
+ * head object into lists so that frequent allocation from the
* free witness pool (and subsequent locking) is avoided.
* In order to maintain the current code simple, when the head
* object is totally unloaded it means also that we do not have
@@ -1781,7 +1773,7 @@ witness_thread_exit(struct thread *td)
n++;
witness_list_lock(&lle->ll_children[i],
witness_output);
-
+
}
kassert_panic(
"Thread %p cannot exit while holding sleeplocks\n", td);
@@ -1948,7 +1940,6 @@ found:
static void
depart(struct witness *w)
{
-
MPASS(w->w_refcount == 0);
if (w->w_class->lc_flags & LC_SLEEPLOCK) {
w_sleep_cnt--;
@@ -1999,18 +1990,18 @@ adopt(struct witness *parent, struct witness *child)
child->w_num_ancestors++;
}
- /*
- * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
+ /*
+ * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
* an ancestor of 'pi' during this loop.
*/
for (i = 1; i <= w_max_used_index; i++) {
- if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 &&
(i != pi))
continue;
/* Find each descendant of 'i' and mark it as a descendant. */
for (j = 1; j <= w_max_used_index; j++) {
- /*
+ /*
* Skip children that are already marked as
* descendants of 'i'.
*/
@@ -2021,7 +2012,7 @@ adopt(struct witness *parent, struct witness *child)
* We are only interested in descendants of 'ci'. Note
* that 'ci' itself is counted as a descendant of 'ci'.
*/
- if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
+ if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 &&
(j != ci))
continue;
w_rmatrix[i][j] |= WITNESS_ANCESTOR;
@@ -2029,16 +2020,16 @@ adopt(struct witness *parent, struct witness *child)
w_data[i].w_num_descendants++;
w_data[j].w_num_ancestors++;
- /*
+ /*
* Make sure we aren't marking a node as both an
- * ancestor and descendant. We should have caught
+ * ancestor and descendant. We should have caught
* this as a lock order reversal earlier.
*/
if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
(w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- i, j, w_rmatrix[i][j]);
+ i, j, w_rmatrix[i][j]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2047,7 +2038,7 @@ adopt(struct witness *parent, struct witness *child)
(w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
printf("witness rmatrix paradox! [%d][%d]=%d "
"both ancestor and descendant\n",
- j, i, w_rmatrix[j][i]);
+ j, i, w_rmatrix[j][i]);
kdb_backtrace();
printf("Witness disabled.\n");
witness_watch = -1;
@@ -2124,7 +2115,6 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
static int
isitmychild(struct witness *parent, struct witness *child)
{
-
return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
}
@@ -2134,7 +2124,6 @@ isitmychild(struct witness *parent, struct witness *child)
static int
isitmydescendant(struct witness *ancestor, struct witness *descendant)
{
-
return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
__func__));
}
@@ -2182,7 +2171,7 @@ witness_get(void)
STAILQ_REMOVE_HEAD(&w_free, w_list);
w_free_cnt--;
index = w->w_index;
- MPASS(index > 0 && index == w_max_used_index+1 &&
+ MPASS(index > 0 && index == w_max_used_index + 1 &&
index < witness_count);
bzero(w, sizeof(*w));
w->w_index = index;
@@ -2194,7 +2183,6 @@ witness_get(void)
static void
witness_free(struct witness *w)
{
-
STAILQ_INSERT_HEAD(&w_free, w, w_list);
w_free_cnt++;
}
@@ -2219,11 +2207,10 @@ witness_lock_list_get(void)
bzero(lle, sizeof(*lle));
return (lle);
}
-
+
static void
witness_lock_list_free(struct lock_list_entry *lle)
{
-
mtx_lock_spin(&w_mtx);
lle->ll_next = w_lock_list_free;
w_lock_list_free = lle;
@@ -2297,7 +2284,6 @@ witness_voutput(const char *fmt, va_list ap)
static int
witness_thread_has_locks(struct thread *td)
{
-
if (td->td_sleeplocks == NULL)
return (0);
return (td->td_sleeplocks->ll_count != 0);
@@ -2573,14 +2559,12 @@ witness_setflag(struct lock_object *lock, int flag, int set)
void
witness_norelease(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 1);
}
void
witness_releaseok(struct lock_object *lock)
{
-
witness_setflag(lock, LI_NORELEASE, 0);
}
@@ -2588,7 +2572,6 @@ witness_releaseok(struct lock_object *lock)
static void
witness_ddb_list(struct thread *td)
{
-
KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
KASSERT(kdb_active, ("%s: not in the debugger", __func__));
@@ -2653,7 +2636,6 @@ DB_SHOW_ALIAS_FLAGS(alllocks, db_witness_list_all, DB_CMD_MEMSAFE);
DB_SHOW_COMMAND_FLAGS(witness, db_witness_display, DB_CMD_MEMSAFE)
{
-
witness_ddb_display(db_printf);
}
#endif
@@ -2673,9 +2655,9 @@ sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx)
/* Allocate and init temporary storage space. */
tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
- tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
- tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
+ tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP,
M_WAITOK | M_ZERO);
stack_zero(&tmp_data1->wlod_stack);
stack_zero(&tmp_data2->wlod_stack);
@@ -2750,12 +2732,12 @@ restart:
sbuf_printf(sb,
"\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
if (data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w1->w_name, tmp_w1->w_class->lc_name,
+ tmp_w1->w_name, tmp_w1->w_class->lc_name,
tmp_w2->w_name, tmp_w2->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data1->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2763,7 +2745,7 @@ restart:
if (data2 && data2 != data1) {
sbuf_printf(sb,
"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
- tmp_w2->w_name, tmp_w2->w_class->lc_name,
+ tmp_w2->w_name, tmp_w2->w_class->lc_name,
tmp_w1->w_name, tmp_w1->w_class->lc_name);
stack_sbuf_print(sb, &tmp_data2->wlod_stack);
sbuf_putc(sb, '\n');
@@ -2823,7 +2805,6 @@ sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
static int
sbuf_db_printf_drain(void *arg __unused, const char *data, int len)
{
-
return (db_printf("%.*s", len, data));
}
@@ -3068,7 +3049,7 @@ witness_lock_order_get(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN) == 0)
goto out;
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
data = w_lohash.wloh_array[hash];
while (data != NULL) {
@@ -3089,7 +3070,6 @@ out:
static int
witness_lock_order_check(struct witness *parent, struct witness *child)
{
-
if (parent != child &&
w_rmatrix[parent->w_index][child->w_index]
& WITNESS_LOCK_ORDER_KNOWN &&
@@ -3115,7 +3095,7 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
& WITNESS_LOCK_ORDER_KNOWN)
return (1);
- hash = witness_hash_djb2((const char*)&key,
+ hash = witness_hash_djb2((const char *)&key,
sizeof(key)) % w_lohash.wloh_size;
w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
data = w_lofree;
@@ -3134,7 +3114,6 @@ witness_lock_order_add(struct witness *parent, struct witness *child)
static void
witness_increment_graph_generation(void)
{
-
if (witness_cold == 0)
mtx_assert(&w_mtx, MA_OWNED);
w_generation++;
@@ -3143,7 +3122,6 @@ witness_increment_graph_generation(void)
static int
witness_output_drain(void *arg __unused, const char *data, int len)
{
-
witness_output("%.*s", len, data);
return (len);
}
diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c
index 11bd1b6f30e1..acaf1241cb2e 100644
--- a/sys/kern/sys_procdesc.c
+++ b/sys/kern/sys_procdesc.c
@@ -129,7 +129,7 @@ procdesc_find(struct thread *td, int fd, const cap_rights_t *rightsp,
if (error)
return (error);
if (fp->f_type != DTYPE_PROCDESC) {
- error = EBADF;
+ error = EINVAL;
goto out;
}
pd = fp->f_data;
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 4122f9261871..4cef89cd5219 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -602,4 +602,6 @@ const char *syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index fa64597d14a5..911f9093824b 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3383,5 +3383,15 @@
_In_reads_(gidsetsize) const gid_t *gidset
);
}
+597 AUE_JAIL_ATTACH STD {
+ int jail_attach_jd(
+ int fd
+ );
+ }
+598 AUE_JAIL_REMOVE STD {
+ int jail_remove_jd(
+ int fd
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 2b1ea9eed8d4..e28fef931ea8 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3500,6 +3500,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9367,6 +9381,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11365,6 +11399,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 19870e989437..6138e543fae7 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1807,9 +1807,7 @@ uipc_filt_sowrite(struct knote *kn, long hint)
kn->kn_data = uipc_stream_sbspace(&so2->so_rcv);
if (so2->so_rcv.sb_state & SBS_CANTRCVMORE) {
- /*
- * XXXGL: maybe kn->kn_flags |= EV_EOF ?
- */
+ kn->kn_flags |= EV_EOF;
return (1);
} else if (kn->kn_sfflags & NOTE_LOWAT)
return (kn->kn_data >= kn->kn_sdata);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 89c1d779f04c..13abb9171234 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -86,7 +86,7 @@
*
* This fundamental choice needs to be revisited. In the meantime, the current
* state is described below. Significance of all notable routines is explained
- * in comments placed above their implementation. Scattered thoroughout the
+ * in comments placed above their implementation. Scattered throughout the
* file are TODO comments indicating shortcomings which can be fixed without
* reworking everything (most of the fixes will likely be reusable). Various
* details are omitted from this explanation to not clutter the overview, they
@@ -109,18 +109,19 @@
* The (directory vnode; name) tuple reliably determines the target entry if
* it exists.
*
- * Since there are no small locks at this time (all are 32 bytes in size on
- * LP64), the code works around the problem by introducing lock arrays to
- * protect hash buckets and vnode lists.
+ * Since there were no small locks at the time of writing this comment (all are
+ * 32 bytes in size on LP64), the code works around the problem by introducing
+ * lock arrays to protect hash buckets and vnode lists.
*
* II. Filesystem integration
*
* Filesystems participating in name caching do the following:
* - set vop_lookup routine to vfs_cache_lookup
- * - set vop_cachedlookup to whatever can perform the lookup if the above fails
- * - if they support lockless lookup (see below), vop_fplookup_vexec and
- * vop_fplookup_symlink are set along with the MNTK_FPLOOKUP flag on the
- * mount point
+ * - set vop_cachedlookup to a routine which can perform the lookup if the
+ * above fails
+ * - if they support lockless lookup (see below), they set vop_fplookup_vexec
+ * and vop_fplookup_symlink along with the MNTK_FPLOOKUP flag on the mount
+ * point
* - call cache_purge or cache_vop_* routines to eliminate stale entries as
* applicable
* - call cache_enter to add entries depending on the MAKEENTRY flag
@@ -134,11 +135,15 @@
* ... -> namei -> cache_fplookup -> cache_fplookup_noentry -> VOP_LOOKUP ->
* vfs_cache_lookup -> VOP_CACHEDLOOKUP -> ufs_lookup_ino -> cache_enter
*
+ * You may notice a degree of CPU waste in this callchain.
+ *
* III. Performance considerations
*
* For lockless case forward lookup avoids any writes to shared areas apart
* from the terminal path component. In other words non-modifying lookups of
- * different files don't suffer any scalability problems in the namecache.
+ * different files don't suffer any scalability problems in the namecache
+ * itself.
+ *
* Looking up the same file is limited by VFS and goes beyond the scope of this
* file.
*
@@ -158,8 +163,10 @@
*
* IV. Observability
*
- * Note not everything has an explicit dtrace probe nor it should have, thus
- * some of the one-liners below depend on implementation details.
+ * Several statistics are collected in the vfs.cache sysctl tree.
+ *
+ * Some of the state can be checked for with explicit dtrace probes, must of it
+ * depends on implementation details.
*
* Examples:
*
@@ -167,7 +174,7 @@
* # line number, column 2 is status code (see cache_fpl_status)
* dtrace -n 'vfs:fplookup:lookup:done { @[arg1, arg2] = count(); }'
*
- * # Lengths of names added by binary name
+ * # Histogram of lengths of names added, aggregated by which programs are doing it
* dtrace -n 'fbt::cache_enter_time:entry { @[execname] = quantize(args[2]->cn_namelen); }'
*
* # Same as above but only those which exceed 64 characters
@@ -195,6 +202,11 @@
* - vnodes are subject to being recycled even if target inode is left in memory,
* which loses the name cache entries when it perhaps should not. in case of tmpfs
* names get duplicated -- kept by filesystem itself and namecache separately
+ * - vnode reclamation (see vnlru in kern/vfs_subr.c) defaults to skipping
+ * directories for this very reason, which arguably further reducing quality
+ * of vnode LRU. Per the above this is done to avoid breaking vnode -> path
+ * resolution (it becomes expensive for directories and impossible for the rest)
+ * This would not be a factor if namecache entries could persist without vnodes.
* - struct namecache has a fixed size and comes in 2 variants, often wasting
* space. now hard to replace with malloc due to dependence on SMR, which
* requires UMA zones to opt in
@@ -207,7 +219,8 @@
* performance left on the table, most notably from single-threaded standpoint.
* Below is a woefully incomplete list of changes which can help. Ideas are
* mostly sketched out, no claim is made all kinks or prerequisites are laid
- * out.
+ * out. The name of the game is eliding branches altogether and hopefully some
+ * of memory accesses.
*
* Note there is performance lost all over VFS.
*
@@ -223,13 +236,6 @@
* the vnode to hang around for the long haul, but would work for aforementioned
* stat(2) but also access(2), readlink(2), realpathat(2) and probably more.
*
- * === hotpatching for sdt probes
- *
- * They result in *tons* of branches all over with rather regrettable codegen
- * at times. Removing sdt probes altogether gives over 2% boost in lookup rate.
- * Reworking the code to patch itself at runtime with asm goto would solve it.
- * asm goto is fully supported by gcc and clang.
- *
* === copyinstr
*
* On all architectures it operates one byte at a time, while it could be
@@ -251,10 +257,12 @@
* things worked out locklessly. Instead the lockless lookup could be the
* actual entry point which calls what is currently namei as a fallback.
*
+ * It could be hotpatched if lockless lookup is disabled.
+ *
* === avoidable branches in cache_can_fplookup
*
* The cache_fast_lookup_enabled flag check could be hotpatchable (in fact if
- * this is off, none of fplookup code should execute).
+ * this is off, none of fplookup code should execute, see above).
*
* Both audit and capsicum branches can be combined into one, but it requires
* paying off a lot of tech debt first.
@@ -277,8 +285,18 @@
*
* === inactive on v_usecount reaching 0
*
- * VOP_NEED_INACTIVE should not exist. Filesystems would indicate need for such
- * processing with a bit in usecount.
+ * VOP_NEED_INACTIVE should not exist. Filesystems can indicate need for such
+ * processing with a bit in usecount and adding a hold count. Then vput fast path
+ * would become as simple as (ACHTUNG: locking ignored):
+ *
+ * ref = atomic_fetchadd_int(&vp->v_count, -1) - 1;
+ * if ((ref & MAGIC_BIT) == 0) // common case
+ * return;
+ * if (ref != 0) // the bit is set but this was not the last user
+ * return;
+ * // do inactive here
+ *
+ * Also see below.
*
* === v_holdcnt
*
@@ -287,7 +305,8 @@
* vnlru et al would consider the vnode not-freeable if has either hold or
* usecount on it.
*
- * This would eliminate 2 atomics.
+ * This would eliminate 2 atomics in the common case of securing a vnode and
+ * undoing it.
*/
static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
@@ -4632,7 +4651,7 @@ cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp,
}
/*
- * The target vnode is not supported, prepare for the slow path to take over.
+ * Prepare fallback to the locked lookup while trying to retain the progress.
*/
static int __noinline
cache_fplookup_partial_setup(struct cache_fpl *fpl)
@@ -6289,53 +6308,90 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
* Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one.
*
* Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria
- * outlined below.
- *
- * Traditional vnode lookup conceptually looks like this:
+ * outlined at the end.
*
- * vn_lock(current);
- * for (;;) {
- * next = find();
- * vn_lock(next);
- * vn_unlock(current);
- * current = next;
- * if (last)
- * break;
- * }
- * return (current);
+ * Traversing from one vnode to another requires atomicity with regard to
+ * permissions, mount points and of course their relative placement (if you are
+ * looking up "bar" in "foo" and you found it, it better be in that directory
+ * at the time).
*
- * Each jump to the next vnode is safe memory-wise and atomic with respect to
- * any modifications thanks to holding respective locks.
+ * Normally this is accomplished with locking, but it comes with a significant
+ * performance hit and is untenable as a fast path even in a moderate core
+ * count environment (at the time of writing this comment this would be a
+ * little south of 100).
*
* The same guarantee can be provided with a combination of safe memory
* reclamation and sequence counters instead. If all operations which affect
* the relationship between the current vnode and the one we are looking for
* also modify the counter, we can verify whether all the conditions held as
- * we made the jump. This includes things like permissions, mount points etc.
- * Counter modification is provided by enclosing relevant places in
- * vn_seqc_write_begin()/end() calls.
+ * we made the jump.
*
- * Thus this translates to:
+ * See places which issue vn_seqc_write_begin()/vn_seqc_write_end() for
+ * operations affected.
+ *
+ * Suppose the variable "cnp" contains lookup metadata (the path etc.), then
+ * locked lookup conceptually looks like this:
+ *
+ * // lock the current directory
+ * vn_lock(dvp);
+ * for (;;) {
+ * // permission check
+ * if (!canlookup(dvp, cnp))
+ * abort();
+ * // look for the target name inside dvp
+ * tvp = findnext(dvp, cnp);
+ * vn_lock(tvp);
+ * // tvp is still guaranteed to be inside of dvp because of the lock on dvp
+ * vn_unlock(dvp);
+ * // dvp is unlocked. its state is now arbitrary, but that's fine as we
+ * // made the jump while everything relevant was correct, continue with tvp
+ * // as the directory to look up names in
+ * tvp = dvp;
+ * if (last)
+ * break;
+ * // if not last loop back and continue until done
+ * }
+ * vget(tvp);
+ * return (tvp);
+ *
+ * Lockless lookup replaces locking with sequence counter checks:
*
* vfs_smr_enter();
* dvp_seqc = seqc_read_any(dvp);
- * if (seqc_in_modify(dvp_seqc)) // someone is altering the vnode
+ * // fail if someone is altering the directory vnode
+ * if (seqc_in_modify(dvp_seqc))
* abort();
* for (;;) {
- * tvp = find();
+ * // permission check. note it can race, but we will validate the outcome
+ * // with a seqc
+ * if (!canlookup_smr(dvp, cnp)) {
+ * // has dvp changed from under us? if so, the denial may be invalid
+ * if (!seqc_consistent(dvp, dvp_seqc)
+ * fallback_to_locked();
+ * // nothing changed, lookup denial is valid
+ * fail();
+ * }
+ * // look for the target name inside dvp
+ * tvp = findnext(dvp, cnp);
* tvp_seqc = seqc_read_any(tvp);
- * if (seqc_in_modify(tvp_seqc)) // someone is altering the target vnode
- * abort();
- * if (!seqc_consistent(dvp, dvp_seqc) // someone is altering the vnode
- * abort();
- * dvp = tvp; // we know nothing of importance has changed
- * dvp_seqc = tvp_seqc; // store the counter for the tvp iteration
+ * // bail if someone is altering the target vnode
+ * if (seqc_in_modify(tvp_seqc))
+ * fallback_to_locked();
+ * // bail if someone is altering the directory vnode
+ * if (!seqc_consistent(dvp, dvp_seqc)
+ * fallback_to_locked();
+ * // we confirmed neither dvp nor tvp changed while we were making the
+ * // jump to the next component, thus the result is the same as if we
+ * // held the lock on dvp and tvp the entire time, continue with tvp
+ * // as the directory to look up names in
+ * dvp = tvp;
+ * dvp_seqc = tvp_seqc;
* if (last)
* break;
* }
* vget(); // secure the vnode
* if (!seqc_consistent(tvp, tvp_seqc) // final check
- * abort();
+ * fallback_to_locked();
* // at this point we know nothing has changed for any parent<->child pair
* // as they were crossed during the lookup, meaning we matched the guarantee
* // of the locked variant
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index cd30d5cfae47..ceda770cb714 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -103,6 +103,16 @@ struct vattr va_null;
* Routines having to do with the management of the vnode table.
*/
+void
+vfs_unref_vfsconf(struct vfsconf *vfsp)
+{
+ vfsconf_lock();
+ KASSERT(vfsp->vfc_refcount > 0,
+ ("vfs %p refcount underflow %d", vfsp, vfsp->vfc_refcount));
+ vfsp->vfc_refcount--;
+ vfsconf_unlock();
+}
+
static struct vfsconf *
vfs_byname_locked(const char *name)
{
@@ -123,9 +133,11 @@ vfs_byname(const char *name)
{
struct vfsconf *vfsp;
- vfsconf_slock();
+ vfsconf_lock();
vfsp = vfs_byname_locked(name);
- vfsconf_sunlock();
+ if (vfsp != NULL)
+ vfsp->vfc_refcount++;
+ vfsconf_unlock();
return (vfsp);
}
@@ -387,7 +399,7 @@ vfs_register(struct vfsconf *vfc)
static int once;
struct vfsconf *tvfc;
uint32_t hashval;
- int secondpass;
+ int error, prevmaxconf, secondpass;
if (!once) {
vattr_null(&va_null);
@@ -405,6 +417,7 @@ vfs_register(struct vfsconf *vfc)
return (EEXIST);
}
+ prevmaxconf = maxvfsconf;
if (vfs_typenumhash != 0) {
/*
* Calculate a hash on vfc_name to use for vfc_typenum. Unless
@@ -497,16 +510,24 @@ vfs_register(struct vfsconf *vfc)
vfc->vfc_vfsops = &vfsops_sigdefer;
}
- if (vfc->vfc_flags & VFCF_JAIL)
- prison_add_vfs(vfc);
-
/*
* Call init function for this VFS...
*/
if ((vfc->vfc_flags & VFCF_SBDRY) != 0)
- vfc->vfc_vfsops_sd->vfs_init(vfc);
+ error = vfc->vfc_vfsops_sd->vfs_init(vfc);
else
- vfc->vfc_vfsops->vfs_init(vfc);
+ error = vfc->vfc_vfsops->vfs_init(vfc);
+
+ if (error != 0) {
+ maxvfsconf = prevmaxconf;
+ TAILQ_REMOVE(&vfsconf, vfc, vfc_list);
+ vfsconf_unlock();
+ return (error);
+ }
+
+ if ((vfc->vfc_flags & VFCF_JAIL) != 0)
+ prison_add_vfs(vfc);
+
vfsconf_unlock();
/*
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 8e64a7fe966b..13403acacc08 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -683,7 +683,6 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
MPASSERT(mp->mnt_vfs_ops == 1, mp,
("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
(void) vfs_busy(mp, MBF_NOWAIT);
- atomic_add_acq_int(&vfsp->vfc_refcount, 1);
mp->mnt_op = vfsp->vfc_vfsops;
mp->mnt_vfc = vfsp;
mp->mnt_stat.f_type = vfsp->vfc_typenum;
@@ -731,7 +730,6 @@ vfs_mount_destroy(struct mount *mp)
__FILE__, __LINE__));
MPPASS(mp->mnt_writeopcount == 0, mp);
MPPASS(mp->mnt_secondary_writes == 0, mp);
- atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
struct vnode *vp;
@@ -769,6 +767,9 @@ vfs_mount_destroy(struct mount *mp)
vfs_free_addrlist(mp->mnt_export);
free(mp->mnt_export, M_MOUNT);
}
+ vfsconf_lock();
+ mp->mnt_vfc->vfc_refcount--;
+ vfsconf_unlock();
crfree(mp->mnt_cred);
uma_zfree(mount_zone, mp);
}
@@ -1133,6 +1134,7 @@ vfs_domount_first(
if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (EPERM);
}
@@ -1169,6 +1171,7 @@ vfs_domount_first(
}
if (error != 0) {
vput(vp);
+ vfs_unref_vfsconf(vfsp);
return (error);
}
vn_seqc_write_begin(vp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index a6e38be89291..f86bda2aa6f0 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2186,6 +2186,8 @@ freevnode(struct vnode *vp)
{
struct bufobj *bo;
+ ASSERT_VOP_UNLOCKED(vp, __func__);
+
/*
* The vnode has been marked for destruction, so free it.
*
@@ -2222,12 +2224,16 @@ freevnode(struct vnode *vp)
mac_vnode_destroy(vp);
#endif
if (vp->v_pollinfo != NULL) {
+ int error __diagused;
+
/*
* Use LK_NOWAIT to shut up witness about the lock. We may get
* here while having another vnode locked when trying to
* satisfy a lookup and needing to recycle.
*/
- VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ VNASSERT(error == 0, vp,
+ ("freevnode: cannot lock vp %p for pollinfo destroy", vp));
destroy_vpollinfo(vp->v_pollinfo);
VOP_UNLOCK(vp);
vp->v_pollinfo = NULL;
@@ -4501,6 +4507,17 @@ vgonel(struct vnode *vp)
/*
* Done with purge, reset to the standard lock and invalidate
* the vnode.
+ *
+ * FIXME: this is buggy for vnode ops with custom locking primitives.
+ *
+ * vget used to be gated with a special flag serializing it against vgone,
+ * which got lost in the process of SMP-ifying the VFS layer.
+ *
+ * Suppose a custom locking routine references ->v_data.
+ *
+ * Since now it is possible to start executing it as vgone is
+ * progressing, this very well may crash as ->v_data gets invalidated
+ * and memory used to back it is freed.
*/
vp->v_vnlock = &vp->v_lock;
vp->v_op = &dead_vnodeops;
diff --git a/sys/libkern/arm64/crc32c_armv8.S b/sys/libkern/arm64/crc32c_armv8.S
index 649afff4b711..430b24f7615a 100644
--- a/sys/libkern/arm64/crc32c_armv8.S
+++ b/sys/libkern/arm64/crc32c_armv8.S
@@ -39,14 +39,14 @@ ENTRY(armv8_crc32c)
cbz w2, end
tbz x1, #0x0, half_word_aligned
sub w2, w2, 0x1
- ldr w10, [x1], #0x1
+ ldrb w10, [x1], #0x1
crc32cb w0, w0, w10
half_word_aligned:
cmp w2, #0x2
b.lo last_byte
tbz x1, #0x1, word_aligned
sub w2, w2, 0x2
- ldr w10, [x1], #0x2
+ ldrh w10, [x1], #0x2
crc32ch w0, w0, w10
word_aligned:
cmp w2, #0x4
@@ -69,11 +69,11 @@ last_word:
crc32cw w0, w0, w10
last_half_word:
tbz w2, #0x1, last_byte
- ldr w10, [x1], #0x2
+ ldrh w10, [x1], #0x2
crc32ch w0, w0, w10
last_byte:
tbz w2, #0x0, end
- ldr w10, [x1], #0x1
+ ldrb w10, [x1], #0x1
crc32cb w0, w0, w10
end:
ret
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 5315d518afd8..f9fdbca78869 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -576,7 +576,10 @@ _mlx5ib= mlx5ib
${MACHINE_CPUARCH} == "i386"
_ena= ena
_gve= gve
+# gcc13 and earlier lack __builtin_bitcountg used by linux emulation
+.if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 140000)
_iwlwifi= iwlwifi
+.endif
_rtw88= rtw88
_rtw89= rtw89
_vmware= vmware
diff --git a/sys/modules/dtb/rockchip/Makefile b/sys/modules/dtb/rockchip/Makefile
index 33c2048cbb15..9c8ca1acc837 100644
--- a/sys/modules/dtb/rockchip/Makefile
+++ b/sys/modules/dtb/rockchip/Makefile
@@ -21,7 +21,8 @@ DTS= \
rockchip/rk3566-quartz64-a.dts \
rockchip/rk3568-nanopi-r5s.dts \
rockchip/rk3566-radxa-zero-3e.dts \
- rockchip/rk3566-radxa-zero-3w.dts
+ rockchip/rk3566-radxa-zero-3w.dts \
+ rockchip/rk3568-bpi-r2-pro.dts
DTSO= rk3328-analog-sound.dtso \
rk3328-i2c0.dtso \
diff --git a/sys/modules/e6000sw/Makefile b/sys/modules/e6000sw/Makefile
index da08f80b0a29..73cbaea801f0 100644
--- a/sys/modules/e6000sw/Makefile
+++ b/sys/modules/e6000sw/Makefile
@@ -3,6 +3,6 @@
KMOD= e6000sw
SRCS= e6000sw.c
-SRCS+= bus_if.h etherswitch_if.h mdio_if.h miibus_if.h ofw_bus_if.h opt_platform.h
+SRCS+= bus_if.h device_if.h etherswitch_if.h mdio_if.h miibus_if.h ofw_bus_if.h opt_platform.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/etherswitch/Makefile b/sys/modules/etherswitch/Makefile
index 087231545cd4..0b16a19e5117 100644
--- a/sys/modules/etherswitch/Makefile
+++ b/sys/modules/etherswitch/Makefile
@@ -3,7 +3,7 @@
KMOD = etherswitch
SRCS= etherswitch.c
-SRCS+= mdio_if.h miibus_if.h etherswitch_if.h etherswitch_if.c
+SRCS+= bus_if.h device_if.h mdio_if.h miibus_if.h etherswitch_if.h etherswitch_if.c
CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch
.include <bsd.kmod.mk>
diff --git a/sys/modules/evdev/Makefile b/sys/modules/evdev/Makefile
index bd66013885db..20813b73f6dd 100644
--- a/sys/modules/evdev/Makefile
+++ b/sys/modules/evdev/Makefile
@@ -2,7 +2,7 @@
KMOD= evdev
SRCS= cdev.c evdev.c evdev_mt.c evdev_utils.c
-SRCS+= opt_evdev.h bus_if.h device_if.h
+SRCS+= opt_evdev.h opt_kbd.h bus_if.h device_if.h
EXPORT_SYMS= YES
diff --git a/sys/modules/gpio/gpioaei/Makefile b/sys/modules/gpio/gpioaei/Makefile
index 8f856af48eb7..1f0f1d0e53a6 100644
--- a/sys/modules/gpio/gpioaei/Makefile
+++ b/sys/modules/gpio/gpioaei/Makefile
@@ -10,6 +10,8 @@ SRCS+= \
gpio_if.h \
gpiobus_if.h
+SRCS+= opt_acpi.h opt_platform.h
+
CFLAGS+= -I. -I${SRCTOP}/sys/dev/gpio/
.include <bsd.kmod.mk>
diff --git a/sys/modules/gve/Makefile b/sys/modules/gve/Makefile
index 08b26a994e36..ece275485df7 100644
--- a/sys/modules/gve/Makefile
+++ b/sys/modules/gve/Makefile
@@ -40,5 +40,5 @@ SRCS= gve_main.c \
gve_tx_dqo.c \
gve_sysctl.c
SRCS+= device_if.h bus_if.h pci_if.h
-
+SRCS+= opt_inet6.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/ichwd/Makefile b/sys/modules/ichwd/Makefile
index 3c3bbc37eff5..27b4c38437ff 100644
--- a/sys/modules/ichwd/Makefile
+++ b/sys/modules/ichwd/Makefile
@@ -1,6 +1,6 @@
.PATH: ${SRCTOP}/sys/dev/ichwd
KMOD= ichwd
-SRCS= ichwd.c device_if.h bus_if.h pci_if.h isa_if.h
+SRCS= i6300esbwd.c ichwd.c device_if.h bus_if.h pci_if.h isa_if.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/if_infiniband/Makefile b/sys/modules/if_infiniband/Makefile
index 01e3164b1271..7ec343999da1 100644
--- a/sys/modules/if_infiniband/Makefile
+++ b/sys/modules/if_infiniband/Makefile
@@ -3,7 +3,8 @@
KMOD= if_infiniband
SRCS= if_infiniband.c \
opt_inet.h \
- opt_inet6.h
+ opt_inet6.h \
+ opt_kbd.h
EXPORT_SYMS= YES
diff --git a/sys/modules/if_vlan/Makefile b/sys/modules/if_vlan/Makefile
index 3077f4289d5a..0cdab3f7653a 100644
--- a/sys/modules/if_vlan/Makefile
+++ b/sys/modules/if_vlan/Makefile
@@ -2,6 +2,6 @@
KMOD= if_vlan
SRCS= if_vlan.c
-SRCS+= opt_inet.h opt_inet6.h opt_kern_tls.h opt_vlan.h opt_ratelimit.h
+SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_vlan.h opt_ratelimit.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/irdma/Makefile b/sys/modules/irdma/Makefile
index b2ffb67ca66f..a9ef6e63d3f2 100644
--- a/sys/modules/irdma/Makefile
+++ b/sys/modules/irdma/Makefile
@@ -1,8 +1,8 @@
.include <bsd.own.mk>
-OFED_INC_DIR = ${.CURDIR}/../../ofed/include
-ICE_DIR = ${.CURDIR}/../../dev/ice
-.PATH: ${.CURDIR}/../../dev/irdma
+OFED_INC_DIR = ${SRCTOP}/sys/ofed/include
+ICE_DIR = ${SRCTOP}/sys/dev/ice
+.PATH: ${SRCTOP}/sys/dev/irdma
KMOD= irdma
SRCS= icrdma.c
diff --git a/sys/modules/linux64/Makefile b/sys/modules/linux64/Makefile
index b23891a65a4f..327da11afdaf 100644
--- a/sys/modules/linux64/Makefile
+++ b/sys/modules/linux64/Makefile
@@ -31,6 +31,7 @@ SRCS= linux_dummy_machdep.c \
opt_ktrace.h \
opt_inet6.h \
opt_posix.h \
+ opt_usb.h \
bus_if.h \
device_if.h \
vnode_if.h \
diff --git a/sys/modules/md/Makefile b/sys/modules/md/Makefile
index 2b0586c44717..3f16e04860a1 100644
--- a/sys/modules/md/Makefile
+++ b/sys/modules/md/Makefile
@@ -1,6 +1,6 @@
.PATH: ${SRCTOP}/sys/dev/md
KMOD= geom_md
-SRCS= md.c opt_md.h opt_geom.h opt_rootdevname.h vnode_if.h
+SRCS= bus_if.h device_if.h md.c opt_md.h opt_geom.h opt_rootdevname.h vnode_if.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/miiproxy/Makefile b/sys/modules/miiproxy/Makefile
index 5173358989da..730bef4220cd 100644
--- a/sys/modules/miiproxy/Makefile
+++ b/sys/modules/miiproxy/Makefile
@@ -3,7 +3,7 @@
KMOD = miiproxy
SRCS= miiproxy.c
-SRCS+= mdio_if.h miibus_if.h
+SRCS+= bus_if.h mdio_if.h miibus_if.h opt_platform.h
CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch
.include <bsd.kmod.mk>
diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile
index 506c045ab0ce..65341fdfb8aa 100644
--- a/sys/modules/mlx5/Makefile
+++ b/sys/modules/mlx5/Makefile
@@ -46,7 +46,7 @@ mlx5_ipsec_offload.c \
mlx5_ipsec.c \
mlx5_ipsec_rxtx.c
SRCS+= ${LINUXKPI_GENSRCS}
-SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h
+SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_rss.h opt_ratelimit.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile
index 03bf174e33b0..3697fa65dc83 100644
--- a/sys/modules/mlx5en/Makefile
+++ b/sys/modules/mlx5en/Makefile
@@ -15,7 +15,7 @@ mlx5_en_rl.c \
mlx5_en_txrx.c \
mlx5_en_port_buffer.c
SRCS+= ${LINUXKPI_GENSRCS}
-SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h opt_kern_tls.h
+SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_rss.h opt_ratelimit.h opt_kern_tls.h
.if defined(HAVE_PER_CQ_EVENT_PACKET)
CFLAGS+= -DHAVE_PER_CQ_EVENT_PACKET
diff --git a/sys/modules/netgraph/ksocket/Makefile b/sys/modules/netgraph/ksocket/Makefile
index 395fdbd7b3e3..7099648f6219 100644
--- a/sys/modules/netgraph/ksocket/Makefile
+++ b/sys/modules/netgraph/ksocket/Makefile
@@ -1,4 +1,6 @@
KMOD= ng_ksocket
SRCS= ng_ksocket.c
+SRCS+= opt_inet6.h
+
.include <bsd.kmod.mk>
diff --git a/sys/modules/nvmf/nvmf/Makefile b/sys/modules/nvmf/nvmf/Makefile
index 7ebe614998bd..21d73d363d2f 100644
--- a/sys/modules/nvmf/nvmf/Makefile
+++ b/sys/modules/nvmf/nvmf/Makefile
@@ -10,4 +10,7 @@ SRCS= nvmf.c \
nvmf_qpair.c \
nvmf_sim.c
+SRCS+= bus_if.h device_if.h
+SRCS+= opt_cam.h
+
.include <bsd.kmod.mk>
diff --git a/sys/modules/ossl/Makefile b/sys/modules/ossl/Makefile
index ac2c752e922e..c516fe0c158d 100644
--- a/sys/modules/ossl/Makefile
+++ b/sys/modules/ossl/Makefile
@@ -25,7 +25,7 @@ SRCS.arm= \
sha256-armv4.S \
sha512-armv4.S \
ossl_arm.c \
- ossl_aes_gcm.c
+ ossl_aes_gcm_neon.c
SRCS.aarch64= \
chacha-armv8.S \
@@ -48,6 +48,7 @@ SRCS.amd64= \
sha256-x86_64.S \
sha512-x86_64.S \
ossl_aes_gcm.c \
+ ossl_aes_gcm_avx512.c \
ossl_x86.c
SRCS.i386= \
@@ -60,6 +61,8 @@ SRCS.i386= \
ossl_x86.c
SRCS.powerpc64le= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \
@@ -81,6 +84,8 @@ SRCS.powerpc64le= \
x25519-ppc64.S
SRCS.powerpc64= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \
diff --git a/sys/modules/qatfw/qat_4xxx/Makefile b/sys/modules/qatfw/qat_4xxx/Makefile
index fb7171bcaf45..f6f19d6cbe32 100644
--- a/sys/modules/qatfw/qat_4xxx/Makefile
+++ b/sys/modules/qatfw/qat_4xxx/Makefile
@@ -4,6 +4,9 @@
KMOD= qat_4xxx_fw
-FIRMWS= qat_4xxx.bin:qat_4xxx_fw:111 qat_4xxx_mmp.bin:qat_4xxx_mmp_fw:111
+FIRMWS= qat_4xxx.bin:qat_4xxx_fw:111 \
+ qat_4xxx_mmp.bin:qat_4xxx_mmp_fw:111 \
+ qat_402xx.bin:qat_402xx_fw:111 \
+ qat_402xx_mmp.bin:qat_402xx_mmp_fw:111
.include <bsd.kmod.mk>
diff --git a/sys/modules/qlnx/qlnxev/Makefile b/sys/modules/qlnx/qlnxev/Makefile
index ed62f1f1dd40..766a5a950032 100644
--- a/sys/modules/qlnx/qlnxev/Makefile
+++ b/sys/modules/qlnx/qlnxev/Makefile
@@ -49,6 +49,7 @@ SRCS+=ecore_vf.c
SRCS+=qlnx_ioctl.c
SRCS+=qlnx_os.c
+SRCS+=opt_inet.h
SRCS+= ${LINUXKPI_GENSRCS}
diff --git a/sys/modules/sound/driver/hda/Makefile b/sys/modules/sound/driver/hda/Makefile
index 0eec98fc53e1..1e137dc5671c 100644
--- a/sys/modules/sound/driver/hda/Makefile
+++ b/sys/modules/sound/driver/hda/Makefile
@@ -2,7 +2,7 @@
KMOD= snd_hda
SRCS= device_if.h bus_if.h pci_if.h channel_if.h mixer_if.h hdac_if.h
-SRCS+= hdaa.c hdaa.h hdaa_patches.c hdac.c hdac_if.h hdac_if.c
-SRCS+= hdacc.c hdac_private.h hdac_reg.h hda_reg.h hdac.h
+SRCS+= hdaa.c hdaa.h hdaa_patches.c hdacc.c hdac.c hdac_if.c
+SRCS+= hdac_private.h hdac_reg.h hda_reg.h hdac.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/uinput/Makefile b/sys/modules/uinput/Makefile
index 66ade2a5bb33..a9e2ec867b91 100644
--- a/sys/modules/uinput/Makefile
+++ b/sys/modules/uinput/Makefile
@@ -2,6 +2,6 @@
KMOD= uinput
SRCS= uinput.c
-SRCS+= opt_evdev.h
+SRCS+= opt_evdev.h opt_kbd.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/usb/Makefile b/sys/modules/usb/Makefile
index 1290b878fa37..d9b1c8635b30 100644
--- a/sys/modules/usb/Makefile
+++ b/sys/modules/usb/Makefile
@@ -46,10 +46,9 @@ SUBDIR = usb
SUBDIR += ${_dwc_otg} ehci ${_musb} ohci uhci xhci ${_uss820dci} \
${_atmegadci} ${_avr32dci} ${_rsu} ${_rsufw} ${_bcm2838_xhci}
SUBDIR += mtw ${_rum} ${_run} ${_runfw} ${_uath} upgt usie ural ${_zyd} ${_urtw}
-SUBDIR += atp cfumass uhid uhid_snes ukbd ums udbp uep wmt wsp ugold uled \
- usbhid
-SUBDIR += ucom u3g uark ubsa ubser uchcom ucycom ufoma uftdi ugensa uipaq ulpt \
- umb umct umcs umodem umoscom uplcom uslcom uvisor uvscom
+SUBDIR += atp cfumass uhid uhid_snes ukbd ums udbp uep wmt wsp ugold uled usbhid
+SUBDIR += ucom u3g uark ubsa ubser uchcom ucycom udbc ufoma uftdi ugensa uipaq
+SUBDIR += ulpt umb umct umcs umodem umoscom uplcom uslcom uvisor uvscom
SUBDIR += i2ctinyusb
SUBDIR += cp2112
SUBDIR += udl
diff --git a/sys/modules/usb/udbc/Makefile b/sys/modules/usb/udbc/Makefile
new file mode 100644
index 000000000000..9996b2e391fb
--- /dev/null
+++ b/sys/modules/usb/udbc/Makefile
@@ -0,0 +1,9 @@
+S= ${SRCTOP}/sys
+
+.PATH: $S/dev/usb/serial
+
+KMOD= udbc
+SRCS= opt_bus.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h \
+ udbc.c
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/usb/usie/Makefile b/sys/modules/usb/usie/Makefile
index 6a5f79248ff8..9edeed082f8d 100644
--- a/sys/modules/usb/usie/Makefile
+++ b/sys/modules/usb/usie/Makefile
@@ -29,6 +29,6 @@
KMOD = usie
SRCS = if_usie.c
SRCS += opt_bus.h opt_usb.h device_if.h bus_if.h \
- usb_if.h usbdevs.h opt_inet.h
+ usb_if.h usbdevs.h opt_inet.h opt_inet6.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/usb/wmt/Makefile b/sys/modules/usb/wmt/Makefile
index 72cf1d814908..8cb5abd7383e 100644
--- a/sys/modules/usb/wmt/Makefile
+++ b/sys/modules/usb/wmt/Makefile
@@ -3,6 +3,6 @@ S= ${SRCTOP}/sys
.PATH: $S/dev/usb/input
KMOD= wmt
-SRCS= opt_bus.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h wmt.c
+SRCS= opt_bus.h opt_kbd.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h wmt.c
.include <bsd.kmod.mk>
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 12274bcceea1..77d08862dcd5 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -258,6 +258,9 @@
/* dops->d_revalidate() takes 4 args */
/* #undef HAVE_D_REVALIDATE_4ARGS */
+/* Define if d_set_d_op() is available */
+/* #undef HAVE_D_SET_D_OP */
+
/* Define to 1 if you have the 'execvpe' function. */
#define HAVE_EXECVPE 1
@@ -483,9 +486,6 @@
/* building against unsupported kernel version */
/* #undef HAVE_LINUX_EXPERIMENTAL */
-/* Define to 1 if you have the <linux/stat.h> header file. */
-/* #undef HAVE_LINUX_STAT_H */
-
/* makedev() is declared in sys/mkdev.h */
/* #undef HAVE_MAKEDEV_IN_MKDEV */
@@ -840,7 +840,7 @@
/* #undef ZFS_DEVICE_MINOR */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.3.99-571-FreeBSD_ga9410ccbd"
+#define ZFS_META_ALIAS "zfs-2.4.99-52-FreeBSD_g3f4312a0a"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -870,10 +870,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "571-FreeBSD_ga9410ccbd"
+#define ZFS_META_RELEASE "52-FreeBSD_g3f4312a0a"
/* Define the project version. */
-#define ZFS_META_VERSION "2.3.99"
+#define ZFS_META_VERSION "2.4.99"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 5c265cf5b08e..585e308e72d5 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.3.99-571-ga9410ccbd"
+#define ZFS_META_GITREV "zfs-2.4.99-52-g3f4312a0a"
diff --git a/sys/net/if.c b/sys/net/if.c
index 79c883fd4a0a..b6a798aa0fab 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -74,7 +74,6 @@
#include <vm/uma.h>
#include <net/bpf.h>
-#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_clone.h>
@@ -1102,6 +1101,7 @@ if_detach_internal(struct ifnet *ifp, bool vmove)
struct ifaddr *ifa;
int i;
struct domain *dp;
+ void *if_afdata[AF_MAX];
#ifdef VIMAGE
bool shutdown;
@@ -1225,15 +1225,30 @@ finish_vnet_shutdown:
IF_AFDATA_LOCK(ifp);
i = ifp->if_afdata_initialized;
ifp->if_afdata_initialized = 0;
+ if (i != 0) {
+ /*
+ * Defer the dom_ifdetach call.
+ */
+ _Static_assert(sizeof(if_afdata) == sizeof(ifp->if_afdata),
+ "array size mismatch");
+ memcpy(if_afdata, ifp->if_afdata, sizeof(if_afdata));
+ memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
+ }
IF_AFDATA_UNLOCK(ifp);
if (i == 0)
return;
+ /*
+ * XXXZL: This net epoch wait is not necessary if we have done right.
+ * But if we do not, at least we can make a guarantee that threads those
+ * enter net epoch will see NULL address family dependent data,
+ * e.g. if_afdata[AF_INET6]. A clear NULL pointer derefence is much
+ * better than writing to freed memory.
+ */
+ NET_EPOCH_WAIT();
SLIST_FOREACH(dp, &domains, dom_next) {
- if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
- (*dp->dom_ifdetach)(ifp,
- ifp->if_afdata[dp->dom_family]);
- ifp->if_afdata[dp->dom_family] = NULL;
- }
+ if (dp->dom_ifdetach != NULL &&
+ if_afdata[dp->dom_family] != NULL)
+ (*dp->dom_ifdetach)(ifp, if_afdata[dp->dom_family]);
}
}
@@ -2589,16 +2604,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
* flip. They require special handling because in-kernel
* consumers may indepdently toggle them.
*/
- if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
- if (new_flags & IFF_PPROMISC)
- ifp->if_flags |= IFF_PROMISC;
- else if (ifp->if_pcount == 0)
- ifp->if_flags &= ~IFF_PROMISC;
- if (log_promisc_mode_change)
- if_printf(ifp, "permanently promiscuous mode %s\n",
- ((new_flags & IFF_PPROMISC) ?
- "enabled" : "disabled"));
- }
+ if_setppromisc(ifp, new_flags & IFF_PPROMISC);
if ((ifp->if_flags ^ new_flags) & IFF_PALLMULTI) {
if (new_flags & IFF_PALLMULTI)
ifp->if_flags |= IFF_ALLMULTI;
@@ -4456,6 +4462,32 @@ if_getmtu_family(const if_t ifp, int family)
return (ifp->if_mtu);
}
+void
+if_setppromisc(if_t ifp, bool ppromisc)
+{
+ int new_flags;
+
+ if (ppromisc)
+ new_flags = ifp->if_flags | IFF_PPROMISC;
+ else
+ new_flags = ifp->if_flags & ~IFF_PPROMISC;
+ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
+ if (new_flags & IFF_PPROMISC)
+ new_flags |= IFF_PROMISC;
+ /*
+ * Only unset IFF_PROMISC if there are no more consumers of
+ * promiscuity, i.e. the ifp->if_pcount refcount is 0.
+ */
+ else if (ifp->if_pcount == 0)
+ new_flags &= ~IFF_PROMISC;
+ if (log_promisc_mode_change)
+ if_printf(ifp, "permanently promiscuous mode %s\n",
+ ((new_flags & IFF_PPROMISC) ?
+ "enabled" : "disabled"));
+ }
+ ifp->if_flags = new_flags;
+}
+
/*
* Methods for drivers to access interface unicast and multicast
* link level addresses. Driver shall not know 'struct ifaddr' neither
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 66555fd1feb5..d7911a348d87 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -522,11 +522,11 @@ SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap,
"Log MAC address port flapping");
/* allow IP addresses on bridge members */
-VNET_DEFINE_STATIC(bool, member_ifaddrs) = false;
+VNET_DEFINE_STATIC(bool, member_ifaddrs) = true;
#define V_member_ifaddrs VNET(member_ifaddrs)
SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs,
CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false,
- "Allow layer 3 addresses on bridge members");
+ "Allow layer 3 addresses on bridge members (deprecated)");
static bool
bridge_member_ifaddrs(void)
@@ -1448,24 +1448,30 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
/*
* If member_ifaddrs is disabled, do not allow an interface with
- * assigned IP addresses to be added to a bridge.
+ * assigned IP addresses to be added to a bridge. Skip this check
+ * for gif interfaces, because the IP address assigned to a gif
+ * interface is separate from the bridge's Ethernet segment.
*/
- if (!V_member_ifaddrs) {
+ if (ifs->if_type != IFT_GIF) {
struct ifaddr *ifa;
CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) {
-#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET)
- return (EXTERROR(EINVAL,
- "Member interface may not have "
- "an IPv4 address configured"));
-#endif
-#ifdef INET6
- if (ifa->ifa_addr->sa_family == AF_INET6)
+ if (ifa->ifa_addr->sa_family != AF_INET &&
+ ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ if (V_member_ifaddrs) {
+ if_printf(sc->sc_ifp,
+ "WARNING: Adding member interface %s which "
+ "has an IP address assigned is deprecated "
+ "and will be unsupported in a future "
+ "release.\n", ifs->if_xname);
+ break;
+ } else {
return (EXTERROR(EINVAL,
"Member interface may not have "
- "an IPv6 address configured"));
-#endif
+ "an IP address assigned"));
+ }
}
}
@@ -2398,6 +2404,12 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
return (EINVAL);
}
+ /* Do VLAN filtering. */
+ if (!bridge_vfilter_out(bif, m)) {
+ m_freem(m);
+ return (0);
+ }
+
/* We may be sending a fragment so traverse the mbuf */
for (; m; m = m0) {
m0 = m->m_nextpkt;
@@ -2817,10 +2829,6 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
goto drop;
- /* Do VLAN filtering. */
- if (!bridge_vfilter_out(dbif, m))
- goto drop;
-
if ((dbif->bif_flags & IFBIF_STP) &&
dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
goto drop;
@@ -3189,10 +3197,6 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
continue;
- /* Do VLAN filtering. */
- if (!bridge_vfilter_out(dbif, m))
- continue;
-
if ((dbif->bif_flags & IFBIF_STP) &&
dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
continue;
@@ -3358,6 +3362,14 @@ bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m)
NET_EPOCH_ASSERT();
+ /*
+ * If the interface is in span mode, then bif_sc will be NULL.
+ * Since the purpose of span interfaces is to receive all frames,
+ * pass everything.
+ */
+ if (dbif->bif_sc == NULL)
+ return (true);
+
/* If VLAN filtering isn't enabled, pass everything. */
if ((dbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0)
return (true);
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index b0f579f688ac..5ed8c19f3128 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -159,7 +159,7 @@ struct ifbreq {
uint32_t ifbr_addrexceeded; /* member if addr violations */
ether_vlanid_t ifbr_pvid; /* member if PVID */
uint16_t ifbr_vlanproto; /* member if VLAN protocol */
- uint8_t pad[32];
+ uint8_t pad[28];
};
/* BRDGGIFFLAGS, BRDGSIFFLAGS */
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index a213a84e17db..fbffa8f359a0 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -67,9 +67,9 @@
#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_media.h>
-#include <net/if_var.h>
#include <net/if_private.h>
#include <net/if_types.h>
+#include <net/if_vlan_var.h>
#include <net/netisr.h>
#ifdef RSS
#include <net/rss_config.h>
@@ -435,6 +435,21 @@ epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
}
+/*
+ * Update ifp->if_hwassist according to the current value of ifp->if_capenable.
+ */
+static void
+epair_caps_changed(struct ifnet *ifp)
+{
+ uint64_t hwassist = 0;
+
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ hwassist |= CSUM_IP_TCP | CSUM_IP_UDP;
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP;
+ ifp->if_hwassist = hwassist;
+}
+
static int
epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
@@ -462,6 +477,44 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
break;
+ case SIOCGIFCAP:
+ ifr->ifr_reqcap = ifp->if_capabilities;
+ ifr->ifr_curcap = ifp->if_capenable;
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ /*
+ * Enable/disable capabilities as requested, besides
+ * IFCAP_RXCSUM(_IPV6), which always remain enabled.
+ * Incoming packets may have the mbuf flag CSUM_DATA_VALID set.
+ * Without IFCAP_RXCSUM(_IPV6), this flag would have to be
+ * removed, which does not seem helpful.
+ */
+ ifp->if_capenable = ifr->ifr_reqcap | IFCAP_RXCSUM |
+ IFCAP_RXCSUM_IPV6;
+ epair_caps_changed(ifp);
+ /*
+ * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the
+ * other epair interface as well.
+ * A bridge disables IFCAP_TXCSUM(_IPV6) when adding one epair
+ * interface if another interface in the bridge has it disabled.
+ * In that case this capability needs to be disabled on the
+ * other epair interface to avoid sending packets in the bridge
+ * that rely on this capability.
+ */
+ sc = ifp->if_softc;
+ if ((ifp->if_capenable ^ sc->oifp->if_capenable) &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) {
+ sc->oifp->if_capenable &=
+ ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ sc->oifp->if_capenable |= ifp->if_capenable &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ epair_caps_changed(sc->oifp);
+ }
+ VLAN_CAPABILITIES(ifp);
+ error = 0;
+ break;
+
default:
/* Let the common ethernet handler process this. */
error = ether_ioctl(ifp, cmd, data);
@@ -573,8 +626,11 @@ epair_setup_ifp(struct epair_softc *sc, char *name, int unit)
ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_capabilities = IFCAP_VLAN_MTU;
- ifp->if_capenable = IFCAP_VLAN_MTU;
+ ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+ ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+ epair_caps_changed(ifp);
ifp->if_transmit = epair_transmit;
ifp->if_qflush = epair_qflush;
ifp->if_start = epair_start;
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 3ae0c01c0efc..9c157bf3d3c2 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -695,7 +695,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
* seen by upper protocol layers.
*/
if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
- bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
+ memcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
m->m_flags |= M_PROMISC;
}
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 08435e7bd5f6..f2df612b19c1 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -622,6 +622,7 @@ int if_setmtu(if_t ifp, int mtu);
int if_getmtu(const if_t ifp);
int if_getmtu_family(const if_t ifp, int family);
void if_notifymtu(if_t ifp);
+void if_setppromisc(const if_t ifp, bool ppromisc);
int if_setflagbits(if_t ifp, int set, int clear);
int if_setflags(if_t ifp, int flags);
int if_getflags(const if_t ifp);
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 98c59e5de988..1e6d98291c04 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -712,7 +712,7 @@ static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
static void iflib_altq_if_start(if_t ifp);
static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m);
#endif
-static int iflib_register(if_ctx_t);
+static void iflib_register(if_ctx_t);
static void iflib_deregister(if_ctx_t);
static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
static uint16_t iflib_get_mbuf_size_for(unsigned int size);
@@ -3646,6 +3646,12 @@ defrag:
bus_dmamap_unload(buf_tag, map);
DBG_COUNTER_INC(encap_txq_avail_fail);
DBG_COUNTER_INC(encap_txd_encap_fail);
+ if (ctx->ifc_sysctl_simple_tx) {
+ *m_headp = m_head = iflib_remove_mbuf(txq);
+ m_freem(*m_headp);
+ DBG_COUNTER_INC(tx_frees);
+ *m_headp = NULL;
+ }
if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
GROUPTASK_ENQUEUE(&txq->ift_task);
return (ENOBUFS);
@@ -4298,6 +4304,10 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
m_freem(m);
DBG_COUNTER_INC(tx_frees);
+ if (err == ENOBUFS)
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+ else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
return (err);
@@ -5136,10 +5146,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
ctx->ifc_dev = dev;
ctx->ifc_softc = sc;
- if ((err = iflib_register(ctx)) != 0) {
- device_printf(dev, "iflib_register failed %d\n", err);
- goto fail_ctx_free;
- }
+ iflib_register(ctx);
iflib_add_device_sysctl_pre(ctx);
scctx = &ctx->ifc_softc_ctx;
@@ -5363,7 +5370,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
DEBUGNET_SET(ctx->ifc_ifp, iflib);
- if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
iflib_add_device_sysctl_post(ctx);
iflib_add_pfil(ctx);
ctx->ifc_flags |= IFC_INIT_DONE;
@@ -5387,7 +5393,6 @@ fail_unlock:
CTX_UNLOCK(ctx);
IFNET_WUNLOCK();
iflib_deregister(ctx);
-fail_ctx_free:
device_set_softc(ctx->ifc_dev, NULL);
if (ctx->ifc_flags & IFC_SC_ALLOCATED)
free(ctx->ifc_softc, M_IFLIB);
@@ -5685,7 +5690,7 @@ _iflib_pre_assert(if_softc_ctx_t scctx)
MPASS(scctx->isc_txrx->ift_rxd_flush);
}
-static int
+static void
iflib_register(if_ctx_t ctx)
{
if_shared_ctx_t sctx = ctx->ifc_sctx;
@@ -5718,6 +5723,7 @@ iflib_register(if_ctx_t ctx)
if_settransmitfn(ifp, iflib_if_transmit);
#endif
if_setqflushfn(ifp, iflib_if_qflush);
+ if_setgetcounterfn(ifp, iflib_if_get_counter);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
ctx->ifc_vlan_attach_event =
EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
@@ -5731,7 +5737,6 @@ iflib_register(if_ctx_t ctx)
ifmedia_init(ctx->ifc_mediap, IFM_IMASK,
iflib_media_change, iflib_media_status);
}
- return (0);
}
static void
@@ -7146,6 +7151,11 @@ iflib_simple_transmit(if_t ifp, struct mbuf *m)
bytes_sent += m->m_pkthdr.len;
mcast_sent += !!(m->m_flags & M_MCAST);
(void)iflib_txd_db_check(txq, true);
+ } else {
+ if (error == ENOBUFS)
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+ else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
(void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
mtx_unlock(&txq->ift_mtx);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index d6c13470f2eb..e0fda082fefe 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -326,6 +326,7 @@ pf_counter_u64_zero(struct pf_counter_u64 *pfcu64)
_Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size");
SYSCTL_DECL(_net_pf);
+MALLOC_DECLARE(M_PF);
MALLOC_DECLARE(M_PFHASH);
MALLOC_DECLARE(M_PF_RULE_ITEM);
@@ -645,6 +646,7 @@ struct pf_kpool {
int tblidx;
u_int16_t proxy_port[2];
u_int8_t opts;
+ sa_family_t ipv6_nexthop_af;
};
struct pf_rule_actions {
@@ -859,8 +861,8 @@ struct pf_krule {
u_int8_t keep_state;
sa_family_t af;
u_int8_t proto;
- u_int8_t type;
- u_int8_t code;
+ uint16_t type;
+ uint16_t code;
u_int8_t flags;
u_int8_t flagset;
u_int8_t min_ttl;
@@ -1749,6 +1751,7 @@ struct pf_kstatus {
counter_u64_t lcounters[KLCNT_MAX]; /* limit counters */
struct pf_counter_u64 fcounters[FCNT_MAX]; /* state operation counters */
counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */
+ counter_u64_t ncounters[NCNT_MAX];
uint32_t states;
uint32_t src_nodes;
uint32_t running;
@@ -2389,8 +2392,6 @@ extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
u_int16_t, u_int16_t, u_int8_t);
-VNET_DECLARE(struct ifnet *, sync_ifp);
-#define V_sync_ifp VNET(sync_ifp);
VNET_DECLARE(struct pf_krule, pf_default_rule);
#define V_pf_default_rule VNET(pf_default_rule)
extern void pf_addrcpy(struct pf_addr *, const struct pf_addr *,
@@ -2421,7 +2422,7 @@ int pf_multihome_scan_init(int, int, struct pf_pdesc *);
int pf_multihome_scan_asconf(int, int, struct pf_pdesc *);
u_int32_t pf_new_isn(struct pf_kstate *);
-void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, u_short *,
+void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *,
sa_family_t);
void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
@@ -2438,6 +2439,7 @@ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
void pf_normalize_init(void);
void pf_normalize_cleanup(void);
+uint64_t pf_normalize_get_frag_count(void);
int pf_normalize_tcp(struct pf_pdesc *);
void pf_normalize_tcp_cleanup(struct pf_kstate *);
int pf_normalize_tcp_init(struct pf_pdesc *,
@@ -2612,6 +2614,7 @@ struct pf_kruleset *pf_find_kruleset(const char *);
struct pf_kruleset *pf_get_leaf_kruleset(char *, char **);
struct pf_kruleset *pf_find_or_create_kruleset(const char *);
void pf_rs_initialize(void);
+void pf_rule_tree_free(struct pf_krule_global *);
struct pf_krule *pf_krule_alloc(void);
@@ -2680,7 +2683,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *,
struct pf_addr *, struct pf_kpool *);
u_short pf_map_addr_sn(u_int8_t, struct pf_krule *,
struct pf_addr *, struct pf_addr *,
- sa_family_t *, struct pfi_kkif **nkif,
+ sa_family_t *, struct pfi_kkif **,
struct pf_addr *, struct pf_kpool *,
pf_sn_types_t);
int pf_get_transaddr_af(struct pf_krule *,
diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c
index d96d7988a864..1dd8e38b9896 100644
--- a/sys/net80211/ieee80211_ddb.c
+++ b/sys/net80211/ieee80211_ddb.c
@@ -296,7 +296,7 @@ _db_show_sta(const struct ieee80211_node *ni)
ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan);
db_printf("\thtopmode 0x%x htstbc 0x%x chw %d (%s)\n",
ni->ni_htopmode, ni->ni_htstbc,
- ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
+ ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw));
/* XXX ampdu state */
for (i = 0; i < WME_NUM_TID; i++)
diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h
index 141b13f9f740..954801d95787 100644
--- a/sys/net80211/ieee80211_freebsd.h
+++ b/sys/net80211/ieee80211_freebsd.h
@@ -93,12 +93,22 @@ typedef struct {
} while (0)
#define IEEE80211_TX_LOCK_OBJ(_ic) (&(_ic)->ic_txlock.mtx)
#define IEEE80211_TX_LOCK_DESTROY(_ic) mtx_destroy(IEEE80211_TX_LOCK_OBJ(_ic))
-#define IEEE80211_TX_LOCK(_ic) mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic))
-#define IEEE80211_TX_UNLOCK(_ic) mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic))
-#define IEEE80211_TX_LOCK_ASSERT(_ic) \
- mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED)
-#define IEEE80211_TX_UNLOCK_ASSERT(_ic) \
- mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED)
+#define IEEE80211_TX_LOCK(_ic) do { \
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \
+ mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic)); \
+ } while (0);
+#define IEEE80211_TX_UNLOCK(_ic) do { \
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \
+ mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic)); \
+ } while (0);
+#define IEEE80211_TX_LOCK_ASSERT(_ic) do { \
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \
+ mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED); \
+ } while (0)
+#define IEEE80211_TX_UNLOCK_ASSERT(_ic) { \
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \
+ mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED); \
+ } while (0)
/*
* Stageq / ni_tx_superg lock
@@ -331,11 +341,16 @@ struct mbuf *ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen);
#define M_AGE_SUB(m,adj) (m->m_pkthdr.csum_data -= adj)
/*
- * Store the sequence number.
+ * Store / retrieve the sequence number in an mbuf.
+ *
+ * The sequence number being stored/retreived is the 12 bit
+ * base sequence number, not the 16 bit sequence number field.
+ * I.e., it's from 0..4095 inclusive, with no 4 bit padding for
+ * fragment numbers.
*/
#define M_SEQNO_SET(m, seqno) \
- ((m)->m_pkthdr.tso_segsz = (seqno))
-#define M_SEQNO_GET(m) ((m)->m_pkthdr.tso_segsz)
+ ((m)->m_pkthdr.tso_segsz = ((seqno) % IEEE80211_SEQ_RANGE))
+#define M_SEQNO_GET(m) (((m)->m_pkthdr.tso_segsz) % IEEE80211_SEQ_RANGE)
#define MTAG_ABI_NET80211 1132948340 /* net80211 ABI */
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index c28f124648a1..3af56a228295 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -1476,7 +1476,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni)
ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20;
if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) {
ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40;
- ni->ni_chw = IEEE80211_STA_RX_BW_40;
+ ni->ni_chw = NET80211_STA_RX_BW_40;
if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan))
ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE;
else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan))
@@ -1484,7 +1484,7 @@ ieee80211_ht_wds_init(struct ieee80211_node *ni)
if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40)
ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40;
} else {
- ni->ni_chw = IEEE80211_STA_RX_BW_20;
+ ni->ni_chw = NET80211_STA_RX_BW_20;
ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE;
}
ni->ni_htctlchan = ni->ni_chan->ic_ieee;
@@ -1580,7 +1580,7 @@ ieee80211_ht_node_join(struct ieee80211_node *ni)
if (ni->ni_flags & IEEE80211_NODE_HT) {
vap->iv_ht_sta_assoc++;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
vap->iv_ht40_sta_assoc++;
}
htinfo_update(vap);
@@ -1598,7 +1598,7 @@ ieee80211_ht_node_leave(struct ieee80211_node *ni)
if (ni->ni_flags & IEEE80211_NODE_HT) {
vap->iv_ht_sta_assoc--;
- if (ni->ni_chw == IEEE80211_STA_RX_BW_40)
+ if (ni->ni_chw == NET80211_STA_RX_BW_40)
vap->iv_ht40_sta_assoc--;
}
htinfo_update(vap);
@@ -1827,7 +1827,7 @@ htinfo_update_chw(struct ieee80211_node *ni, int htflags, int vhtflags)
done:
/* update node's (11n) tx channel width */
ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ?
- IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+ NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20;
return (ret);
}
@@ -1933,7 +1933,7 @@ ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags)
{
#define _RETURN_CHAN_BITS(_cb) \
do { \
- IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \
+ if (0) IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, \
"%s:%d: selected %b", __func__, __LINE__, \
(_cb), IEEE80211_CHAN_BITS); \
return (_cb); \
@@ -2689,11 +2689,11 @@ ht_recv_action_ht_txchwidth(struct ieee80211_node *ni,
* here.
*/
chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ?
- IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+ NET80211_STA_RX_BW_40 : NET80211_STA_RX_BW_20;
IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
"%s: HT txchwidth, width %d%s (%s)", __func__,
- chw, ni->ni_chw != chw ? "*" : "", ieee80211_ni_chw_to_str(chw));
+ chw, ni->ni_chw != chw ? "*" : "", net80211_ni_chw_to_str(chw));
if (chw != ni->ni_chw) {
/* XXX does this need to change the ht40 station count? */
ni->ni_chw = chw;
@@ -3832,5 +3832,5 @@ ieee80211_ht_check_tx_ht40(const struct ieee80211_node *ni)
return (IEEE80211_IS_CHAN_HT40(bss_chan) &&
IEEE80211_IS_CHAN_HT40(ni->ni_chan) &&
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
}
diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c
index a201d1b278f0..49ba00299fee 100644
--- a/sys/net80211/ieee80211_node.c
+++ b/sys/net80211/ieee80211_node.c
@@ -2673,7 +2673,7 @@ ieee80211_dump_node(struct ieee80211_node_table *nt __unused,
ni->ni_htctlchan, ni->ni_ht2ndchan);
net80211_printf("\thtopmode %x htstbc %x htchw %d (%s)\n",
ni->ni_htopmode, ni->ni_htstbc,
- ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
+ ni->ni_chw, net80211_ni_chw_to_str(ni->ni_chw));
net80211_printf("\tvhtcap %x freq1 %d freq2 %d vhtbasicmcs %x\n",
ni->ni_vhtcap, (int) ni->ni_vht_chan1, (int) ni->ni_vht_chan2,
(int) ni->ni_vht_basicmcs);
@@ -2831,7 +2831,7 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp)
ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "",
/* XXX update for VHT string */
ni->ni_flags & IEEE80211_NODE_HT ?
- (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
+ (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "",
ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "",
ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" :
diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h
index ef25fa0d7fdd..f1246dd12419 100644
--- a/sys/net80211/ieee80211_node.h
+++ b/sys/net80211/ieee80211_node.h
@@ -109,33 +109,33 @@ enum ieee80211_mesh_mlstate {
"\20\1IDLE\2OPENSNT\2OPENRCV\3CONFIRMRCV\4ESTABLISHED\5HOLDING"
/*
- * This structure is shared with LinuxKPI 802.11 code describing up-to
- * which channel width the station can receive.
+ * This enum was shared with the LinuxKPI enum ieee80211_sta_rx_bandwidth
+ * describing up-to which channel width the station can receive.
* Rather than using hardcoded MHz values for the channel width use an enum with
* flags. This allows us to keep the uint8_t slot for ni_chw in
- * struct ieee80211_node and means we do not have to sync to the value for
- * LinuxKPI.
+ * struct ieee80211_node it means we do not have to sync to the value for
+ * LinuxKPI (just the names).
*
* NB: BW_20 needs to 0 and values need to be sorted! Cannot make it
* bitfield-alike for use with %b.
*/
-enum ieee80211_sta_rx_bw {
- IEEE80211_STA_RX_BW_20 = 0x00,
- IEEE80211_STA_RX_BW_40,
- IEEE80211_STA_RX_BW_80,
- IEEE80211_STA_RX_BW_160,
- IEEE80211_STA_RX_BW_320,
+enum net80211_sta_rx_bw {
+ NET80211_STA_RX_BW_20 = 0x00,
+ NET80211_STA_RX_BW_40,
+ NET80211_STA_RX_BW_80,
+ NET80211_STA_RX_BW_160,
+ NET80211_STA_RX_BW_320,
} __packed;
static inline const char *
-ieee80211_ni_chw_to_str(enum ieee80211_sta_rx_bw bw)
+net80211_ni_chw_to_str(enum net80211_sta_rx_bw bw)
{
switch (bw) {
- case IEEE80211_STA_RX_BW_20: return ("BW_20");
- case IEEE80211_STA_RX_BW_40: return ("BW_40");
- case IEEE80211_STA_RX_BW_80: return ("BW_80");
- case IEEE80211_STA_RX_BW_160: return ("BW_160");
- case IEEE80211_STA_RX_BW_320: return ("BW_320");
+ case NET80211_STA_RX_BW_20: return ("BW_20");
+ case NET80211_STA_RX_BW_40: return ("BW_40");
+ case NET80211_STA_RX_BW_80: return ("BW_80");
+ case NET80211_STA_RX_BW_160: return ("BW_160");
+ case NET80211_STA_RX_BW_320: return ("BW_320");
}
}
@@ -285,7 +285,7 @@ struct ieee80211_node {
uint8_t ni_ht2ndchan; /* HT 2nd channel */
uint8_t ni_htopmode; /* HT operating mode */
uint8_t ni_htstbc; /* HT */
- enum ieee80211_sta_rx_bw ni_chw; /* negotiated channel width */
+ enum net80211_sta_rx_bw ni_chw; /* negotiated channel width */
struct ieee80211_htrateset ni_htrates; /* negotiated ht rate set */
struct ieee80211_tx_ampdu ni_tx_ampdu[WME_NUM_TID];
struct ieee80211_rx_ampdu ni_rx_ampdu[WME_NUM_TID];
diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c
index afe83ea0805c..116fc76a9ce1 100644
--- a/sys/net80211/ieee80211_output.c
+++ b/sys/net80211/ieee80211_output.c
@@ -974,7 +974,7 @@ ieee80211_send_setup(
/* NB: zero out i_seq field (for s/w encryption etc) */
*(uint16_t *)&wh->i_seq[0] = 0;
- } else
+ } else if (!IEEE80211_CONF_SEQNO_OFFLOAD(ni->ni_ic))
ieee80211_output_seqno_assign(ni, tid, m);
if (IEEE80211_IS_MULTICAST(wh->i_addr1))
@@ -1082,6 +1082,12 @@ ieee80211_send_nulldata(struct ieee80211_node *ni)
uint8_t *frm;
int ret;
+ /* Don't send NULL frames if we've been configured not to do so. */
+ if ((ic->ic_flags_ext & IEEE80211_FEXT_NO_NULLDATA) != 0) {
+ ieee80211_node_decref(ni);
+ return (0);
+ }
+
if (vap->iv_state == IEEE80211_S_CAC) {
IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH,
ni, "block %s frame in CAC state", "null data");
@@ -1810,7 +1816,8 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni,
* and we don't need the TX lock held.
*/
if ((m->m_flags & M_AMPDU_MPDU) == 0) {
- ieee80211_output_seqno_assign(ni, tid, m);
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic))
+ ieee80211_output_seqno_assign(ni, tid, m);
} else {
/*
* NB: don't assign a sequence # to potential
@@ -1828,7 +1835,9 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni,
*(uint16_t *)wh->i_seq = 0;
}
} else {
- ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID, m);
+ if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic))
+ ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID,
+ m);
/*
* XXX TODO: we shouldn't allow EAPOL, etc that would
* be forced to be non-QoS traffic to be A-MSDU encapsulated.
@@ -3856,6 +3865,8 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast)
* If the driver identifies it does its own TX seqno management then
* we can skip this (and still not do the TX seqno.)
*/
+
+ /* TODO: IEEE80211_CONF_SEQNO_OFFLOAD() */
ieee80211_output_beacon_seqno_assign(ni, m);
/* XXX faster to recalculate entirely or just changes? */
diff --git a/sys/net80211/ieee80211_phy.c b/sys/net80211/ieee80211_phy.c
index eb96d74a2bd9..7f53c717152b 100644
--- a/sys/net80211/ieee80211_phy.c
+++ b/sys/net80211/ieee80211_phy.c
@@ -658,26 +658,26 @@ static uint16_t ieee80211_vht_mcs_allowed_list_160[] = {
*
* See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details.
*
- * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw
+ * @param bw channel bandwidth, via enum net80211_sta_rx_bw
* @param nss number of spatial streams, 1..8
* @returns bitmask of valid MCS rates from 0..9
*/
uint16_t
-ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss)
+ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw bw, uint8_t nss)
{
if (nss == 0 || nss > 8)
return (0);
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
return (ieee80211_vht_mcs_allowed_list_20[nss - 1]);
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
return (ieee80211_vht_mcs_allowed_list_40[nss - 1]);
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
return (ieee80211_vht_mcs_allowed_list_80[nss - 1]);
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
return (ieee80211_vht_mcs_allowed_list_160[nss - 1]);
- case IEEE80211_STA_RX_BW_320:
+ case NET80211_STA_RX_BW_320:
/* invalid for VHT */
return (0);
}
@@ -689,14 +689,14 @@ ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw bw, uint8_t nss)
*
* See 802.11-2020 21.5 (Parameters for VHT-MCSs) for more details.
*
- * @param bw channel bandwidth, via enum ieee80211_sta_rx_bw
+ * @param bw channel bandwidth, via enum net80211_sta_rx_bw
* @param nss number of spatial streams, 1..8
* @param mcs MCS rate, 0..9
* @retval true if the NSS / MCS / bandwidth combination is valid
* @retval false if the NSS / MCS / bandwidth combination is not valid
*/
bool
-ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw bw, uint8_t nss,
+ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw bw, uint8_t nss,
uint8_t mcs)
{
uint16_t mask;
@@ -737,7 +737,7 @@ static struct mcs_entry mcs_entries[] = {
/**
* @brief Calculate the bitrate of the given VHT MCS rate.
*
- * @param bw Channel bandwidth (enum ieee80211_sta_rx_bw)
+ * @param bw Channel bandwidth (enum net80211_sta_rx_bw)
* @param nss Number of spatial streams, 1..8
* @param mcs MCS, 0..9
* @param is_shortgi True if short guard-interval (400nS)
@@ -746,7 +746,7 @@ static struct mcs_entry mcs_entries[] = {
* @returns The bitrate in kbit/sec.
*/
uint32_t
-ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw,
+ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw bw,
uint8_t nss, uint8_t mcs, bool is_shortgi)
{
uint32_t sym_len, n_carriers;
@@ -773,16 +773,16 @@ ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw bw,
* See 802.11-2020 Table 21-5 (Timing-related constraints.)
*/
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
n_carriers = 52;
break;
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
n_carriers = 108;
break;
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
n_carriers = 234;
break;
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
n_carriers = 468;
break;
default:
diff --git a/sys/net80211/ieee80211_phy.h b/sys/net80211/ieee80211_phy.h
index 749b082e34e9..391c8bfc5010 100644
--- a/sys/net80211/ieee80211_phy.h
+++ b/sys/net80211/ieee80211_phy.h
@@ -221,13 +221,13 @@ uint32_t ieee80211_compute_duration_ht(uint32_t frameLen,
uint16_t rate, int streams, int isht40,
int isShortGI);
-enum ieee80211_sta_rx_bw;
+enum net80211_sta_rx_bw;
-uint16_t ieee80211_phy_vht_get_mcs_mask(enum ieee80211_sta_rx_bw,
+uint16_t ieee80211_phy_vht_get_mcs_mask(enum net80211_sta_rx_bw,
uint8_t);
-bool ieee80211_phy_vht_validate_mcs(enum ieee80211_sta_rx_bw,
+bool ieee80211_phy_vht_validate_mcs(enum net80211_sta_rx_bw,
uint8_t, uint8_t);
-uint32_t ieee80211_phy_vht_get_mcs_kbit(enum ieee80211_sta_rx_bw,
+uint32_t ieee80211_phy_vht_get_mcs_kbit(enum net80211_sta_rx_bw,
uint8_t, uint8_t, bool);
#endif /* _KERNEL */
diff --git a/sys/net80211/ieee80211_sta.c b/sys/net80211/ieee80211_sta.c
index 463a8b16773b..19e5ffe9a367 100644
--- a/sys/net80211/ieee80211_sta.c
+++ b/sys/net80211/ieee80211_sta.c
@@ -1934,7 +1934,7 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype,
vap->iv_flags&IEEE80211_F_USEPROT ? ", protection" : "",
ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "",
ni->ni_flags & IEEE80211_NODE_HT ?
- (ni->ni_chw == IEEE80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
+ (ni->ni_chw == NET80211_STA_RX_BW_40 ? ", HT40" : ", HT20") : "",
ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "",
ni->ni_flags & IEEE80211_NODE_AMSDU ? " (+AMSDU)" : "",
ni->ni_flags & IEEE80211_NODE_MIMO_RTS ? " (+SMPS-DYN)" :
diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h
index a0293f814899..b9bc2357428d 100644
--- a/sys/net80211/ieee80211_var.h
+++ b/sys/net80211/ieee80211_var.h
@@ -700,13 +700,14 @@ MALLOC_DECLARE(M_80211_VAP);
#define IEEE80211_FEXT_QUIET_IE 0x00800000 /* STATUS: quiet IE in a beacon has been added */
#define IEEE80211_FEXT_UAPSD 0x01000000 /* CONF: enable U-APSD */
#define IEEE80211_FEXT_AMPDU_OFFLOAD 0x02000000 /* CONF: driver/fw handles AMPDU[-TX] itself */
+#define IEEE80211_FEXT_NO_NULLDATA 0x04000000 /* CONF: don't originate NULL data frames from net80211 */
#define IEEE80211_FEXT_BITS \
"\20\2INACT\3SCANWAIT\4BGSCAN\5WPS\6TSN\7SCANREQ\10RESUME" \
"\0114ADDR\12NONEPR_PR\13SWBMISS\14DFS\15DOTD\16STATEWAIT\17REINIT" \
"\20BPF\21WDSLEGACY\22PROBECHAN\23UNIQMAC\24SCAN_OFFLOAD\25SEQNO_OFFLOAD" \
"\26FRAG_OFFLOAD\27VHT" \
- "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD"
+ "\30QUIET_IE\31UAPSD\32AMPDU_OFFLOAD\33NO_NULLDATA"
/* ic_flags_ht/iv_flags_ht */
#define IEEE80211_FHT_NONHT_PR 0x00000001 /* STATUS: non-HT sta present */
diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index de0b691d4d2a..10a5fc7f08ab 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -974,7 +974,7 @@ ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni)
return (IEEE80211_IS_CHAN_VHT40(bss_chan) &&
IEEE80211_IS_CHAN_VHT40(ni->ni_chan) &&
- (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+ (ni->ni_chw == NET80211_STA_RX_BW_40));
}
/*
@@ -1003,7 +1003,7 @@ ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni)
*/
return (IEEE80211_IS_CHAN_VHT80(bss_chan) &&
IEEE80211_IS_CHAN_VHT80(ni->ni_chan) &&
- (ni->ni_chw != IEEE80211_STA_RX_BW_20));
+ (ni->ni_chw != NET80211_STA_RX_BW_20));
}
/*
@@ -1030,7 +1030,7 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni)
* If a HT TX width action frame sets it to 20MHz
* then reject doing 160MHz.
*/
- if (ni->ni_chw == IEEE80211_STA_RX_BW_20)
+ if (ni->ni_chw == NET80211_STA_RX_BW_20)
return (false);
if (IEEE80211_IS_CHAN_VHT160(bss_chan) &&
@@ -1062,19 +1062,19 @@ ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni)
*/
bool
ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni,
- enum ieee80211_sta_rx_bw bw)
+ enum net80211_sta_rx_bw bw)
{
switch (bw) {
- case IEEE80211_STA_RX_BW_20:
+ case NET80211_STA_RX_BW_20:
return (ieee80211_vht_check_tx_vht(ni));
- case IEEE80211_STA_RX_BW_40:
+ case NET80211_STA_RX_BW_40:
return (ieee80211_vht_check_tx_vht40(ni));
- case IEEE80211_STA_RX_BW_80:
+ case NET80211_STA_RX_BW_80:
return (ieee80211_vht_check_tx_vht80(ni));
- case IEEE80211_STA_RX_BW_160:
+ case NET80211_STA_RX_BW_160:
return (ieee80211_vht_check_tx_vht160(ni));
- case IEEE80211_STA_RX_BW_320:
+ case NET80211_STA_RX_BW_320:
return (false);
default:
return (false);
@@ -1096,7 +1096,7 @@ ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni,
*/
bool
ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *ni,
- enum ieee80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs)
+ enum net80211_sta_rx_bw bw, uint8_t nss, uint8_t mcs)
{
uint8_t mc;
diff --git a/sys/net80211/ieee80211_vht.h b/sys/net80211/ieee80211_vht.h
index a1529df4a85b..b9b19fbc6008 100644
--- a/sys/net80211/ieee80211_vht.h
+++ b/sys/net80211/ieee80211_vht.h
@@ -65,8 +65,8 @@ void ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni,
bool ieee80211_vht_check_tx_vht(const struct ieee80211_node *);
bool ieee80211_vht_check_tx_bw(const struct ieee80211_node *,
- enum ieee80211_sta_rx_bw);
+ enum net80211_sta_rx_bw);
bool ieee80211_vht_node_check_tx_valid_mcs(const struct ieee80211_node *,
- enum ieee80211_sta_rx_bw bw, uint8_t, uint8_t);
+ enum net80211_sta_rx_bw bw, uint8_t, uint8_t);
#endif /* _NET80211_IEEE80211_VHT_H_ */
diff --git a/sys/netgraph/bluetooth/include/ng_hci.h b/sys/netgraph/bluetooth/include/ng_hci.h
index 44a14e62f4ed..ce3291770740 100644
--- a/sys/netgraph/bluetooth/include/ng_hci.h
+++ b/sys/netgraph/bluetooth/include/ng_hci.h
@@ -448,7 +448,7 @@ typedef struct {
typedef bdaddr_t * bdaddr_p;
/* Any BD_ADDR. Note: This is actually 7 bytes (count '\0' terminator) */
-#define NG_HCI_BDADDR_ANY ((bdaddr_p) "\000\000\000\000\000\000")
+#define NG_HCI_BDADDR_ANY (&(const bdaddr_t){"\000\000\000\000\000\000"})
/* HCI status return parameter */
typedef struct {
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
index 6c0a6fda1fb1..73a0897857b2 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -113,7 +113,7 @@ static void ng_btsocket_rfcomm_connect_cfm
static int ng_btsocket_rfcomm_session_create
(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so,
- bdaddr_p src, bdaddr_p dst, struct thread *td);
+ const bdaddr_t *src, const bdaddr_t *dst, struct thread *td);
static int ng_btsocket_rfcomm_session_accept
(ng_btsocket_rfcomm_session_p s0);
static int ng_btsocket_rfcomm_session_connect
@@ -1250,7 +1250,7 @@ ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s)
static int
ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp,
- struct socket *l2so, bdaddr_p src, bdaddr_p dst,
+ struct socket *l2so, const bdaddr_t *src, const bdaddr_t *dst,
struct thread *td)
{
ng_btsocket_rfcomm_session_p s = NULL;
diff --git a/sys/netgraph/ng_device.c b/sys/netgraph/ng_device.c
index e4fcdfc635cb..582f877ff3ed 100644
--- a/sys/netgraph/ng_device.c
+++ b/sys/netgraph/ng_device.c
@@ -32,26 +32,27 @@
*/
#if 0
-#define DBG do { printf("ng_device: %s\n", __func__ ); } while (0)
+#define DBG do { printf("ng_device: %s\n", __func__); } while (0)
#else
#define DBG do {} while (0)
#endif
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/conf.h>
+#include <sys/epoch.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/poll.h>
#include <sys/proc.h>
-#include <sys/epoch.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/syslog.h>
-#include <sys/systm.h>
#include <sys/uio.h>
-#include <sys/vnode.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -135,9 +136,7 @@ static d_close_t ngdclose;
static d_open_t ngdopen;
static d_read_t ngdread;
static d_write_t ngdwrite;
-#if 0
static d_ioctl_t ngdioctl;
-#endif
static d_poll_t ngdpoll;
static struct cdevsw ngd_cdevsw = {
@@ -146,16 +145,16 @@ static struct cdevsw ngd_cdevsw = {
.d_close = ngdclose,
.d_read = ngdread,
.d_write = ngdwrite,
-#if 0
.d_ioctl = ngdioctl,
-#endif
.d_poll = ngdpoll,
.d_name = NG_DEVICE_DEVNAME,
};
-/******************************************************************************
+/*
+ *****************************************************************************
* Netgraph methods
- ******************************************************************************/
+ *****************************************************************************
+ */
/*
* Handle loading and unloading for this node type.
@@ -205,13 +204,13 @@ ng_device_constructor(node_p node)
priv->ngddev = make_dev(&ngd_cdevsw, priv->unit, UID_ROOT,
GID_WHEEL, 0600, NG_DEVICE_DEVNAME "%d", priv->unit);
- if(priv->ngddev == NULL) {
- printf("%s(): make_dev() failed\n",__func__);
+ if (priv->ngddev == NULL) {
+ printf("%s(): make_dev() failed\n", __func__);
mtx_destroy(&priv->ngd_mtx);
mtx_destroy(&priv->readq.ifq_mtx);
free_unr(ngd_unit, priv->unit);
free(priv, M_NETGRAPH);
- return(EINVAL);
+ return (EINVAL);
}
/* XXX: race here? */
priv->ngddev->si_drv1 = priv;
@@ -221,7 +220,7 @@ ng_device_constructor(node_p node)
log(LOG_WARNING, "%s: can't acquire netgraph name\n",
devtoname(priv->ngddev));
- return(0);
+ return (0);
}
/*
@@ -289,7 +288,7 @@ ng_device_newhook(node_p node, hook_p hook, const char *name)
priv->hook = hook;
- return(0);
+ return (0);
}
/*
@@ -322,7 +321,7 @@ ng_device_rcvdata(hook_p hook, item_p item)
}
mtx_unlock(&priv->ngd_mtx);
- return(0);
+ return (0);
}
/*
@@ -347,7 +346,7 @@ ng_device_disconnect(hook_p hook)
ng_rmnode_self(NG_HOOK_NODE(hook));
- return(0);
+ return (0);
}
/*
@@ -360,9 +359,11 @@ ng_device_shutdown(node_p node)
return (0);
}
-/******************************************************************************
+/*
+ *****************************************************************************
* Device methods
- ******************************************************************************/
+ *****************************************************************************
+ */
/*
* the device is opened
@@ -370,7 +371,7 @@ ng_device_shutdown(node_p node)
static int
ngdopen(struct cdev *dev, int flag, int mode, struct thread *td)
{
- priv_p priv = (priv_p )dev->si_drv1;
+ priv_p priv = (priv_p)dev->si_drv1;
DBG;
@@ -378,7 +379,7 @@ ngdopen(struct cdev *dev, int flag, int mode, struct thread *td)
priv->flags |= NGDF_OPEN;
mtx_unlock(&priv->ngd_mtx);
- return(0);
+ return (0);
}
/*
@@ -387,14 +388,44 @@ ngdopen(struct cdev *dev, int flag, int mode, struct thread *td)
static int
ngdclose(struct cdev *dev, int flag, int mode, struct thread *td)
{
- priv_p priv = (priv_p )dev->si_drv1;
+ priv_p priv = (priv_p)dev->si_drv1;
DBG;
mtx_lock(&priv->ngd_mtx);
priv->flags &= ~NGDF_OPEN;
mtx_unlock(&priv->ngd_mtx);
- return(0);
+ return (0);
+}
+
+/*
+ * Process IOCTLs
+ *
+ * At this stage we only return success on FIONBIO to allow setting the device
+ * as non-blocking.
+ *
+ */
+static int
+ngdioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+ struct thread *td)
+{
+ int error;
+
+ switch (cmd) {
+ case FIONBIO:
+ error = 0;
+ break;
+ case FIOASYNC:
+ if (*(int *)data != 0)
+ error = EINVAL;
+ else
+ error = 0;
+ break;
+ default:
+ error = ENOTTY;
+ }
+
+ return (error);
}
#if 0 /*
@@ -408,21 +439,22 @@ ngdclose(struct cdev *dev, int flag, int mode, struct thread *td)
*
*/
static int
-ngdioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
+ngdioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
+ struct thread *td)
{
struct ngd_softc *sc = &ngd_softc;
- struct ngd_connection * connection = NULL;
- struct ngd_connection * tmp;
+ struct ngd_connection *connection = NULL;
+ struct ngd_connection *tmp;
int error = 0;
struct ng_mesg *msg;
- struct ngd_param_s * datap;
+ struct ngd_param_s *datap;
DBG;
NG_MKMESSAGE(msg, NGM_DEVICE_COOKIE, cmd, sizeof(struct ngd_param_s),
M_NOWAIT);
if (msg == NULL) {
- printf("%s(): msg == NULL\n",__func__);
+ printf("%s(): msg == NULL\n", __func__);
goto nomsg;
}
@@ -431,12 +463,12 @@ ngdioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
datap->p = addr;
NG_SEND_MSG_HOOK(error, sc->node, msg, connection->active_hook, 0);
- if(error)
- printf("%s(): NG_SEND_MSG_HOOK error: %d\n",__func__,error);
+ if (error)
+ printf("%s(): NG_SEND_MSG_HOOK error: %d\n", __func__, error);
nomsg:
- return(0);
+ return (0);
}
#endif /* if 0 */
@@ -447,7 +479,7 @@ nomsg:
static int
ngdread(struct cdev *dev, struct uio *uio, int flag)
{
- priv_p priv = (priv_p )dev->si_drv1;
+ priv_p priv = (priv_p)dev->si_drv1;
struct mbuf *m;
int len, error = 0;
@@ -457,7 +489,7 @@ ngdread(struct cdev *dev, struct uio *uio, int flag)
do {
IF_DEQUEUE(&priv->readq, m);
if (m == NULL) {
- if (flag & IO_NDELAY)
+ if (flag & O_NONBLOCK)
return (EWOULDBLOCK);
mtx_lock(&priv->ngd_mtx);
priv->flags |= NGDF_RWAIT;
@@ -483,14 +515,14 @@ ngdread(struct cdev *dev, struct uio *uio, int flag)
/*
* This function is called when our device is written to.
- * We read the data from userland into mbuf chain and pass it to the remote hook.
- *
+ * We read the data from userland into mbuf chain and pass it to the remote
+ * hook.
*/
static int
ngdwrite(struct cdev *dev, struct uio *uio, int flag)
{
struct epoch_tracker et;
- priv_p priv = (priv_p )dev->si_drv1;
+ priv_p priv = (priv_p)dev->si_drv1;
struct mbuf *m;
int error = 0;
@@ -520,7 +552,7 @@ ngdwrite(struct cdev *dev, struct uio *uio, int flag)
static int
ngdpoll(struct cdev *dev, int events, struct thread *td)
{
- priv_p priv = (priv_p )dev->si_drv1;
+ priv_p priv = (priv_p)dev->si_drv1;
int revents = 0;
if (events & (POLLIN | POLLRDNORM) &&
diff --git a/sys/netgraph/ng_nat.c b/sys/netgraph/ng_nat.c
index defbe817becd..8b82d777caeb 100644
--- a/sys/netgraph/ng_nat.c
+++ b/sys/netgraph/ng_nat.c
@@ -818,7 +818,8 @@ ng_nat_rcvdata(hook_p hook, item_p item )
if (ip->ip_v != IPVERSION)
goto send; /* other IP version, let it pass */
- if (m->m_pkthdr.len < ipofs + ntohs(ip->ip_len))
+ uint16_t ip_len = ntohs(ip->ip_len);
+ if (m->m_pkthdr.len < (ipofs + ip_len))
goto send; /* packet too short (i.e. fragmented or broken) */
/*
@@ -852,50 +853,68 @@ ng_nat_rcvdata(hook_p hook, item_p item )
if (rval == PKT_ALIAS_RESPOND)
m->m_flags |= M_SKIP_FIREWALL;
- m->m_pkthdr.len = m->m_len = ntohs(ip->ip_len) + ipofs;
- if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
- ip->ip_p == IPPROTO_TCP) {
- struct tcphdr *th = (struct tcphdr *)((caddr_t)ip +
- (ip->ip_hl << 2));
+ /* Re-read just in case it has been updated */
+ ip_len = ntohs(ip->ip_len);
+ int new_m_len = ip_len + ipofs;
+ if (new_m_len > (m->m_len + M_TRAILINGSPACE(m))) {
/*
- * Here is our terrible HACK.
- *
- * Sometimes LibAlias edits contents of TCP packet.
- * In this case it needs to recompute full TCP
- * checksum. However, the problem is that LibAlias
- * doesn't have any idea about checksum offloading
- * in kernel. To workaround this, we do not do
- * checksumming in LibAlias, but only mark the
- * packets with TH_RES1 in the th_x2 field. If we
- * receive a marked packet, we calculate correct
- * checksum for it aware of offloading.
- *
- * Why do I do such a terrible hack instead of
- * recalculating checksum for each packet?
- * Because the previous checksum was not checked!
- * Recalculating checksums for EVERY packet will
- * hide ALL transmission errors. Yes, marked packets
- * still suffer from this problem. But, sigh, natd(8)
- * has this problem, too.
+ * This is just a safety railguard to make sure LibAlias has not
+ * screwed the IP packet up somehow, should probably be KASSERT()
+ * at some point. Calling in_delayed_cksum() will parse IP packet
+ * again and reliably panic if there is less data than the IP
+ * header declares, there might be some other places too.
*/
+ log(LOG_ERR, "ng_nat_rcvdata: outgoing packet corrupted, "
+ "not enough data: expected %d, available (%d - %d)\n",
+ ip_len, m->m_len + (int)M_TRAILINGSPACE(m), ipofs);
+ NG_FREE_ITEM(item);
+ return (ENXIO);
+ }
+
+ m->m_pkthdr.len = m->m_len = new_m_len;
- if (tcp_get_flags(th) & TH_RES1) {
- uint16_t ip_len = ntohs(ip->ip_len);
+ if ((ip->ip_off & htons(IP_OFFMASK)) != 0 || ip->ip_p != IPPROTO_TCP)
+ goto send;
- tcp_set_flags(th, tcp_get_flags(th) & ~TH_RES1);
- th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htons(IPPROTO_TCP +
- ip_len - (ip->ip_hl << 2)));
+ uint16_t pl_offset = ip->ip_hl << 2;
+ struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + pl_offset);
- if ((m->m_pkthdr.csum_flags & CSUM_TCP) == 0) {
- m->m_pkthdr.csum_data = offsetof(struct tcphdr,
- th_sum);
- in_delayed_cksum(m);
- }
- }
- }
+ /*
+ * Here is our terrible HACK.
+ *
+ * Sometimes LibAlias edits contents of TCP packet.
+ * In this case it needs to recompute full TCP
+ * checksum. However, the problem is that LibAlias
+ * doesn't have any idea about checksum offloading
+ * in kernel. To workaround this, we do not do
+ * checksumming in LibAlias, but only mark the
+ * packets with TH_RES1 in the th_x2 field. If we
+ * receive a marked packet, we calculate correct
+ * checksum for it aware of offloading.
+ *
+ * Why do I do such a terrible hack instead of
+ * recalculating checksum for each packet?
+ * Because the previous checksum was not checked!
+ * Recalculating checksums for EVERY packet will
+ * hide ALL transmission errors. Yes, marked packets
+ * still suffer from this problem. But, sigh, natd(8)
+ * has this problem, too.
+ */
+
+ if (!(tcp_get_flags(th) & TH_RES1))
+ goto send;
+
+ tcp_set_flags(th, tcp_get_flags(th) & ~TH_RES1);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(IPPROTO_TCP + ip_len - pl_offset));
+
+ if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0)
+ goto send;
+
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ in_delayed_cksum_o(m, ipofs);
send:
if (hook == priv->in)
diff --git a/sys/netgraph/ng_parse.c b/sys/netgraph/ng_parse.c
index 448ecc92f075..5e1a1bb47ac0 100644
--- a/sys/netgraph/ng_parse.c
+++ b/sys/netgraph/ng_parse.c
@@ -1199,14 +1199,14 @@ ng_parse_composite(const struct ng_parse_type *type, const char *s,
int *off, const u_char *const start, u_char *const buf, int *buflen,
const enum comptype ctype)
{
- const int num = ng_get_composite_len(type, start, buf, ctype);
int nextIndex = 0; /* next implicit array index */
u_int index; /* field or element index */
int *foff; /* field value offsets in string */
int align, len, blen, error = 0;
/* Initialize */
- if (num < 0)
+ const int num = ng_get_composite_len(type, start, buf, ctype);
+ if (num < 0 || num > INT_MAX / sizeof(*foff))
return (EINVAL);
foff = malloc(num * sizeof(*foff), M_NETGRAPH_PARSE, M_NOWAIT | M_ZERO);
if (foff == NULL) {
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index d85ad4e9f4fd..c20a20cd983d 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -659,7 +659,7 @@ cc_modevent(module_t mod, int event_type, void *data)
case MOD_SHUTDOWN:
break;
case MOD_QUIESCE:
- /* Stop any new assigments */
+ /* Stop any new assignments */
err = cc_stop_new_assignments(algo);
break;
case MOD_UNLOAD:
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 963449d4b4b1..70a61dbf93a3 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -522,9 +522,16 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred
/*
* Check if bridge wants to allow adding addrs to member interfaces.
*/
- if (ifp->if_bridge && bridge_member_ifaddrs_p &&
- !bridge_member_ifaddrs_p())
- return (EINVAL);
+ if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF &&
+ bridge_member_ifaddrs_p != NULL) {
+ if (bridge_member_ifaddrs_p())
+ if_printf(ifp, "WARNING: Assigning an IP address to "
+ "an interface which is also a bridge member is "
+ "deprecated and will be unsupported in a future "
+ "release.\n");
+ else
+ return (EINVAL);
+ }
/*
* See whether address already exist.
@@ -1882,6 +1889,8 @@ in_domifdetach(struct ifnet *ifp, void *aux)
{
struct in_ifinfo *ii = (struct in_ifinfo *)aux;
+ MPASS(ifp->if_afdata[AF_INET] == NULL);
+
igmp_domifdetach(ifp);
lltable_free(ii->ii_llt);
free(ii, M_IFADDR);
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index ec6ba8d92015..ef08b9cfd3d6 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1044,14 +1044,14 @@ done:
}
void
-in_delayed_cksum(struct mbuf *m)
+in_delayed_cksum_o(struct mbuf *m, uint16_t iph_offset)
{
struct ip *ip;
struct udphdr *uh;
uint16_t cklen, csum, offset;
- ip = mtod(m, struct ip *);
- offset = ip->ip_hl << 2 ;
+ ip = (struct ip *)mtodo(m, iph_offset);
+ offset = iph_offset + (ip->ip_hl << 2);
if (m->m_pkthdr.csum_flags & CSUM_UDP) {
/* if udp header is not in the first mbuf copy udplen */
@@ -1078,6 +1078,13 @@ in_delayed_cksum(struct mbuf *m)
*(u_short *)mtodo(m, offset) = csum;
}
+void
+in_delayed_cksum(struct mbuf *m)
+{
+
+ in_delayed_cksum_o(m, 0);
+}
+
/*
* IP socket option processing.
*/
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index f782ebc53eb0..c113484079a3 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -271,6 +271,7 @@ VNET_DECLARE(struct pfil_head *, inet_local_pfil_head);
#define PFIL_INET_LOCAL_NAME "inet-local"
void in_delayed_cksum(struct mbuf *m);
+void in_delayed_cksum_o(struct mbuf *m, uint16_t o);
/* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
/*
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index b77ebc928809..63bbe4bba11b 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -137,8 +137,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 6492495dc583..d5dc516c28aa 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2562,299 +2562,270 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
hhook_run_tcp_est_in(tp, th, &to);
#endif
- if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
- maxseg = tcp_maxseg(tp);
- if (no_data &&
- (tiwin == tp->snd_wnd ||
- (tp->t_flags & TF_SACK_PERMIT))) {
+ if (SEQ_LT(th->th_ack, tp->snd_una)) {
+ /* This is old ACK information, don't process it. */
+ break;
+ }
+ if (th->th_ack == tp->snd_una) {
+ /* Check if this is a duplicate ACK. */
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ V_tcp_do_newsack) {
/*
- * If this is the first time we've seen a
- * FIN from the remote, this is not a
- * duplicate and it needs to be processed
- * normally. This happens during a
- * simultaneous close.
+ * If SEG.ACK == SND.UNA, RFC 6675 requires a
+ * duplicate ACK to selectively acknowledge
+ * at least one byte, which was not selectively
+ * acknowledged before.
*/
- if ((thflags & TH_FIN) &&
- (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
- tp->t_dupacks = 0;
+ if (sack_changed == SACK_NOCHANGE) {
break;
}
- TCPSTAT_INC(tcps_rcvdupack);
- /*
- * If we have outstanding data (other than
- * a window probe), this is a completely
- * duplicate ack (ie, window info didn't
- * change and FIN isn't set),
- * the ack is the biggest we've
- * seen and we've seen exactly our rexmt
- * threshold of them, assume a packet
- * has been dropped and retransmit it.
- * Kludge snd_nxt & the congestion
- * window so we send only this one
- * packet.
- *
- * We know we're losing at the current
- * window size so do congestion avoidance
- * (set ssthresh to half the current window
- * and pull our congestion window back to
- * the new ssthresh).
- *
- * Dup acks mean that packets have left the
- * network (they're now cached at the receiver)
- * so bump cwnd by the amount in the receiver
- * to keep a constant cwnd packets in the
- * network.
- *
- * When using TCP ECN, notify the peer that
- * we reduced the cwnd.
- */
+ } else {
/*
- * Following 2 kinds of acks should not affect
- * dupack counting:
- * 1) Old acks
- * 2) Acks with SACK but without any new SACK
- * information in them. These could result from
- * any anomaly in the network like a switch
- * duplicating packets or a possible DoS attack.
+ * If SEG.ACK == SND.UNA, RFC 5681 requires a
+ * duplicate ACK to have no data on it and to
+ * not be a window update.
*/
- if (th->th_ack != tp->snd_una ||
- (tcp_is_sack_recovery(tp, &to) &&
- (sack_changed == SACK_NOCHANGE))) {
+ if (!no_data || tiwin != tp->snd_wnd) {
break;
- } else if (!tcp_timer_active(tp, TT_REXMT)) {
- tp->t_dupacks = 0;
- } else if (++tp->t_dupacks > tcprexmtthresh ||
- IN_FASTRECOVERY(tp->t_flags)) {
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
- if (V_tcp_do_prr &&
+ }
+ }
+ /*
+ * If this is the first time we've seen a
+ * FIN from the remote, this is not a
+ * duplicate ACK and it needs to be processed
+ * normally.
+ * This happens during a simultaneous close.
+ */
+ if ((thflags & TH_FIN) &&
+ (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
+ tp->t_dupacks = 0;
+ break;
+ }
+ /* Perform duplicate ACK processing. */
+ TCPSTAT_INC(tcps_rcvdupack);
+ maxseg = tcp_maxseg(tp);
+ if (!tcp_timer_active(tp, TT_REXMT)) {
+ tp->t_dupacks = 0;
+ } else if (++tp->t_dupacks > tcprexmtthresh ||
+ IN_FASTRECOVERY(tp->t_flags)) {
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ if (V_tcp_do_prr &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->t_flags & TF_SACK_PERMIT)) {
+ tcp_do_prr_ack(tp, th, &to,
+ sack_changed, &maxseg);
+ } else if (tcp_is_sack_recovery(tp, &to) &&
IN_FASTRECOVERY(tp->t_flags) &&
- (tp->t_flags & TF_SACK_PERMIT)) {
- tcp_do_prr_ack(tp, th, &to,
- sack_changed, &maxseg);
- } else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags) &&
- (tp->snd_nxt == tp->snd_max)) {
- int awnd;
+ (tp->snd_nxt == tp->snd_max)) {
+ int awnd;
- /*
- * Compute the amount of data in flight first.
- * We can inject new data into the pipe iff
- * we have less than ssthresh
- * worth of data in flight.
- */
- awnd = tcp_compute_pipe(tp);
- if (awnd < tp->snd_ssthresh) {
- tp->snd_cwnd += imax(maxseg,
- imin(2 * maxseg,
- tp->sackhint.delivered_data));
- if (tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
- }
- } else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags) &&
- SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ /*
+ * Compute the amount of data in flight first.
+ * We can inject new data into the pipe iff
+ * we have less than ssthresh
+ * worth of data in flight.
+ */
+ awnd = tcp_compute_pipe(tp);
+ if (awnd < tp->snd_ssthresh) {
tp->snd_cwnd += imax(maxseg,
imin(2 * maxseg,
tp->sackhint.delivered_data));
- } else {
- tp->snd_cwnd += maxseg;
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
}
- (void) tcp_output(tp);
- goto drop;
- } else if (tp->t_dupacks == tcprexmtthresh ||
- (tp->t_flags & TF_SACK_PERMIT &&
- V_tcp_do_newsack &&
- tp->sackhint.sacked_bytes >
- (tcprexmtthresh - 1) * maxseg)) {
+ } else if (tcp_is_sack_recovery(tp, &to) &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
+ } else {
+ tp->snd_cwnd += maxseg;
+ }
+ (void) tcp_output(tp);
+ goto drop;
+ } else if (tp->t_dupacks == tcprexmtthresh ||
+ (tp->t_flags & TF_SACK_PERMIT &&
+ V_tcp_do_newsack &&
+ tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * maxseg)) {
enter_recovery:
- /*
- * Above is the RFC6675 trigger condition of
- * more than (dupthresh-1)*maxseg sacked data.
- * If the count of holes in the
- * scoreboard is >= dupthresh, we could
- * also enter loss recovery, but don't
- * have that value readily available.
- */
- tp->t_dupacks = tcprexmtthresh;
- tcp_seq onxt = tp->snd_nxt;
+ /*
+ * Above is the RFC6675 trigger condition of
+ * more than (dupthresh-1)*maxseg sacked data.
+ * If the count of holes in the
+ * scoreboard is >= dupthresh, we could
+ * also enter loss recovery, but don't
+ * have that value readily available.
+ */
+ tp->t_dupacks = tcprexmtthresh;
+ tcp_seq onxt = tp->snd_nxt;
- /*
- * If we're doing sack, check to
- * see if we're already in sack
- * recovery. If we're not doing sack,
- * check to see if we're in newreno
- * recovery.
- */
- if (tcp_is_sack_recovery(tp, &to)) {
- if (IN_FASTRECOVERY(tp->t_flags)) {
- tp->t_dupacks = 0;
- break;
- }
- } else {
- if (SEQ_LEQ(th->th_ack,
- tp->snd_recover)) {
- tp->t_dupacks = 0;
- break;
- }
+ /*
+ * If we're doing sack, check to
+ * see if we're already in sack
+ * recovery. If we're not doing sack,
+ * check to see if we're in newreno
+ * recovery.
+ */
+ if (tcp_is_sack_recovery(tp, &to)) {
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ tp->t_dupacks = 0;
+ break;
}
- /* Congestion signal before ack. */
- cc_cong_signal(tp, th, CC_NDUPACK);
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
- tcp_timer_activate(tp, TT_REXMT, 0);
- tp->t_rtttime = 0;
- if (V_tcp_do_prr) {
- /*
- * snd_ssthresh and snd_recover are
- * already updated by cc_cong_signal.
- */
- if (tcp_is_sack_recovery(tp, &to)) {
- /*
- * Include Limited Transmit
- * segments here
- */
- tp->sackhint.prr_delivered =
- imin(tp->snd_max - th->th_ack,
- (tp->snd_limited + 1) * maxseg);
- } else {
- tp->sackhint.prr_delivered =
- maxseg;
- }
- tp->sackhint.recover_fs = max(1,
- tp->snd_nxt - tp->snd_una);
+ } else {
+ if (SEQ_LEQ(th->th_ack,
+ tp->snd_recover)) {
+ tp->t_dupacks = 0;
+ break;
}
- tp->snd_limited = 0;
+ }
+ /* Congestion signal before ack. */
+ cc_cong_signal(tp, th, CC_NDUPACK);
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ tcp_timer_activate(tp, TT_REXMT, 0);
+ tp->t_rtttime = 0;
+ if (V_tcp_do_prr) {
+ /*
+ * snd_ssthresh and snd_recover are
+ * already updated by cc_cong_signal.
+ */
if (tcp_is_sack_recovery(tp, &to)) {
- TCPSTAT_INC(tcps_sack_recovery_episode);
/*
- * When entering LR after RTO due to
- * Duplicate ACKs, retransmit existing
- * holes from the scoreboard.
+ * Include Limited Transmit
+ * segments here
*/
- tcp_resend_sackholes(tp);
- /* Avoid inflating cwnd in tcp_output */
- tp->snd_nxt = tp->snd_max;
- tp->snd_cwnd = tcp_compute_pipe(tp) +
+ tp->sackhint.prr_delivered =
+ imin(tp->snd_max - th->th_ack,
+ (tp->snd_limited + 1) * maxseg);
+ } else {
+ tp->sackhint.prr_delivered =
maxseg;
- (void) tcp_output(tp);
- /* Set cwnd to the expected flightsize */
- tp->snd_cwnd = tp->snd_ssthresh;
- if (SEQ_GT(th->th_ack, tp->snd_una)) {
- goto resume_partialack;
- }
- goto drop;
}
- tp->snd_nxt = th->th_ack;
- tp->snd_cwnd = maxseg;
- (void) tcp_output(tp);
- KASSERT(tp->snd_limited <= 2,
- ("%s: tp->snd_limited too big",
- __func__));
- tp->snd_cwnd = tp->snd_ssthresh +
- maxseg *
- (tp->t_dupacks - tp->snd_limited);
- if (SEQ_GT(onxt, tp->snd_nxt))
- tp->snd_nxt = onxt;
- goto drop;
- } else if (V_tcp_do_rfc3042) {
- /*
- * Process first and second duplicate
- * ACKs. Each indicates a segment
- * leaving the network, creating room
- * for more. Make sure we can send a
- * packet on reception of each duplicate
- * ACK by increasing snd_cwnd by one
- * segment. Restore the original
- * snd_cwnd after packet transmission.
- */
- cc_ack_received(tp, th, nsegs, CC_DUPACK);
- uint32_t oldcwnd = tp->snd_cwnd;
- tcp_seq oldsndmax = tp->snd_max;
- u_int sent;
- int avail;
-
- KASSERT(tp->t_dupacks == 1 ||
- tp->t_dupacks == 2,
- ("%s: dupacks not 1 or 2",
- __func__));
- if (tp->t_dupacks == 1)
- tp->snd_limited = 0;
- if ((tp->snd_nxt == tp->snd_max) &&
- (tp->t_rxtshift == 0))
- tp->snd_cwnd =
- SEQ_SUB(tp->snd_nxt,
- tp->snd_una) -
- tcp_sack_adjust(tp);
- tp->snd_cwnd +=
- (tp->t_dupacks - tp->snd_limited) *
- maxseg - tcp_sack_adjust(tp);
+ tp->sackhint.recover_fs = max(1,
+ tp->snd_nxt - tp->snd_una);
+ }
+ tp->snd_limited = 0;
+ if (tcp_is_sack_recovery(tp, &to)) {
+ TCPSTAT_INC(tcps_sack_recovery_episode);
/*
- * Only call tcp_output when there
- * is new data available to be sent
- * or we need to send an ACK.
+ * When entering LR after RTO due to
+ * Duplicate ACKs, retransmit existing
+ * holes from the scoreboard.
*/
- SOCK_SENDBUF_LOCK(so);
- avail = sbavail(&so->so_snd);
- SOCK_SENDBUF_UNLOCK(so);
- if (tp->t_flags & TF_ACKNOW ||
- (avail >=
- SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
- (void) tcp_output(tp);
- }
- sent = SEQ_SUB(tp->snd_max, oldsndmax);
- if (sent > maxseg) {
- KASSERT((tp->t_dupacks == 2 &&
- tp->snd_limited == 0) ||
- (sent == maxseg + 1 &&
- tp->t_flags & TF_SENTFIN) ||
- (sent < 2 * maxseg &&
- tp->t_flags & TF_NODELAY),
- ("%s: sent too much: %u>%u",
- __func__, sent, maxseg));
- tp->snd_limited = 2;
- } else if (sent > 0) {
- ++tp->snd_limited;
- }
- tp->snd_cwnd = oldcwnd;
+ tcp_resend_sackholes(tp);
+ /* Avoid inflating cwnd in tcp_output */
+ tp->snd_nxt = tp->snd_max;
+ tp->snd_cwnd = tcp_compute_pipe(tp) +
+ maxseg;
+ (void) tcp_output(tp);
+ /* Set cwnd to the expected flightsize */
+ tp->snd_cwnd = tp->snd_ssthresh;
goto drop;
}
- }
- break;
- } else {
- /*
- * This ack is advancing the left edge, reset the
- * counter.
- */
- tp->t_dupacks = 0;
- /*
- * If this ack also has new SACK info, increment the
- * counter as per rfc6675. The variable
- * sack_changed tracks all changes to the SACK
- * scoreboard, including when partial ACKs without
- * SACK options are received, and clear the scoreboard
- * from the left side. Such partial ACKs should not be
- * counted as dupacks here.
- */
- if (tcp_is_sack_recovery(tp, &to) &&
- (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
- ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
- (tp->snd_nxt == tp->snd_max)) {
- tp->t_dupacks++;
- /* limit overhead by setting maxseg last */
- if (!IN_FASTRECOVERY(tp->t_flags) &&
- (tp->sackhint.sacked_bytes >
- ((tcprexmtthresh - 1) *
- (maxseg = tcp_maxseg(tp))))) {
- goto enter_recovery;
+ tp->snd_nxt = th->th_ack;
+ tp->snd_cwnd = maxseg;
+ (void) tcp_output(tp);
+ KASSERT(tp->snd_limited <= 2,
+ ("%s: tp->snd_limited too big",
+ __func__));
+ tp->snd_cwnd = tp->snd_ssthresh +
+ maxseg *
+ (tp->t_dupacks - tp->snd_limited);
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (V_tcp_do_rfc3042) {
+ /*
+ * Process first and second duplicate
+ * ACKs. Each indicates a segment
+ * leaving the network, creating room
+ * for more. Make sure we can send a
+ * packet on reception of each duplicate
+ * ACK by increasing snd_cwnd by one
+ * segment. Restore the original
+ * snd_cwnd after packet transmission.
+ */
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ uint32_t oldcwnd = tp->snd_cwnd;
+ tcp_seq oldsndmax = tp->snd_max;
+ u_int sent;
+ int avail;
+
+ KASSERT(tp->t_dupacks == 1 ||
+ tp->t_dupacks == 2,
+ ("%s: dupacks not 1 or 2",
+ __func__));
+ if (tp->t_dupacks == 1)
+ tp->snd_limited = 0;
+ if ((tp->snd_nxt == tp->snd_max) &&
+ (tp->t_rxtshift == 0))
+ tp->snd_cwnd =
+ SEQ_SUB(tp->snd_nxt, tp->snd_una);
+ tp->snd_cwnd +=
+ (tp->t_dupacks - tp->snd_limited) * maxseg;
+ tp->snd_cwnd -= tcp_sack_adjust(tp);
+ /*
+ * Only call tcp_output when there
+ * is new data available to be sent
+ * or we need to send an ACK.
+ */
+ SOCK_SENDBUF_LOCK(so);
+ avail = sbavail(&so->so_snd);
+ SOCK_SENDBUF_UNLOCK(so);
+ if (tp->t_flags & TF_ACKNOW ||
+ (avail >=
+ SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
+ (void) tcp_output(tp);
+ }
+ sent = SEQ_SUB(tp->snd_max, oldsndmax);
+ if (sent > maxseg) {
+ KASSERT((tp->t_dupacks == 2 &&
+ tp->snd_limited == 0) ||
+ (sent == maxseg + 1 &&
+ tp->t_flags & TF_SENTFIN) ||
+ (sent < 2 * maxseg &&
+ tp->t_flags & TF_NODELAY),
+ ("%s: sent too much: %u>%u",
+ __func__, sent, maxseg));
+ tp->snd_limited = 2;
+ } else if (sent > 0) {
+ ++tp->snd_limited;
}
+ tp->snd_cwnd = oldcwnd;
+ goto drop;
}
+ break;
}
-
-resume_partialack:
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
- ("%s: th_ack <= snd_una", __func__));
-
+ ("%s: SEQ_LEQ(th_ack, snd_una)", __func__));
+ /*
+ * This ack is advancing the left edge, reset the
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ /*
+ * If this ack also has new SACK info, increment the
+ * t_dupacks as per RFC 6675. The variable
+ * sack_changed tracks all changes to the SACK
+ * scoreboard, including when partial ACKs without
+ * SACK options are received, and clear the scoreboard
+ * from the left side. Such partial ACKs should not be
+ * counted as dupacks here.
+ */
+ if (V_tcp_do_newsack &&
+ tcp_is_sack_recovery(tp, &to) &&
+ (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
+ ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
+ (tp->snd_nxt == tp->snd_max)) {
+ tp->t_dupacks++;
+ /* limit overhead by setting maxseg last */
+ if (!IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) {
+ goto enter_recovery;
+ }
+ }
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c
index e24790ece43d..473c534ef83d 100644
--- a/sys/netinet/tcp_log_buf.c
+++ b/sys/netinet/tcp_log_buf.c
@@ -61,6 +61,9 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#ifdef DDB
+#include <netinet/in_kdtrace.h>
+#endif
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/tcp_var.h>
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index b6c55fac50b3..6e08ad2796a8 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -128,8 +128,25 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
"Enable/Disable TCP SACK support");
VNET_DEFINE(int, tcp_do_newsack) = 1;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_newsack), 0,
+
+static int
+sysctl_net_inet_tcp_sack_revised(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int new;
+
+ new = V_tcp_do_newsack;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ V_tcp_do_newsack = new;
+ gone_in(16, "net.inet.tcp.sack.revised will be deprecated."
+ " net.inet.tcp.sack.enable will always follow RFC6675 SACK.\n");
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_INT,
+ &VNET_NAME(tcp_do_newsack), 0, sysctl_net_inet_tcp_sack_revised, "CU",
"Use revised SACK loss recovery per RFC 6675");
VNET_DEFINE(int, tcp_do_lrd) = 1;
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index fed259f4d8e1..f2d7867df9b4 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -78,8 +78,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 71dd4de6baf9..11ef5ba706c5 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -77,8 +77,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index fc12672a45f7..4a0a5fc118f6 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -76,8 +76,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
index 759bfda98357..1a51097f627c 100644
--- a/sys/netinet/tcp_stacks/rack_pcm.c
+++ b/sys/netinet/tcp_stacks/rack_pcm.c
@@ -78,8 +78,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/tailq_hash.c b/sys/netinet/tcp_stacks/tailq_hash.c
index 5ba3e7cd36c0..ff01640524b6 100644
--- a/sys/netinet/tcp_stacks/tailq_hash.c
+++ b/sys/netinet/tcp_stacks/tailq_hash.c
@@ -51,8 +51,6 @@
#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* required for icmp_var.h */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 2e039ebbfdd2..cc83a21773a8 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -645,14 +645,14 @@ out:
static int
sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
{
- int error = ENOENT;
struct tcp_function_set fs;
struct tcp_function_block *blk;
+ int error;
- memset(&fs, 0, sizeof(fs));
+ memset(&fs, 0, sizeof(struct tcp_function_set));
rw_rlock(&tcp_function_lock);
blk = find_tcp_fb_locked(V_tcp_func_set_ptr, NULL);
- if (blk) {
+ if (blk != NULL) {
/* Found him */
strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
fs.pcbcnt = blk->tfb_refcnt;
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 80e6b53d10df..1ee6c6e31f33 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -102,15 +102,15 @@
#include <security/mac/mac_framework.h>
-VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
+VNET_DEFINE_STATIC(bool, tcp_syncookies) = true;
#define V_tcp_syncookies VNET(tcp_syncookies)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookies), 0,
"Use TCP SYN cookies if the syncache overflows");
-VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
+VNET_DEFINE_STATIC(bool, tcp_syncookiesonly) = false;
#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
@@ -553,9 +553,8 @@ syncache_timer(void *xsch)
static inline bool
syncache_cookiesonly(void)
{
-
- return (V_tcp_syncookies && (V_tcp_syncache.paused ||
- V_tcp_syncookiesonly));
+ return ((V_tcp_syncookies && V_tcp_syncache.paused) ||
+ V_tcp_syncookiesonly);
}
/*
@@ -1083,40 +1082,48 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
if (sc == NULL) {
- /*
- * There is no syncache entry, so see if this ACK is
- * a returning syncookie. To do this, first:
- * A. Check if syncookies are used in case of syncache
- * overflows
- * B. See if this socket has had a syncache entry dropped in
- * the recent past. We don't want to accept a bogus
- * syncookie if we've never received a SYN or accept it
- * twice.
- * C. check that the syncookie is valid. If it is, then
- * cobble up a fake syncache entry, and return.
- */
- if (locked && !V_tcp_syncookies) {
- SCH_UNLOCK(sch);
- TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: Spurious ACK, "
- "segment rejected (syncookies disabled)\n",
- s, __func__);
- goto failed;
- }
- if (locked && !V_tcp_syncookiesonly &&
- sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+ if (locked) {
+ /*
+ * The syncache is currently in use (neither disabled,
+ * nor paused), but no entry was found.
+ */
+ if (!V_tcp_syncookies) {
+ /*
+ * Since no syncookies are used in case of
+ * a bucket overflow, don't even check for
+ * a valid syncookie.
+ */
+ SCH_UNLOCK(sch);
+ TCPSTAT_INC(tcps_sc_spurcookie);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected "
+ "(syncookies disabled)\n",
+ s, __func__);
+ goto failed;
+ }
+ if (sch->sch_last_overflow <
+ time_uptime - SYNCOOKIE_LIFETIME) {
+ /*
+ * Since the bucket did not overflow recently,
+ * don't even check for a valid syncookie.
+ */
+ SCH_UNLOCK(sch);
+ TCPSTAT_INC(tcps_sc_spurcookie);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected "
+ "(no syncache entry)\n",
+ s, __func__);
+ goto failed;
+ }
SCH_UNLOCK(sch);
- TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: Spurious ACK, "
- "segment rejected (no syncache entry)\n",
- s, __func__);
- goto failed;
}
- if (locked)
- SCH_UNLOCK(sch);
bzero(&scs, sizeof(scs));
+ /*
+ * Now check, if the syncookie is valid. If it is, create an on
+ * stack syncache entry.
+ */
if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) {
sc = &scs;
TCPSTAT_INC(tcps_sc_recvcookie);
@@ -1291,10 +1298,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (__predict_false(*lsop == NULL)) {
TCPSTAT_INC(tcps_sc_aborted);
TCPSTATES_DEC(TCPS_SYN_RECEIVED);
- } else
+ } else if (sc != &scs)
TCPSTAT_INC(tcps_sc_completed);
-/* how do we find the inp for the new socket? */
if (sc != &scs)
syncache_free(sc);
return (1);
@@ -1669,7 +1675,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) {
- int wscale = 0;
+ u_int wscale = 0;
/*
* Pick the smallest possible scaling factor that
@@ -1719,13 +1725,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0)
sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
- if (V_tcp_syncookies)
+ if (V_tcp_syncookies || V_tcp_syncookiesonly)
sc->sc_iss = syncookie_generate(sch, sc);
else
sc->sc_iss = arc4random();
#ifdef INET6
if (autoflowlabel) {
- if (V_tcp_syncookies)
+ if (V_tcp_syncookies || V_tcp_syncookiesonly)
sc->sc_flowlabel = sc->sc_iss;
else
sc->sc_flowlabel = ip6_randomflowlabel();
@@ -2265,7 +2271,7 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
uint32_t hash;
uint8_t *secbits;
tcp_seq ack, seq;
- int wnd, wscale = 0;
+ int wnd;
union syncookie cookie;
/*
@@ -2316,12 +2322,14 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
- /* We can simply recompute receive window scale we sent earlier. */
- while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
- wscale++;
-
/* Only use wscale if it was enabled in the orignal SYN. */
if (cookie.flags.wscale_idx > 0) {
+ u_int wscale = 0;
+
+ /* Recompute the receive window scale that was sent earlier. */
+ while (wscale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << wscale) < sb_max)
+ wscale++;
sc->sc_requested_r_scale = wscale;
sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
sc->sc_flags |= SCF_WINSCALE;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 3e6519118a40..cea8a916679b 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -223,16 +223,18 @@ VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
* udp_append() will convert to a sockaddr_in6 before passing the address
* into the socket code.
*
- * In the normal case udp_append() will return 0, indicating that you
- * must unlock the inp. However if a tunneling protocol is in place we increment
- * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
- * then decrement the reference count. If the inp_rele returns 1, indicating the
- * inp is gone, we return that to the caller to tell them *not* to unlock
- * the inp. In the case of multi-cast this will cause the distribution
- * to stop (though most tunneling protocols known currently do *not* use
- * multicast).
+ * In the normal case udp_append() will return 'false', indicating that you
+ * must unlock the inpcb. However if a tunneling protocol is in place we
+ * increment the inpcb refcnt and unlock the inpcb, on return from the tunneling
+ * protocol we then decrement the reference count. If in_pcbrele_rlocked()
+ * returns 'true', indicating the inpcb is gone, we return that to the caller
+ * to tell them *not* to unlock the inpcb. In the case of multicast this will
+ * cause the distribution to stop (though most tunneling protocols known
+ * currently do *not* use multicast).
+ *
+ * The mbuf is always consumed.
*/
-static int
+static bool
udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
struct sockaddr_in *udp_in)
{
@@ -255,15 +257,16 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
in_pcbref(inp);
INP_RUNLOCK(inp);
- filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
- up->u_tun_ctx);
+ filtered = (*up->u_tun_func)(n, off, inp,
+ (struct sockaddr *)&udp_in[0], up->u_tun_ctx);
INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (1);
- if (filtered) {
- INP_RUNLOCK(inp);
- return (1);
+ if (in_pcbrele_rlocked(inp)) {
+ if (!filtered)
+ m_freem(n);
+ return (true);
}
+ if (filtered)
+ return (false);
}
off += sizeof(struct udphdr);
@@ -273,18 +276,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
if (IPSEC_ENABLED(ipv4) &&
IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
m_freem(n);
- return (0);
+ return (false);
}
if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
if (IPSEC_ENABLED(ipv4) &&
UDPENCAP_INPUT(ipv4, n, off, AF_INET) != 0)
- return (0); /* Consumed. */
+ return (false);
}
#endif /* IPSEC */
#ifdef MAC
if (mac_inpcb_check_deliver(inp, n) != 0) {
m_freem(n);
- return (0);
+ return (false);
}
#endif /* MAC */
if (inp->inp_flags & INP_CONTROLOPTS ||
@@ -330,7 +333,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
UDPSTAT_INC(udps_fullsock);
} else
sorwakeup_locked(so);
- return (0);
+ return (false);
}
static bool
@@ -699,7 +702,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
else
UDP_PROBE(receive, NULL, inp, ip, inp, uh);
- if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
+ if (!udp_append(inp, ip, m, iphlen, udp_in))
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index ce0655408a28..4f756a75fac7 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -1235,11 +1235,20 @@ in6_addifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *i
int carp_attached = 0;
int error;
- /* Check if this interface is a bridge member */
- if (ifp->if_bridge && bridge_member_ifaddrs_p &&
- !bridge_member_ifaddrs_p()) {
- error = EINVAL;
- goto out;
+ /*
+ * Check if bridge wants to allow adding addrs to member interfaces.
+ */
+ if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF &&
+ bridge_member_ifaddrs_p != NULL) {
+ if (bridge_member_ifaddrs_p()) {
+ if_printf(ifp, "WARNING: Assigning an IP address to "
+ "an interface which is also a bridge member is "
+ "deprecated and will be unsupported in a future "
+ "release.\n");
+ } else {
+ error = EINVAL;
+ goto out;
+ }
}
/*
@@ -2618,6 +2627,8 @@ in6_domifdetach(struct ifnet *ifp, void *aux)
{
struct in6_ifextra *ext = (struct in6_ifextra *)aux;
+ MPASS(ifp->if_afdata[AF_INET6] == NULL);
+
mld_domifdetach(ifp);
scope6_ifdetach(ext->scope6_id);
nd6_ifdetach(ifp, ext->nd_ifinfo);
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index f284f7fa5ffc..cc149616006e 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -83,7 +83,6 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo);
#define V_ripcbinfo VNET(ripcbinfo)
static int get_rand_ifid(struct ifnet *, struct in6_addr *);
-static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *);
static int in6_ifattach_loopback(struct ifnet *);
static void in6_purgemaddrs(struct ifnet *);
@@ -271,8 +270,8 @@ found:
*
* altifp - secondary EUI64 source
*/
-static int
-get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
+int
+in6_get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
struct in6_addr *in6)
{
struct ifnet *ifp;
@@ -356,7 +355,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
} else {
NET_EPOCH_ENTER(et);
- error = get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr);
+ error = in6_get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr);
NET_EPOCH_EXIT(et);
if (error != 0) {
nd6log((LOG_ERR,
diff --git a/sys/netinet6/in6_ifattach.h b/sys/netinet6/in6_ifattach.h
index 897926e90078..fd52422b10be 100644
--- a/sys/netinet6/in6_ifattach.h
+++ b/sys/netinet6/in6_ifattach.h
@@ -41,6 +41,7 @@ void in6_ifdetach(struct ifnet *);
void in6_ifdetach_destroy(struct ifnet *);
void in6_tmpaddrtimer(void *);
int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
+int in6_get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *);
#endif /* _KERNEL */
diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
index b9af0a78a584..6fe78083df23 100644
--- a/sys/netinet6/nd6_rtr.c
+++ b/sys/netinet6/nd6_rtr.c
@@ -1182,9 +1182,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
struct ifnet *ifp = pr->ndpr_ifp;
struct ifaddr *ifa;
struct in6_aliasreq ifra;
- struct in6_ifaddr *ia, *ib;
+ struct in6_ifaddr *ia = NULL, *ib = NULL;
int error, plen0;
- struct in6_addr mask;
+ struct in6_addr *ifid_addr = NULL, mask;
int prefixlen = pr->ndpr_plen;
int updateflags;
char ip6buf[INET6_ADDRSTRLEN];
@@ -1212,18 +1212,42 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
* with different interface identifiers.
*/
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
- if (ifa)
+ if (ifa) {
ib = (struct in6_ifaddr *)ifa;
- else
- return NULL;
+ ifid_addr = &ib->ia_addr.sin6_addr;
+
+ /* prefixlen + ifidlen must be equal to 128 */
+ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
+ if (prefixlen != plen0) {
+ ifa_free(ifa);
+ ifid_addr = NULL;
+ nd6log((LOG_DEBUG,
+ "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
+ __func__, if_name(ifp), prefixlen, 128 - plen0));
+ }
+ }
- /* prefixlen + ifidlen must be equal to 128 */
- plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
- if (prefixlen != plen0) {
- ifa_free(ifa);
+ /* No suitable LL address, get the ifid directly */
+ if (ifid_addr == NULL) {
+ struct in6_addr taddr;
+ ifa = ifa_alloc(sizeof(taddr), M_WAITOK);
+ if (ifa) {
+ ib = (struct in6_ifaddr *)ifa;
+ ifid_addr = &ib->ia_addr.sin6_addr;
+ if(in6_get_ifid(ifp, NULL, ifid_addr) != 0) {
+ nd6log((LOG_DEBUG,
+ "%s: failed to get ifid for %s\n",
+ __func__, if_name(ifp)));
+ ifa_free(ifa);
+ ifid_addr = NULL;
+ }
+ }
+ }
+
+ if (ifid_addr == NULL) {
nd6log((LOG_INFO,
- "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
- __func__, if_name(ifp), prefixlen, 128 - plen0));
+ "%s: could not determine ifid for %s\n",
+ __func__, if_name(ifp)));
return NULL;
}
@@ -1233,13 +1257,13 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
/* interface ID */
ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
- (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
+ (ifid_addr->s6_addr32[0] & ~mask.s6_addr32[0]);
ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
- (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
+ (ifid_addr->s6_addr32[1] & ~mask.s6_addr32[1]);
ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
- (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
+ (ifid_addr->s6_addr32[2] & ~mask.s6_addr32[2]);
ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
- (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
+ (ifid_addr->s6_addr32[3] & ~mask.s6_addr32[3]);
ifa_free(ifa);
/* lifetimes. */
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index 8b871576d0b2..9beb80792af4 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -403,6 +403,7 @@ static const struct nlattr_parser nla_p_linfo[] = {
NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
static const struct nlattr_parser nla_p_if[] = {
+ { .type = IFLA_ADDRESS, .off = _OUT(ifla_address), .cb = nlattr_get_nla },
{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c
index 4bf913d9c978..4f1540740ead 100644
--- a/sys/netlink/route/iface_drivers.c
+++ b/sys/netlink/route/iface_drivers.c
@@ -82,26 +82,55 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs,
}
}
- if ((lattrs->ifi_change & IFF_UP) && (lattrs->ifi_flags & IFF_UP) == 0) {
- /* Request to down the interface */
- if_down(ifp);
+ if ((lattrs->ifi_change & IFF_UP) != 0 || lattrs->ifi_change == 0) {
+ /* Request to up or down the interface */
+ if (lattrs->ifi_flags & IFF_UP)
+ if_up(ifp);
+ else
+ if_down(ifp);
}
if (lattrs->ifla_mtu > 0) {
if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMTU)) {
struct ifreq ifr = { .ifr_mtu = lattrs->ifla_mtu };
- error = ifhwioctl(SIOCSIFMTU, ifp, (char *)&ifr, curthread);
+ error = ifhwioctl(SIOCSIFMTU, ifp, (char *)&ifr,
+ curthread);
+ if (error != 0) {
+ nlmsg_report_err_msg(npt, "Failed to set mtu");
+ return (error);
+ }
} else {
nlmsg_report_err_msg(npt, "Not enough privileges to set mtu");
return (EPERM);
}
}
- if (lattrs->ifi_change & IFF_PROMISC) {
- error = ifpromisc(ifp, lattrs->ifi_flags & IFF_PROMISC);
- if (error != 0) {
- nlmsg_report_err_msg(npt, "unable to set promisc");
- return (error);
+ if ((lattrs->ifi_change & IFF_PROMISC) != 0 ||
+ lattrs->ifi_change == 0)
+ /*
+ * When asking for IFF_PROMISC, set permanent flag instead
+ * (IFF_PPROMISC) as we have no way of doing promiscuity
+ * reference counting through ifpromisc(). Every call to this
+ * function either sets or unsets IFF_PROMISC, and ifi_change
+ * is usually set to 0xFFFFFFFF.
+ */
+ if_setppromisc(ifp, (lattrs->ifi_flags & IFF_PROMISC) != 0);
+
+ if (lattrs->ifla_address != NULL) {
+ if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMAC)) {
+ error = if_setlladdr(ifp,
+ NLA_DATA(lattrs->ifla_address),
+ NLA_DATA_LEN(lattrs->ifla_address));
+ if (error != 0) {
+ nlmsg_report_err_msg(npt,
+ "setting IFLA_ADDRESS failed with error code: %d",
+ error);
+ return (error);
+ }
+ } else {
+ nlmsg_report_err_msg(npt,
+ "Not enough privileges to set IFLA_ADDRESS");
+ return (EPERM);
}
}
diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h
index b84b34461e35..41f110038b54 100644
--- a/sys/netlink/route/route_var.h
+++ b/sys/netlink/route/route_var.h
@@ -69,6 +69,7 @@ struct nl_parsed_link {
char *ifla_cloner;
char *ifla_ifalias;
struct nlattr *ifla_idata;
+ struct nlattr *ifla_address;
unsigned short ifi_type;
int ifi_index;
uint32_t ifla_link;
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 585c196391c0..7b9405ee1f8d 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -605,7 +605,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
rt_kif = rpool_first->kif;
/*
* Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = r->af;
} else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) {
@@ -634,8 +635,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
}
rt = sp->pfs_1400.rt;
/*
- * Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * Guess the AF of the route address, FreeBSD 14 does
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = sp->pfs_1400.af;
}
@@ -1741,16 +1743,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
return (EINVAL);
- data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
+ data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK);
if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
ifr->ifr_cap_nv.length)) != 0) {
- free(data, M_TEMP);
+ free(data, M_PF);
return (error);
}
if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
- free(data, M_TEMP);
+ free(data, M_PF);
return (EINVAL);
}
@@ -1758,7 +1760,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
pfsync_nvstatus_to_kstatus(nvl, &status);
nvlist_destroy(nvl);
- free(data, M_TEMP);
+ free(data, M_PF);
error = pfsync_kstatus_to_softc(&status, sc);
return (error);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 8cd4fff95b15..5889bb9d68e6 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -1667,7 +1667,6 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
#ifdef INET6
struct nd_neighbor_solicit nd;
struct pf_addr *target;
- u_short action, reason;
if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
goto copy;
@@ -1676,7 +1675,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
case ND_NEIGHBOR_SOLICIT:
if (multi)
return (-1);
- if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
+ if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
+ pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
daddr = target;
@@ -1684,7 +1684,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
case ND_NEIGHBOR_ADVERT:
if (multi)
return (-1);
- if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
+ if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL,
+ pd->af))
return (-1);
target = (struct pf_addr *)&nd.nd_ns_target;
saddr = target;
@@ -3632,6 +3633,18 @@ pf_translate_af(struct pf_pdesc *pd)
pd->src = (struct pf_addr *)&ip4->ip_src;
pd->dst = (struct pf_addr *)&ip4->ip_dst;
pd->off = sizeof(struct ip);
+ if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_TCP;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_UDP;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+ pd->m->m_pkthdr.csum_flags |= CSUM_SCTP;
+ }
break;
case AF_INET6:
ip6 = mtod(pd->m, struct ip6_hdr *);
@@ -3649,6 +3662,18 @@ pf_translate_af(struct pf_pdesc *pd)
pd->src = (struct pf_addr *)&ip6->ip6_src;
pd->dst = (struct pf_addr *)&ip6->ip6_dst;
pd->off = sizeof(struct ip6_hdr);
+ if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6;
+ }
+ if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) {
+ pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+ pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6;
+ }
/*
* If we're dealing with a reassembled packet we need to adjust
@@ -4019,7 +4044,7 @@ pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
optsoff = pd->off + sizeof(struct tcphdr);
#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
if (olen < TCPOLEN_MINSACK ||
- !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
+ !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
return (0);
eoh = opts + olen;
@@ -5082,7 +5107,7 @@ pf_get_wscale(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
- pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -5107,7 +5132,7 @@ pf_get_mss(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
- pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -5960,7 +5985,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
if (r->rt) {
/*
* Set act.rt here instead of in pf_rule_to_actions() because
- * it is applied only from the last pass rule.
+ * it is applied only from the last pass rule. For rules
+ * with the prefer-ipv6-nexthop option act.rt_af is a hint
+ * about AF of the forwarded packet and might be changed.
*/
pd->act.rt = r->rt;
if (r->rt == PF_REPLYTO)
@@ -7633,7 +7660,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
while (off < len) {
struct sctp_paramhdr h;
- if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL,
+ if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL,
pd->af))
return (PF_DROP);
@@ -7653,7 +7680,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
if (in_nullhost(t))
@@ -7697,7 +7724,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
return (PF_DROP);
if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
break;
@@ -7727,7 +7754,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
@@ -7742,7 +7769,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
struct sctp_asconf_paramhdr ah;
if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
- NULL, NULL, pd->af))
+ NULL, pd->af))
return (PF_DROP);
ret = pf_multihome_scan(start + off + sizeof(ah),
ntohs(ah.ph.param_length) - sizeof(ah), pd,
@@ -8024,7 +8051,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
ipoff2 = pd->off + ICMP_MINLEN;
if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip)");
@@ -8045,6 +8072,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
return (PF_DROP);
pd2.tot_len = ntohs(h2.ip_len);
+ pd2.ttl = h2.ip_ttl;
pd2.src = (struct pf_addr *)&h2.ip_src;
pd2.dst = (struct pf_addr *)&h2.ip_dst;
pd2.ip_sum = &h2.ip_sum;
@@ -8055,7 +8083,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
ipoff2 = pd->off + sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(ip6)");
@@ -8067,6 +8095,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
pd2.tot_len = ntohs(h2_6.ip6_plen) +
sizeof(struct ip6_hdr);
+ pd2.ttl = h2_6.ip6_hlim;
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
pd2.ip_sum = NULL;
@@ -8107,7 +8136,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
* expected. Don't access any TCP header fields after
* th_seq, an ackskew test is not possible.
*/
- if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason,
+ if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
@@ -8303,7 +8332,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
int action;
if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(udp)");
@@ -8434,7 +8463,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
int copyback = 0;
int action;
- if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason,
+ if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason,
pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
@@ -8590,7 +8619,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
}
if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
- NULL, reason, pd2.af)) {
+ reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short i"
"(icmp)");
@@ -8710,7 +8739,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
}
if (!pf_pull_hdr(pd->m, pd2.off, iih,
- sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+ sizeof(struct icmp6_hdr), reason, pd2.af)) {
DPFPRINTF(PF_DEBUG_MISC,
"pf: ICMP error message too short "
"(icmp6)");
@@ -8825,6 +8854,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
default: {
int action;
+ /*
+ * Placeholder value, so future calls to pf_change_ap()
+ * don't try to update a NULL checksum pointer.
+ */
+ pd->pcksum = &pd->sctp_dummy_sum;
key.af = pd2.af;
key.proto = pd2.proto;
pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
@@ -8887,7 +8921,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
*/
void *
pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
- u_short *actionp, u_short *reasonp, sa_family_t af)
+ u_short *reasonp, sa_family_t af)
{
int iplen = 0;
switch (af) {
@@ -8897,12 +8931,7 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
if (fragoff) {
- if (fragoff >= len)
- ACTION_SET(actionp, PF_PASS);
- else {
- ACTION_SET(actionp, PF_DROP);
- REASON_SET(reasonp, PFRES_FRAG);
- }
+ REASON_SET(reasonp, PFRES_FRAG);
return (NULL);
}
iplen = ntohs(h->ip_len);
@@ -8919,7 +8948,6 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
#endif /* INET6 */
}
if (m->m_pkthdr.len < off + len || iplen < off + len) {
- ACTION_SET(actionp, PF_DROP);
REASON_SET(reasonp, PFRES_SHORT);
return (NULL);
}
@@ -8974,9 +9002,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *m1, *md;
- struct route ro;
- const struct sockaddr *gw = &ro.ro_dst;
- struct sockaddr_in *dst;
+ struct route_in6 ro;
+ union sockaddr_union rt_gw;
+ const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
+ union sockaddr_union *dst;
struct ip *ip;
struct ifnet *ifp = NULL;
int error = 0;
@@ -9071,10 +9100,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
ip = mtod(m0, struct ip *);
bzero(&ro, sizeof(ro));
- dst = (struct sockaddr_in *)&ro.ro_dst;
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(struct sockaddr_in);
- dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ dst = (union sockaddr_union *)&ro.ro_dst;
+ dst->sin.sin_family = AF_INET;
+ dst->sin.sin_len = sizeof(struct sockaddr_in);
+ dst->sin.sin_addr = ip->ip_dst;
+ if (ifp) { /* Only needed in forward direction and route-to */
+ bzero(&rt_gw, sizeof(rt_gw));
+ ro.ro_flags |= RT_HAS_GW;
+ gw = &rt_gw;
+ switch (pd->act.rt_af) {
+#ifdef INET
+ case AF_INET:
+ rt_gw.sin.sin_family = AF_INET;
+ rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
+ rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ rt_gw.sin6.sin6_family = AF_INET6;
+ rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
+ &pd->act.rt_addr, AF_INET6);
+ break;
+#endif /* INET6 */
+ default:
+ /* Normal af-to without route-to */
+ break;
+ }
+ }
if (pd->dir == PF_IN) {
if (ip->ip_ttl <= IPTTLDEC) {
@@ -9098,10 +9152,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
/* Use the gateway if needed. */
if (nh->nh_flags & NHF_GATEWAY) {
- gw = &nh->gw_sa;
+ gw = (const union sockaddr_union *)&nh->gw_sa;
ro.ro_flags |= RT_HAS_GW;
} else {
- dst->sin_addr = ip->ip_dst;
+ dst->sin.sin_addr = ip->ip_dst;
}
/*
@@ -9126,6 +9180,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
PF_STATE_UNLOCK(s);
}
+ /* It must have been either set from rt_af or from fib4_lookup */
+ KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
+
if (ifp == NULL) {
m0 = pd->m;
pd->m = NULL;
@@ -9210,9 +9267,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
m_clrprotoflags(m0); /* Avoid confusing lower layers. */
md = m0;
- error = pf_dummynet_route(pd, s, r, ifp, gw, &md);
+ error = pf_dummynet_route(pd, s, r, ifp,
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw, (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
goto done;
@@ -9253,9 +9312,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
md = m0;
pd->pf_mtag = pf_find_mtag(md);
error = pf_dummynet_route(pd, s, r, ifp,
- gw, &md);
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw,
+ (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
} else
@@ -9962,9 +10023,12 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
pd->proto = h->ip_p;
/* IGMP packets have router alert options, allow them */
if (pd->proto == IPPROTO_IGMP) {
- /* According to RFC 1112 ttl must be set to 1. */
- if ((h->ip_ttl != 1) ||
- !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) {
+ /*
+ * According to RFC 1112 ttl must be set to 1 in all IGMP
+ * packets sent to 224.0.0.1
+ */
+ if ((h->ip_ttl != 1) &&
+ (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
@@ -9982,7 +10046,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
end < pd->off + sizeof(ext))
return (PF_PASS);
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET)) {
+ reason, AF_INET)) {
DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
return (PF_DROP);
}
@@ -10008,7 +10072,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
while (off < end) {
if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
- sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) {
+ sizeof(opt.ip6o_type), reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
return (PF_DROP);
}
@@ -10016,7 +10080,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
off++;
continue;
}
- if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL,
+ if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
return (PF_DROP);
@@ -10041,7 +10105,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
REASON_SET(reason, PFRES_IPOPTIONS);
return (PF_DROP);
}
- if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL,
+ if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
return (PF_DROP);
@@ -10090,7 +10154,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
break;
case IPPROTO_HOPOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
@@ -10117,7 +10181,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_DROP);
}
if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
return (PF_DROP);
}
@@ -10145,7 +10209,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
return (PF_DROP);
}
@@ -10166,7 +10230,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
case IPPROTO_AH:
case IPPROTO_DSTOPTS:
if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
return (PF_DROP);
}
@@ -10199,7 +10263,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
return (PF_PASS);
}
if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
- NULL, reason, AF_INET6)) {
+ reason, AF_INET6)) {
DPFPRINTF(PF_DEBUG_MISC,
"IPv6 short icmp6hdr");
return (PF_DROP);
@@ -10432,7 +10496,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
- if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action,
+ if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
@@ -10448,7 +10512,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
case IPPROTO_UDP: {
struct udphdr *uh = &pd->hdr.udp;
- if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action,
+ if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
@@ -10469,7 +10533,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
case IPPROTO_SCTP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10499,7 +10563,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
case IPPROTO_ICMP: {
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10513,7 +10577,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
size_t icmp_hlen = sizeof(struct icmp6_hdr);
if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10539,7 +10603,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
}
if (icmp_hlen > sizeof(struct icmp6_hdr) &&
!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
- action, reason, af)) {
+ reason, af)) {
*action = PF_DROP;
REASON_SET(reason, PFRES_SHORT);
return (-1);
@@ -10549,6 +10613,13 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
break;
}
#endif /* INET6 */
+ default:
+ /*
+ * Placeholder value, so future calls to pf_change_ap() don't
+ * try to update a NULL checksum pointer.
+ */
+ pd->pcksum = &pd->sctp_dummy_sum;
+ break;
}
if (pd->sport)
@@ -10556,6 +10627,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
if (pd->dport)
pd->odport = pd->ndport = *pd->dport;
+ MPASS(pd->pcksum != NULL);
+
return (0);
}
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 51b3fd6390e1..54ffdbed3de5 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
#define PF_POOL_ENDPI 0x40
+#define PF_POOL_IPV6NH 0x80
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
@@ -246,6 +247,12 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define SCNT_SRC_NODE_REMOVALS 2
#define SCNT_MAX 3
+/* fragment counters */
+#define NCNT_FRAG_SEARCH 0
+#define NCNT_FRAG_INSERT 1
+#define NCNT_FRAG_REMOVALS 2
+#define NCNT_MAX 3
+
#define PF_TABLE_NAME_SIZE 32
#define PF_QNAME_SIZE 64
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index e5da05a958f6..5bfbb2c83f0e 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -187,6 +187,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z);
#define V_pf_tag_z VNET(pf_tag_z)
static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
+MALLOC_DEFINE(M_PF, "pf", "pf(4)");
#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
@@ -420,6 +421,8 @@ pfattach_vnet(void)
pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK);
for (int i = 0; i < SCNT_MAX; i++)
V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
+ for (int i = 0; i < NCNT_MAX; i++)
+ V_pf_status.ncounters[i] = counter_u64_alloc(M_WAITOK);
if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
INTR_MPSAFE, &V_pf_swi_cookie) != 0)
@@ -1181,18 +1184,18 @@ pf_rule_tree_alloc(int flags)
{
struct pf_krule_global *tree;
- tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags);
+ tree = malloc(sizeof(struct pf_krule_global), M_PF, flags);
if (tree == NULL)
return (NULL);
RB_INIT(tree);
return (tree);
}
-static void
+void
pf_rule_tree_free(struct pf_krule_global *tree)
{
- free(tree, M_TEMP);
+ free(tree, M_PF);
}
static int
@@ -1211,7 +1214,7 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
return (ENOMEM);
rs = pf_find_or_create_kruleset(anchor);
if (rs == NULL) {
- free(tree, M_TEMP);
+ pf_rule_tree_free(tree);
return (EINVAL);
}
pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
@@ -1432,7 +1435,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
rs->rules[rs_num].inactive.rcount = 0;
rs->rules[rs_num].inactive.open = 0;
pf_remove_if_empty_kruleset(rs);
- free(old_tree, M_TEMP);
+ pf_rule_tree_free(old_tree);
return (0);
}
@@ -2276,6 +2279,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
rule->nat.cur = TAILQ_FIRST(&rule->nat.list);
rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list);
rule->route.cur = TAILQ_FIRST(&rule->route.list);
+ rule->route.ipv6_nexthop_af = AF_INET6;
TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
rule, entries);
ruleset->rules[rs_num].inactive.rcount++;
@@ -2506,6 +2510,8 @@ pf_ioctl_clear_status(void)
pf_counter_u64_zero(&V_pf_status.fcounters[i]);
for (int i = 0; i < SCNT_MAX; i++)
counter_u64_zero(V_pf_status.scounters[i]);
+ for (int i = 0; i < NCNT_MAX; i++)
+ counter_u64_zero(V_pf_status.ncounters[i]);
for (int i = 0; i < KLCNT_MAX; i++)
counter_u64_zero(V_pf_status.lcounters[i]);
V_pf_status.since = time_uptime;
@@ -4076,7 +4082,7 @@ DIOCCHANGERULE_error:
out = ps->ps_states;
pstore = mallocarray(slice_count,
- sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO);
nr = 0;
for (i = 0; i <= V_pf_hashmask; i++) {
@@ -4098,10 +4104,10 @@ DIOCGETSTATES_retry:
if (count > slice_count) {
PF_HASHROW_UNLOCK(ih);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
slice_count = count * 2;
pstore = mallocarray(slice_count,
- sizeof(struct pfsync_state_1301), M_TEMP,
+ sizeof(struct pfsync_state_1301), M_PF,
M_WAITOK | M_ZERO);
goto DIOCGETSTATES_retry;
}
@@ -4123,13 +4129,15 @@ DIOCGETSTATES_retry:
PF_HASHROW_UNLOCK(ih);
error = copyout(pstore, out,
sizeof(struct pfsync_state_1301) * count);
- if (error)
+ if (error) {
+ free(pstore, M_PF);
goto fail;
+ }
out = ps->ps_states + nr;
}
DIOCGETSTATES_full:
ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -4155,7 +4163,7 @@ DIOCGETSTATES_full:
out = ps->ps_states;
pstore = mallocarray(slice_count,
- sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO);
nr = 0;
for (i = 0; i <= V_pf_hashmask; i++) {
@@ -4177,10 +4185,10 @@ DIOCGETSTATESV2_retry:
if (count > slice_count) {
PF_HASHROW_UNLOCK(ih);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
slice_count = count * 2;
pstore = mallocarray(slice_count,
- sizeof(struct pf_state_export), M_TEMP,
+ sizeof(struct pf_state_export), M_PF,
M_WAITOK | M_ZERO);
goto DIOCGETSTATESV2_retry;
}
@@ -4201,13 +4209,15 @@ DIOCGETSTATESV2_retry:
PF_HASHROW_UNLOCK(ih);
error = copyout(pstore, out,
sizeof(struct pf_state_export) * count);
- if (error)
+ if (error) {
+ free(pstore, M_PF);
goto fail;
+ }
out = ps->ps_states + nr;
}
DIOCGETSTATESV2_full:
ps->ps_len = nr * sizeof(struct pf_state_export);
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -4737,17 +4747,17 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
error = pfr_add_tables(pfrts, io->pfrio_size,
&io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4769,17 +4779,17 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
error = pfr_del_tables(pfrts, io->pfrio_size,
&io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4805,7 +4815,7 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_NOWAIT | M_ZERO);
+ M_PF, M_NOWAIT | M_ZERO);
if (pfrts == NULL) {
error = ENOMEM;
PF_RULES_RUNLOCK();
@@ -4816,7 +4826,7 @@ DIOCCHANGEADDR_error:
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfrts, io->pfrio_buffer, totlen);
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4843,7 +4853,7 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_tstats);
pfrtstats = mallocarray(io->pfrio_size,
- sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
+ sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO);
if (pfrtstats == NULL) {
error = ENOMEM;
PF_RULES_RUNLOCK();
@@ -4856,7 +4866,7 @@ DIOCCHANGEADDR_error:
PF_TABLE_STATS_UNLOCK();
if (error == 0)
error = copyout(pfrtstats, io->pfrio_buffer, totlen);
- free(pfrtstats, M_TEMP);
+ free(pfrtstats, M_PF);
break;
}
@@ -4881,10 +4891,10 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
@@ -4894,7 +4904,7 @@ DIOCCHANGEADDR_error:
&io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
PF_TABLE_STATS_UNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4922,10 +4932,10 @@ DIOCCHANGEADDR_error:
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -4933,7 +4943,7 @@ DIOCCHANGEADDR_error:
io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
&io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfrts, M_TEMP);
+ free(pfrts, M_PF);
break;
}
@@ -4968,10 +4978,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -4982,7 +4992,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5003,10 +5013,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5016,7 +5026,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5040,11 +5050,11 @@ DIOCCHANGEADDR_error:
goto fail;
}
totlen = count * sizeof(struct pfr_addr);
- pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
+ pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF,
M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5055,7 +5065,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5076,14 +5086,14 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK | M_ZERO);
+ M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
error = pfr_get_addrs(&io->pfrio_table, pfras,
&io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5104,14 +5114,14 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_astats);
pfrastats = mallocarray(io->pfrio_size,
- sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO);
+ sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
error = pfr_get_astats(&io->pfrio_table, pfrastats,
&io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfrastats, io->pfrio_buffer, totlen);
- free(pfrastats, M_TEMP);
+ free(pfrastats, M_PF);
break;
}
@@ -5132,10 +5142,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5145,7 +5155,7 @@ DIOCCHANGEADDR_error:
PF_RULES_WUNLOCK();
if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5166,10 +5176,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_RLOCK();
@@ -5179,7 +5189,7 @@ DIOCCHANGEADDR_error:
PF_RULES_RUNLOCK();
if (error == 0)
error = copyout(pfras, io->pfrio_buffer, totlen);
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5200,10 +5210,10 @@ DIOCCHANGEADDR_error:
}
totlen = io->pfrio_size * sizeof(struct pfr_addr);
pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->pfrio_buffer, pfras, totlen);
if (error) {
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5211,7 +5221,7 @@ DIOCCHANGEADDR_error:
io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
PF_RULES_WUNLOCK();
- free(pfras, M_TEMP);
+ free(pfras, M_PF);
break;
}
@@ -5249,10 +5259,10 @@ DIOCCHANGEADDR_error:
}
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5262,7 +5272,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ETH:
if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5270,13 +5280,13 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if ((error = pf_begin_altq(&ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5291,7 +5301,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_begin(&table,
&ioe->ticket, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5300,7 +5310,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_begin_rules(&ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
break;
@@ -5308,7 +5318,7 @@ DIOCCHANGEADDR_error:
}
PF_RULES_WUNLOCK();
error = copyout(ioes, io->array, totlen);
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5330,10 +5340,10 @@ DIOCCHANGEADDR_error:
}
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5344,7 +5354,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_rollback_eth(ioe->ticket,
ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5352,13 +5362,13 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if ((error = pf_rollback_altq(ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5373,7 +5383,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_rollback(&table,
ioe->ticket, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5382,14 +5392,14 @@ DIOCCHANGEADDR_error:
if ((error = pf_rollback_rules(ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
}
}
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5415,10 +5425,10 @@ DIOCCHANGEADDR_error:
totlen = sizeof(struct pfioc_trans_e) * io->size;
ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
- M_TEMP, M_WAITOK);
+ M_PF, M_WAITOK);
error = copyin(io->array, ioes, totlen);
if (error) {
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail;
}
PF_RULES_WLOCK();
@@ -5431,7 +5441,7 @@ DIOCCHANGEADDR_error:
if (ers == NULL || ioe->ticket == 0 ||
ioe->ticket != ers->inactive.ticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
@@ -5440,14 +5450,14 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if (ioe->anchor[0]) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
if (!V_altqs_inactive_open || ioe->ticket !=
V_ticket_altqs_inactive) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5458,7 +5468,7 @@ DIOCCHANGEADDR_error:
if (rs == NULL || !rs->topen || ioe->ticket !=
rs->tticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5467,7 +5477,7 @@ DIOCCHANGEADDR_error:
if (ioe->rs_num < 0 || ioe->rs_num >=
PF_RULESET_MAX) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EINVAL;
goto fail;
}
@@ -5477,7 +5487,7 @@ DIOCCHANGEADDR_error:
rs->rules[ioe->rs_num].inactive.ticket !=
ioe->ticket) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
error = EBUSY;
goto fail;
}
@@ -5490,7 +5500,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ETH:
if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5498,7 +5508,7 @@ DIOCCHANGEADDR_error:
case PF_RULESET_ALTQ:
if ((error = pf_commit_altq(ioe->ticket))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5513,7 +5523,7 @@ DIOCCHANGEADDR_error:
if ((error = pfr_ina_commit(&table,
ioe->ticket, NULL, NULL, 0))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5522,7 +5532,7 @@ DIOCCHANGEADDR_error:
if ((error = pf_commit_rules(ioe->ticket,
ioe->rs_num, ioe->anchor))) {
PF_RULES_WUNLOCK();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
goto fail; /* really bad */
}
break;
@@ -5536,7 +5546,7 @@ DIOCCHANGEADDR_error:
else
dehook_pf_eth();
- free(ioes, M_TEMP);
+ free(ioes, M_PF);
break;
}
@@ -5565,7 +5575,7 @@ DIOCCHANGEADDR_error:
nr = 0;
- p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
+ p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO);
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask;
i++, sh++) {
PF_HASHROW_LOCK(sh);
@@ -5584,11 +5594,11 @@ DIOCCHANGEADDR_error:
error = copyout(pstore, psn->psn_src_nodes,
sizeof(struct pf_src_node) * nr);
if (error) {
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
goto fail;
}
psn->psn_len = sizeof(struct pf_src_node) * nr;
- free(pstore, M_TEMP);
+ free(pstore, M_PF);
break;
}
@@ -5655,13 +5665,13 @@ DIOCCHANGEADDR_error:
bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
- M_TEMP, M_WAITOK | M_ZERO);
+ M_PF, M_WAITOK | M_ZERO);
PF_RULES_RLOCK();
pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
PF_RULES_RUNLOCK();
error = copyout(ifstore, io->pfiio_buffer, bufsiz);
- free(ifstore, M_TEMP);
+ free(ifstore, M_PF);
break;
}
@@ -6943,6 +6953,8 @@ pf_unload_vnet(void)
pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
for (int i = 0; i < SCNT_MAX; i++)
counter_u64_free(V_pf_status.scounters[i]);
+ for (int i = 0; i < NCNT_MAX; i++)
+ counter_u64_free(V_pf_status.ncounters[i]);
rm_destroy(&V_pf_rules_lock);
sx_destroy(&V_pf_ioctl_lock);
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index bc9e1dc72902..b8b5157c9b15 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
uint64_t hashidx;
int cnt;
sa_family_t wanted_af;
+ u_int8_t pool_type;
+ bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
KASSERT(saf != 0, ("%s: saf == 0", __func__));
KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
+ /*
+ * Given (*naf) is a hint about AF of the forwarded packet.
+ * It might be changed if prefer_ipv6_nexthop is enabled and
+ * the combination of nexthop AF and packet AF allows for it.
+ */
wanted_af = (*naf);
mtx_lock(&rpool->mtx);
@@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
} else {
raddr = &rpool->cur->addr.v.a.addr;
rmask = &rpool->cur->addr.v.a.mask;
- /*
- * For single addresses check their address family. Unless they
- * have none, which happens when addresses are added with
- * the old ioctl mechanism. In such case trust that the address
- * has the proper AF.
- */
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ }
+
+ /*
+ * For pools with a single host with the prefer-ipv6-nexthop option
+ * we can return pool address of any AF, unless the forwarded packet
+ * is IPv6, then we can return only if pool address is IPv6.
+ * For non-prefer-ipv6-nexthop we can return pool address only
+ * of wanted AF, unless the pool address'es AF is unknown, which
+ * happens in case old ioctls have been used to set up the pool.
+ *
+ * Round-robin pools have their own logic for retrying next addresses.
+ */
+ pool_type = rpool->opts & PF_POOL_TYPEMASK;
+ if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
+ ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
+ rpool->cur->addr.type != PF_ADDR_TABLE &&
+ rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
+ if (prefer_ipv6_nexthop) {
+ if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+ wanted_af = rpool->cur->af;
+ } else {
+ if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
}
- switch (rpool->opts & PF_POOL_TYPEMASK) {
+ switch (pool_type) {
case PF_POOL_NONE:
pf_addrcpy(naddr, raddr, wanted_af);
break;
@@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)arc4random_uniform(cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_random:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_random;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else if (init_addr != NULL && PF_AZERO(init_addr,
@@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)(hashidx % cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_srchash:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_srchash;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else {
@@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
{
struct pf_kpooladdr *acur = rpool->cur;
+ retry_other_af_rr:
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af,
@@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
&rpool->tblidx, &rpool->counter, wanted_af,
pf_islinklocal, true))
goto get_addr;
- } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
- wanted_af))
+ } else if (rpool->cur->af == wanted_af &&
+ pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
goto get_addr;
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ /* Reset table index when changing wanted AF. */
+ rpool->tblidx = -1;
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto retry_other_af_rr;
+ }
try_next:
+ /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
+ rpool->ipv6_nexthop_af = AF_INET6;
if (TAILQ_NEXT(rpool->cur, entries) == NULL)
rpool->cur = TAILQ_FIRST(&rpool->list);
else
rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+ try_next_ipv6_nexthop_rr:
+ /* Reset table index when iterating pools or changing wanted AF. */
rpool->tblidx = -1;
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af, NULL,
- true)) {
- /* table contains no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ true))
+ goto get_addr;
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter, wanted_af,
- pf_islinklocal, true)) {
- /* interface has no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
+ true))
+ goto get_addr;
} else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ if (rpool->cur->af == wanted_af) {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ pf_addrcpy(&rpool->counter, raddr, wanted_af);
+ goto get_addr;
}
- pf_addrcpy(&rpool->counter, raddr, wanted_af);
}
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto try_next_ipv6_nexthop_rr;
+ }
+ if (rpool->cur != acur)
+ goto try_next;
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
get_addr:
pf_addrcpy(naddr, &rpool->counter, wanted_af);
if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
@@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
}
}
+ if (wanted_af == 0) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+
if (nkif)
*nkif = rpool->cur->kif;
+ (*naf) = wanted_af;
+
done_pool_mtx:
mtx_unlock(&rpool->mtx);
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index 45b5b8dd5fef..5c8f56ea4567 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -763,6 +763,8 @@ static const struct nlattr_parser nla_p_rule[] = {
{ .type = PF_RT_RCV_IFNOT, .off = _OUT(rcvifnot), .cb = nlattr_get_bool },
{ .type = PF_RT_PKTRATE, .off = _OUT(pktrate), .arg = &threshold_parser, .cb = nlattr_get_nested },
{ .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 },
+ { .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 },
+ { .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 },
};
NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule);
#undef _OUT
@@ -984,8 +986,12 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt)
nlattr_add_u8(nw, PF_RT_AF, rule->af);
nlattr_add_u8(nw, PF_RT_NAF, rule->naf);
nlattr_add_u8(nw, PF_RT_PROTO, rule->proto);
+
nlattr_add_u8(nw, PF_RT_TYPE, rule->type);
nlattr_add_u8(nw, PF_RT_CODE, rule->code);
+ nlattr_add_u16(nw, PF_RT_TYPE_2, rule->type);
+ nlattr_add_u16(nw, PF_RT_CODE_2, rule->code);
+
nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags);
nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset);
nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl);
@@ -1228,6 +1234,9 @@ pf_handle_get_status(struct nlmsghdr *hdr, struct nl_pstate *npt)
V_pf_status.fcounters);
nlattr_add_counters(nw, PF_GS_SCOUNTERS, SCNT_MAX, pf_fcounter,
V_pf_status.scounters);
+ nlattr_add_counters(nw, PF_GS_NCOUNTERS, NCNT_MAX, pf_fcounter,
+ V_pf_status.ncounters);
+ nlattr_add_u64(nw, PF_GS_FRAGMENTS, pf_normalize_get_frag_count());
pfi_update_status(V_pf_status.ifname, &s);
nlattr_add_u64_array(nw, PF_GS_BCOUNTERS, 2 * 2, (uint64_t *)s.bcounters);
@@ -1945,7 +1954,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt)
n = pfr_table_count(&attrs.pfrio_table, attrs.pfrio_flags);
pfrtstats = mallocarray(n,
- sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
+ sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO);
error = pfr_get_tstats(&attrs.pfrio_table, pfrtstats,
&n, attrs.pfrio_flags | PFR_FLAG_USERIOCTL);
@@ -1997,7 +2006,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt)
}
}
}
- free(pfrtstats, M_TEMP);
+ free(pfrtstats, M_PF);
if (!nlmsg_end_dump(npt->nw, error, hdr)) {
NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h
index 87daac393821..b769421bbfcc 100644
--- a/sys/netpfil/pf/pf_nl.h
+++ b/sys/netpfil/pf/pf_nl.h
@@ -283,6 +283,8 @@ enum pf_rule_type_t {
PF_RT_SRC_NODES_ROUTE = 81, /* u64 */
PF_RT_PKTRATE = 82, /* nested, pf_threshold_type_t */
PF_RT_MAX_PKT_SIZE = 83, /* u16 */
+ PF_RT_TYPE_2 = 84, /* u16 */
+ PF_RT_CODE_2 = 85, /* u16 */
};
enum pf_addrule_type_t {
@@ -350,6 +352,8 @@ enum pf_get_status_types_t {
PF_GS_CHKSUM = 14, /* byte array */
PF_GS_PCOUNTERS = 15, /* u64 array */
PF_GS_BCOUNTERS = 16, /* u64 array */
+ PF_GS_NCOUNTERS = 17, /* nested, */
+ PF_GS_FRAGMENTS = 18, /* u64, */
};
enum pf_natlook_types_t {
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index a684d778ab42..53010222dd07 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -211,6 +211,12 @@ pf_normalize_cleanup(void)
mtx_destroy(&V_pf_frag_mtx);
}
+uint64_t
+pf_normalize_get_frag_count(void)
+{
+ return (uma_zone_get_cur(V_pf_frent_z));
+}
+
static int
pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b)
{
@@ -314,6 +320,7 @@ pf_free_fragment(struct pf_fragment *frag)
/* Free all fragment entries */
while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1);
m_freem(frent->fe_m);
uma_zfree(V_pf_frent_z, frent);
@@ -331,6 +338,7 @@ pf_find_fragment(struct pf_frnode *key, uint32_t id)
PF_FRAG_ASSERT();
frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key);
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_SEARCH], 1);
if (frnode == NULL)
return (NULL);
MPASS(frnode->fn_fragments >= 1);
@@ -438,6 +446,7 @@ pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
("overlapping fragment"));
TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
}
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_INSERT], 1);
if (frag->fr_firstoff[index] == NULL) {
KASSERT(prev == NULL || pf_frent_index(prev) < index,
@@ -496,6 +505,7 @@ pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
}
TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1);
KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining"));
frag->fr_entries[index]--;
@@ -768,6 +778,7 @@ pf_join_fragment(struct pf_fragment *frag)
frent = TAILQ_FIRST(&frag->fr_queue);
TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1);
m = frent->fe_m;
if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len)
@@ -775,6 +786,7 @@ pf_join_fragment(struct pf_fragment *frag)
uma_zfree(V_pf_frent_z, frent);
while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+ counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1);
m2 = frent->fe_m;
/* Strip off ip header. */
@@ -1354,7 +1366,7 @@ pf_normalize_ip6(int off, u_short *reason,
pf_rule_to_actions(r, &pd->act);
}
- if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), NULL, reason, AF_INET6))
+ if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6))
return (PF_DROP);
/* Offset now points to data portion. */
@@ -1542,7 +1554,7 @@ pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th,
olen = (th->th_off << 2) - sizeof(*th);
if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m,
- pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af))
+ pd->off + sizeof(*th), opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -1645,7 +1657,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
if (olen >= TCPOLEN_TIMESTAMP &&
((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
(dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
- pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) {
+ pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) {
/* Modulate the timestamps. Can be used for NAT detection, OS
* uptime determination or reboot detection.
*/
@@ -1975,7 +1987,7 @@ pf_normalize_mss(struct pf_pdesc *pd)
olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
optsoff = pd->off + sizeof(struct tcphdr);
if (olen < TCPOLEN_MAXSEG ||
- !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
+ !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
return (0);
opt = opts;
@@ -2009,7 +2021,7 @@ pf_scan_sctp(struct pf_pdesc *pd)
int ret;
while (pd->off + chunk_off < pd->tot_len) {
- if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL,
+ if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch),
NULL, pd->af))
return (PF_DROP);
@@ -2026,7 +2038,7 @@ pf_scan_sctp(struct pf_pdesc *pd)
struct sctp_init_chunk init;
if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init,
- sizeof(init), NULL, NULL, pd->af))
+ sizeof(init), NULL, pd->af))
return (PF_DROP);
/*
diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c
index 89486928e6e1..2f484e2dabc6 100644
--- a/sys/netpfil/pf/pf_nv.c
+++ b/sys/netpfil/pf/pf_nv.c
@@ -505,6 +505,7 @@ int
pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule)
{
int error = 0;
+ uint8_t tmp;
#define ERROUT(x) ERROUT_FUNCTION(errout, x)
@@ -610,8 +611,10 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule)
PFNV_CHK(pf_nvuint8(nvl, "keep_state", &rule->keep_state));
PFNV_CHK(pf_nvuint8(nvl, "af", &rule->af));
PFNV_CHK(pf_nvuint8(nvl, "proto", &rule->proto));
- PFNV_CHK(pf_nvuint8(nvl, "type", &rule->type));
- PFNV_CHK(pf_nvuint8(nvl, "code", &rule->code));
+ PFNV_CHK(pf_nvuint8(nvl, "type", &tmp));
+ rule->type = tmp;
+ PFNV_CHK(pf_nvuint8(nvl, "code", &tmp));
+ rule->code = tmp;
PFNV_CHK(pf_nvuint8(nvl, "flags", &rule->flags));
PFNV_CHK(pf_nvuint8(nvl, "flagset", &rule->flagset));
PFNV_CHK(pf_nvuint8(nvl, "min_ttl", &rule->min_ttl));
diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c
index 150626c5f3fb..8c041d45eae8 100644
--- a/sys/netpfil/pf/pf_osfp.c
+++ b/sys/netpfil/pf/pf_osfp.c
@@ -82,7 +82,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, const struct tcphdr *tcp)
ip6 = mtod(pd->m, struct ip6_hdr *);
break;
}
- if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, NULL,
+ if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL,
pd->af)) return (NULL);
return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 039908a53126..1711e690f6bb 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -59,8 +59,8 @@
#error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead."
#endif
-#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
-#define rs_free(x) free(x, M_TEMP)
+#define rs_malloc(x) malloc(x, M_PF, M_NOWAIT|M_ZERO)
+#define rs_free(x) free(x, M_PF)
VNET_DEFINE(struct pf_kanchor_global, pf_anchors);
VNET_DEFINE(struct pf_kanchor, pf_main_anchor);
@@ -336,6 +336,12 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset)
int i;
while (ruleset != NULL) {
+ for (int i = 0; i < PF_RULESET_MAX; i++) {
+ pf_rule_tree_free(ruleset->rules[i].active.tree);
+ ruleset->rules[i].active.tree = NULL;
+ pf_rule_tree_free(ruleset->rules[i].inactive.tree);
+ ruleset->rules[i].inactive.tree = NULL;
+ }
if (ruleset == &pf_main_ruleset ||
!RB_EMPTY(&ruleset->anchor->children) ||
ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64
index 630c88b97dd7..48f9df5b7e38 100644
--- a/sys/powerpc/conf/GENERIC64
+++ b/sys/powerpc/conf/GENERIC64
@@ -234,9 +234,9 @@ device wlan # 802.11 support
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
options IEEE80211_DEBUG # enable debug msgs
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/powerpc/conf/GENERIC64LE b/sys/powerpc/conf/GENERIC64LE
index eb9a9441425d..9af71f30626d 100644
--- a/sys/powerpc/conf/GENERIC64LE
+++ b/sys/powerpc/conf/GENERIC64LE
@@ -230,9 +230,9 @@ device wlan # 802.11 support
options IEEE80211_SUPPORT_MESH # enable 802.11s draft support
options IEEE80211_DEBUG # enable debug msgs
device wlan_wep # 802.11 WEP support
+device wlan_tkip # 802.11 TKIP support
device wlan_ccmp # 802.11 CCMP support
device wlan_gcmp # 802.11 GCMP support
-device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device ath # Atheros CardBus/PCI NICs
device ath_hal # Atheros CardBus/PCI chip support
diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h
index 1221521be368..de7119dd534a 100644
--- a/sys/riscv/include/vmm.h
+++ b/sys/riscv/include/vmm.h
@@ -49,6 +49,7 @@ enum vm_suspend_how {
VM_SUSPEND_RESET,
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index 7528ef6e4698..ec4514f70fa6 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -1036,10 +1036,14 @@ vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
static int
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
{
+ struct vm *vm;
+ vm = vcpu->vm;
vcpu_lock(vcpu);
-
while (1) {
+ if (vm->suspend)
+ break;
+
if (aplic_check_pending(vcpu->cookie))
break;
diff --git a/sys/sys/bus.h b/sys/sys/bus.h
index dda27f4737b2..e7ce152160f8 100644
--- a/sys/sys/bus.h
+++ b/sys/sys/bus.h
@@ -78,7 +78,7 @@ typedef enum device_property_type {
* The strings are placed one after the other, separated by NUL characters.
* Fields should be added after the last one and order maintained for compatibility
*/
-#define BUS_USER_BUFFER (3*1024)
+#define BUS_USER_BUFFER (3 * 1024)
struct u_device {
uintptr_t dv_handle;
uintptr_t dv_parent;
@@ -247,8 +247,8 @@ typedef struct devclass *devclass_t;
* and may use regular mutexes. However, it is prohibited from
* sleeping on a sleep queue.
*/
-typedef int driver_filter_t(void*);
-typedef void driver_intr_t(void*);
+typedef int driver_filter_t(void *);
+typedef void driver_intr_t(void *);
/**
* @brief Interrupt type bits.
@@ -476,18 +476,18 @@ int bus_generic_resume(device_t dev);
int bus_generic_resume_child(device_t dev, device_t child);
int bus_generic_setup_intr(device_t dev, device_t child,
struct resource *irq, int flags,
- driver_filter_t *filter, driver_intr_t *intr,
+ driver_filter_t *filter, driver_intr_t *intr,
void *arg, void **cookiep);
struct resource *
- bus_generic_rl_alloc_resource (device_t, device_t, int, int *,
- rman_res_t, rman_res_t, rman_res_t, u_int);
-void bus_generic_rl_delete_resource (device_t, device_t, int, int);
-int bus_generic_rl_get_resource (device_t, device_t, int, int, rman_res_t *,
- rman_res_t *);
-int bus_generic_rl_set_resource (device_t, device_t, int, int, rman_res_t,
- rman_res_t);
-int bus_generic_rl_release_resource (device_t, device_t, struct resource *);
+ bus_generic_rl_alloc_resource(device_t, device_t, int, int *,
+ rman_res_t, rman_res_t, rman_res_t, u_int);
+void bus_generic_rl_delete_resource(device_t, device_t, int, int);
+int bus_generic_rl_get_resource(device_t, device_t, int, int, rman_res_t *,
+ rman_res_t *);
+int bus_generic_rl_set_resource(device_t, device_t, int, int, rman_res_t,
+ rman_res_t);
+int bus_generic_rl_release_resource(device_t, device_t, struct resource *);
struct resource *
bus_generic_rman_alloc_resource(device_t dev, device_t child, int type,
int *rid, rman_res_t start,
@@ -562,7 +562,7 @@ int bus_get_domain(device_t dev, int *domain);
int bus_release_resource(device_t dev, struct resource *r);
int bus_free_resource(device_t dev, int type, struct resource *r);
int bus_setup_intr(device_t dev, struct resource *r, int flags,
- driver_filter_t filter, driver_intr_t handler,
+ driver_filter_t filter, driver_intr_t handler,
void *arg, void **cookiep);
int bus_teardown_intr(device_t dev, struct resource *r, void *cookie);
int bus_suspend_intr(device_t dev, struct resource *r);
@@ -687,9 +687,9 @@ int device_probe_child(device_t bus, device_t dev);
int device_quiesce(device_t dev);
void device_quiet(device_t dev);
void device_quiet_children(device_t dev);
-void device_set_desc(device_t dev, const char* desc);
-void device_set_descf(device_t dev, const char* fmt, ...) __printflike(2, 3);
-void device_set_desc_copy(device_t dev, const char* desc);
+void device_set_desc(device_t dev, const char *desc);
+void device_set_descf(device_t dev, const char *fmt, ...) __printflike(2, 3);
+void device_set_desc_copy(device_t dev, const char *desc);
int device_set_devclass(device_t dev, const char *classname);
int device_set_devclass_fixed(device_t dev, const char *classname);
bool device_is_devclass_fixed(device_t dev);
@@ -887,7 +887,8 @@ DECLARE_MODULE(_name##_##busname, _name##_##busname##_mod, \
*/
#define __BUS_ACCESSOR(varp, var, ivarp, ivar, type) \
\
-static __inline type varp ## _get_ ## var(device_t dev) \
+static __inline type \
+varp ## _get_ ## var(device_t dev) \
{ \
uintptr_t v; \
int e __diagused; \
@@ -899,7 +900,8 @@ static __inline type varp ## _get_ ## var(device_t dev) \
return ((type) v); \
} \
\
-static __inline void varp ## _set_ ## var(device_t dev, type t) \
+static __inline void \
+varp ## _set_ ## var(device_t dev, type t) \
{ \
uintptr_t v = (uintptr_t) t; \
int e __diagused; \
diff --git a/sys/sys/cpu.h b/sys/sys/cpu.h
index b6a0094f0c51..5bb55679a05b 100644
--- a/sys/sys/cpu.h
+++ b/sys/sys/cpu.h
@@ -40,25 +40,31 @@
#define CPU_IVAR_CPUID_SIZE 3
#define CPU_IVAR_CPUID 4
-static __inline struct pcpu *cpu_get_pcpu(device_t dev)
+static __inline struct pcpu *
+cpu_get_pcpu(device_t dev)
{
uintptr_t v = 0;
+
BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_PCPU, &v);
return ((struct pcpu *)v);
}
-static __inline int32_t cpu_get_nominal_mhz(device_t dev)
+static __inline int32_t
+cpu_get_nominal_mhz(device_t dev)
{
uintptr_t v = 0;
+
if (BUS_READ_IVAR(device_get_parent(dev), dev,
CPU_IVAR_NOMINAL_MHZ, &v) != 0)
return (-1);
return ((int32_t)v);
}
-static __inline const uint32_t *cpu_get_cpuid(device_t dev, size_t *count)
+static __inline const uint32_t *
+cpu_get_cpuid(device_t dev, size_t *count)
{
uintptr_t v = 0;
+
if (BUS_READ_IVAR(device_get_parent(dev), dev,
CPU_IVAR_CPUID_SIZE, &v) != 0)
return (NULL);
@@ -124,10 +130,10 @@ TAILQ_HEAD(cf_level_lst, cf_level);
* state. It is probably a bug to not combine this with "info only"
*/
#define CPUFREQ_TYPE_MASK 0xffff
-#define CPUFREQ_TYPE_RELATIVE (1<<0)
-#define CPUFREQ_TYPE_ABSOLUTE (1<<1)
-#define CPUFREQ_FLAG_INFO_ONLY (1<<16)
-#define CPUFREQ_FLAG_UNCACHED (1<<17)
+#define CPUFREQ_TYPE_RELATIVE (1 << 0)
+#define CPUFREQ_TYPE_ABSOLUTE (1 << 1)
+#define CPUFREQ_FLAG_INFO_ONLY (1 << 16)
+#define CPUFREQ_FLAG_UNCACHED (1 << 17)
/*
* When setting a level, the caller indicates the priority of this request.
@@ -162,7 +168,7 @@ int cpufreq_settings_changed(device_t dev);
* The new level and the result of the change (0 is success) is passed in.
* If the driver wishes to revoke the change from cpufreq_pre_change, it
* stores a non-zero error code in the result parameter and the change will
- * not be made. If the post-change eventhandler gets a non-zero result,
+ * not be made. If the post-change eventhandler gets a non-zero result,
* no change was made and the previous level remains in effect. If a change
* is revoked, the post-change eventhandler is still called with the error
* value supplied by the revoking driver. This gives listeners who cached
diff --git a/sys/sys/efi.h b/sys/sys/efi.h
index 89c8b15519de..f82c733898b4 100644
--- a/sys/sys/efi.h
+++ b/sys/sys/efi.h
@@ -40,9 +40,9 @@
{0xf2fd1544,0x9794,0x4a2c,{0x99,0x2e,0xe5,0xbb,0xcf,0x20,0xe3,0x94}}
#define EFI_TABLE_ESRT \
{0xb122a263,0x3661,0x4f68,{0x99,0x29,0x78,0xf8,0xb0,0xd6,0x21,0x80}}
-#define EFI_PROPERTIES_TABLE \
+#define EFI_PROPERTIES_TABLE \
{0x880aaca3,0x4adc,0x4a04,{0x90,0x79,0xb7,0x47,0x34,0x08,0x25,0xe5}}
-#define EFI_MEMORY_ATTRIBUTES_TABLE \
+#define EFI_MEMORY_ATTRIBUTES_TABLE \
{0xdcfa911d,0x26eb,0x469f,{0xa2,0x20,0x38,0xb7,0xdc,0x46,0x12,0x20}}
#define LINUX_EFI_MEMRESERVE_TABLE \
{0x888eb0c6,0x8ede,0x4ff5,{0xa8,0xf0,0x9a,0xee,0x5c,0xb9,0x77,0xc2}}
@@ -54,7 +54,7 @@ enum efi_reset {
};
typedef uint16_t efi_char;
-typedef unsigned long efi_status;
+typedef unsigned long efi_status;
/*
* This type-puns to a struct uuid, but all the EDK2 headers use this variation,
@@ -62,10 +62,10 @@ typedef unsigned long efi_status;
* can use EDK2 definitions both places.
*/
typedef struct efi_guid {
- uint32_t Data1;
- uint16_t Data2;
- uint16_t Data3;
- uint8_t Data4[8];
+ uint32_t Data1;
+ uint16_t Data2;
+ uint16_t Data3;
+ uint8_t Data4[8];
} efi_guid_t; /* Type puns with GUID and EFI_GUID */
struct efi_cfgtbl {
@@ -293,100 +293,99 @@ struct efi_ops {
extern const struct efi_ops *active_efi_ops;
/* Public MI EFI functions */
-static inline int efi_rt_ok(void)
+static inline int
+efi_rt_ok(void)
{
-
if (active_efi_ops->rt_ok == NULL)
return (ENXIO);
return (active_efi_ops->rt_ok());
}
-static inline int efi_get_table(efi_guid_t *guid, void **ptr)
+static inline int
+efi_get_table(efi_guid_t *guid, void **ptr)
{
-
if (active_efi_ops->get_table == NULL)
return (ENXIO);
return (active_efi_ops->get_table(guid, ptr));
}
-static inline int efi_copy_table(efi_guid_t *guid, void **buf,
- size_t buf_len, size_t *table_len)
+static inline int
+efi_copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
{
-
if (active_efi_ops->copy_table == NULL)
return (ENXIO);
return (active_efi_ops->copy_table(guid, buf, buf_len, table_len));
}
-static inline int efi_get_time(struct efi_tm *tm)
+static inline int
+efi_get_time(struct efi_tm *tm)
{
-
if (active_efi_ops->get_time == NULL)
return (ENXIO);
return (active_efi_ops->get_time(tm));
}
-static inline int efi_get_time_capabilities(struct efi_tmcap *tmcap)
+static inline int
+efi_get_time_capabilities(struct efi_tmcap *tmcap)
{
-
if (active_efi_ops->get_time_capabilities == NULL)
return (ENXIO);
return (active_efi_ops->get_time_capabilities(tmcap));
}
-static inline int efi_reset_system(enum efi_reset type)
+static inline int
+efi_reset_system(enum efi_reset type)
{
-
if (active_efi_ops->reset_system == NULL)
return (ENXIO);
return (active_efi_ops->reset_system(type));
}
-static inline int efi_set_time(struct efi_tm *tm)
+static inline int
+efi_set_time(struct efi_tm *tm)
{
-
if (active_efi_ops->set_time == NULL)
return (ENXIO);
return (active_efi_ops->set_time(tm));
}
-static inline int efi_get_waketime(uint8_t *enabled, uint8_t *pending,
- struct efi_tm *tm)
+static inline int
+efi_get_waketime(uint8_t *enabled, uint8_t *pending, struct efi_tm *tm)
{
if (active_efi_ops->get_waketime == NULL)
return (ENXIO);
return (active_efi_ops->get_waketime(enabled, pending, tm));
}
-static inline int efi_set_waketime(uint8_t enable, struct efi_tm *tm)
+static inline int
+efi_set_waketime(uint8_t enable, struct efi_tm *tm)
{
if (active_efi_ops->set_waketime == NULL)
return (ENXIO);
return (active_efi_ops->set_waketime(enable, tm));
}
-static inline int efi_var_get(uint16_t *name, efi_guid_t *vendor,
- uint32_t *attrib, size_t *datasize, void *data)
+static inline int
+efi_var_get(uint16_t *name, efi_guid_t *vendor, uint32_t *attrib,
+ size_t *datasize, void *data)
{
-
if (active_efi_ops->var_get == NULL)
return (ENXIO);
return (active_efi_ops->var_get(name, vendor, attrib, datasize, data));
}
-static inline int efi_var_nextname(size_t *namesize, uint16_t *name,
- efi_guid_t *vendor)
+static inline int
+efi_var_nextname(size_t *namesize, uint16_t *name, efi_guid_t *vendor)
{
-
if (active_efi_ops->var_nextname == NULL)
return (ENXIO);
return (active_efi_ops->var_nextname(namesize, name, vendor));
}
-static inline int efi_var_set(uint16_t *name, efi_guid_t *vendor,
- uint32_t attrib, size_t datasize, void *data)
+static inline int
+efi_var_set(uint16_t *name, efi_guid_t *vendor, uint32_t attrib,
+ size_t datasize, void *data)
{
-
if (active_efi_ops->var_set == NULL)
return (ENXIO);
return (active_efi_ops->var_set(name, vendor, attrib, datasize, data));
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 1b30e4292de8..084eaafcbdc0 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -45,7 +45,9 @@
#define EVFILT_USER (-11) /* User events */
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_EMPTY (-13) /* empty send socket buf */
-#define EVFILT_SYSCOUNT 13
+#define EVFILT_JAIL (-14) /* attached to struct prison */
+#define EVFILT_JAILDESC (-15) /* attached to jail descriptors */
+#define EVFILT_SYSCOUNT 15
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
@@ -205,10 +207,18 @@ struct freebsd11_kevent32 {
#define NOTE_PDATAMASK 0x000fffff /* mask for pid */
/* additional flags for EVFILT_PROC */
-#define NOTE_TRACK 0x00000001 /* follow across forks */
+#define NOTE_TRACK 0x00000001 /* follow across fork/create */
#define NOTE_TRACKERR 0x00000002 /* could not track child */
#define NOTE_CHILD 0x00000004 /* am a child process */
+/* data/hint flags for EVFILT_JAIL and EVFILT_JAILDESC */
+#define NOTE_JAIL_CHILD 0x80000000 /* child jail was created */
+#define NOTE_JAIL_SET 0x40000000 /* jail was modified */
+#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
+#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */
+#define NOTE_JAIL_MULTI 0x08000000 /* multiple child or attach */
+#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */
+
/* additional flags for EVFILT_TIMER */
#define NOTE_SECONDS 0x00000001 /* data is seconds */
#define NOTE_MSECONDS 0x00000002 /* data is milliseconds */
@@ -309,6 +319,7 @@ struct knote {
struct proc *p_proc; /* proc pointer */
struct kaiocb *p_aio; /* AIO job pointer */
struct aioliojob *p_lio; /* LIO job pointer */
+ struct prison *p_prison; /* prison pointer */
void *p_v; /* generic other pointer */
} kn_ptr;
const struct filterops *kn_fop;
diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h
index 7bf1d264ff5e..6783a0d2d84f 100644
--- a/sys/sys/exterrvar.h
+++ b/sys/sys/exterrvar.h
@@ -31,7 +31,7 @@
#error "Specify error category before including sys/exterrvar.h"
#endif
-#ifdef BLOAT_KERNEL_WITH_EXTERR
+#ifdef EXTERR_STRINGS
#define SET_ERROR_MSG(mmsg) (mmsg)
#else
#define SET_ERROR_MSG(mmsg) NULL
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 63313926c4f0..cc3c733580fd 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -72,6 +72,7 @@ struct nameidata;
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
#define DTYPE_INOTIFY 15 /* inotify descriptor */
+#define DTYPE_JAILDESC 16 /* jail descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index d2655c52e832..e12e8c3178c9 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -99,8 +99,12 @@ enum prison_state {
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
-#define JAIL_GET_MASK 0x08
+#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */
+#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */
+#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */
+#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */
+#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */
+#define JAIL_GET_MASK 0xf8
#define JAIL_SYS_DISABLE 0
#define JAIL_SYS_NEW 1
@@ -115,7 +119,9 @@ int jail(struct jail *);
int jail_set(struct iovec *, unsigned int, int);
int jail_get(struct iovec *, unsigned int, int);
int jail_attach(int);
+int jail_attach_jd(int);
int jail_remove(int);
+int jail_remove_jd(int);
__END_DECLS
#else /* _KERNEL */
@@ -144,6 +150,8 @@ MALLOC_DECLARE(M_PRISON);
#define JAIL_META_PRIVATE "meta"
#define JAIL_META_SHARED "env"
+struct jaildesc;
+struct knlist;
struct racct;
struct prison_racct;
@@ -189,7 +197,9 @@ struct prison {
struct vnode *pr_root; /* (c) vnode to rdir */
struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
- void *pr_sparep[3];
+ struct knlist *pr_klist; /* (m) attached knotes */
+ LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */
+ void *pr_sparep;
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -425,10 +435,11 @@ SYSCTL_DECL(_security_jail_param);
/*
* Kernel support functions for jail().
*/
-struct ucred;
+struct knote;
struct mount;
struct sockaddr;
struct statfs;
+struct ucred;
struct vfsconf;
/*
@@ -463,6 +474,7 @@ void prison_proc_free(struct prison *);
void prison_proc_link(struct prison *, struct proc *);
void prison_proc_unlink(struct prison *, struct proc *);
void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
+void prison_remove(struct prison *);
void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
bool prison_ischild(struct prison *, struct prison *);
bool prison_isalive(const struct prison *);
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
new file mode 100644
index 000000000000..fda270d62e70
--- /dev/null
+++ b/sys/sys/jaildesc.h
@@ -0,0 +1,87 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_JAILDESC_H_
+#define _SYS_JAILDESC_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+#include <sys/selinfo.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/_types.h>
+
+struct prison;
+
+/*-
+ * struct jaildesc describes a jail descriptor, which points to a struct
+ * prison. struct prison in turn has a linked list of struct jaildesc.
+ *
+ * Locking key:
+ * (c) set on creation, remains unchanged
+ * (d) jd_lock
+ * (p) jd_prison->pr_mtx
+ */
+struct jaildesc {
+ LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
+ struct prison *jd_prison; /* (d) the prison */
+ struct mtx jd_lock;
+ struct selinfo jd_selinfo; /* (d) event notification */
+ unsigned jd_flags; /* (d) JDF_* flags */
+};
+
+/*
+ * Locking macros for the jaildesc.
+ */
+#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock)
+#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \
+ NULL, MTX_DEF)
+#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock)
+#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock)
+
+/*
+ * Flags for the jd_flags field
+ */
+#define JDF_SELECTED 0x00000001 /* issue selwakeup() */
+#define JDF_REMOVED 0x00000002 /* jail was removed */
+#define JDF_OWNING 0x00000004 /* closing descriptor removes jail */
+
+int jaildesc_find(struct thread *td, int fd, struct prison **prp,
+ struct ucred **ucredp);
+int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
+void jaildesc_set_prison(struct file *jd, struct prison *pr);
+void jaildesc_prison_cleanup(struct prison *pr);
+void jaildesc_knote(struct prison *pr, long hint);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_JAILDESC_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 380099092107..417afd4dbbe4 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -249,15 +249,8 @@ struct sysinit_tslog {
const void *data;
const char *name;
};
-static inline void
-sysinit_tslog_shim(const void *data)
-{
- const struct sysinit_tslog *x = data;
-
- TSRAW(curthread, TS_ENTER, "SYSINIT", x->name);
- (x->func)(x->data);
- TSRAW(curthread, TS_EXIT, "SYSINIT", x->name);
-}
+void sysinit_tslog_shim(const void *);
+
#define C_SYSINIT(uniquifier, subsystem, order, func, ident) \
static struct sysinit_tslog uniquifier ## _sys_init_tslog = { \
func, \
@@ -322,7 +315,7 @@ void sysinit_add(struct sysinit **set, struct sysinit **set_end);
* int
* please avoid using for new tunables!
*/
-extern void tunable_int_init(void *);
+extern void tunable_int_init(const void *);
struct tunable_int {
const char *path;
int *var;
@@ -341,7 +334,7 @@ struct tunable_int {
/*
* long
*/
-extern void tunable_long_init(void *);
+extern void tunable_long_init(const void *);
struct tunable_long {
const char *path;
long *var;
@@ -360,7 +353,7 @@ struct tunable_long {
/*
* unsigned long
*/
-extern void tunable_ulong_init(void *);
+extern void tunable_ulong_init(const void *);
struct tunable_ulong {
const char *path;
unsigned long *var;
@@ -379,7 +372,7 @@ struct tunable_ulong {
/*
* int64_t
*/
-extern void tunable_int64_init(void *);
+extern void tunable_int64_init(const void *);
struct tunable_int64 {
const char *path;
int64_t *var;
@@ -398,7 +391,7 @@ struct tunable_int64 {
/*
* uint64_t
*/
-extern void tunable_uint64_init(void *);
+extern void tunable_uint64_init(const void *);
struct tunable_uint64 {
const char *path;
uint64_t *var;
@@ -417,7 +410,7 @@ struct tunable_uint64 {
/*
* quad
*/
-extern void tunable_quad_init(void *);
+extern void tunable_quad_init(const void *);
struct tunable_quad {
const char *path;
quad_t *var;
@@ -436,7 +429,7 @@ struct tunable_quad {
/*
* bool
*/
-extern void tunable_bool_init(void *);
+extern void tunable_bool_init(const void *);
struct tunable_bool {
const char *path;
bool *var;
@@ -452,7 +445,7 @@ struct tunable_bool {
#define TUNABLE_BOOL_FETCH(path, var) getenv_bool((path), (var))
-extern void tunable_str_init(void *);
+extern void tunable_str_init(const void *);
struct tunable_str {
const char *path;
char *var;
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index f6480b173a5c..18f85192f6c3 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1007,6 +1007,7 @@ struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val,
void statfs_scale_blocks(struct statfs *sf, long max_size);
struct vfsconf *vfs_byname(const char *);
struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *);
+void vfs_unref_vfsconf(struct vfsconf *vfsp);
void vfs_mount_destroy(struct mount *);
void vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
void vfs_freeopts(struct vfsoptlist *opts);
diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h
index 56c03a1b0be9..08d4e2d28b33 100644
--- a/sys/sys/mutex.h
+++ b/sys/sys/mutex.h
@@ -91,7 +91,7 @@
void _mtx_init(volatile uintptr_t *c, const char *name, const char *type,
int opts);
void _mtx_destroy(volatile uintptr_t *c);
-void mtx_sysinit(void *arg);
+void mtx_sysinit(const void *arg);
int _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF);
int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
int line);
diff --git a/sys/sys/param.h b/sys/sys/param.h
index fc2a78883f1e..ce91430909ce 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500063
+#define __FreeBSD_version 1600000
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/pciio.h b/sys/sys/pciio.h
index 6467e82b1b3d..64c0b32cb8e2 100644
--- a/sys/sys/pciio.h
+++ b/sys/sys/pciio.h
@@ -77,6 +77,9 @@ struct pci_conf {
u_int8_t pc_revid; /* chip revision ID */
char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
u_long pd_unit; /* device unit number */
+ int pd_numa_domain; /* device NUMA domain */
+ size_t pc_reported_len;/* length of PCI data reported */
+ char pc_spare[64]; /* space for future fields */
};
struct pci_match_conf {
@@ -165,7 +168,6 @@ struct pci_bar_ioreq {
#define PCIIO_BAR_MMAP_RW 0x04
#define PCIIO_BAR_MMAP_ACTIVATE 0x08
-#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io)
#define PCIOCREAD _IOWR('p', 2, struct pci_io)
#define PCIOCWRITE _IOWR('p', 3, struct pci_io)
#define PCIOCATTACHED _IOWR('p', 4, struct pci_io)
@@ -173,5 +175,6 @@ struct pci_bar_ioreq {
#define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io)
#define PCIOCBARMMAP _IOWR('p', 8, struct pci_bar_mmap)
#define PCIOCBARIO _IOWR('p', 9, struct pci_bar_ioreq)
+#define PCIOCGETCONF _IOWR('p', 10, struct pci_conf_io)
#endif /* !_SYS_PCIIO_H_ */
diff --git a/sys/sys/power.h b/sys/sys/power.h
index 9afa55dd403a..33ace400bfd2 100644
--- a/sys/sys/power.h
+++ b/sys/sys/power.h
@@ -3,6 +3,10 @@
*
* Copyright (c) 2001 Mitsuru IWASAKI
* All rights reserved.
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Aymeric Wibo
+ * <obiwac@freebsd.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -28,8 +32,10 @@
#ifndef _SYS_POWER_H_
#define _SYS_POWER_H_
+#ifdef _KERNEL
#include <sys/_eventhandler.h>
+#include <sys/types.h>
/* Power management system type */
#define POWER_PM_TYPE_ACPI 0x01
@@ -38,13 +44,55 @@
/* Commands for Power management function */
#define POWER_CMD_SUSPEND 0x00
-/* Sleep state */
+/*
+ * Sleep state.
+ *
+ * These are high-level sleep states that the system can enter. They map to
+ * a specific generic sleep type (enum power_stype).
+ */
#define POWER_SLEEP_STATE_STANDBY 0x00
#define POWER_SLEEP_STATE_SUSPEND 0x01
#define POWER_SLEEP_STATE_HIBERNATE 0x02
-typedef int (*power_pm_fn_t)(u_long, void*, ...);
-extern int power_pm_register(u_int, power_pm_fn_t, void *);
+/*
+ * Sleep type.
+ *
+ * These are the specific generic methods of entering a sleep state. E.g.
+ * POWER_SLEEP_STATE_SUSPEND could be set to enter either suspend-to-RAM (which
+ * is S3 on ACPI systems), or suspend-to-idle (S0ix on ACPI systems). This
+ * would be done through the kern.power.suspend sysctl.
+ */
+enum power_stype {
+ POWER_STYPE_AWAKE,
+ POWER_STYPE_STANDBY,
+ POWER_STYPE_SUSPEND_TO_MEM,
+ POWER_STYPE_SUSPEND_TO_IDLE,
+ POWER_STYPE_HIBERNATE,
+ POWER_STYPE_POWEROFF,
+ POWER_STYPE_COUNT,
+ POWER_STYPE_UNKNOWN,
+};
+
+static const char * const power_stype_names[POWER_STYPE_COUNT] = {
+ [POWER_STYPE_AWAKE] = "awake",
+ [POWER_STYPE_STANDBY] = "standby",
+ [POWER_STYPE_SUSPEND_TO_MEM] = "s2mem",
+ [POWER_STYPE_SUSPEND_TO_IDLE] = "s2idle",
+ [POWER_STYPE_HIBERNATE] = "hibernate",
+ [POWER_STYPE_POWEROFF] = "poweroff",
+};
+
+extern enum power_stype power_standby_stype;
+extern enum power_stype power_suspend_stype;
+extern enum power_stype power_hibernate_stype;
+
+extern enum power_stype power_name_to_stype(const char *_name);
+extern const char *power_stype_to_name(enum power_stype _stype);
+
+typedef int (*power_pm_fn_t)(u_long _cmd, void* _arg, enum power_stype _stype);
+extern int power_pm_register(u_int _pm_type, power_pm_fn_t _pm_fn,
+ void *_pm_arg,
+ bool _pm_supported[static POWER_STYPE_COUNT]);
extern u_int power_pm_get_type(void);
extern void power_pm_suspend(int);
@@ -60,4 +108,5 @@ extern void power_profile_set_state(int);
typedef void (*power_profile_change_hook)(void *, int);
EVENTHANDLER_DECLARE(power_profile_change, power_profile_change_hook);
+#endif /* _KERNEL */
#endif /* !_SYS_POWER_H_ */
diff --git a/sys/sys/random.h b/sys/sys/random.h
index 5abf762cd200..2a68f0c99b6d 100644
--- a/sys/sys/random.h
+++ b/sys/sys/random.h
@@ -142,9 +142,6 @@ random_harvest_direct(const void *entropy, u_int size, enum random_entropy_sourc
random_harvest_direct_(entropy, size, origin);
}
-void random_harvest_register_source(enum random_entropy_source);
-void random_harvest_deregister_source(enum random_entropy_source);
-
#if defined(RANDOM_ENABLE_UMA)
#define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c)
#else /* !defined(RANDOM_ENABLE_UMA) */
diff --git a/sys/sys/rmlock.h b/sys/sys/rmlock.h
index 664356998438..eae7342527e3 100644
--- a/sys/sys/rmlock.h
+++ b/sys/sys/rmlock.h
@@ -52,7 +52,7 @@ void rm_init(struct rmlock *rm, const char *name);
void rm_init_flags(struct rmlock *rm, const char *name, int opts);
void rm_destroy(struct rmlock *rm);
int rm_wowned(const struct rmlock *rm);
-void rm_sysinit(void *arg);
+void rm_sysinit(const void *arg);
void _rm_wlock_debug(struct rmlock *rm, const char *file, int line);
void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line);
diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h
index 0ebe90e09bed..929f78c1d204 100644
--- a/sys/sys/rwlock.h
+++ b/sys/sys/rwlock.h
@@ -128,7 +128,7 @@
*/
void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
void _rw_destroy(volatile uintptr_t *c);
-void rw_sysinit(void *arg);
+void rw_sysinit(const void *arg);
int _rw_wowned(const volatile uintptr_t *c);
void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index deb277decc75..d28cae9d01e5 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -99,7 +99,7 @@
* Function prototipes. Routines that start with an underscore are not part
* of the public interface and are wrappered with a macro.
*/
-void sx_sysinit(void *arg);
+void sx_sysinit(const void *arg);
#define sx_init(sx, desc) sx_init_flags((sx), (desc), 0)
void sx_init_flags(struct sx *sx, const char *description, int opts);
void sx_destroy(struct sx *sx);
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 2d6903967e15..cff27b8be316 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -535,4 +535,6 @@
#define SYS_inotify_rm_watch 594
#define SYS_getgroups 595
#define SYS_setgroups 596
-#define SYS_MAXSYSCALL 597
+#define SYS_jail_attach_jd 597
+#define SYS_jail_remove_jd 598
+#define SYS_MAXSYSCALL 599
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index d1172c2dc7bf..443dbadcfbff 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -438,4 +438,6 @@ MIASM = \
inotify_add_watch_at.o \
inotify_rm_watch.o \
getgroups.o \
- setgroups.o
+ setgroups.o \
+ jail_attach_jd.o \
+ jail_remove_jd.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index 98311a6dbf94..8dda4b4533ea 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1901,6 +1901,12 @@ struct setgroups_args {
char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)];
char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)];
};
+struct jail_attach_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
+struct jail_remove_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
int sys__exit(struct thread *, struct _exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@@ -2305,6 +2311,8 @@ int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *
int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
int sys_getgroups(struct thread *, struct getgroups_args *);
int sys_setgroups(struct thread *, struct setgroups_args *);
+int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *);
+int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *);
#ifdef COMPAT_43
@@ -3301,6 +3309,8 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *);
#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
#define SYS_AUE_getgroups AUE_GETGROUPS
#define SYS_AUE_setgroups AUE_SETGROUPS
+#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH
+#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE
#undef PAD_
#undef PADL_
diff --git a/sys/sys/ttycom.h b/sys/sys/ttycom.h
index d7ddc66b09fb..43e8b98a5bc4 100644
--- a/sys/sys/ttycom.h
+++ b/sys/sys/ttycom.h
@@ -69,8 +69,8 @@
/* 89-91 conflicts: tun and tap */
#define TIOCTIMESTAMP _IOR('t', 89, struct timeval) /* enable/get timestamp
* of last input event */
-#define TIOCMGDTRWAIT _IOR('t', 90, int) /* modem: get wait on close */
-#define TIOCMSDTRWAIT _IOW('t', 91, int) /* modem: set wait on close */
+/* TIOCMGDTRWAIT _IOR('t', 90, int) * was modem: get wait on close */
+/* TIOCMSDTRWAIT _IOW('t', 91, int) * was modem: set wait on close */
/* 92-93 tun and tap */
/* 94-97 conflicts: tun and tap */
#define TIOCDRAIN _IO('t', 94) /* wait till output drained */
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 103236b6ed1b..3183f0792256 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -266,6 +266,7 @@ struct user {
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
#define KF_TYPE_INOTIFY 15
+#define KF_TYPE_JAILDESC 16
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -453,6 +454,9 @@ struct kinfo_file {
uint64_t kf_timerfd_addr;
} kf_timerfd;
struct {
+ int32_t kf_jid;
+ } kf_jail;
+ struct {
uint64_t kf_kqueue_addr;
int32_t kf_kqueue_count;
int32_t kf_kqueue_state;
diff --git a/sys/tools/amd64_ia32_vdso.sh b/sys/tools/amd64_ia32_vdso.sh
index 85d2299b45d0..e5865639d398 100644
--- a/sys/tools/amd64_ia32_vdso.sh
+++ b/sys/tools/amd64_ia32_vdso.sh
@@ -58,7 +58,7 @@ then
exit 1
fi
-${CC} ${DEBUG} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \
+${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \
-o elf-vdso32.so.o -I. -I"${S}" -include opt_global.h \
-DVDSO_NAME=elf_vdso32_so_1 -DVDSO_FILE=\"elf-vdso32.so.1\" \
"${S}"/tools/vdso_wrap.S
diff --git a/sys/tools/amd64_vdso.sh b/sys/tools/amd64_vdso.sh
index 2a83ae874ab7..ed91ddc8abb5 100644
--- a/sys/tools/amd64_vdso.sh
+++ b/sys/tools/amd64_vdso.sh
@@ -67,7 +67,7 @@ then
exit 1
fi
-${CC} ${DEBUG} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \
+${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \
-o elf-vdso.so.o -I. -I"${S}" -include opt_global.h \
-DVDSO_NAME=elf_vdso_so_1 -DVDSO_FILE=\"elf-vdso.so.1\" \
"${S}"/tools/vdso_wrap.S
diff --git a/sys/tools/makeobjops.awk b/sys/tools/makeobjops.awk
index 5ea658c5a3b3..522fb04ec4d1 100644
--- a/sys/tools/makeobjops.awk
+++ b/sys/tools/makeobjops.awk
@@ -315,7 +315,7 @@ function handle_method (static, doc)
printh("\t" join(";\n\t", arguments, num_arguments) ";");
}
else {
- prototype = "static __inline " ret " " umname "(";
+ prototype = "static __inline " ret "\n" umname "(";
printh(format_line(prototype argument_list ")",
line_width, length(prototype)));
}
@@ -327,7 +327,7 @@ function handle_method (static, doc)
firstvar = "((kobj_t)" firstvar ")";
if (prolog != "")
printh(prolog);
- printh("\tKOBJOPLOOKUP(" firstvar "->ops," mname ");");
+ printh("\tKOBJOPLOOKUP(" firstvar "->ops, " mname ");");
rceq = (ret != "void") ? "rc = " : "";
printh("\t" rceq "((" mname "_t *) _m)(" varname_list ");");
if (epilog != "")
diff --git a/sys/ufs/ffs/ffs_rawread.c b/sys/ufs/ffs/ffs_rawread.c
index 9db0bee0d66d..ef0b2ff4f788 100644
--- a/sys/ufs/ffs/ffs_rawread.c
+++ b/sys/ufs/ffs/ffs_rawread.c
@@ -281,7 +281,7 @@ ffs_rawread_main(struct vnode *vp,
if (error != 0)
break;
- if (resid > bp->b_bufsize) { /* Setup fist readahead */
+ if (resid > bp->b_bufsize) { /* Setup first readahead */
if (rawreadahead != 0)
nbp = uma_zalloc(ffsraw_pbuf_zone,
M_NOWAIT);
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 5189f7405400..679b2e20e88b 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -4017,8 +4017,9 @@ restart:
rr = rdomain == UMA_ANYDOMAIN;
if (rr) {
aflags = (flags & ~M_WAITOK) | M_NOWAIT;
- vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
- &aflags);
+ if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags) != 0)
+ return (NULL);
} else {
aflags = flags;
domain = rdomain;
@@ -5245,8 +5246,9 @@ uma_prealloc(uma_zone_t zone, int items)
slabs = howmany(items, keg->uk_ipers);
while (slabs-- > 0) {
aflags = M_NOWAIT;
- vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
- &aflags);
+ if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags) != 0)
+ panic("%s: Domainset is empty", __func__);
for (;;) {
slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
aflags);
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index b44bdb96b0d4..9fa17da954f7 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -58,6 +58,9 @@
static int vm_domainset_default_stride = 64;
+static bool vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain);
+
+
/*
* Determine which policy is to be used for this allocation.
*/
@@ -93,28 +96,15 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
pindex += (((uintptr_t)obj) / sizeof(*obj));
di->di_offset = pindex;
}
- /* Skip domains below min on the first pass. */
- di->di_minskip = true;
}
static void
vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
{
+ /* Grab the next domain in 'ds_order'. */
*domain = di->di_domain->ds_order[
- ++(*di->di_iter) % di->di_domain->ds_cnt];
-}
-
-static void
-vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
-{
- int d;
-
- do {
- d = di->di_domain->ds_order[
- ++(*di->di_iter) % di->di_domain->ds_cnt];
- } while (d == di->di_domain->ds_prefer);
- *domain = d;
+ (*di->di_iter)++ % di->di_domain->ds_cnt];
}
static void
@@ -127,79 +117,144 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
*domain = di->di_domain->ds_order[d];
}
-static void
-vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
+/*
+ * Internal function determining the current phase's first candidate domain.
+ *
+ * Returns whether these is an eligible domain, which is returned through
+ * '*domain'. '*domain' can be modified even if there is no eligible domain.
+ *
+ * See herald comment of vm_domainset_iter_first() below about phases.
+ */
+static bool
+vm_domainset_iter_phase_first(struct vm_domainset_iter *di, int *domain)
{
-
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
switch (di->di_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
- /*
- * To prevent impossible allocations we convert an invalid
- * first-touch to round-robin.
- */
- /* FALLTHROUGH */
- case DOMAINSET_POLICY_INTERLEAVE:
- /* FALLTHROUGH */
+ *domain = PCPU_GET(domain);
+ break;
case DOMAINSET_POLICY_ROUNDROBIN:
vm_domainset_iter_rr(di, domain);
break;
case DOMAINSET_POLICY_PREFER:
- vm_domainset_iter_prefer(di, domain);
+ *domain = di->di_domain->ds_prefer;
+ break;
+ case DOMAINSET_POLICY_INTERLEAVE:
+ vm_domainset_iter_interleave(di, domain);
break;
default:
panic("%s: Unknown policy %d", __func__, di->di_policy);
}
KASSERT(*domain < vm_ndomains,
("%s: Invalid domain %d", __func__, *domain));
+
+ /*
+ * Has the policy's start domain already been visited?
+ */
+ if (!DOMAINSET_ISSET(*domain, &di->di_remain_mask))
+ return (vm_domainset_iter_next(di, domain));
+
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+
+ /* Does it have enough free pages (phase 1)? */
+ if (di->di_minskip && vm_page_count_min_domain(*domain)) {
+ /* Mark the domain as eligible for phase 2. */
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ return (vm_domainset_iter_next(di, domain));
+ }
+
+ return (true);
}
-static void
+/*
+ * Resets an iterator to point to the first candidate domain.
+ *
+ * Returns whether there is an eligible domain to start with. '*domain' may be
+ * modified even if there is none.
+ *
+ * There must have been one call to vm_domainset_iter_init() before.
+ *
+ * This function must be called at least once before calling
+ * vm_domainset_iter_next(). Note that functions wrapping
+ * vm_domainset_iter_init() usually do that themselves.
+ *
+ * This function may be called again to reset the iterator to the policy's first
+ * candidate domain. After each reset, the iterator will visit the same domains
+ * as in the previous iteration minus those on which vm_domainset_iter_ignore()
+ * has been called. Note that the first candidate domain may change at each
+ * reset (at time of this writing, only on the DOMAINSET_POLICY_ROUNDROBIN
+ * policy).
+ *
+ * Domains which have a number of free pages over 'v_free_min' are always
+ * visited first (this is called the "phase 1" in comments, "phase 2" being the
+ * examination of the remaining domains; no domains are ever visited twice).
+ */
+static bool
vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
{
+ /* Initialize the mask of domains to visit. */
+ DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask);
+ /*
+ * No candidate domains for phase 2 at start. This will be filled by
+ * phase 1.
+ */
+ DOMAINSET_ZERO(&di->di_min_mask);
+ /* Skip domains below 'v_free_min' on phase 1. */
+ di->di_minskip = true;
- switch (di->di_policy) {
- case DOMAINSET_POLICY_FIRSTTOUCH:
- *domain = PCPU_GET(domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) {
- /*
- * Add an extra iteration because we will visit the
- * current domain a second time in the rr iterator.
- */
- di->di_n = di->di_domain->ds_cnt + 1;
- break;
- }
- /*
- * To prevent impossible allocations we convert an invalid
- * first-touch to round-robin.
- */
- /* FALLTHROUGH */
- case DOMAINSET_POLICY_ROUNDROBIN:
- di->di_n = di->di_domain->ds_cnt;
+ return (vm_domainset_iter_phase_first(di, domain));
+}
+
+/*
+ * Advances the iterator to the next candidate domain.
+ *
+ * Returns whether there was another domain to visit. '*domain' may be modified
+ * even if there is none.
+ *
+ * vm_domainset_iter_first() must have been called at least once before using
+ * this function (see its herald comment for more details on iterators).
+ */
+static bool
+vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
+{
+ /* Loop while there remains domains to visit in the current phase. */
+ while (!DOMAINSET_EMPTY(&di->di_remain_mask)) {
+ /* Grab the next domain in 'ds_order'. */
vm_domainset_iter_rr(di, domain);
- break;
- case DOMAINSET_POLICY_PREFER:
- *domain = di->di_domain->ds_prefer;
- di->di_n = di->di_domain->ds_cnt;
- break;
- case DOMAINSET_POLICY_INTERLEAVE:
- vm_domainset_iter_interleave(di, domain);
- di->di_n = di->di_domain->ds_cnt;
- break;
- default:
- panic("%s: Unknown policy %d", __func__, di->di_policy);
+ KASSERT(*domain < vm_ndomains,
+ ("%s: Invalid domain %d", __func__, *domain));
+
+ if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) {
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
+ return (true);
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ }
}
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
- KASSERT(*domain < vm_ndomains,
- ("%s: Invalid domain %d", __func__, *domain));
+
+ /*
+ * If phase 1 (skip low memory domains) is over, start phase 2 (consider
+ * low memory domains).
+ */
+ if (di->di_minskip) {
+ di->di_minskip = false;
+ /* Browse domains that were under 'v_free_min'. */
+ DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask);
+ return (vm_domainset_iter_phase_first(di, domain));
+ }
+
+ return (false);
}
-void
+int
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
- vm_pindex_t pindex, int *domain, int *req, struct pctrie_iter *pages)
+ vm_pindex_t pindex, int *domain, int *req)
{
struct domainset_ref *dr;
+ di->di_flags = *req;
+ *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
+ VM_ALLOC_NOWAIT;
+
/*
* Object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
@@ -209,36 +264,21 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
dr = &obj->domain;
else
dr = &curthread->td_domain;
+
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
- di->di_flags = *req;
- *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
- VM_ALLOC_NOWAIT;
- vm_domainset_iter_first(di, domain);
- if (vm_page_count_min_domain(*domain))
- vm_domainset_iter_page(di, obj, domain, pages);
+ /*
+ * XXXOC: Shouldn't we just panic on 'false' if VM_ALLOC_WAITOK was
+ * passed?
+ */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
int
vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain, struct pctrie_iter *pages)
{
- if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask)))
- return (ENOMEM);
-
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
- vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
- }
-
- /* If we skipped domains below min restart the search. */
- if (di->di_minskip) {
- di->di_minskip = false;
- vm_domainset_iter_first(di, domain);
+ if (vm_domainset_iter_next(di, domain))
return (0);
- }
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
@@ -257,61 +297,43 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
return (ENOMEM);
/* Restart the search. */
- vm_domainset_iter_first(di, domain);
-
- return (0);
+ /* XXXOC: Shouldn't we just panic on 'false'? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
-static void
+static int
_vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
int *flags)
{
-
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
- vm_domainset_iter_first(di, domain);
- if (vm_page_count_min_domain(*domain))
- vm_domainset_iter_policy(di, domain);
+ /* XXXOC: Shouldn't we just panic on 'false' if M_WAITOK was passed? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
-void
+int
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
- _vm_domainset_iter_policy_init(di, domain, flags);
+ return (_vm_domainset_iter_policy_init(di, domain, flags));
}
-void
+int
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
- _vm_domainset_iter_policy_init(di, domain, flags);
+ return (_vm_domainset_iter_policy_init(di, domain, flags));
}
int
vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{
- if (DOMAINSET_EMPTY(&di->di_valid_mask))
- return (ENOMEM);
-
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
- vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
- }
-
- /* If we skipped domains below min restart the search. */
- if (di->di_minskip) {
- di->di_minskip = false;
- vm_domainset_iter_first(di, domain);
+ if (vm_domainset_iter_next(di, domain))
return (0);
- }
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & M_WAITOK) == 0)
@@ -321,9 +343,8 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
vm_wait_doms(&di->di_valid_mask, 0);
/* Restart the search. */
- vm_domainset_iter_first(di, domain);
-
- return (0);
+ /* XXXOC: Shouldn't we just panic on 'false'? */
+ return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM);
}
void
@@ -345,12 +366,12 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
return (EJUSTRETURN);
}
-void
+int
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
- vm_pindex_t pindex, int *domain, int *flags, struct pctrie_iter *pages)
+ vm_pindex_t pindex, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
int
@@ -360,20 +381,20 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
return (EJUSTRETURN);
}
-void
+int
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
-void
+int
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
-
*domain = 0;
+ return (0);
}
void
diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h
index 0d325a642f40..ef86c8ccb5e4 100644
--- a/sys/vm/vm_domainset.h
+++ b/sys/vm/vm_domainset.h
@@ -33,23 +33,26 @@ struct pctrie_iter;
struct vm_domainset_iter {
struct domainset *di_domain;
unsigned int *di_iter;
+ /* Initialized from 'di_domain', initial value after reset. */
domainset_t di_valid_mask;
+ /* Domains to browse in the current phase. */
+ domainset_t di_remain_mask;
+ /* Domains skipped in phase 1 because under 'v_free_min'. */
+ domainset_t di_min_mask;
vm_pindex_t di_offset;
int di_flags;
uint16_t di_policy;
- domainid_t di_n;
bool di_minskip;
};
int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *,
int *, struct pctrie_iter *);
-void vm_domainset_iter_page_init(struct vm_domainset_iter *,
- struct vm_object *, vm_pindex_t, int *, int *,
- struct pctrie_iter *);
+int vm_domainset_iter_page_init(struct vm_domainset_iter *,
+ struct vm_object *, vm_pindex_t, int *, int *);
int vm_domainset_iter_policy(struct vm_domainset_iter *, int *);
-void vm_domainset_iter_policy_init(struct vm_domainset_iter *,
+int vm_domainset_iter_policy_init(struct vm_domainset_iter *,
struct domainset *, int *, int *);
-void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
+int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
struct domainset_ref *, int *, int *);
void vm_domainset_iter_ignore(struct vm_domainset_iter *, int);
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 93ec6014c27d..1fd6518cf4ed 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -91,6 +91,8 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
vm_ooffset_t *);
int vm_fault_disable_pagefaults(void);
void vm_fault_enable_pagefaults(int save);
+int vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count);
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vm_prot_t prot, vm_page_t *ma, int max_count);
int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 3e57e8d4f1d0..524aed2be2ff 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -71,11 +71,9 @@
* Page fault handling module.
*/
-#include <sys/cdefs.h>
#include "opt_ktrace.h"
#include "opt_vm.h"
-#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@@ -204,7 +202,10 @@ vm_fault_page_release(vm_page_t *mp)
* pageout while optimizing fault restarts.
*/
vm_page_deactivate(m);
- vm_page_xunbusy(m);
+ if (vm_page_xbusied(m))
+ vm_page_xunbusy(m);
+ else
+ vm_page_sunbusy(m);
*mp = NULL;
}
}
@@ -260,6 +261,12 @@ vm_fault_unlock_vp(struct faultstate *fs)
}
}
+static bool
+vm_fault_might_be_cow(struct faultstate *fs)
+{
+ return (fs->object != fs->first_object);
+}
+
static void
vm_fault_deallocate(struct faultstate *fs)
{
@@ -267,7 +274,7 @@ vm_fault_deallocate(struct faultstate *fs)
vm_fault_page_release(&fs->m_cow);
vm_fault_page_release(&fs->m);
vm_object_pip_wakeup(fs->object);
- if (fs->object != fs->first_object) {
+ if (vm_fault_might_be_cow(fs)) {
VM_OBJECT_WLOCK(fs->first_object);
vm_fault_page_free(&fs->first_m);
VM_OBJECT_WUNLOCK(fs->first_object);
@@ -329,6 +336,13 @@ vm_fault_dirty(struct faultstate *fs, vm_page_t m)
}
+static bool
+vm_fault_is_read(const struct faultstate *fs)
+{
+ return ((fs->prot & VM_PROT_WRITE) == 0 &&
+ (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0);
+}
+
/*
* Unlocks fs.first_object and fs.map on success.
*/
@@ -1002,12 +1016,22 @@ vm_fault_relookup(struct faultstate *fs)
return (KERN_SUCCESS);
}
+static bool
+vm_fault_can_cow_rename(struct faultstate *fs)
+{
+ return (
+ /* Only one shadow object and no other refs. */
+ fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
+ /* No other ways to look the object up. */
+ fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0);
+}
+
static void
vm_fault_cow(struct faultstate *fs)
{
- bool is_first_object_locked;
+ bool is_first_object_locked, rename_cow;
- KASSERT(fs->object != fs->first_object,
+ KASSERT(vm_fault_might_be_cow(fs),
("source and target COW objects are identical"));
/*
@@ -1019,21 +1043,29 @@ vm_fault_cow(struct faultstate *fs)
* object so that it will go out to swap when needed.
*/
is_first_object_locked = false;
- if (
- /*
- * Only one shadow object and no other refs.
- */
- fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
- /*
- * No other ways to look the object up
- */
- fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 &&
- /*
- * We don't chase down the shadow chain and we can acquire locks.
- */
- (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
- fs->object == fs->first_object->backing_object &&
- VM_OBJECT_TRYWLOCK(fs->object)) {
+ rename_cow = false;
+
+ if (vm_fault_can_cow_rename(fs) && vm_page_xbusied(fs->m)) {
+ /*
+ * Check that we don't chase down the shadow chain and
+ * we can acquire locks. Recheck the conditions for
+ * rename after the shadow chain is stable after the
+ * object locking.
+ */
+ is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object);
+ if (is_first_object_locked &&
+ fs->object == fs->first_object->backing_object) {
+ if (VM_OBJECT_TRYWLOCK(fs->object)) {
+ rename_cow = vm_fault_can_cow_rename(fs);
+ if (!rename_cow)
+ VM_OBJECT_WUNLOCK(fs->object);
+ }
+ }
+ }
+
+ if (rename_cow) {
+ vm_page_assert_xbusied(fs->m);
+
/*
* Remove but keep xbusy for replace. fs->m is moved into
* fs->first_object and left busy while fs->first_m is
@@ -1090,8 +1122,12 @@ vm_fault_cow(struct faultstate *fs)
* address space. If OBJ_ONEMAPPING is set after the check,
* removing mappings will at worse trigger some unnecessary page
* faults.
+ *
+ * In the fs->m shared busy case, the xbusy state of
+ * fs->first_m prevents new mappings of fs->m from
+ * being created because a parallel fault on this
+ * shadow chain should wait for xbusy on fs->first_m.
*/
- vm_page_assert_xbusied(fs->m_cow);
if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0)
pmap_remove_all(fs->m_cow);
}
@@ -1171,7 +1207,7 @@ vm_fault_zerofill(struct faultstate *fs)
* If there's no object left, fill the page in the top
* object with zeros.
*/
- if (fs->object != fs->first_object) {
+ if (vm_fault_might_be_cow(fs)) {
vm_object_pip_wakeup(fs->object);
fs->object = fs->first_object;
fs->pindex = fs->first_pindex;
@@ -1420,14 +1456,13 @@ vm_fault_getpages(struct faultstate *fs, int *behindp, int *aheadp)
* and we could end up trying to pagein and pageout the same page
* simultaneously.
*
- * We can theoretically allow the busy case on a read fault if the page
- * is marked valid, but since such pages are typically already pmap'd,
- * putting that special case in might be more effort then it is worth.
- * We cannot under any circumstances mess around with a shared busied
- * page except, perhaps, to pmap it.
+ * We allow the busy case on a read fault if the page is valid. We
+ * cannot under any circumstances mess around with a shared busied
+ * page except, perhaps, to pmap it. This is controlled by the
+ * VM_ALLOC_SBUSY bit in the allocflags argument.
*/
static void
-vm_fault_busy_sleep(struct faultstate *fs)
+vm_fault_busy_sleep(struct faultstate *fs, int allocflags)
{
/*
* Reference the page before unlocking and
@@ -1435,13 +1470,13 @@ vm_fault_busy_sleep(struct faultstate *fs)
* likely to reclaim it.
*/
vm_page_aflag_set(fs->m, PGA_REFERENCED);
- if (fs->object != fs->first_object) {
+ if (vm_fault_might_be_cow(fs)) {
vm_fault_page_release(&fs->first_m);
vm_object_pip_wakeup(fs->first_object);
}
vm_object_pip_wakeup(fs->object);
vm_fault_unlock_map(fs);
- if (!vm_page_busy_sleep(fs->m, "vmpfw", 0))
+ if (!vm_page_busy_sleep(fs->m, "vmpfw", allocflags))
VM_OBJECT_UNLOCK(fs->object);
VM_CNT_INC(v_intrans);
vm_object_deallocate(fs->first_object);
@@ -1487,8 +1522,53 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp)
vm_page_iter_init(&pages, fs->object);
fs->m = vm_radix_iter_lookup(&pages, fs->pindex);
if (fs->m != NULL) {
+ /*
+ * If the found page is valid, will be either shadowed
+ * or mapped read-only, and will not be renamed for
+ * COW, then busy it in shared mode. This allows
+ * other faults needing this page to proceed in
+ * parallel.
+ *
+ * Unlocked check for validity, rechecked after busy
+ * is obtained.
+ */
+ if (vm_page_all_valid(fs->m) &&
+ /*
+ * No write permissions for the new fs->m mapping,
+ * or the first object has only one mapping, so
+ * other writeable COW mappings of fs->m cannot
+ * appear under us.
+ */
+ (vm_fault_is_read(fs) || vm_fault_might_be_cow(fs)) &&
+ /*
+ * fs->m cannot be renamed from object to
+ * first_object. These conditions will be
+ * re-checked with proper synchronization in
+ * vm_fault_cow().
+ */
+ (!vm_fault_can_cow_rename(fs) ||
+ fs->object != fs->first_object->backing_object)) {
+ if (!vm_page_trysbusy(fs->m)) {
+ vm_fault_busy_sleep(fs, VM_ALLOC_SBUSY);
+ return (FAULT_RESTART);
+ }
+
+ /*
+ * Now make sure that racily checked
+ * conditions are still valid.
+ */
+ if (__predict_true(vm_page_all_valid(fs->m) &&
+ (vm_fault_is_read(fs) ||
+ vm_fault_might_be_cow(fs)))) {
+ VM_OBJECT_UNLOCK(fs->object);
+ return (FAULT_SOFT);
+ }
+
+ vm_page_sunbusy(fs->m);
+ }
+
if (!vm_page_tryxbusy(fs->m)) {
- vm_fault_busy_sleep(fs);
+ vm_fault_busy_sleep(fs, 0);
return (FAULT_RESTART);
}
@@ -1701,10 +1781,15 @@ RetryFault:
found:
/*
- * A valid page has been found and exclusively busied. The
- * object lock must no longer be held.
+ * A valid page has been found and busied. The object lock
+ * must no longer be held if the page was busied.
+ *
+ * Regardless of the busy state of fs.m, fs.first_m is always
+ * exclusively busied after the first iteration of the loop
+ * calling vm_fault_object(). This is an ordering point for
+ * the parallel faults occuring in on the same page.
*/
- vm_page_assert_xbusied(fs.m);
+ vm_page_assert_busied(fs.m);
VM_OBJECT_ASSERT_UNLOCKED(fs.object);
/*
@@ -1712,7 +1797,7 @@ found:
* top-level object, we have to copy it into a new page owned by the
* top-level object.
*/
- if (fs.object != fs.first_object) {
+ if (vm_fault_might_be_cow(&fs)) {
/*
* We only really need to copy if we want to write it.
*/
@@ -1773,7 +1858,7 @@ found:
* Page must be completely valid or it is not fit to
* map into user space. vm_pager_get_pages() ensures this.
*/
- vm_page_assert_xbusied(fs.m);
+ vm_page_assert_busied(fs.m);
KASSERT(vm_page_all_valid(fs.m),
("vm_fault: page %p partially invalid", fs.m));
@@ -1805,7 +1890,13 @@ found:
(*fs.m_hold) = fs.m;
vm_page_wire(fs.m);
}
- vm_page_xunbusy(fs.m);
+
+ KASSERT(fs.first_object == fs.object || vm_page_xbusied(fs.first_m),
+ ("first_m must be xbusy"));
+ if (vm_page_xbusied(fs.m))
+ vm_page_xunbusy(fs.m);
+ else
+ vm_page_sunbusy(fs.m);
fs.m = NULL;
/*
@@ -1995,32 +2086,43 @@ vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
}
/*
- * Hold each of the physical pages that are mapped by the specified range of
- * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
- * and allow the specified types of access, "prot". If all of the implied
- * pages are successfully held, then the number of held pages is returned
- * together with pointers to those pages in the array "ma". However, if any
- * of the pages cannot be held, -1 is returned.
+ * Hold each of the physical pages that are mapped by the specified
+ * range of virtual addresses, ["addr", "addr" + "len"), if those
+ * mappings are valid and allow the specified types of access, "prot".
+ * If all of the implied pages are successfully held, then the number
+ * of held pages is assigned to *ppages_count, together with pointers
+ * to those pages in the array "ma". The returned value is zero.
+ *
+ * However, if any of the pages cannot be held, an error is returned,
+ * and no pages are held.
+ * Error values:
+ * ENOMEM - the range is not valid
+ * EINVAL - the provided vm_page array is too small to hold all pages
+ * EAGAIN - a page was not mapped, and the thread is in nofaulting mode
+ * EFAULT - a page with requested permissions cannot be mapped
+ * (more detailed result from vm_fault() is lost)
*/
int
-vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
- vm_prot_t prot, vm_page_t *ma, int max_count)
+vm_fault_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count, int *ppages_count)
{
vm_offset_t end, va;
vm_page_t *mp;
- int count;
+ int count, error;
boolean_t pmap_failed;
- if (len == 0)
+ if (len == 0) {
+ *ppages_count = 0;
return (0);
+ }
end = round_page(addr + len);
addr = trunc_page(addr);
if (!vm_map_range_valid(map, addr, end))
- return (-1);
+ return (ENOMEM);
if (atop(end - addr) > max_count)
- panic("vm_fault_quick_hold_pages: count > max_count");
+ return (EINVAL);
count = atop(end - addr);
/*
@@ -2062,19 +2164,49 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
* the proper behaviour explicitly.
*/
if ((prot & VM_PROT_QUICK_NOFAULT) != 0 &&
- (curthread->td_pflags & TDP_NOFAULTING) != 0)
- goto error;
- for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
+ (curthread->td_pflags & TDP_NOFAULTING) != 0) {
+ error = EAGAIN;
+ goto fail;
+ }
+ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
if (*mp == NULL && vm_fault(map, va, prot,
- VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
- goto error;
+ VM_FAULT_NORMAL, mp) != KERN_SUCCESS) {
+ error = EFAULT;
+ goto fail;
+ }
+ }
}
- return (count);
-error:
+ *ppages_count = count;
+ return (0);
+fail:
for (mp = ma; mp < ma + count; mp++)
if (*mp != NULL)
vm_page_unwire(*mp, PQ_INACTIVE);
- return (-1);
+ return (error);
+}
+
+ /*
+ * Hold each of the physical pages that are mapped by the specified range of
+ * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
+ * and allow the specified types of access, "prot". If all of the implied
+ * pages are successfully held, then the number of held pages is returned
+ * together with pointers to those pages in the array "ma". However, if any
+ * of the pages cannot be held, -1 is returned.
+ */
+int
+vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count)
+{
+ int error, pages_count;
+
+ error = vm_fault_hold_pages(map, addr, len, prot, ma,
+ max_count, &pages_count);
+ if (error != 0) {
+ if (error == EINVAL)
+ panic("vm_fault_quick_hold_pages: count > max_count");
+ return (-1);
+ }
+ return (pages_count);
}
/*
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 94df2c2f9a9e..e0f1807a1b32 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -453,7 +453,7 @@ vm_thread_stack_create(struct domainset *ds, int pages)
obj = vm_thread_kstack_size_to_obj(pages);
if (vm_ndomains > 1)
obj->domain.dr_policy = ds;
- vm_domainset_iter_page_init(&di, obj, 0, &domain, &req, NULL);
+ vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
do {
/*
* Get a kernel virtual address for this thread's kstack.
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index e7d7b6726d2c..ac327aa37b72 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -323,7 +323,9 @@ kmem_alloc_attr_domainset(struct domainset *ds, vm_size_t size, int flags,
start_segind = -1;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0)
+ return (NULL);
+
do {
addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
memattr);
@@ -417,7 +419,9 @@ kmem_alloc_contig_domainset(struct domainset *ds, vm_size_t size, int flags,
start_segind = -1;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags))
+ return (NULL);
+
do {
addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
alignment, boundary, memattr);
@@ -517,7 +521,9 @@ kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags)
void *addr;
int domain;
- vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0)
+ return (NULL);
+
do {
addr = kmem_malloc_domain(domain, size, flags);
if (addr != NULL)
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index abad5efb8a79..16878604fa11 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2015,8 +2015,9 @@ vm_page_alloc_iter(vm_object_t object, vm_pindex_t pindex, int req,
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, object, pindex, &domain, &req,
- pages);
+ if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_domain_iter(object, pindex, domain, req,
pages);
@@ -2268,7 +2269,9 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
start_segind = -1;
- vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_contig_domain(object, pindex, domain, req,
npages, low, high, alignment, boundary, memattr);
@@ -2596,7 +2599,9 @@ vm_page_alloc_noobj(int req)
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_noobj_domain(domain, req);
if (m != NULL)
@@ -2615,7 +2620,9 @@ vm_page_alloc_noobj_contig(int req, u_long npages, vm_paddr_t low,
vm_page_t m;
int domain;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (NULL);
+
do {
m = vm_page_alloc_noobj_contig_domain(domain, req, npages, low,
high, alignment, boundary, memattr);
@@ -3334,7 +3341,9 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
ret = ERANGE;
- vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL);
+ if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0)
+ return (ret);
+
do {
status = vm_page_reclaim_contig_domain(domain, req, npages, low,
high, alignment, boundary);
diff --git a/sys/x86/acpica/acpi_apm.c b/sys/x86/acpica/acpi_apm.c
index be161cd6171b..8e5785cf0ed6 100644
--- a/sys/x86/acpica/acpi_apm.c
+++ b/sys/x86/acpica/acpi_apm.c
@@ -235,7 +235,7 @@ apmdtor(void *data)
acpi_sc = clone->acpi_sc;
/* We are about to lose a reference so check if suspend should occur */
- if (acpi_sc->acpi_next_sstate != 0 &&
+ if (acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE &&
clone->notify_status != APM_EV_ACKED)
acpi_AckSleepState(clone, 0);
@@ -283,10 +283,10 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_SUSPEND:
if ((flag & FWRITE) == 0)
return (EPERM);
- if (acpi_sc->acpi_next_sstate == 0) {
- if (acpi_sc->acpi_suspend_sx != ACPI_STATE_S5) {
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_AWAKE) {
+ if (power_suspend_stype != POWER_STYPE_POWEROFF) {
error = acpi_ReqSleepState(acpi_sc,
- acpi_sc->acpi_suspend_sx);
+ power_suspend_stype);
} else {
printf(
"power off via apm suspend not supported\n");
@@ -298,10 +298,10 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_STANDBY:
if ((flag & FWRITE) == 0)
return (EPERM);
- if (acpi_sc->acpi_next_sstate == 0) {
- if (acpi_sc->acpi_standby_sx != ACPI_STATE_S5) {
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_AWAKE) {
+ if (power_standby_stype != POWER_STYPE_POWEROFF) {
error = acpi_ReqSleepState(acpi_sc,
- acpi_sc->acpi_standby_sx);
+ power_standby_stype);
} else {
printf(
"power off via apm standby not supported\n");
@@ -313,10 +313,11 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_NEXTEVENT:
printf("apm nextevent start\n");
ACPI_LOCK(acpi);
- if (acpi_sc->acpi_next_sstate != 0 && clone->notify_status ==
- APM_EV_NONE) {
+ if (acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE &&
+ clone->notify_status == APM_EV_NONE) {
ev_info = (struct apm_event_info *)addr;
- if (acpi_sc->acpi_next_sstate <= ACPI_STATE_S3)
+ /* XXX Check this. */
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_STANDBY)
ev_info->type = PMEV_STANDBYREQ;
else
ev_info->type = PMEV_SUSPENDREQ;
@@ -392,7 +393,7 @@ apmpoll(struct cdev *dev, int events, struct thread *td)
revents = 0;
devfs_get_cdevpriv((void **)&clone);
ACPI_LOCK(acpi);
- if (clone->acpi_sc->acpi_next_sstate)
+ if (clone->acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE)
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(td, &clone->sel_read);
@@ -433,7 +434,7 @@ apmreadfilt(struct knote *kn, long hint)
ACPI_LOCK(acpi);
clone = kn->kn_hook;
- sleeping = clone->acpi_sc->acpi_next_sstate ? 1 : 0;
+ sleeping = clone->acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE;
ACPI_UNLOCK(acpi);
return (sleeping);
}
diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c
index a4c1a7836268..f8900fe0561f 100644
--- a/sys/x86/iommu/amd_intrmap.c
+++ b/sys/x86/iommu/amd_intrmap.c
@@ -112,6 +112,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
{
struct amdiommu_ctx *ctx;
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
bool is_iommu;
@@ -180,7 +182,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
@@ -220,6 +223,7 @@ static struct amdiommu_ctx *
amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
{
devclass_t src_class;
+ device_t requester;
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
uint32_t edte;
@@ -251,7 +255,8 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
bootverbose);
if (error == 0) {
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
ctx = amdiommu_get_ctx_for_dev(unit, src,
rid, 0, false /* XXXKIB */, false, dte, edte);
}
@@ -266,6 +271,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
u_int cookie)
{
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
MPASS(ctx != NULL);
@@ -291,7 +298,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
atomic_thread_fence_rel();
bzero(irte, sizeof(*irte));
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c
index 06e41523624b..f12a0c9bae9b 100644
--- a/sys/x86/iommu/intel_intrmap.c
+++ b/sys/x86/iommu/intel_intrmap.c
@@ -234,6 +234,8 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
{
devclass_t src_class;
struct dmar_unit *unit;
+ device_t requester;
+ int error __diagused;
/*
* We need to determine if the interrupt source generates FSB
@@ -253,8 +255,10 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
unit = dmar_find_hpet(src, rid);
} else {
unit = dmar_find(src, bootverbose);
- if (unit != NULL && rid != NULL)
- iommu_get_requester(src, rid);
+ if (unit != NULL && rid != NULL) {
+ error = iommu_get_requester(src, &requester, rid);
+ MPASS(error == 0);
+ }
}
return (unit);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 4d64eaf78b29..7661c82f4394 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -2613,7 +2613,7 @@ print_vmx_info(void)
"\020EPT#VE" /* EPT-violation #VE */
"\021XSAVES" /* Enable XSAVES/XRSTORS */
);
- printf("\n Exit Controls=0x%b", mask,
+ printf("\n Exit Controls=0x%b", exit,
"\020"
"\003DR" /* Save debug controls */
/* Ignore Host address-space size */
@@ -2625,7 +2625,7 @@ print_vmx_info(void)
"\026EFER-LD" /* Load MSR_EFER */
"\027PTMR-SV" /* Save VMX-preemption timer value */
);
- printf("\n Entry Controls=0x%b", mask,
+ printf("\n Entry Controls=0x%b", entry,
"\020"
"\003DR" /* Save debug controls */
/* Ignore IA-32e mode guest */