aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2022-05-18 22:54:40 +0000
committerMartin Matuska <mm@FreeBSD.org>2022-05-18 22:55:59 +0000
commit716fd348e01c5f2ba125f878a634a753436c2994 (patch)
tree0d738baf7a9ccfd90fa1e622f67e0399f306f024 /sys/contrib/openzfs/module
parent4e2d3f26bd12610ef8672eefb02814b882a4c29b (diff)
parentc0cf6ed6792e545fd614c2a88cb53756db7e03f8 (diff)
downloadsrc-716fd348e01c5f2ba125f878a634a753436c2994.tar.gz
src-716fd348e01c5f2ba125f878a634a753436c2994.zip
zfs: merge openzfs/zfs@c0cf6ed67
Notable upstream pull request merges: #10662 zvol_wait: Ignore locked zvols #12789 Improve log spacemap load time #12812 Improved zpool status output, list all affected datasets #13277 FreeBSD: Use NDFREE_PNBUF if available #13302 Make zfs_max_recordsize default to 16M #13311 Fix error handling in FreeBSD's get/putpages VOPs #13345 FreeBSD: Fix translation from ABD to physical pages #13373 zfs: holds: dequadratify #13375 Corrected edge case in uncompressed ARC->L2ARC handling #13388 Improve mg_aliquot math #13405 Reduce dbuf_find() lock contention #13406 FreeBSD: use zero_region instead of allocating a dedicated page Obtained from: OpenZFS OpenZFS commit: c0cf6ed6792e545fd614c2a88cb53756db7e03f8
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in427
-rw-r--r--sys/contrib/openzfs/module/Makefile.in61
-rw-r--r--sys/contrib/openzfs/module/avl/Makefile.in10
-rw-r--r--sys/contrib/openzfs/module/avl/avl.c22
-rw-r--r--sys/contrib/openzfs/module/icp/Makefile.in90
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr.c5
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm.c2
-rw-r--r--sys/contrib/openzfs/module/icp/illumos-crypto.c7
-rw-r--r--sys/contrib/openzfs/module/lua/Makefile.in39
-rw-r--r--sys/contrib/openzfs/module/lua/lapi.c23
-rw-r--r--sys/contrib/openzfs/module/nvpair/Makefile.in13
-rw-r--r--sys/contrib/openzfs/module/nvpair/nvpair.c21
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_sunddi.c13
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c6
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c33
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c12
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/Makefile.in17
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-generic.c58
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/Makefile.in38
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c1
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c7
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c70
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c65
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c49
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c58
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c97
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c24
-rw-r--r--sys/contrib/openzfs/module/spl/Makefile.in13
-rw-r--r--sys/contrib/openzfs/module/unicode/Makefile.in11
-rw-r--r--sys/contrib/openzfs/module/unicode/u8_textprep.c21
-rw-r--r--sys/contrib/openzfs/module/zcommon/Makefile.in28
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfeature_common.c7
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_prop.c14
-rw-r--r--sys/contrib/openzfs/module/zcommon/zprop_common.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/Makefile.in158
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c47
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c28
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf_stats.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c65
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_destroy.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c141
-rw-r--r--sys/contrib/openzfs/module/zfs/sa.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_errlog.c910
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_log_spacemap.c231
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zfeature.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c1
-rw-r--r--sys/contrib/openzfs/module/zstd/Makefile.in69
-rw-r--r--sys/contrib/openzfs/module/zstd/README.md13
-rw-r--r--sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h2
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c2
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_superblock.c2
-rw-r--r--sys/contrib/openzfs/module/zstd/zfs_zstd.c14
65 files changed, 2063 insertions, 1069 deletions
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 1507965c5750..11099999fb87 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -1,20 +1,6 @@
# When integrated in to a monolithic kernel the spl module must appear
# first. This ensures its module initialization function is run before
# any of the other module initialization functions which depend on it.
-ZFS_MODULES += spl/
-ZFS_MODULES += avl/
-ZFS_MODULES += icp/
-ZFS_MODULES += lua/
-ZFS_MODULES += nvpair/
-ZFS_MODULES += unicode/
-ZFS_MODULES += zcommon/
-ZFS_MODULES += zfs/
-ZFS_MODULES += zstd/
-
-# The rest is only relevant when run by kbuild
-ifneq ($(KERNELRELEASE),)
-
-obj-$(CONFIG_ZFS) := $(ZFS_MODULES)
ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
ZFS_MODULE_CFLAGS += -Wmissing-prototypes
@@ -22,10 +8,16 @@ ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@
ifneq ($(KBUILD_EXTMOD),)
zfs_include = @abs_top_srcdir@/include
+icp_include = @abs_srcdir@/icp/include
+zstd_include = @abs_srcdir@/zstd/include
ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
+src = @abs_srcdir@
+obj = @abs_builddir@
else
zfs_include = $(srctree)/include/zfs
+icp_include = $(srctree)/$(src)/icp/include
+zstd_include = $(srctree)/$(src)/zstd/include
ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h
endif
@@ -36,12 +28,415 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)
ZFS_MODULE_CPPFLAGS += -D_KERNEL
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
+# KASAN enables -Werror=frame-larger-than=1024, which
+# breaks oh so many parts of our build.
+ifeq ($(CONFIG_KASAN),y)
+ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
+endif
+
ifneq ($(KBUILD_EXTMOD),)
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
endif
-subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+
+# Suppress unused-value warnings in sparc64 architecture headers
+ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
+
+
+obj-$(CONFIG_ZFS) := spl.o zfs.o
+
+SPL_OBJS := \
+ spl-atomic.o \
+ spl-condvar.o \
+ spl-cred.o \
+ spl-err.o \
+ spl-generic.o \
+ spl-kmem-cache.o \
+ spl-kmem.o \
+ spl-kstat.o \
+ spl-proc.o \
+ spl-procfs-list.o \
+ spl-taskq.o \
+ spl-thread.o \
+ spl-trace.o \
+ spl-tsd.o \
+ spl-vmem.o \
+ spl-xdr.o \
+ spl-zlib.o
+
+spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
+
+zfs-objs += avl/avl.o
+
+ICP_OBJS := \
+ algs/aes/aes_impl.o \
+ algs/aes/aes_impl_generic.o \
+ algs/aes/aes_modes.o \
+ algs/edonr/edonr.o \
+ algs/modes/cbc.o \
+ algs/modes/ccm.o \
+ algs/modes/ctr.o \
+ algs/modes/ecb.o \
+ algs/modes/gcm.o \
+ algs/modes/gcm_generic.o \
+ algs/modes/modes.o \
+ algs/sha2/sha2.o \
+ algs/skein/skein.o \
+ algs/skein/skein_block.o \
+ algs/skein/skein_iv.o \
+ api/kcf_cipher.o \
+ api/kcf_ctxops.o \
+ api/kcf_mac.o \
+ core/kcf_callprov.o \
+ core/kcf_mech_tabs.o \
+ core/kcf_prov_lib.o \
+ core/kcf_prov_tabs.o \
+ core/kcf_sched.o \
+ illumos-crypto.o \
+ io/aes.o \
+ io/sha2_mod.o \
+ io/skein_mod.o \
+ spi/kcf_spi.o
+
+ICP_OBJS_X86_64 := \
+ asm-x86_64/aes/aes_aesni.o \
+ asm-x86_64/aes/aes_amd64.o \
+ asm-x86_64/aes/aeskey.o \
+ asm-x86_64/modes/aesni-gcm-x86_64.o \
+ asm-x86_64/modes/gcm_pclmulqdq.o \
+ asm-x86_64/modes/ghash-x86_64.o \
+ asm-x86_64/sha2/sha256_impl.o \
+ asm-x86_64/sha2/sha512_impl.o
+
+ICP_OBJS_X86 := \
+ algs/aes/aes_impl_aesni.o \
+ algs/aes/aes_impl_x86-64.o \
+ algs/modes/gcm_pclmulqdq.o
+
+zfs-objs += $(addprefix icp/,$(ICP_OBJS))
+zfs-$(CONFIG_X86) += $(addprefix icp/,$(ICP_OBJS_X86))
+zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64))
+
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : asflags-y += -I$(icp_include)
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflags-y += -I$(icp_include)
+
+# Suppress objtool "can't find jump dest instruction at" warnings. They
+# are caused by the constants which are defined in the text section of the
+# assembly file using .byte instructions (e.g. bswap_mask). The objtool
+# utility tries to interpret them as opcodes and obviously fails doing so.
+OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
+OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
+# Suppress objtool "unsupported stack pointer realignment" warnings. We are
+# not using a DRAP register while aligning the stack to a 64 byte boundary.
+# See #6950 for the reasoning.
+OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
+OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
+
+
+LUA_OBJS := \
+ lapi.o \
+ lauxlib.o \
+ lbaselib.o \
+ lcode.o \
+ lcompat.o \
+ lcorolib.o \
+ lctype.o \
+ ldebug.o \
+ ldo.o \
+ lfunc.o \
+ lgc.o \
+ llex.o \
+ lmem.o \
+ lobject.o \
+ lopcodes.o \
+ lparser.o \
+ lstate.o \
+ lstring.o \
+ lstrlib.o \
+ ltable.o \
+ ltablib.o \
+ ltm.o \
+ lvm.o \
+ lzio.o \
+ setjmp/setjmp.o
+
+zfs-objs += $(addprefix lua/,$(LUA_OBJS))
+
+
+NVPAIR_OBJS := \
+ fnvpair.o \
+ nvpair.o \
+ nvpair_alloc_fixed.o \
+ nvpair_alloc_spl.o
+
+zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS))
+
+
+UNICODE_OBJS := \
+ u8_textprep.o \
+ uconv.o
+
+zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS))
+
+
+ZCOMMON_OBJS := \
+ cityhash.o \
+ zfeature_common.o \
+ zfs_comutil.o \
+ zfs_deleg.o \
+ zfs_fletcher.o \
+ zfs_fletcher_superscalar.o \
+ zfs_fletcher_superscalar4.o \
+ zfs_namecheck.o \
+ zfs_prop.o \
+ zpool_prop.o \
+ zprop_common.o
+
+ZCOMMON_OBJS_X86 := \
+ zfs_fletcher_avx512.o \
+ zfs_fletcher_intel.o \
+ zfs_fletcher_sse.o
+
+ZCOMMON_OBJS_ARM64 := \
+ zfs_fletcher_aarch64_neon.o
+
+zfs-objs += $(addprefix zcommon/,$(ZCOMMON_OBJS))
+zfs-$(CONFIG_X86) += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
+zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64))
+
+
+# Zstd uses -O3 by default, so we should follow
+ZFS_ZSTD_FLAGS := -O3
+
+# -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h
+# Set it for other compilers, too.
+ZFS_ZSTD_FLAGS += -fno-tree-vectorize
+
+# SSE register return with SSE disabled if -march=znverX is passed
+ZFS_ZSTD_FLAGS += -U__BMI__
+
+# Quiet warnings about frame size due to unused code in unmodified zstd lib
+ZFS_ZSTD_FLAGS += -Wframe-larger-than=20480
+
+ZSTD_OBJS := \
+ zfs_zstd.o \
+ zstd_sparc.o
+
+ZSTD_UPSTREAM_OBJS := \
+ lib/common/entropy_common.o \
+ lib/common/error_private.o \
+ lib/common/fse_decompress.o \
+ lib/common/pool.o \
+ lib/common/zstd_common.o \
+ lib/compress/fse_compress.o \
+ lib/compress/hist.o \
+ lib/compress/huf_compress.o \
+ lib/compress/zstd_compress.o \
+ lib/compress/zstd_compress_literals.o \
+ lib/compress/zstd_compress_sequences.o \
+ lib/compress/zstd_compress_superblock.o \
+ lib/compress/zstd_double_fast.o \
+ lib/compress/zstd_fast.o \
+ lib/compress/zstd_lazy.o \
+ lib/compress/zstd_ldm.o \
+ lib/compress/zstd_opt.o \
+ lib/decompress/huf_decompress.o \
+ lib/decompress/zstd_ddict.o \
+ lib/decompress/zstd_decompress.o \
+ lib/decompress/zstd_decompress_block.o
+
+zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS))
+
+# Disable aarch64 neon SIMD instructions for kernel mode
+$(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS)
+$(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include)
+$(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w
+$(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h
+
+
+ZFS_OBJS := \
+ abd.o \
+ aggsum.o \
+ arc.o \
+ blkptr.o \
+ bplist.o \
+ bpobj.o \
+ bptree.o \
+ bqueue.o \
+ btree.o \
+ dataset_kstats.o \
+ dbuf.o \
+ dbuf_stats.o \
+ ddt.o \
+ ddt_zap.o \
+ dmu.o \
+ dmu_diff.o \
+ dmu_object.o \
+ dmu_objset.o \
+ dmu_recv.o \
+ dmu_redact.o \
+ dmu_send.o \
+ dmu_traverse.o \
+ dmu_tx.o \
+ dmu_zfetch.o \
+ dnode.o \
+ dnode_sync.o \
+ dsl_bookmark.o \
+ dsl_crypt.o \
+ dsl_dataset.o \
+ dsl_deadlist.o \
+ dsl_deleg.o \
+ dsl_destroy.o \
+ dsl_dir.o \
+ dsl_pool.o \
+ dsl_prop.o \
+ dsl_scan.o \
+ dsl_synctask.o \
+ dsl_userhold.o \
+ edonr_zfs.o \
+ fm.o \
+ gzip.o \
+ hkdf.o \
+ lz4.o \
+ lz4_zfs.o \
+ lzjb.o \
+ metaslab.o \
+ mmp.o \
+ multilist.o \
+ objlist.o \
+ pathname.o \
+ range_tree.o \
+ refcount.o \
+ rrwlock.o \
+ sa.o \
+ sha256.o \
+ skein_zfs.o \
+ spa.o \
+ spa_boot.o \
+ spa_checkpoint.o \
+ spa_config.o \
+ spa_errlog.o \
+ spa_history.o \
+ spa_log_spacemap.o \
+ spa_misc.o \
+ spa_stats.o \
+ space_map.o \
+ space_reftree.o \
+ txg.o \
+ uberblock.o \
+ unique.o \
+ vdev.o \
+ vdev_cache.o \
+ vdev_draid.o \
+ vdev_draid_rand.o \
+ vdev_indirect.o \
+ vdev_indirect_births.o \
+ vdev_indirect_mapping.o \
+ vdev_initialize.o \
+ vdev_label.o \
+ vdev_mirror.o \
+ vdev_missing.o \
+ vdev_queue.o \
+ vdev_raidz.o \
+ vdev_raidz_math.o \
+ vdev_raidz_math_scalar.o \
+ vdev_rebuild.o \
+ vdev_removal.o \
+ vdev_root.o \
+ vdev_trim.o \
+ zap.o \
+ zap_leaf.o \
+ zap_micro.o \
+ zcp.o \
+ zcp_get.o \
+ zcp_global.o \
+ zcp_iter.o \
+ zcp_set.o \
+ zcp_synctask.o \
+ zfeature.o \
+ zfs_byteswap.o \
+ zfs_fm.o \
+ zfs_fuid.o \
+ zfs_ioctl.o \
+ zfs_log.o \
+ zfs_onexit.o \
+ zfs_quota.o \
+ zfs_ratelimit.o \
+ zfs_replay.o \
+ zfs_rlock.o \
+ zfs_sa.o \
+ zfs_vnops.o \
+ zil.o \
+ zio.o \
+ zio_checksum.o \
+ zio_compress.o \
+ zio_inject.o \
+ zle.o \
+ zrlock.o \
+ zthr.o \
+ zvol.o
+
+ZFS_OBJS_OS := \
+ abd_os.o \
+ arc_os.o \
+ mmp_os.o \
+ policy.o \
+ qat.o \
+ qat_compress.o \
+ qat_crypt.o \
+ spa_misc_os.o \
+ trace.o \
+ vdev_disk.o \
+ vdev_file.o \
+ zfs_acl.o \
+ zfs_ctldir.o \
+ zfs_debug.o \
+ zfs_dir.o \
+ zfs_file_os.o \
+ zfs_ioctl_os.o \
+ zfs_racct.o \
+ zfs_sysfs.o \
+ zfs_uio.o \
+ zfs_vfsops.o \
+ zfs_vnops_os.o \
+ zfs_znode.o \
+ zio_crypt.o \
+ zpl_ctldir.o \
+ zpl_export.o \
+ zpl_file.o \
+ zpl_inode.o \
+ zpl_super.o \
+ zpl_xattr.o \
+ zvol_os.o
+
+ZFS_OBJS_X86 := \
+ vdev_raidz_math_avx2.o \
+ vdev_raidz_math_avx512bw.o \
+ vdev_raidz_math_avx512f.o \
+ vdev_raidz_math_sse2.o \
+ vdev_raidz_math_ssse3.o
+
+ZFS_OBJS_ARM64 := \
+ vdev_raidz_math_aarch64_neon.o \
+ vdev_raidz_math_aarch64_neonx2.o
+
+ZFS_OBJS_PPC_PPC64 := \
+ vdev_raidz_math_powerpc_altivec.o
+
+zfs-objs += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS))
+zfs-$(CONFIG_X86) += $(addprefix zfs/,$(ZFS_OBJS_X86))
+zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
+zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
+zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
+
+# Suppress incorrect warnings from versions of objtool which are not
+# aware of x86 EVEX prefix instructions used for AVX512.
+OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
+OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y
+ifeq ($(CONFIG_ALTIVEC),y)
+$(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec
endif
diff --git a/sys/contrib/openzfs/module/Makefile.in b/sys/contrib/openzfs/module/Makefile.in
index 762f9394dd20..5b71e1abf79e 100644
--- a/sys/contrib/openzfs/module/Makefile.in
+++ b/sys/contrib/openzfs/module/Makefile.in
@@ -3,19 +3,19 @@ include Kbuild
INSTALL_MOD_DIR ?= extra
INSTALL_MOD_PATH ?= $(DESTDIR)
-SUBDIR_TARGETS = icp lua zstd
-
all: modules
distclean maintainer-clean: clean
-install: modules_install
-uninstall: modules_uninstall
+install: modules_install data_install
+uninstall: modules_uninstall data_uninstall
check:
.PHONY: all distclean maintainer-clean install uninstall check distdir \
modules modules-Linux modules-FreeBSD modules-unknown \
clean clean-Linux clean-FreeBSD \
modules_install modules_install-Linux modules_install-FreeBSD \
+ data_install data_install-Linux data_install-FreeBSD \
modules_uninstall modules_uninstall-Linux modules_uninstall-FreeBSD \
+ data_uninstall data_uninstall-Linux data_uninstall-FreeBSD \
cppcheck cppcheck-Linux cppcheck-FreeBSD
# For FreeBSD, use debug options from ./configure if not overridden.
@@ -51,7 +51,8 @@ endif
FMAKE = env -u MAKEFLAGS make $(FMAKEFLAGS)
modules-Linux:
- list='$(SUBDIR_TARGETS)'; for td in $$list; do $(MAKE) -C $$td; done
+ mkdir -p $(sort $(dir $(spl-objs) $(spl-)))
+ mkdir -p $(sort $(dir $(zfs-objs) $(zfs-)))
$(MAKE) -C @LINUX_OBJ@ $(if @KERNEL_CC@,CC=@KERNEL_CC@) \
$(if @KERNEL_LD@,LD=@KERNEL_LD@) $(if @KERNEL_LLVM@,LLVM=@KERNEL_LLVM@) \
M="$$PWD" @KERNEL_MAKE@ CONFIG_ZFS=m modules
@@ -77,16 +78,20 @@ clean-FreeBSD:
clean: clean-@ac_system@
-modules_install-Linux:
+.PHONY: modules_uninstall-Linux-legacy
+modules_uninstall-Linux-legacy:
+ $(RM) -r $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,spl/ avl/ icp/ lua/ nvpair/ unicode/ zcommon/ zfs/ zstd/)
+
+KMODDIR := $(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@
+modules_install-Linux: modules_uninstall-Linux-legacy
@# Install the kernel modules
$(MAKE) -C @LINUX_OBJ@ M="$$PWD" modules_install \
INSTALL_MOD_PATH=$(INSTALL_MOD_PATH) \
INSTALL_MOD_DIR=$(INSTALL_MOD_DIR) \
KERNELRELEASE=@LINUX_VERSION@
@# Remove extraneous build products when packaging
- kmoddir=$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@; \
if [ -n "$(DESTDIR)" ]; then \
- find $$kmoddir -name 'modules.*' -delete; \
+ find $(KMODDIR) -name 'modules.*' -delete; \
fi
@# Debian ships tiny fake System.map files that are
@# syntactically valid but just say
@@ -107,18 +112,32 @@ modules_install-FreeBSD:
modules_install: modules_install-@ac_system@
-modules_uninstall-Linux:
+data_install-Linux:
+ @mkdir -p $(DESTDIR)/@prefix@/src/zfs-@VERSION@/@LINUX_VERSION@
+ cp ../zfs.release ../zfs_config.h @LINUX_SYMBOLS@ $(DESTDIR)/@prefix@/src/zfs-@VERSION@/@LINUX_VERSION@
+
+data_install-FreeBSD:
+ @
+
+data_install: data_install-@ac_system@
+
+modules_uninstall-Linux: modules_uninstall-Linux-legacy
@# Uninstall the kernel modules
- kmoddir=$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@; \
- for objdir in $(ZFS_MODULES); do \
- $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \
- done
+ $(RM) $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,zfs.ko spl.ko)
modules_uninstall-FreeBSD:
@false
modules_uninstall: modules_uninstall-@ac_system@
+data_uninstall-Linux:
+ $(RM) $(addprefix $(DESTDIR)/@prefix@/src/zfs-@VERSION@/@LINUX_VERSION@/,zfs.release zfs_config.h @LINUX_SYMBOLS@)
+
+data_uninstall-FreeBSD:
+ @
+
+data_uninstall: data_uninstall-@ac_system@
+
cppcheck-Linux:
@CPPCHECK@ -j@CPU_COUNT@ --std=c99 --quiet --force --error-exitcode=2 \
--inline-suppr \
@@ -126,7 +145,7 @@ cppcheck-Linux:
--suppress=noValidConfiguration \
--enable=warning,information -D_KERNEL \
--include=@LINUX_OBJ@/include/generated/autoconf.h \
- --include=@top_srcdir@/zfs_config.h \
+ --include=@top_builddir@/zfs_config.h \
--config-exclude=@LINUX_OBJ@/include \
-i zstd/lib \
-I @LINUX_OBJ@/include \
@@ -134,7 +153,7 @@ cppcheck-Linux:
-I @top_srcdir@/include/os/linux/spl \
-I @top_srcdir@/include/os/linux/zfs \
-I @top_srcdir@/include \
- avl icp lua nvpair spl unicode zcommon zfs zstd os/linux
+ avl icp lua nvpair unicode zcommon zfs zstd os/linux
cppcheck-FreeBSD:
@true
@@ -142,9 +161,11 @@ cppcheck-FreeBSD:
cppcheck: cppcheck-@ac_system@
distdir:
- (cd @srcdir@ && find $(ZFS_MODULES) os -name '*.[chS]') | \
- while read path; do \
- mkdir -p $$distdir/$${path%/*}; \
- cp @srcdir@/$$path $$distdir/$$path; \
- done; \
+ cd @srcdir@ && find . -name '*.[chS]' -exec sh -c 'for f; do mkdir -p $$distdir/$${f%/*}; cp @srcdir@/$$f $$distdir/$$f; done' _ {} +
cp @srcdir@/Makefile.bsd $$distdir/Makefile.bsd
+
+gen-zstd-symbols:
+ for obj in $(addprefix zstd/,$(ZSTD_UPSTREAM_OBJS)); do echo; echo "/* $${obj#zstd/}: */"; @OBJDUMP@ -t $$obj | awk '$$2 == "g" && !/ zfs_/ {print "#define\t" $$6 " zfs_" $$6}' | sort; done >> zstd/include/zstd_compat_wrapper.h
+
+check-zstd-symbols:
+ @OBJDUMP@ -t $(addprefix zstd/,$(ZSTD_UPSTREAM_OBJS)) | awk '/file format/ {print} $$2 == "g" && !/ zfs_/ {++ret; print} END {exit ret}'
diff --git a/sys/contrib/openzfs/module/avl/Makefile.in b/sys/contrib/openzfs/module/avl/Makefile.in
deleted file mode 100644
index 991d5f95b8c0..000000000000
--- a/sys/contrib/openzfs/module/avl/Makefile.in
+++ /dev/null
@@ -1,10 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-endif
-
-MODULE := zavl
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-$(MODULE)-objs += avl.o
diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c
index 3891a2d62880..69cb8bf6815b 100644
--- a/sys/contrib/openzfs/module/avl/avl.c
+++ b/sys/contrib/openzfs/module/avl/avl.c
@@ -1044,28 +1044,6 @@ done:
return (AVL_NODE2DATA(node, off));
}
-#if defined(_KERNEL)
-
-static int __init
-avl_init(void)
-{
- return (0);
-}
-
-static void __exit
-avl_fini(void)
-{
-}
-
-module_init(avl_init);
-module_exit(avl_fini);
-#endif
-
-ZFS_MODULE_DESCRIPTION("Generic AVL tree implementation");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-
EXPORT_SYMBOL(avl_create);
EXPORT_SYMBOL(avl_find);
EXPORT_SYMBOL(avl_insert);
diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in
deleted file mode 100644
index 72c9ab12adb7..000000000000
--- a/sys/contrib/openzfs/module/icp/Makefile.in
+++ /dev/null
@@ -1,90 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-icp_include = $(src)/include
-else
-icp_include = $(srctree)/$(src)/include
-endif
-
-MODULE := icp
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-asflags-y := -I$(icp_include)
-ccflags-y := -I$(icp_include)
-
-$(MODULE)-objs += illumos-crypto.o
-$(MODULE)-objs += api/kcf_cipher.o
-$(MODULE)-objs += api/kcf_mac.o
-$(MODULE)-objs += api/kcf_ctxops.o
-$(MODULE)-objs += core/kcf_callprov.o
-$(MODULE)-objs += core/kcf_prov_tabs.o
-$(MODULE)-objs += core/kcf_sched.o
-$(MODULE)-objs += core/kcf_mech_tabs.o
-$(MODULE)-objs += core/kcf_prov_lib.o
-$(MODULE)-objs += spi/kcf_spi.o
-$(MODULE)-objs += io/aes.o
-$(MODULE)-objs += io/sha2_mod.o
-$(MODULE)-objs += io/skein_mod.o
-$(MODULE)-objs += algs/modes/cbc.o
-$(MODULE)-objs += algs/modes/ccm.o
-$(MODULE)-objs += algs/modes/ctr.o
-$(MODULE)-objs += algs/modes/ecb.o
-$(MODULE)-objs += algs/modes/gcm_generic.o
-$(MODULE)-objs += algs/modes/gcm.o
-$(MODULE)-objs += algs/modes/modes.o
-$(MODULE)-objs += algs/aes/aes_impl_generic.o
-$(MODULE)-objs += algs/aes/aes_impl.o
-$(MODULE)-objs += algs/aes/aes_modes.o
-$(MODULE)-objs += algs/edonr/edonr.o
-$(MODULE)-objs += algs/sha2/sha2.o
-$(MODULE)-objs += algs/skein/skein.o
-$(MODULE)-objs += algs/skein/skein_block.o
-$(MODULE)-objs += algs/skein/skein_iv.o
-
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aeskey.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_amd64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o
-
-$(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o
-$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o
-$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o
-
-# Suppress objtool "can't find jump dest instruction at" warnings. They
-# are caused by the constants which are defined in the text section of the
-# assembly file using .byte instructions (e.g. bswap_mask). The objtool
-# utility tries to interpret them as opcodes and obviously fails doing so.
-OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
-OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
-# Suppress objtool "unsupported stack pointer realignment" warnings. We are
-# not using a DRAP register while aligning the stack to a 64 byte boundary.
-# See #6950 for the reasoning.
-OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
-OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
-
-ICP_DIRS = \
- api \
- core \
- spi \
- io \
- os \
- algs \
- algs/aes \
- algs/edonr \
- algs/modes \
- algs/sha2 \
- algs/skein \
- asm-x86_64 \
- asm-x86_64/aes \
- asm-x86_64/modes \
- asm-x86_64/sha2 \
- asm-i386 \
- asm-generic
-
-all:
- mkdir -p $(ICP_DIRS)
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
index 6f3a43e263be..9388a6f6b7c9 100644
--- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
+++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
@@ -47,10 +47,7 @@
#define hashState384(x) ((x)->pipe->p512)
#define hashState512(x) ((x)->pipe->p512)
-/* shift and rotate shortcuts */
-#define shl(x, n) ((x) << n)
-#define shr(x, n) ((x) >> n)
-
+/* rotate shortcuts */
#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
index e666b45b5f44..ee2100b7f425 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
@@ -806,7 +806,7 @@ static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
* fallback to the fastest generic implementation.
*/
const gcm_impl_ops_t *
-gcm_impl_get_ops()
+gcm_impl_get_ops(void)
{
if (!kfpu_allowed())
return (&gcm_generic_impl);
diff --git a/sys/contrib/openzfs/module/icp/illumos-crypto.c b/sys/contrib/openzfs/module/icp/illumos-crypto.c
index f68f6bc765a2..d17b90e7200a 100644
--- a/sys/contrib/openzfs/module/icp/illumos-crypto.c
+++ b/sys/contrib/openzfs/module/icp/illumos-crypto.c
@@ -104,7 +104,7 @@
* ZFS Makefiles.
*/
-void __exit
+void
icp_fini(void)
{
skein_mod_fini();
@@ -139,10 +139,7 @@ icp_init(void)
return (0);
}
-#if defined(_KERNEL)
+#if defined(_KERNEL) && defined(__FreeBSD__)
module_exit(icp_fini);
module_init(icp_init);
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
#endif
diff --git a/sys/contrib/openzfs/module/lua/Makefile.in b/sys/contrib/openzfs/module/lua/Makefile.in
deleted file mode 100644
index 0a74c17e64e8..000000000000
--- a/sys/contrib/openzfs/module/lua/Makefile.in
+++ /dev/null
@@ -1,39 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-endif
-
-MODULE := zlua
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-ccflags-y := -DLUA_USE_LONGLONG
-
-$(MODULE)-objs += lapi.o
-$(MODULE)-objs += lauxlib.o
-$(MODULE)-objs += lbaselib.o
-$(MODULE)-objs += lcode.o
-$(MODULE)-objs += lcompat.o
-$(MODULE)-objs += lcorolib.o
-$(MODULE)-objs += lctype.o
-$(MODULE)-objs += ldebug.o
-$(MODULE)-objs += ldo.o
-$(MODULE)-objs += lfunc.o
-$(MODULE)-objs += lgc.o
-$(MODULE)-objs += llex.o
-$(MODULE)-objs += lmem.o
-$(MODULE)-objs += lobject.o
-$(MODULE)-objs += lopcodes.o
-$(MODULE)-objs += lparser.o
-$(MODULE)-objs += lstate.o
-$(MODULE)-objs += lstring.o
-$(MODULE)-objs += lstrlib.o
-$(MODULE)-objs += ltable.o
-$(MODULE)-objs += ltablib.o
-$(MODULE)-objs += ltm.o
-$(MODULE)-objs += lvm.o
-$(MODULE)-objs += lzio.o
-$(MODULE)-objs += setjmp/setjmp.o
-
-all:
- mkdir -p setjmp
diff --git a/sys/contrib/openzfs/module/lua/lapi.c b/sys/contrib/openzfs/module/lua/lapi.c
index 72b0037aa9a9..726e5c2ad4bb 100644
--- a/sys/contrib/openzfs/module/lua/lapi.c
+++ b/sys/contrib/openzfs/module/lua/lapi.c
@@ -1278,29 +1278,6 @@ LUA_API void lua_upvaluejoin (lua_State *L, int fidx1, int n1,
luaC_objbarrier(L, f1, *up2);
}
-#if defined(_KERNEL)
-
-static int __init
-lua_init(void)
-{
- return (0);
-}
-
-static void __exit
-lua_fini(void)
-{
-}
-
-module_init(lua_init);
-module_exit(lua_fini);
-
-#endif
-
-ZFS_MODULE_DESCRIPTION("Lua Interpreter for ZFS");
-ZFS_MODULE_AUTHOR("Lua.org");
-ZFS_MODULE_LICENSE("Dual MIT/GPL");
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-
EXPORT_SYMBOL(lua_absindex);
EXPORT_SYMBOL(lua_atpanic);
EXPORT_SYMBOL(lua_checkstack);
diff --git a/sys/contrib/openzfs/module/nvpair/Makefile.in b/sys/contrib/openzfs/module/nvpair/Makefile.in
deleted file mode 100644
index d8145236674b..000000000000
--- a/sys/contrib/openzfs/module/nvpair/Makefile.in
+++ /dev/null
@@ -1,13 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-endif
-
-MODULE := znvpair
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-$(MODULE)-objs += nvpair.o
-$(MODULE)-objs += fnvpair.o
-$(MODULE)-objs += nvpair_alloc_spl.o
-$(MODULE)-objs += nvpair_alloc_fixed.o
diff --git a/sys/contrib/openzfs/module/nvpair/nvpair.c b/sys/contrib/openzfs/module/nvpair/nvpair.c
index a5222dac7849..a442990dade0 100644
--- a/sys/contrib/openzfs/module/nvpair/nvpair.c
+++ b/sys/contrib/openzfs/module/nvpair/nvpair.c
@@ -3678,27 +3678,6 @@ nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
return (err);
}
-#if defined(_KERNEL)
-static int __init
-nvpair_init(void)
-{
- return (0);
-}
-
-static void __exit
-nvpair_fini(void)
-{
-}
-
-module_init(nvpair_init);
-module_exit(nvpair_fini);
-#endif
-
-ZFS_MODULE_DESCRIPTION("Generic name/value pair implementation");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-
EXPORT_SYMBOL(nv_alloc_init);
EXPORT_SYMBOL(nv_alloc_reset);
EXPORT_SYMBOL(nv_alloc_fini);
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c
index 0354b986cd5f..e46271a039de 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c
@@ -43,15 +43,11 @@ static struct opensolaris_utsname hw_utsname = {
.machine = MACHINE
};
-#ifndef KERNEL_STATIC
-char hw_serial[11] = "0";
-
utsname_t *
utsname(void)
{
return (&hw_utsname);
}
-#endif
static void
opensolaris_utsname_init(void *arg)
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sunddi.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sunddi.c
index ebec77bdb37f..2a3c027c9389 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sunddi.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sunddi.c
@@ -46,19 +46,6 @@ ddi_strtol(const char *str, char **nptr, int base, long *result)
}
int
-ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
-{
-
- if (str == hw_serial) {
- *result = prison0.pr_hostid;
- return (0);
- }
-
- *result = strtoul(str, nptr, base);
- return (0);
-}
-
-int
ddi_strtoull(const char *str, char **nptr, int base, unsigned long long *result)
{
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
index 914e0e6ded66..1ac41f616a0d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
@@ -1131,8 +1131,12 @@ vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
vm_offset_t addr = (vm_offset_t)buf;
vm_offset_t end = addr + len;
- if (bp->bio_ma_n == 0)
+ if (bp->bio_ma_n == 0) {
bp->bio_ma_offset = addr & PAGE_MASK;
+ addr &= ~PAGE_MASK;
+ } else {
+ ASSERT0(P2PHASE(addr, PAGE_SIZE));
+ }
do {
bp->bio_ma[bp->bio_ma_n++] =
PHYS_TO_VM_PAGE(pmap_kextract(addr));
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index e33aaea481b1..e57855770293 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -97,6 +97,10 @@
VFS_SMR_DECLARE;
+#if __FreeBSD_version < 1300103
+#define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF)
+#endif
+
#if __FreeBSD_version >= 1300047
#define vm_page_wire_lock(pp)
#define vm_page_wire_unlock(pp)
@@ -237,7 +241,7 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
}
/* Keep a count of the synchronous opens in the znode */
- if (flag & (FSYNC | FDSYNC))
+ if (flag & O_SYNC)
atomic_inc_32(&zp->z_sync_cnt);
ZFS_EXIT(zfsvfs);
@@ -255,7 +259,7 @@ zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
ZFS_VERIFY_ZP(zp);
/* Decrement the synchronous opens in the znode */
- if ((flag & (FSYNC | FDSYNC)) && (count == 1))
+ if ((flag & O_SYNC) && (count == 1))
atomic_dec_32(&zp->z_sync_cnt);
ZFS_EXIT(zfsvfs);
@@ -4036,8 +4040,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
int pgsin_b, pgsin_a;
int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
+ ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
start = IDX_TO_OFF(ma[0]->pindex);
end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
@@ -4161,19 +4165,18 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
int err;
int i;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
-
object = vp->v_object;
- pcount = btoc(len);
- ncount = pcount;
-
KASSERT(ma[0]->object == object, ("mismatching object"));
KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
+ pcount = btoc(len);
+ ncount = pcount;
for (i = 0; i < pcount; i++)
rtvals[i] = zfs_vm_pagerret_error;
+ ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
+ ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
+
off = IDX_TO_OFF(ma[0]->pindex);
blksz = zp->z_blksz;
lo_off = rounddown(off, blksz);
@@ -4399,11 +4402,11 @@ ioflags(int ioflags)
int flags = 0;
if (ioflags & IO_APPEND)
- flags |= FAPPEND;
+ flags |= O_APPEND;
if (ioflags & IO_NDELAY)
- flags |= FNONBLOCK;
+ flags |= O_NONBLOCK;
if (ioflags & IO_SYNC)
- flags |= (FSYNC | FDSYNC | FRSYNC);
+ flags |= O_SYNC;
return (flags);
}
@@ -4624,7 +4627,7 @@ zfs_freebsd_create(struct vop_create_args *ap)
zfsvfs = ap->a_dvp->v_mount->mnt_data;
*ap->a_vpp = NULL;
- rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode,
+ rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
&zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
if (rc == 0)
*ap->a_vpp = ZTOV(zp);
@@ -5447,7 +5450,7 @@ zfs_getextattr(struct vop_getextattr_args *ap)
error = ENOENT;
ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp)
+ ZFS_VERIFY_ZP(zp);
rw_enter(&zp->z_xattr_lock, RW_READER);
error = zfs_getextattr_impl(ap, zfs_xattr_compat);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
index 2496d6897d9a..877b7187b676 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_xattr_cached = NULL;
zp->z_xattr_parent = 0;
zp->z_vnode = NULL;
+ zp->z_sync_writes_cnt = 0;
+ zp->z_async_writes_cnt = 0;
+
return (0);
}
@@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
ASSERT3P(zp->z_acl_cached, ==, NULL);
ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
}
@@ -453,6 +459,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
+ zp->z_sync_writes_cnt = 0;
+ zp->z_async_writes_cnt = 0;
#if __FreeBSD_version >= 1300139
atomic_store_ptr(&zp->z_cached_symlink, NULL);
#endif
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 487778472e79..1011aaf68ac6 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -311,15 +311,13 @@ retry:
err = SET_ERROR(EBUSY);
goto out_opened;
}
-#ifdef FEXCL
- if (flag & FEXCL) {
+ if (flag & O_EXCL) {
if (zv->zv_open_count != 0) {
err = SET_ERROR(EBUSY);
goto out_opened;
}
zv->zv_flags |= ZVOL_EXCL;
}
-#endif
zv->zv_open_count += count;
out_opened:
@@ -952,18 +950,16 @@ retry:
err = SET_ERROR(EBUSY);
goto out_opened;
}
-#ifdef FEXCL
- if (flags & FEXCL) {
+ if (flags & O_EXCL) {
if (zv->zv_open_count != 0) {
err = SET_ERROR(EBUSY);
goto out_opened;
}
zv->zv_flags |= ZVOL_EXCL;
}
-#endif
zv->zv_open_count++;
- if (flags & (FSYNC | FDSYNC)) {
+ if (flags & O_SYNC) {
zsd = &zv->zv_zso->zso_dev;
zsd->zsd_sync_cnt++;
if (zsd->zsd_sync_cnt == 1 &&
@@ -1037,7 +1033,7 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
* You may get multiple opens, but only one close.
*/
zv->zv_open_count--;
- if (flags & (FSYNC | FDSYNC)) {
+ if (flags & O_SYNC) {
zsd = &zv->zv_zso->zso_dev;
zsd->zsd_sync_cnt--;
}
diff --git a/sys/contrib/openzfs/module/os/linux/spl/Makefile.in b/sys/contrib/openzfs/module/os/linux/spl/Makefile.in
deleted file mode 100644
index b2325f91b4a7..000000000000
--- a/sys/contrib/openzfs/module/os/linux/spl/Makefile.in
+++ /dev/null
@@ -1,17 +0,0 @@
-$(MODULE)-objs += ../os/linux/spl/spl-atomic.o
-$(MODULE)-objs += ../os/linux/spl/spl-condvar.o
-$(MODULE)-objs += ../os/linux/spl/spl-cred.o
-$(MODULE)-objs += ../os/linux/spl/spl-err.o
-$(MODULE)-objs += ../os/linux/spl/spl-generic.o
-$(MODULE)-objs += ../os/linux/spl/spl-kmem.o
-$(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o
-$(MODULE)-objs += ../os/linux/spl/spl-kstat.o
-$(MODULE)-objs += ../os/linux/spl/spl-proc.o
-$(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o
-$(MODULE)-objs += ../os/linux/spl/spl-taskq.o
-$(MODULE)-objs += ../os/linux/spl/spl-thread.o
-$(MODULE)-objs += ../os/linux/spl/spl-trace.o
-$(MODULE)-objs += ../os/linux/spl/spl-tsd.o
-$(MODULE)-objs += ../os/linux/spl/spl-vmem.o
-$(MODULE)-objs += ../os/linux/spl/spl-xdr.o
-$(MODULE)-objs += ../os/linux/spl/spl-zlib.o
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index cc9a973fef62..f99a2f966660 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -425,22 +425,33 @@ EXPORT_SYMBOL(__aeabi_ldivmod);
* functions against their Solaris counterparts. It is possible that I
* may have misinterpreted the man page or the man page is incorrect.
*/
-int ddi_strtoul(const char *, char **, int, unsigned long *);
int ddi_strtol(const char *, char **, int, long *);
int ddi_strtoull(const char *, char **, int, unsigned long long *);
int ddi_strtoll(const char *, char **, int, long long *);
-#define define_ddi_strtoux(type, valtype) \
-int ddi_strtou##type(const char *str, char **endptr, \
+#define define_ddi_strtox(type, valtype) \
+int ddi_strto##type(const char *str, char **endptr, \
int base, valtype *result) \
{ \
valtype last_value, value = 0; \
char *ptr = (char *)str; \
- int flag = 1, digit; \
+ int digit, minus = 0; \
+ \
+ while (strchr(" \t\n\r\f", *ptr)) \
+ ++ptr; \
\
if (strlen(ptr) == 0) \
return (EINVAL); \
\
+ switch (*ptr) { \
+ case '-': \
+ minus = 1; \
+ zfs_fallthrough; \
+ case '+': \
+ ++ptr; \
+ break; \
+ } \
+ \
/* Auto-detect base based on prefix */ \
if (!base) { \
if (str[0] == '0') { \
@@ -474,46 +485,21 @@ int ddi_strtou##type(const char *str, char **endptr, \
if (last_value > value) /* Overflow */ \
return (ERANGE); \
\
- flag = 1; \
ptr++; \
} \
\
- if (flag) \
- *result = value; \
+ *result = minus ? -value : value; \
\
if (endptr) \
- *endptr = (char *)(flag ? ptr : str); \
+ *endptr = ptr; \
\
return (0); \
} \
-#define define_ddi_strtox(type, valtype) \
-int ddi_strto##type(const char *str, char **endptr, \
- int base, valtype *result) \
-{ \
- int rc; \
- \
- if (*str == '-') { \
- rc = ddi_strtou##type(str + 1, endptr, base, result); \
- if (!rc) { \
- if (*endptr == str + 1) \
- *endptr = (char *)str; \
- else \
- *result = -*result; \
- } \
- } else { \
- rc = ddi_strtou##type(str, endptr, base, result); \
- } \
- \
- return (rc); \
-}
-
-define_ddi_strtoux(l, unsigned long)
define_ddi_strtox(l, long)
-define_ddi_strtoux(ll, unsigned long long)
+define_ddi_strtox(ull, unsigned long long)
define_ddi_strtox(ll, long long)
-EXPORT_SYMBOL(ddi_strtoul);
EXPORT_SYMBOL(ddi_strtol);
EXPORT_SYMBOL(ddi_strtoll);
EXPORT_SYMBOL(ddi_strtoull);
@@ -828,7 +814,7 @@ spl_fini(void)
module_init(spl_init);
module_exit(spl_fini);
-ZFS_MODULE_DESCRIPTION("Solaris Porting Layer");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE("GPL");
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+MODULE_DESCRIPTION("Solaris Porting Layer");
+MODULE_AUTHOR(ZFS_META_AUTHOR);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
index bb2b56880646..33aaad653dc8 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@@ -1420,7 +1420,7 @@ EXPORT_SYMBOL(spl_kmem_cache_reap_now);
* it should do no harm.
*/
int
-spl_kmem_cache_reap_active()
+spl_kmem_cache_reap_active(void)
{
return (0);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/Makefile.in b/sys/contrib/openzfs/module/os/linux/zfs/Makefile.in
deleted file mode 100644
index fa990776db83..000000000000
--- a/sys/contrib/openzfs/module/os/linux/zfs/Makefile.in
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Linux specific sources included from module/zfs/Makefile.in
-#
-
-# Suppress unused-value warnings in sparc64 architecture headers
-ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
-
-$(MODULE)-objs += ../os/linux/zfs/abd_os.o
-$(MODULE)-objs += ../os/linux/zfs/arc_os.o
-$(MODULE)-objs += ../os/linux/zfs/mmp_os.o
-$(MODULE)-objs += ../os/linux/zfs/policy.o
-$(MODULE)-objs += ../os/linux/zfs/trace.o
-$(MODULE)-objs += ../os/linux/zfs/qat.o
-$(MODULE)-objs += ../os/linux/zfs/qat_compress.o
-$(MODULE)-objs += ../os/linux/zfs/qat_crypt.o
-$(MODULE)-objs += ../os/linux/zfs/spa_misc_os.o
-$(MODULE)-objs += ../os/linux/zfs/vdev_disk.o
-$(MODULE)-objs += ../os/linux/zfs/vdev_file.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_acl.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_debug.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_racct.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_uio.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_vnops_os.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_znode.o
-$(MODULE)-objs += ../os/linux/zfs/zio_crypt.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_ctldir.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_export.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_file.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_inode.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_super.o
-$(MODULE)-objs += ../os/linux/zfs/zpl_xattr.o
-$(MODULE)-objs += ../os/linux/zfs/zvol_os.o
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 688458621b93..0cd4fa5213d4 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -620,7 +620,6 @@ abd_alloc_zero_scatter(void)
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
- zfs_refcount_create(&abd_zero_scatter->abd_children);
ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages *
sizeof (struct scatterlist), KM_SLEEP);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 6cec5be44012..235cd1691c14 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -467,8 +467,11 @@ vdev_submit_bio_impl(struct bio *bio)
* blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched().
* As a side effect the function was converted to GPL-only. Define our
* own version when needed which uses rcu_read_lock_sched().
+ *
+ * The Linux 5.17 kernel split linux/blk-cgroup.h into a private and a public
+ * part, moving blkg_tryget into the private one. Define our own version.
*/
-#if defined(HAVE_BLKG_TRYGET_GPL_ONLY)
+#if defined(HAVE_BLKG_TRYGET_GPL_ONLY) || !defined(HAVE_BLKG_TRYGET)
static inline bool
vdev_blkg_tryget(struct blkcg_gq *blkg)
{
@@ -493,7 +496,7 @@ vdev_blkg_tryget(struct blkcg_gq *blkg)
return (rc);
}
-#elif defined(HAVE_BLKG_TRYGET)
+#else
#define vdev_blkg_tryget(bg) blkg_tryget(bg)
#endif
#ifdef HAVE_BIO_SET_DEV_MACRO
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
index 351e4dad799c..b70691ab31c1 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -863,6 +863,26 @@ zfs_unix_to_v4(uint32_t access_mask)
return (new_mask);
}
+
+static int
+zfs_v4_to_unix(uint32_t access_mask, int *unmapped)
+{
+ int new_mask = 0;
+
+ *unmapped = access_mask &
+ (ACE_WRITE_OWNER | ACE_WRITE_ACL | ACE_DELETE);
+
+ if (access_mask & WRITE_MASK)
+ new_mask |= S_IWOTH;
+ if (access_mask & ACE_READ_DATA)
+ new_mask |= S_IROTH;
+ if (access_mask & ACE_EXECUTE)
+ new_mask |= S_IXOTH;
+
+ return (new_mask);
+}
+
+
static void
zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
uint16_t access_type, uint64_t fuid, uint16_t entry_type)
@@ -2399,6 +2419,53 @@ zfs_has_access(znode_t *zp, cred_t *cr)
return (B_TRUE);
}
+/*
+ * Simplified access check for case where ACL is known to not contain
+ * information beyond what is defined in the mode. In this case, we
+ * can pass along to the kernel / vfs generic_permission() check, which
+ * evaluates the mode and POSIX ACL.
+ *
+ * NFSv4 ACLs allow granting permissions that are usually relegated only
+ * to the file owner or superuser. Examples are ACE_WRITE_OWNER (chown),
+ * ACE_WRITE_ACL(chmod), and ACE_DELETE. ACE_DELETE requests must fail
+ * because with conventional posix permissions, right to delete file
+ * is determined by write bit on the parent dir.
+ *
+ * If unmappable perms are requested, then we must return EPERM
+ * and include those bits in the working_mode so that the caller of
+ * zfs_zaccess_common() can decide whether to perform additional
+ * policy / capability checks. EACCES is used in zfs_zaccess_aces_check()
+ * to indicate access check failed due to explicit DENY entry, and so
+ * we want to avoid that here.
+ */
+static int
+zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
+{
+ int err, mask;
+ int unmapped = 0;
+
+ ASSERT(zp->z_pflags & ZFS_ACL_TRIVIAL);
+
+ mask = zfs_v4_to_unix(*working_mode, &unmapped);
+ if (mask == 0 || unmapped) {
+ *working_mode = unmapped;
+ return (unmapped ? SET_ERROR(EPERM) : 0);
+ }
+
+#if defined(HAVE_IOPS_PERMISSION_USERNS)
+ err = generic_permission(cr->user_ns, ZTOI(zp), mask);
+#else
+ err = generic_permission(ZTOI(zp), mask);
+#endif
+ if (err != 0) {
+ return (SET_ERROR(EPERM));
+ }
+
+ *working_mode = unmapped;
+
+ return (0);
+}
+
static int
zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
@@ -2450,6 +2517,9 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
return (SET_ERROR(EPERM));
}
+ if (zp->z_pflags & ZFS_ACL_TRIVIAL)
+ return (zfs_zaccess_trivial(zp, working_mode, cr));
+
return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index f7e71461a3bd..aae19f6346fd 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -162,7 +162,7 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)
zfs_refcount_destroy(&se->se_refcount);
kmem_strfree(se->se_name);
kmem_strfree(se->se_path);
- rw_destroy(se->se_taskqid_lock);
+ rw_destroy(&se->se_taskqid_lock);
kmem_free(se, sizeof (zfs_snapentry_t));
}
@@ -496,6 +496,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_pflags = 0;
zp->z_mode = 0;
zp->z_sync_cnt = 0;
+ zp->z_sync_writes_cnt = 0;
+ zp->z_async_writes_cnt = 0;
ip->i_generation = 0;
ip->i_ino = id;
ip->i_mode = (S_IFDIR | S_IRWXUGO);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index fee3fe540b90..c65702e1a053 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -58,6 +58,8 @@
#include <sys/zvol.h>
#include <sys/fm/util.h>
#include <sys/dsl_crypt.h>
+#include <sys/crypto/icp.h>
+#include <sys/zstd/zstd.h>
#include <sys/zfs_ioctl_impl.h>
@@ -233,8 +235,8 @@ zfsdev_detach(void)
#define ZFS_DEBUG_STR ""
#endif
-static int __init
-openzfs_init(void)
+static int
+openzfs_init_os(void)
{
int error;
@@ -259,8 +261,8 @@ openzfs_init(void)
return (0);
}
-static void __exit
-openzfs_fini(void)
+static void
+openzfs_fini_os(void)
{
zfs_sysfs_fini();
zfs_kmod_fini();
@@ -269,12 +271,59 @@ openzfs_fini(void)
ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
}
+
+extern int __init zcommon_init(void);
+extern void zcommon_fini(void);
+
+static int __init
+openzfs_init(void)
+{
+ int err;
+ if ((err = zcommon_init()) != 0)
+ goto zcommon_failed;
+ if ((err = icp_init()) != 0)
+ goto icp_failed;
+ if ((err = zstd_init()) != 0)
+ goto zstd_failed;
+ if ((err = openzfs_init_os()) != 0)
+ goto openzfs_os_failed;
+ return (0);
+
+openzfs_os_failed:
+ zstd_fini();
+zstd_failed:
+ icp_fini();
+icp_failed:
+ zcommon_fini();
+zcommon_failed:
+ return (err);
+}
+
+static void __exit
+openzfs_fini(void)
+{
+ openzfs_fini_os();
+ zstd_fini();
+ icp_fini();
+ zcommon_fini();
+}
+
#if defined(_KERNEL)
module_init(openzfs_init);
module_exit(openzfs_fini);
#endif
-ZFS_MODULE_DESCRIPTION("ZFS");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+MODULE_ALIAS("zavl");
+MODULE_ALIAS("icp");
+MODULE_ALIAS("zlua");
+MODULE_ALIAS("znvpair");
+MODULE_ALIAS("zunicode");
+MODULE_ALIAS("zcommon");
+MODULE_ALIAS("zzstd");
+MODULE_DESCRIPTION("ZFS");
+MODULE_AUTHOR(ZFS_META_AUTHOR);
+MODULE_LICENSE("Lua: MIT");
+MODULE_LICENSE("zstd: Dual BSD/GPL");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE(ZFS_META_LICENSE);
+MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
index 6f71382cf74e..eb7c5f6166d2 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
@@ -65,16 +65,15 @@
/*
* A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs'
*/
-struct zfs_mod_kobj;
typedef struct zfs_mod_kobj zfs_mod_kobj_t;
-
struct zfs_mod_kobj {
struct kobject zko_kobj;
struct kobj_type zko_kobj_type;
struct sysfs_ops zko_sysfs_ops;
size_t zko_attr_count;
struct attribute *zko_attr_list; /* allocated */
- struct attribute **zko_default_attrs; /* allocated */
+ struct attribute_group zko_default_group; /* .attrs allocated */
+ const struct attribute_group *zko_default_groups[2];
size_t zko_child_count;
zfs_mod_kobj_t *zko_children; /* allocated */
};
@@ -127,10 +126,10 @@ zfs_kobj_release(struct kobject *kobj)
zkobj->zko_attr_list = NULL;
}
- if (zkobj->zko_default_attrs != NULL) {
- kmem_free(zkobj->zko_default_attrs,
+ if (zkobj->zko_default_group.attrs != NULL) {
+ kmem_free(zkobj->zko_default_group.attrs,
DEFAULT_ATTR_SIZE(zkobj->zko_attr_count));
- zkobj->zko_default_attrs = NULL;
+ zkobj->zko_default_group.attrs = NULL;
}
if (zkobj->zko_child_count != 0) {
@@ -154,11 +153,12 @@ zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name)
{
VERIFY3U(attr_num, <, zkobj->zko_attr_count);
ASSERT(zkobj->zko_attr_list);
- ASSERT(zkobj->zko_default_attrs);
+ ASSERT(zkobj->zko_default_group.attrs);
zkobj->zko_attr_list[attr_num].name = attr_name;
zkobj->zko_attr_list[attr_num].mode = 0444;
- zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num];
+ zkobj->zko_default_group.attrs[attr_num] =
+ &zkobj->zko_attr_list[attr_num];
sysfs_attr_init(&zkobj->zko_attr_list[attr_num]);
}
@@ -176,9 +176,9 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
return (ENOMEM);
}
/* this will always have at least one slot for NULL termination */
- zkobj->zko_default_attrs = kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt),
- KM_SLEEP);
- if (zkobj->zko_default_attrs == NULL) {
+ zkobj->zko_default_group.attrs =
+ kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt), KM_SLEEP);
+ if (zkobj->zko_default_group.attrs == NULL) {
if (zkobj->zko_attr_list != NULL) {
kmem_free(zkobj->zko_attr_list,
ATTR_TABLE_SIZE(attr_cnt));
@@ -186,14 +186,19 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
return (ENOMEM);
}
zkobj->zko_attr_count = attr_cnt;
- zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_attrs;
+ zkobj->zko_default_groups[0] = &zkobj->zko_default_group;
+#ifdef HAVE_SYSFS_DEFAULT_GROUPS
+ zkobj->zko_kobj_type.default_groups = zkobj->zko_default_groups;
+#else
+ zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_group.attrs;
+#endif
if (child_cnt > 0) {
zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt),
KM_SLEEP);
if (zkobj->zko_children == NULL) {
- if (zkobj->zko_default_attrs != NULL) {
- kmem_free(zkobj->zko_default_attrs,
+ if (zkobj->zko_default_group.attrs != NULL) {
+ kmem_free(zkobj->zko_default_group.attrs,
DEFAULT_ATTR_SIZE(attr_cnt));
}
if (zkobj->zko_attr_list != NULL) {
@@ -215,9 +220,9 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
static int
zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
{
- /* zko_default_attrs must be NULL terminated */
- ASSERT(zkobj->zko_default_attrs != NULL);
- ASSERT(zkobj->zko_default_attrs[zkobj->zko_attr_count] == NULL);
+ /* zko_default_group.attrs must be NULL terminated */
+ ASSERT(zkobj->zko_default_group.attrs != NULL);
+ ASSERT(zkobj->zko_default_group.attrs[zkobj->zko_attr_count] == NULL);
kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type);
return (kobject_add(&zkobj->zko_kobj, parent, name));
@@ -226,7 +231,7 @@ zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
/*
* Each zfs property has these common attributes
*/
-static const char *zprop_attrs[] = {
+static const char *const zprop_attrs[] = {
"type",
"readonly",
"setonce",
@@ -239,7 +244,7 @@ static const char *zprop_attrs[] = {
#define ZFS_PROP_ATTR_COUNT ARRAY_SIZE(zprop_attrs)
#define ZPOOL_PROP_ATTR_COUNT (ZFS_PROP_ATTR_COUNT - 1)
-static const char *zprop_types[] = {
+static const char *const zprop_types[] = {
"number",
"string",
"index",
@@ -250,7 +255,7 @@ typedef struct zfs_type_map {
const char *ztm_name;
} zfs_type_map_t;
-static zfs_type_map_t type_map[] = {
+static const zfs_type_map_t type_map[] = {
{ZFS_TYPE_FILESYSTEM, "filesystem"},
{ZFS_TYPE_SNAPSHOT, "snapshot"},
{ZFS_TYPE_VOLUME, "volume"},
@@ -371,7 +376,7 @@ pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
* A user process can easily check if the running zfs kernel module
* supports the new feature.
*/
-static const char *zfs_kernel_features[] = {
+static const char *const zfs_kernel_features[] = {
/* --> Add new kernel features here */
"com.delphix:vdev_initialize",
"org.zfsonlinux:vdev_trim",
@@ -439,7 +444,7 @@ zfs_kernel_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
/*
* Each pool feature has these common attributes
*/
-static const char *pool_feature_attrs[] = {
+static const char *const pool_feature_attrs[] = {
"description",
"guid",
"uname",
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index ce47b3e6087a..4f31bcb5959d 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@ -248,7 +248,7 @@ zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
/* touch each page in this segment. */
p = iov->iov_base + skip;
while (cnt) {
- if (get_user(tmp, (uint8_t *)p))
+ if (copy_from_user(&tmp, p, 1))
return (EFAULT);
ulong_t incr = MIN(cnt, PAGESIZE);
p += incr;
@@ -256,7 +256,7 @@ zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
}
/* touch the last byte in case it straddles a page. */
p--;
- if (get_user(tmp, (uint8_t *)p))
+ if (copy_from_user(&tmp, p, 1))
return (EFAULT);
}
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index ece7c373e852..d6ff838806eb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -474,7 +474,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
*/
if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
- B_FALSE, cr))) {
+ B_TRUE, cr))) {
zrele(*zpp);
*zpp = NULL;
}
@@ -3396,7 +3396,7 @@ top:
}
static void
-zfs_putpage_commit_cb(void *arg)
+zfs_putpage_sync_commit_cb(void *arg)
{
struct page *pp = arg;
@@ -3404,13 +3404,26 @@ zfs_putpage_commit_cb(void *arg)
end_page_writeback(pp);
}
+static void
+zfs_putpage_async_commit_cb(void *arg)
+{
+ struct page *pp = arg;
+ znode_t *zp = ITOZ(pp->mapping->host);
+
+ ClearPageError(pp);
+ end_page_writeback(pp);
+ atomic_dec_32(&zp->z_async_writes_cnt);
+}
+
/*
* Push a page out to disk, once the page is on stable storage the
* registered commit callback will be run as notification of completion.
*
- * IN: ip - page mapped for inode.
- * pp - page to push (page is locked)
- * wbc - writeback control data
+ * IN: ip - page mapped for inode.
+ * pp - page to push (page is locked)
+ * wbc - writeback control data
+ * for_sync - does the caller intend to wait synchronously for the
+ * page writeback to complete?
*
* RETURN: 0 if success
* error code if failure
@@ -3419,7 +3432,8 @@ zfs_putpage_commit_cb(void *arg)
* ip - ctime|mtime updated
*/
int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
+ boolean_t for_sync)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
@@ -3517,6 +3531,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
zfs_rangelock_exit(lr);
if (wbc->sync_mode != WB_SYNC_NONE) {
+ /*
+ * Speed up any non-sync page writebacks since
+ * they may take several seconds to complete.
+ * Refer to the comment in zpl_fsync() (when
+ * HAVE_FSYNC_RANGE is defined) for details.
+ */
+ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+ zil_commit(zfsvfs->z_log, zp->z_id);
+ }
+
if (PageWriteback(pp))
#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
folio_wait_bit(page_folio(pp), PG_writeback);
@@ -3542,6 +3566,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* was in fact not skipped and should not be counted as if it were.
*/
wbc->pages_skipped--;
+ if (!for_sync)
+ atomic_inc_32(&zp->z_async_writes_cnt);
set_page_writeback(pp);
unlock_page(pp);
@@ -3556,9 +3582,15 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
dmu_tx_wait(tx);
dmu_tx_abort(tx);
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+ filemap_dirty_folio(page_mapping(pp), page_folio(pp));
+#else
__set_page_dirty_nobuffers(pp);
+#endif
ClearPageError(pp);
end_page_writeback(pp);
+ if (!for_sync)
+ atomic_dec_32(&zp->z_async_writes_cnt);
zfs_rangelock_exit(lr);
ZFS_EXIT(zfsvfs);
return (err);
@@ -3583,7 +3615,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
- zfs_putpage_commit_cb, pp);
+ for_sync ? zfs_putpage_sync_commit_cb :
+ zfs_putpage_async_commit_cb, pp);
+
dmu_tx_commit(tx);
zfs_rangelock_exit(lr);
@@ -3595,6 +3629,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* performance reasons.
*/
zil_commit(zfsvfs->z_log, zp->z_id);
+ } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
+ /*
+ * If the caller does not intend to wait synchronously
+ * for this page writeback to complete and there are active
+ * synchronous calls on this file, do a commit so that
+ * the latter don't accidentally end up waiting for
+ * our writeback to complete. Refer to the comment in
+ * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
+ */
+ zil_commit(zfsvfs->z_log, zp->z_id);
}
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index b76e65d16822..d921f2b07463 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_acl_cached = NULL;
zp->z_xattr_cached = NULL;
zp->z_xattr_parent = 0;
+ zp->z_sync_writes_cnt = 0;
+ zp->z_async_writes_cnt = 0;
+
return (0);
}
@@ -154,6 +157,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
ASSERT3P(zp->z_dirlocks, ==, NULL);
ASSERT3P(zp->z_acl_cached, ==, NULL);
ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
}
static int
@@ -554,6 +560,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
+ zp->z_sync_writes_cnt = 0;
+ zp->z_async_writes_cnt = 0;
zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index f78e50262af7..8b84eb795fc3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -33,9 +33,13 @@
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_project.h>
-#ifdef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS
+#if defined(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS) || \
+ defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO)
#include <linux/pagemap.h>
#endif
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+#include <linux/writeback.h>
+#endif
/*
* When using fallocate(2) to preallocate space, inflate the requested
@@ -161,17 +165,56 @@ static int
zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
+ znode_t *zp = ITOZ(inode);
+ zfsvfs_t *zfsvfs = ITOZSB(inode);
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
+ /*
+ * The variables z_sync_writes_cnt and z_async_writes_cnt work in
+ * tandem so that sync writes can detect if there are any non-sync
+ * writes going on and vice-versa. The "vice-versa" part to this logic
+ * is located in zfs_putpage() where non-sync writes check if there are
+ * any ongoing sync writes. If any sync and non-sync writes overlap,
+ * we do a commit to complete the non-sync writes since the latter can
+ * potentially take several seconds to complete and thus block sync
+ * writes in the upcoming call to filemap_write_and_wait_range().
+ */
+ atomic_inc_32(&zp->z_sync_writes_cnt);
+ /*
+ * If the following check does not detect an overlapping non-sync write
+ * (say because it's just about to start), then it is guaranteed that
+ * the non-sync write will detect this sync write. This is because we
+ * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
+ * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
+ * zfs_putpage() respectively.
+ */
+ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+ ZPL_ENTER(zfsvfs);
+ zil_commit(zfsvfs->z_log, zp->z_id);
+ ZPL_EXIT(zfsvfs);
+ }
+
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+
+ /*
+ * The sync write is not complete yet but we decrement
+ * z_sync_writes_cnt since zfs_fsync() increments and decrements
+ * it internally. If a non-sync write starts just after the decrement
+ * operation but before we call zfs_fsync(), it may not detect this
+ * overlapping sync write but it does not matter since we have already
+ * gone past filemap_write_and_wait_range() and we won't block due to
+ * the non-sync write.
+ */
+ atomic_dec_32(&zp->z_sync_writes_cnt);
+
if (error)
return (error);
crhold(cr);
cookie = spl_fstrans_mark();
- error = -zfs_fsync(ITOZ(inode), datasync, cr);
+ error = -zfs_fsync(zp, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -413,6 +456,8 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
if (ret)
return (ret);
+ kiocb->ki_pos = pos;
+
zfs_uio_t uio;
zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
count, 0);
@@ -647,24 +692,41 @@ zpl_readpage_filler(void *data, struct page *pp)
* paging. For simplicity, the code relies on read_cache_pages() to
* correctly lock each page for IO and call zpl_readpage().
*/
+#ifdef HAVE_VFS_READPAGES
static int
zpl_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL));
}
+#else
+static void
+zpl_readahead(struct readahead_control *ractl)
+{
+ struct page *page;
+
+ while ((page = readahead_page(ractl)) != NULL) {
+ int ret;
+
+ ret = zpl_readpage_filler(NULL, page);
+ put_page(page);
+ if (ret)
+ break;
+ }
+}
+#endif
static int
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{
- struct address_space *mapping = data;
+ boolean_t *for_sync = data;
fstrans_cookie_t cookie;
ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp));
cookie = spl_fstrans_mark();
- (void) zfs_putpage(mapping->host, pp, wbc);
+ (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
spl_fstrans_unmark(cookie);
return (0);
@@ -691,8 +753,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
* we run it once in non-SYNC mode so that the ZIL gets all the data,
* and then we commit it all in one go.
*/
+ boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
wbc->sync_mode = WB_SYNC_NONE;
- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
+ result = write_cache_pages(mapping, wbc, zpl_putpage, &for_sync);
if (sync_mode != wbc->sync_mode) {
ZPL_ENTER(zfsvfs);
ZPL_VERIFY_ZP(zp);
@@ -708,7 +771,8 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
* details). That being said, this is a no-op in most cases.
*/
wbc->sync_mode = sync_mode;
- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
+ result = write_cache_pages(mapping, wbc, zpl_putpage,
+ &for_sync);
}
return (result);
}
@@ -725,7 +789,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
wbc->sync_mode = WB_SYNC_ALL;
- return (zpl_putpage(pp, wbc, pp->mapping));
+ boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
+
+ return (zpl_putpage(pp, wbc, &for_sync));
}
/*
@@ -764,11 +830,13 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
if (mode & (test_mode)) {
flock64_t bf;
- if (offset > olen)
- goto out_unmark;
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ if (offset > olen)
+ goto out_unmark;
- if (offset + len > olen)
- len = olen - offset;
+ if (offset + len > olen)
+ len = olen - offset;
+ }
bf.l_type = F_WRLCK;
bf.l_whence = SEEK_SET;
bf.l_start = offset;
@@ -1135,7 +1203,11 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
const struct address_space_operations zpl_address_space_operations = {
+#ifdef HAVE_VFS_READPAGES
.readpages = zpl_readpages,
+#else
+ .readahead = zpl_readahead,
+#endif
.readpage = zpl_readpage,
.writepage = zpl_writepage,
.writepages = zpl_writepages,
@@ -1143,6 +1215,9 @@ const struct address_space_operations zpl_address_space_operations = {
#ifdef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS
.set_page_dirty = __set_page_dirty_nobuffers,
#endif
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+ .dirty_folio = filemap_dirty_folio,
+#endif
};
const struct file_operations zpl_file_operations = {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index c964cce0de9a..4ebdf8331695 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -46,7 +46,10 @@ static unsigned int zvol_request_sync = 0;
static unsigned int zvol_prefetch_bytes = (128 * 1024);
static unsigned long zvol_max_discard_blocks = 16384;
static unsigned int zvol_threads = 32;
+
+#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
static const unsigned int zvol_open_timeout_ms = 1000;
+#endif
struct zvol_state_os {
struct gendisk *zvo_disk; /* generic disk */
@@ -903,22 +906,17 @@ zvol_alloc(dev_t dev, const char *name)
zso->zvo_disk->major = zvol_major;
zso->zvo_disk->events = DISK_EVENT_MEDIA_CHANGE;
+ /*
+ * Setting ZFS_VOLMODE_DEV disables partitioning on ZVOL devices.
+ * This is accomplished by limiting the number of minors for the
+ * device to one and explicitly disabling partition scanning.
+ */
if (volmode == ZFS_VOLMODE_DEV) {
- /*
- * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set
- * gendisk->minors = 1 as noted in include/linux/blkdev.h.
- * Also disable extended partition numbers (GENHD_FL_EXT_DEVT)
- * and suppresses partition scanning (GENHD_FL_NO_PART_SCAN)
- * setting gendisk->flags accordingly.
- */
zso->zvo_disk->minors = 1;
-#if defined(GENHD_FL_EXT_DEVT)
- zso->zvo_disk->flags &= ~GENHD_FL_EXT_DEVT;
-#endif
-#if defined(GENHD_FL_NO_PART_SCAN)
- zso->zvo_disk->flags |= GENHD_FL_NO_PART_SCAN;
-#endif
+ zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT;
+ zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART;
}
+
zso->zvo_disk->first_minor = (dev & MINORMASK);
zso->zvo_disk->fops = &zvol_ops;
zso->zvo_disk->private_data = zv;
diff --git a/sys/contrib/openzfs/module/spl/Makefile.in b/sys/contrib/openzfs/module/spl/Makefile.in
deleted file mode 100644
index cedbfe92b58a..000000000000
--- a/sys/contrib/openzfs/module/spl/Makefile.in
+++ /dev/null
@@ -1,13 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-mfdir = $(obj)
-else
-mfdir = $(srctree)/$(src)
-endif
-
-MODULE := spl
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-include $(mfdir)/../os/linux/spl/Makefile
diff --git a/sys/contrib/openzfs/module/unicode/Makefile.in b/sys/contrib/openzfs/module/unicode/Makefile.in
deleted file mode 100644
index 59c07c4555b7..000000000000
--- a/sys/contrib/openzfs/module/unicode/Makefile.in
+++ /dev/null
@@ -1,11 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-endif
-
-MODULE := zunicode
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-$(MODULE)-objs += u8_textprep.o
-$(MODULE)-objs += uconv.o
diff --git a/sys/contrib/openzfs/module/unicode/u8_textprep.c b/sys/contrib/openzfs/module/unicode/u8_textprep.c
index b6b07b2453af..37d648b2172d 100644
--- a/sys/contrib/openzfs/module/unicode/u8_textprep.c
+++ b/sys/contrib/openzfs/module/unicode/u8_textprep.c
@@ -2129,27 +2129,6 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
return (ret_val);
}
-#if defined(_KERNEL)
-static int __init
-unicode_init(void)
-{
- return (0);
-}
-
-static void __exit
-unicode_fini(void)
-{
-}
-
-module_init(unicode_init);
-module_exit(unicode_fini);
-#endif
-
-ZFS_MODULE_DESCRIPTION("Unicode implementation");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-
EXPORT_SYMBOL(u8_validate);
EXPORT_SYMBOL(u8_strcmp);
EXPORT_SYMBOL(u8_textprep_str);
diff --git a/sys/contrib/openzfs/module/zcommon/Makefile.in b/sys/contrib/openzfs/module/zcommon/Makefile.in
deleted file mode 100644
index ebc538440445..000000000000
--- a/sys/contrib/openzfs/module/zcommon/Makefile.in
+++ /dev/null
@@ -1,28 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-endif
-
-MODULE := zcommon
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-# Suppress unused-value warnings in sparc64 architecture headers
-ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
-
-$(MODULE)-objs += cityhash.o
-$(MODULE)-objs += zfeature_common.o
-$(MODULE)-objs += zfs_comutil.o
-$(MODULE)-objs += zfs_deleg.o
-$(MODULE)-objs += zfs_fletcher.o
-$(MODULE)-objs += zfs_fletcher_superscalar.o
-$(MODULE)-objs += zfs_fletcher_superscalar4.o
-$(MODULE)-objs += zfs_namecheck.o
-$(MODULE)-objs += zfs_prop.o
-$(MODULE)-objs += zpool_prop.o
-$(MODULE)-objs += zprop_common.o
-
-$(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
-$(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
-$(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
-$(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
index 13dbccae2d4a..f09389e6d02e 100644
--- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
@@ -696,6 +696,7 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
{
+
static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
@@ -707,6 +708,12 @@ zpool_feature_init(void)
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
}
+ zfeature_register(SPA_FEATURE_HEAD_ERRLOG,
+ "com.delphix:head_errlog", "head_errlog",
+ "Support for per-dataset on-disk error logs.",
+ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, ZFEATURE_TYPE_BOOLEAN, NULL,
+ sfeatures);
+
zfs_mod_list_supported_free(sfeatures);
}
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
index 8b3e774d99ec..500d80a33b6b 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
@@ -1006,7 +1006,10 @@ uint8_t **zfs_kfpu_fpregs;
EXPORT_SYMBOL(zfs_kfpu_fpregs);
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
-static int __init
+extern int __init zcommon_init(void);
+extern void zcommon_fini(void);
+
+int __init
zcommon_init(void)
{
int error = kfpu_init();
@@ -1018,22 +1021,19 @@ zcommon_init(void)
return (0);
}
-static void __exit
+void
zcommon_fini(void)
{
fletcher_4_fini();
kfpu_fini();
}
+#ifdef __FreeBSD__
module_init_early(zcommon_init);
module_exit(zcommon_fini);
-
#endif
-ZFS_MODULE_DESCRIPTION("Generic ZFS support");
-ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
-ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
-ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+#endif
/* zfs dataset property functions */
EXPORT_SYMBOL(zfs_userquota_prop_prefixes);
diff --git a/sys/contrib/openzfs/module/zcommon/zprop_common.c b/sys/contrib/openzfs/module/zcommon/zprop_common.c
index 0f496877577b..b1da4ca64bd5 100644
--- a/sys/contrib/openzfs/module/zcommon/zprop_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zprop_common.c
@@ -136,7 +136,7 @@ zprop_register_string(int prop, const char *name, const char *def,
const char *colname, const struct zfs_mod_supported_features *sfeatures)
{
zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
- objset_types, values, colname, B_FALSE, B_TRUE, B_FALSE, NULL,
+ objset_types, values, colname, B_FALSE, B_TRUE, B_TRUE, NULL,
sfeatures);
}
@@ -159,7 +159,7 @@ zprop_register_index(int prop, const char *name, uint64_t def,
const struct zfs_mod_supported_features *sfeatures)
{
zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
- objset_types, values, colname, B_FALSE, B_TRUE, B_FALSE, idx_tbl,
+ objset_types, values, colname, B_FALSE, B_TRUE, B_TRUE, idx_tbl,
sfeatures);
}
diff --git a/sys/contrib/openzfs/module/zfs/Makefile.in b/sys/contrib/openzfs/module/zfs/Makefile.in
deleted file mode 100644
index 30dc91a7eb59..000000000000
--- a/sys/contrib/openzfs/module/zfs/Makefile.in
+++ /dev/null
@@ -1,158 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-mfdir = $(obj)
-else
-mfdir = $(srctree)/$(src)
-endif
-
-MODULE := zfs
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-# Suppress unused-value warnings in sparc64 architecture headers
-ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
-
-$(MODULE)-objs += abd.o
-$(MODULE)-objs += aggsum.o
-$(MODULE)-objs += arc.o
-$(MODULE)-objs += blkptr.o
-$(MODULE)-objs += bplist.o
-$(MODULE)-objs += bpobj.o
-$(MODULE)-objs += bptree.o
-$(MODULE)-objs += btree.o
-$(MODULE)-objs += bqueue.o
-$(MODULE)-objs += dataset_kstats.o
-$(MODULE)-objs += dbuf.o
-$(MODULE)-objs += dbuf_stats.o
-$(MODULE)-objs += ddt.o
-$(MODULE)-objs += ddt_zap.o
-$(MODULE)-objs += dmu.o
-$(MODULE)-objs += dmu_diff.o
-$(MODULE)-objs += dmu_object.o
-$(MODULE)-objs += dmu_objset.o
-$(MODULE)-objs += dmu_recv.o
-$(MODULE)-objs += dmu_redact.o
-$(MODULE)-objs += dmu_send.o
-$(MODULE)-objs += dmu_traverse.o
-$(MODULE)-objs += dmu_tx.o
-$(MODULE)-objs += dmu_zfetch.o
-$(MODULE)-objs += dnode.o
-$(MODULE)-objs += dnode_sync.o
-$(MODULE)-objs += dsl_bookmark.o
-$(MODULE)-objs += dsl_crypt.o
-$(MODULE)-objs += dsl_dataset.o
-$(MODULE)-objs += dsl_deadlist.o
-$(MODULE)-objs += dsl_deleg.o
-$(MODULE)-objs += dsl_destroy.o
-$(MODULE)-objs += dsl_dir.o
-$(MODULE)-objs += dsl_pool.o
-$(MODULE)-objs += dsl_prop.o
-$(MODULE)-objs += dsl_scan.o
-$(MODULE)-objs += dsl_synctask.o
-$(MODULE)-objs += dsl_userhold.o
-$(MODULE)-objs += edonr_zfs.o
-$(MODULE)-objs += fm.o
-$(MODULE)-objs += gzip.o
-$(MODULE)-objs += hkdf.o
-$(MODULE)-objs += lz4.o
-$(MODULE)-objs += lz4_zfs.o
-$(MODULE)-objs += lzjb.o
-$(MODULE)-objs += metaslab.o
-$(MODULE)-objs += mmp.o
-$(MODULE)-objs += multilist.o
-$(MODULE)-objs += objlist.o
-$(MODULE)-objs += pathname.o
-$(MODULE)-objs += range_tree.o
-$(MODULE)-objs += refcount.o
-$(MODULE)-objs += rrwlock.o
-$(MODULE)-objs += sa.o
-$(MODULE)-objs += sha256.o
-$(MODULE)-objs += skein_zfs.o
-$(MODULE)-objs += spa.o
-$(MODULE)-objs += spa_boot.o
-$(MODULE)-objs += spa_checkpoint.o
-$(MODULE)-objs += spa_config.o
-$(MODULE)-objs += spa_errlog.o
-$(MODULE)-objs += spa_history.o
-$(MODULE)-objs += spa_log_spacemap.o
-$(MODULE)-objs += spa_misc.o
-$(MODULE)-objs += spa_stats.o
-$(MODULE)-objs += space_map.o
-$(MODULE)-objs += space_reftree.o
-$(MODULE)-objs += txg.o
-$(MODULE)-objs += uberblock.o
-$(MODULE)-objs += unique.o
-$(MODULE)-objs += vdev.o
-$(MODULE)-objs += vdev_cache.o
-$(MODULE)-objs += vdev_draid.o
-$(MODULE)-objs += vdev_draid_rand.o
-$(MODULE)-objs += vdev_indirect.o
-$(MODULE)-objs += vdev_indirect_births.o
-$(MODULE)-objs += vdev_indirect_mapping.o
-$(MODULE)-objs += vdev_initialize.o
-$(MODULE)-objs += vdev_label.o
-$(MODULE)-objs += vdev_mirror.o
-$(MODULE)-objs += vdev_missing.o
-$(MODULE)-objs += vdev_queue.o
-$(MODULE)-objs += vdev_raidz.o
-$(MODULE)-objs += vdev_raidz_math.o
-$(MODULE)-objs += vdev_raidz_math_scalar.o
-$(MODULE)-objs += vdev_rebuild.o
-$(MODULE)-objs += vdev_removal.o
-$(MODULE)-objs += vdev_root.o
-$(MODULE)-objs += vdev_trim.o
-$(MODULE)-objs += zap.o
-$(MODULE)-objs += zap_leaf.o
-$(MODULE)-objs += zap_micro.o
-$(MODULE)-objs += zcp.o
-$(MODULE)-objs += zcp_get.o
-$(MODULE)-objs += zcp_global.o
-$(MODULE)-objs += zcp_iter.o
-$(MODULE)-objs += zcp_set.o
-$(MODULE)-objs += zcp_synctask.o
-$(MODULE)-objs += zfeature.o
-$(MODULE)-objs += zfs_byteswap.o
-$(MODULE)-objs += zfs_fm.o
-$(MODULE)-objs += zfs_fuid.o
-$(MODULE)-objs += zfs_ioctl.o
-$(MODULE)-objs += zfs_log.o
-$(MODULE)-objs += zfs_onexit.o
-$(MODULE)-objs += zfs_quota.o
-$(MODULE)-objs += zfs_ratelimit.o
-$(MODULE)-objs += zfs_replay.o
-$(MODULE)-objs += zfs_rlock.o
-$(MODULE)-objs += zfs_sa.o
-$(MODULE)-objs += zfs_vnops.o
-$(MODULE)-objs += zil.o
-$(MODULE)-objs += zio.o
-$(MODULE)-objs += zio_checksum.o
-$(MODULE)-objs += zio_compress.o
-$(MODULE)-objs += zio_inject.o
-$(MODULE)-objs += zle.o
-$(MODULE)-objs += zrlock.o
-$(MODULE)-objs += zthr.o
-$(MODULE)-objs += zvol.o
-
-# Suppress incorrect warnings from versions of objtool which are not
-# aware of x86 EVEX prefix instructions used for AVX512.
-OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
-OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y
-
-$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse2.o
-$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_ssse3.o
-$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o
-$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512f.o
-$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o
-
-$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o
-$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o
-
-$(MODULE)-$(CONFIG_PPC) += vdev_raidz_math_powerpc_altivec.o
-$(MODULE)-$(CONFIG_PPC64) += vdev_raidz_math_powerpc_altivec.o
-
-ifeq ($(CONFIG_ALTIVEC),y)
-$(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec
-endif
-
-include $(mfdir)/../os/linux/zfs/Makefile
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 79e754c4abcb..af42670cc2c9 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -9337,26 +9337,37 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
}
if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
- cabd = abd_alloc_for_io(asize, ismd);
- tmp = abd_borrow_buf(cabd, asize);
+ /*
+ * In some cases, we can wind up with size > asize, so
+ * we need to opt for the larger allocation option here.
+ *
+ * (We also need abd_return_buf_copy in all cases because
+ * it's an ASSERT() to modify the buffer before returning it
+ * with arc_return_buf(), and all the compressors
+ * write things before deciding to fail compression in nearly
+ * every case.)
+ */
+ cabd = abd_alloc_for_io(size, ismd);
+ tmp = abd_borrow_buf(cabd, size);
psize = zio_compress_data(compress, to_write, tmp, size,
hdr->b_complevel);
- if (psize >= size) {
- abd_return_buf(cabd, tmp, asize);
+ if (psize >= asize) {
+ psize = HDR_GET_PSIZE(hdr);
+ abd_return_buf_copy(cabd, tmp, size);
HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
to_write = cabd;
- abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
- if (size != asize)
- abd_zero_off(to_write, size, asize - size);
+ abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
+ if (psize != asize)
+ abd_zero_off(to_write, psize, asize - psize);
goto encrypt;
}
ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
if (psize < asize)
memset((char *)tmp + psize, 0, asize - psize);
psize = HDR_GET_PSIZE(hdr);
- abd_return_buf_copy(cabd, tmp, asize);
+ abd_return_buf_copy(cabd, tmp, size);
to_write = cabd;
}
@@ -11045,20 +11056,20 @@ EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
- param_get_long, ZMOD_RW, "Min arc size");
+ param_get_long, ZMOD_RW, "Minimum ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
- param_get_long, ZMOD_RW, "Max arc size");
+ param_get_long, ZMOD_RW, "Maximum ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
- param_get_long, ZMOD_RW, "Metadata limit for arc size");
+ param_get_long, ZMOD_RW, "Metadata limit for ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
param_set_arc_long, param_get_long, ZMOD_RW,
- "Percent of arc size for arc meta limit");
+ "Percent of ARC size for ARC meta limit");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
- param_get_long, ZMOD_RW, "Min arc metadata");
+ param_get_long, ZMOD_RW, "Minimum ARC metadata size in bytes");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
"Meta objects to scan for prune");
@@ -11070,16 +11081,16 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, INT, ZMOD_RW,
"Meta reclaim strategy");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
- param_get_int, ZMOD_RW, "Seconds before growing arc size");
+ param_get_int, ZMOD_RW, "Seconds before growing ARC size");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_dampener_disable, INT, ZMOD_RW,
"Disable arc_p adapt dampener");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
- param_get_int, ZMOD_RW, "log2(fraction of arc to reclaim)");
+ param_get_int, ZMOD_RW, "log2(fraction of ARC to reclaim)");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
- "Percent of pagecache to reclaim arc to");
+ "Percent of pagecache to reclaim ARC to");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, p_min_shift, param_set_arc_int,
param_get_int, ZMOD_RW, "arc_c shift to calc min/max arc_p");
@@ -11088,7 +11099,7 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD,
"Target average block size");
ZFS_MODULE_PARAM(zfs, zfs_, compressed_arc_enabled, INT, ZMOD_RW,
- "Disable compressed arc buffers");
+ "Disable compressed ARC buffers");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prefetch_ms, param_set_arc_int,
param_get_int, ZMOD_RW, "Min life of prefetch block in ms");
@@ -11149,7 +11160,7 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
param_get_long, ZMOD_RW, "System free memory target size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
- param_get_long, ZMOD_RW, "Minimum bytes of dnodes in arc");
+ param_get_long, ZMOD_RW, "Minimum bytes of dnodes in ARC");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
param_set_arc_long, param_get_long, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index ee2470b38606..9a273b010fb1 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -339,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
+ rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER);
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
mutex_enter(&db->db_mtx);
if (db->db_state != DB_EVICTING) {
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (db);
}
mutex_exit(&db->db_mtx);
}
}
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (NULL);
}
@@ -393,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
+ rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (dbf);
}
mutex_exit(&dbf->db_mtx);
@@ -417,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, idx));
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, he);
@@ -474,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
/*
* We mustn't hold db_mtx to maintain lock ordering:
- * DBUF_HASH_MUTEX > db_mtx.
+ * DBUF_HASH_RWLOCK > db_mtx.
*/
ASSERT(zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_state == DB_EVICTING);
ASSERT(!MUTEX_HELD(&db->db_mtx));
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
+ rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
dbp = &h->hash_table[idx];
while ((dbf = *dbp) != db) {
dbp = &dbf->db_hash_next;
@@ -491,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
if (h->hash_table[idx] &&
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, idx));
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
}
@@ -914,8 +914,8 @@ retry:
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
- for (i = 0; i < DBUF_MUTEXES; i++)
- mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+ for (i = 0; i < DBUF_RWLOCKS; i++)
+ rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL);
dbuf_stats_init(h);
@@ -981,8 +981,8 @@ dbuf_fini(void)
dbuf_stats_destroy();
- for (i = 0; i < DBUF_MUTEXES; i++)
- mutex_destroy(&h->hash_mutexes[i]);
+ for (i = 0; i < DBUF_RWLOCKS; i++)
+ rw_destroy(&h->hash_rwlocks[i]);
#if defined(_KERNEL)
/*
* Large allocations which do not require contiguous pages
@@ -3947,7 +3947,7 @@ dmu_buf_get_user(dmu_buf_t *db_fake)
}
void
-dmu_buf_user_evict_wait()
+dmu_buf_user_evict_wait(void)
{
taskq_wait(dbu_evict_taskq);
}
diff --git a/sys/contrib/openzfs/module/zfs/dbuf_stats.c b/sys/contrib/openzfs/module/zfs/dbuf_stats.c
index fa9a5f08060a..a42750ac8e90 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf_stats.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf_stats.c
@@ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
if (size)
buf[0] = 0;
- mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
+ rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER);
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
/*
* Returning ENOMEM will cause the data and header functions
@@ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
mutex_exit(&db->db_mtx);
}
- mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
+ rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx));
return (error);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 461feeffb6a3..7d8b2c96bd74 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -86,7 +86,7 @@ static int zfs_dmu_offset_next_sync = 1;
* helps to limit the amount of memory that can be used by prefetching.
* Larger objects should be prefetched a bit at a time.
*/
-static int dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
+int dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index e836d681e920..ca894c35253c 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -73,12 +73,19 @@
* The SPA supports block sizes up to 16MB. However, very large blocks
* can have an impact on i/o latency (e.g. tying up a spinning disk for
* ~300ms), and also potentially on the memory allocator. Therefore,
- * we do not allow the recordsize to be set larger than zfs_max_recordsize
- * (default 1MB). Larger blocks can be created by changing this tunable,
- * and pools with larger blocks can always be imported and used, regardless
- * of this setting.
+ * we did not allow the recordsize to be set larger than zfs_max_recordsize
+ * (former default: 1MB). Larger blocks could be created by changing this
+ * tunable, and pools with larger blocks could always be imported and used,
+ * regardless of this setting.
+ *
+ * We do, however, still limit it by default to 1M on x86_32, because Linux's
+ * 3/1 memory split doesn't leave much room for 16M chunks.
*/
-int zfs_max_recordsize = 1 * 1024 * 1024;
+#ifdef _ILP32
+int zfs_max_recordsize = 1 * 1024 * 1024;
+#else
+int zfs_max_recordsize = 16 * 1024 * 1024;
+#endif
static int zfs_allow_redacted_dataset_mount = 0;
#define SWITCH64(x, y) \
@@ -3708,6 +3715,15 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
dsl_dir_rele(odd, FTAG);
promote_rele(ddpa, FTAG);
+
+ /*
+ * Transfer common error blocks from old head to new head.
+ */
+ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ uint64_t old_head = origin_head->ds_object;
+ uint64_t new_head = hds->ds_object;
+ spa_swap_errlog(dp->dp_spa, new_head, old_head, tx);
+ }
}
/*
@@ -4924,13 +4940,38 @@ dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;
}
-#if defined(_LP64)
-#define RECORDSIZE_PERM ZMOD_RW
-#else
-/* Limited to 1M on 32-bit platforms due to lack of virtual address space */
-#define RECORDSIZE_PERM ZMOD_RD
-#endif
-ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, RECORDSIZE_PERM,
+/*
+ * Find and return (in *oldest_dsobj) the oldest snapshot of the dsobj
+ * dataset whose birth time is >= min_txg.
+ */
+int
+dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
+ uint64_t *oldest_dsobj)
+{
+ dsl_dataset_t *ds;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+
+ int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+
+ while (prev_obj != 0 && min_txg < prev_obj_txg) {
+ dsl_dataset_rele(ds, FTAG);
+ if ((error = dsl_dataset_hold_obj(dp, prev_obj,
+ FTAG, &ds)) != 0)
+ return (error);
+ prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+ prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ }
+ *oldest_dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
"Max allowed record size");
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
index b32929b3320c..7dddd8eed5e9 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
@@ -1153,6 +1153,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
dsl_dataset_rele(prev, FTAG);
}
+ /* Delete errlog. */
+ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG))
+ spa_delete_dataset_errlog(dp->dp_spa, ds->ds_object, tx);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 7ed83b305db7..ab32bfec1310 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -48,10 +48,10 @@
/*
* Metaslab granularity, in bytes. This is roughly similar to what would be
* referred to as the "stripe size" in traditional RAID arrays. In normal
- * operation, we will try to write this amount of data to a top-level vdev
- * before moving on to the next one.
+ * operation, we will try to write this amount of data to each disk before
+ * moving on to the next top-level vdev.
*/
-static unsigned long metaslab_aliquot = 512 << 10;
+static unsigned long metaslab_aliquot = 1024 * 1024;
/*
* For testing, make some blocks above a certain size be gang blocks.
@@ -899,7 +899,8 @@ metaslab_group_activate(metaslab_group_t *mg)
if (++mg->mg_activation_count <= 0)
return;
- mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
+ mg->mg_aliquot = metaslab_aliquot * MAX(1,
+ vdev_get_ndisks(mg->mg_vd) - vdev_get_nparity(mg->mg_vd));
metaslab_group_alloc_update(mg);
if ((mgprev = mc->mc_allocator[0].mca_rotor) == NULL) {
@@ -2750,7 +2751,8 @@ metaslab_fini_flush_data(metaslab_t *msp)
mutex_exit(&spa->spa_flushed_ms_lock);
spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp));
- spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp));
+ spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp),
+ metaslab_unflushed_dirty(msp));
}
uint64_t
@@ -3728,50 +3730,45 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
metaslab_flush_update(msp, tx);
}
-/*
- * Called when the metaslab has been flushed (its own spacemap now reflects
- * all the contents of the pool-wide spacemap log). Updates the metaslab's
- * metadata and any pool-wide related log space map data (e.g. summary,
- * obsolete logs, etc..) to reflect that.
- */
static void
-metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx)
{
- metaslab_group_t *mg = msp->ms_group;
- spa_t *spa = mg->mg_vd->vdev_spa;
-
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- ASSERT3U(spa_sync_pass(spa), ==, 1);
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ ASSERT(spa_syncing_log_sm(spa) != NULL);
+ ASSERT(msp->ms_sm != NULL);
ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- /*
- * Just because a metaslab got flushed, that doesn't mean that
- * it will pass through metaslab_sync_done(). Thus, make sure to
- * update ms_synced_length here in case it doesn't.
- */
- msp->ms_synced_length = space_map_length(msp->ms_sm);
+ mutex_enter(&spa->spa_flushed_ms_lock);
+ metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, B_TRUE);
+ avl_add(&spa->spa_metaslabs_by_flushed, msp);
+ mutex_exit(&spa->spa_flushed_ms_lock);
- /*
- * We may end up here from metaslab_condense() without the
- * feature being active. In that case this is a no-op.
- */
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
+ spa_log_sm_increment_current_mscount(spa);
+ spa_log_summary_add_flushed_metaslab(spa, B_TRUE);
+}
+void
+metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
ASSERT(spa_syncing_log_sm(spa) != NULL);
ASSERT(msp->ms_sm != NULL);
ASSERT(metaslab_unflushed_txg(msp) != 0);
ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp);
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa));
/* update metaslab's position in our flushing tree */
uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp);
+ boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp);
mutex_enter(&spa->spa_flushed_ms_lock);
avl_remove(&spa->spa_metaslabs_by_flushed, msp);
metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, dirty);
avl_add(&spa->spa_metaslabs_by_flushed, msp);
mutex_exit(&spa->spa_flushed_ms_lock);
@@ -3779,17 +3776,47 @@ metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
spa_log_sm_decrement_mscount(spa, ms_prev_flushed_txg);
spa_log_sm_increment_current_mscount(spa);
+ /* update log space map summary */
+ spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg,
+ ms_prev_flushed_dirty);
+ spa_log_summary_add_flushed_metaslab(spa, dirty);
+
/* cleanup obsolete logs if any */
- uint64_t log_blocks_before = spa_log_sm_nblocks(spa);
spa_cleanup_old_sm_logs(spa, tx);
- uint64_t log_blocks_after = spa_log_sm_nblocks(spa);
- VERIFY3U(log_blocks_after, <=, log_blocks_before);
+}
- /* update log space map summary */
- uint64_t blocks_gone = log_blocks_before - log_blocks_after;
- spa_log_summary_add_flushed_metaslab(spa);
- spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg);
- spa_log_summary_decrement_blkcount(spa, blocks_gone);
+/*
+ * Called when the metaslab has been flushed (its own spacemap now reflects
+ * all the contents of the pool-wide spacemap log). Updates the metaslab's
+ * metadata and any pool-wide related log space map data (e.g. summary,
+ * obsolete logs, etc..) to reflect that.
+ */
+static void
+metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+{
+ metaslab_group_t *mg = msp->ms_group;
+ spa_t *spa = mg->mg_vd->vdev_spa;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+ ASSERT3U(spa_sync_pass(spa), ==, 1);
+
+ /*
+ * Just because a metaslab got flushed, that doesn't mean that
+ * it will pass through metaslab_sync_done(). Thus, make sure to
+ * update ms_synced_length here in case it doesn't.
+ */
+ msp->ms_synced_length = space_map_length(msp->ms_sm);
+
+ /*
+ * We may end up here from metaslab_condense() without the
+ * feature being active. In that case this is a no-op.
+ */
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP) ||
+ metaslab_unflushed_txg(msp) == 0)
+ return;
+
+ metaslab_unflushed_bump(msp, tx, B_FALSE);
}
boolean_t
@@ -4005,23 +4032,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
ASSERT0(metaslab_allocated_space(msp));
}
- if (metaslab_unflushed_txg(msp) == 0 &&
- spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
- ASSERT(spa_syncing_log_sm(spa) != NULL);
-
- metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
- spa_log_sm_increment_current_mscount(spa);
- spa_log_summary_add_flushed_metaslab(spa);
-
- ASSERT(msp->ms_sm != NULL);
- mutex_enter(&spa->spa_flushed_ms_lock);
- avl_add(&spa->spa_metaslabs_by_flushed, msp);
- mutex_exit(&spa->spa_flushed_ms_lock);
-
- ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
- ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- }
-
if (!range_tree_is_empty(msp->ms_checkpointing) &&
vd->vdev_checkpoint_sm == NULL) {
ASSERT(spa_has_checkpoint(spa));
@@ -4069,6 +4079,10 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
space_map_t *log_sm = spa_syncing_log_sm(spa);
if (log_sm != NULL) {
ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP));
+ if (metaslab_unflushed_txg(msp) == 0)
+ metaslab_unflushed_add(msp, tx);
+ else if (!metaslab_unflushed_dirty(msp))
+ metaslab_unflushed_bump(msp, tx, B_TRUE);
space_map_write(log_sm, alloctree, SM_ALLOC,
vd->vdev_id, tx);
@@ -6131,6 +6145,12 @@ metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
mutex_exit(&mg->mg_ms_disabled_lock);
}
+void
+metaslab_set_unflushed_dirty(metaslab_t *ms, boolean_t dirty)
+{
+ ms->ms_unflushed_dirty = dirty;
+}
+
static void
metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
{
@@ -6167,15 +6187,16 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
void
metaslab_set_unflushed_txg(metaslab_t *ms, uint64_t txg, dmu_tx_t *tx)
{
- spa_t *spa = ms->ms_group->mg_vd->vdev_spa;
-
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
-
ms->ms_unflushed_txg = txg;
metaslab_update_ondisk_flush_data(ms, tx);
}
+boolean_t
+metaslab_unflushed_dirty(metaslab_t *ms)
+{
+ return (ms->ms_unflushed_dirty);
+}
+
uint64_t
metaslab_unflushed_txg(metaslab_t *ms)
{
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index 2b6776581a47..db8c2b831f1d 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -1068,8 +1068,8 @@ sa_setup(objset_t *os, uint64_t sa_obj, const sa_attr_reg_t *reg_attrs,
za.za_num_integers);
break;
}
- VERIFY(ddi_strtoull(za.za_name, NULL, 10,
- (unsigned long long *)&lot_num) == 0);
+ VERIFY0(ddi_strtoull(za.za_name, NULL, 10,
+ (unsigned long long *)&lot_num));
(void) sa_add_layout_entry(os, lot_attrs,
za.za_num_integers, lot_num,
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index e69cb5527be8..01114dedef48 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -4355,7 +4355,7 @@ spa_ld_load_vdev_metadata(spa_t *spa)
error = spa_ld_log_spacemaps(spa);
if (error != 0) {
- spa_load_failed(spa, "spa_ld_log_sm_data failed [error=%d]",
+ spa_load_failed(spa, "spa_ld_log_spacemaps failed [error=%d]",
error);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
}
diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c
index c6b28ea7d1b8..9e5d1de63c0b 100644
--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014, Delphix. All rights reserved.
+ * Copyright (c) 2021, George Amanakis. All rights reserved.
*/
/*
@@ -43,6 +44,16 @@
* calculation when the data is requested, storing the result so future queries
* will be faster.
*
+ * If the head_errlog feature is enabled, a different on-disk format is used.
+ * The error log of each head dataset is stored separately in the zap object
+ * and keyed by the head id. This enables listing every dataset affected in
+ * userland. In order to be able to track whether an error block has been
+ * modified or added to snapshots since it was marked as an error, a new tuple
+ * is introduced: zbookmark_err_phys_t. It allows the storage of the birth
+ * transaction group of an error block on-disk. The birth transaction group is
+ * used by check_filesystem() to assess whether this block was freed,
+ * re-written or added to a snapshot since its marking as an error.
+ *
* This log is then shipped into an nvlist where the key is the dataset name and
* the value is the object name. Userland is then responsible for uniquifying
* this list and displaying it to the user.
@@ -53,7 +64,17 @@
#include <sys/spa_impl.h>
#include <sys/zap.h>
#include <sys/zio.h>
+#include <sys/dsl_dir.h>
+#include <sys/dmu_objset.h>
+#include <sys/dbuf.h>
+/*
+ * spa_upgrade_errlog_limit : A zfs module parameter that controls the number
+ * of on-disk error log entries that will be converted to the new
+ * format when enabling head_errlog. Defaults to 0 which converts
+ * all log entries.
+ */
+static uint32_t spa_upgrade_errlog_limit = 0;
/*
* Convert a bookmark to a string.
@@ -67,9 +88,35 @@ bookmark_to_name(zbookmark_phys_t *zb, char *buf, size_t len)
}
/*
- * Convert a string to a bookmark
+ * Convert an err_phys to a string.
+ */
+static void
+errphys_to_name(zbookmark_err_phys_t *zep, char *buf, size_t len)
+{
+ (void) snprintf(buf, len, "%llx:%llx:%llx:%llx",
+ (u_longlong_t)zep->zb_object, (u_longlong_t)zep->zb_level,
+ (u_longlong_t)zep->zb_blkid, (u_longlong_t)zep->zb_birth);
+}
+
+/*
+ * Convert a string to a err_phys.
+ */
+static void
+name_to_errphys(char *buf, zbookmark_err_phys_t *zep)
+{
+ zep->zb_object = zfs_strtonum(buf, &buf);
+ ASSERT(*buf == ':');
+ zep->zb_level = (int)zfs_strtonum(buf + 1, &buf);
+ ASSERT(*buf == ':');
+ zep->zb_blkid = zfs_strtonum(buf + 1, &buf);
+ ASSERT(*buf == ':');
+ zep->zb_birth = zfs_strtonum(buf + 1, &buf);
+ ASSERT(*buf == '\0');
+}
+
+/*
+ * Convert a string to a bookmark.
*/
-#ifdef _KERNEL
static void
name_to_bookmark(char *buf, zbookmark_phys_t *zb)
{
@@ -82,8 +129,74 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
zb->zb_blkid = zfs_strtonum(buf + 1, &buf);
ASSERT(*buf == '\0');
}
+
+#ifdef _KERNEL
+static void
+zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep, zbookmark_phys_t *zb)
+{
+ zb->zb_objset = dataset;
+ zb->zb_object = zep->zb_object;
+ zb->zb_level = zep->zb_level;
+ zb->zb_blkid = zep->zb_blkid;
+}
#endif
+static void
+name_to_object(char *buf, uint64_t *obj)
+{
+ *obj = zfs_strtonum(buf, &buf);
+ ASSERT(*buf == '\0');
+}
+
+static int
+get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
+ uint64_t *head_dataset_id)
+{
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+ dsl_dataset_t *ds;
+ objset_t *os;
+
+ dsl_pool_config_enter(dp, FTAG);
+ int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds);
+ if (error != 0) {
+ dsl_pool_config_exit(dp, FTAG);
+ return (error);
+ }
+ ASSERT(head_dataset_id);
+ *head_dataset_id = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
+
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
+ return (error);
+ }
+
+ dnode_t *dn;
+ blkptr_t bp;
+
+ error = dnode_hold(os, zep->zb_object, FTAG, &dn);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
+ return (error);
+ }
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
+ NULL);
+
+ if (error == 0 && BP_IS_HOLE(&bp))
+ error = SET_ERROR(ENOENT);
+
+ zep->zb_birth = bp.blk_birth;
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
+ return (error);
+}
+
/*
* Log an uncorrectable error to the persistent error log. We add it to the
* spa's list of pending errors. The changes are actually synced out to disk
@@ -128,6 +241,276 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
mutex_exit(&spa->spa_errlist_lock);
}
+#ifdef _KERNEL
+static int
+find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
+ uint64_t *birth_txg)
+{
+ objset_t *os;
+ int error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ return (error);
+
+ dnode_t *dn;
+ blkptr_t bp;
+
+ error = dnode_hold(os, zep->zb_object, FTAG, &dn);
+ if (error != 0)
+ return (error);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
+ NULL);
+
+ if (error == 0 && BP_IS_HOLE(&bp))
+ error = SET_ERROR(ENOENT);
+
+ *birth_txg = bp.blk_birth;
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ return (error);
+}
+
+/*
+ * This function serves a double role. If only_count is true, it returns
+ * (in *count) how many times an error block belonging to this filesystem is
+ * referenced by snapshots or clones. If only_count is false, each time the
+ * error block is referenced by a snapshot or clone, it fills the userspace
+ * array at uaddr with the bookmarks of the error blocks. The array is filled
+ * from the back and *count is modified to be the number of unused entries at
+ * the beginning of the array.
+ */
+static int
+check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
+ uint64_t *count, void *uaddr, boolean_t only_count)
+{
+ dsl_dataset_t *ds;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+
+ int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ uint64_t latest_txg;
+ uint64_t txg_to_consider = spa->spa_syncing_txg;
+ boolean_t check_snapshot = B_TRUE;
+ error = find_birth_txg(ds, zep, &latest_txg);
+ if (error == 0) {
+ if (zep->zb_birth == latest_txg) {
+ /* Block neither free nor rewritten. */
+ if (!only_count) {
+ zbookmark_phys_t zb;
+ zep_to_zb(head_ds, zep, &zb);
+ if (copyout(&zb, (char *)uaddr + (*count - 1)
+ * sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EFAULT));
+ }
+ (*count)--;
+ } else {
+ (*count)++;
+ }
+ check_snapshot = B_FALSE;
+ } else {
+ ASSERT3U(zep->zb_birth, <, latest_txg);
+ txg_to_consider = latest_txg;
+ }
+ }
+
+ /* How many snapshots reference this block. */
+ uint64_t snap_count;
+ error = zap_count(spa->spa_meta_objset,
+ dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
+
+ if (snap_count == 0) {
+ /* File system has no snapshot. */
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+ }
+
+ uint64_t *snap_obj_array = kmem_alloc(snap_count * sizeof (uint64_t),
+ KM_SLEEP);
+
+ int aff_snap_count = 0;
+ uint64_t snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+
+ /* Check only snapshots created from this file system. */
+ while (snap_obj != 0 && zep->zb_birth < snap_obj_txg &&
+ snap_obj_txg <= txg_to_consider) {
+
+ dsl_dataset_rele(ds, FTAG);
+ error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds);
+ if (error != 0)
+ goto out;
+
+ if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds)
+ break;
+
+ boolean_t affected = B_TRUE;
+ if (check_snapshot) {
+ uint64_t blk_txg;
+ error = find_birth_txg(ds, zep, &blk_txg);
+ affected = (error == 0 && zep->zb_birth == blk_txg);
+ }
+
+ if (affected) {
+ snap_obj_array[aff_snap_count] = snap_obj;
+ aff_snap_count++;
+
+ if (!only_count) {
+ zbookmark_phys_t zb;
+ zep_to_zb(snap_obj, zep, &zb);
+ if (copyout(&zb, (char *)uaddr + (*count - 1) *
+ sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ error = SET_ERROR(EFAULT);
+ goto out;
+ }
+ (*count)--;
+ } else {
+ (*count)++;
+ }
+
+ /*
+ * Only clones whose origins were affected could also
+ * have affected snapshots.
+ */
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ dsl_dataset_phys(ds)->ds_next_clones_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ error = check_filesystem(spa,
+ za.za_first_integer, zep,
+ count, uaddr, only_count);
+
+ if (error != 0) {
+ zap_cursor_fini(&zc);
+ goto out;
+ }
+ }
+ zap_cursor_fini(&zc);
+ }
+ snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+ snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ }
+ dsl_dataset_rele(ds, FTAG);
+
+out:
+ kmem_free(snap_obj_array, sizeof (*snap_obj_array));
+ return (error);
+}
+
+static int
+find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
+ uint64_t *top_affected_fs)
+{
+ uint64_t oldest_dsobj;
+ int error = dsl_dataset_oldest_snapshot(spa, head_ds, zep->zb_birth,
+ &oldest_dsobj);
+ if (error != 0)
+ return (error);
+
+ dsl_dataset_t *ds;
+ error = dsl_dataset_hold_obj(spa->spa_dsl_pool, oldest_dsobj,
+ FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ *top_affected_fs =
+ dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+static int
+process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
+ uint64_t *count, void *uaddr, boolean_t only_count)
+{
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+ dsl_pool_config_enter(dp, FTAG);
+ uint64_t top_affected_fs;
+
+ int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
+ if (error == 0)
+ error = check_filesystem(spa, top_affected_fs, zep, count,
+ uaddr, only_count);
+
+ dsl_pool_config_exit(dp, FTAG);
+ return (error);
+}
+
+static uint64_t
+get_errlog_size(spa_t *spa, uint64_t spa_err_obj)
+{
+ if (spa_err_obj == 0)
+ return (0);
+ uint64_t total = 0;
+
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
+ zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+
+ zap_cursor_t head_ds_cursor;
+ zap_attribute_t head_ds_attr;
+ zbookmark_err_phys_t head_ds_block;
+
+ uint64_t head_ds;
+ name_to_object(za.za_name, &head_ds);
+
+ for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset,
+ za.za_first_integer); zap_cursor_retrieve(&head_ds_cursor,
+ &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) {
+
+ name_to_errphys(head_ds_attr.za_name, &head_ds_block);
+ (void) process_error_block(spa, head_ds, &head_ds_block,
+ &total, NULL, B_TRUE);
+ }
+ zap_cursor_fini(&head_ds_cursor);
+ }
+ zap_cursor_fini(&zc);
+ return (total);
+}
+
+static uint64_t
+get_errlist_size(spa_t *spa, avl_tree_t *tree)
+{
+ if (avl_numnodes(tree) == 0)
+ return (0);
+ uint64_t total = 0;
+
+ spa_error_entry_t *se;
+ for (se = avl_first(tree); se != NULL; se = AVL_NEXT(tree, se)) {
+ zbookmark_err_phys_t zep;
+ zep.zb_object = se->se_bookmark.zb_object;
+ zep.zb_level = se->se_bookmark.zb_level;
+ zep.zb_blkid = se->se_bookmark.zb_blkid;
+
+ /*
+ * If we cannot find out the head dataset and birth txg of
+ * the present error block, we opt not to error out. In the
+ * next pool sync this information will be retrieved by
+ * sync_error_list() and written to the on-disk error log.
+ */
+ uint64_t head_ds_obj;
+ if (get_head_and_birth_txg(spa, &zep,
+ se->se_bookmark.zb_objset, &head_ds_obj) == 0)
+ (void) process_error_block(spa, head_ds_obj, &zep,
+ &total, NULL, B_TRUE);
+ }
+ return (total);
+}
+#endif
+
/*
* Return the number of errors currently in the error log. This is actually the
* sum of both the last log and the current log, since we don't know the union
@@ -136,83 +519,284 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
uint64_t
spa_get_errlog_size(spa_t *spa)
{
- uint64_t total = 0, count;
+ uint64_t total = 0;
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ mutex_enter(&spa->spa_errlog_lock);
+ uint64_t count;
+ if (spa->spa_errlog_scrub != 0 &&
+ zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub,
+ &count) == 0)
+ total += count;
+
+ if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished &&
+ zap_count(spa->spa_meta_objset, spa->spa_errlog_last,
+ &count) == 0)
+ total += count;
+ mutex_exit(&spa->spa_errlog_lock);
+
+ mutex_enter(&spa->spa_errlist_lock);
+ total += avl_numnodes(&spa->spa_errlist_last);
+ total += avl_numnodes(&spa->spa_errlist_scrub);
+ mutex_exit(&spa->spa_errlist_lock);
+ } else {
+#ifdef _KERNEL
+ mutex_enter(&spa->spa_errlog_lock);
+ total += get_errlog_size(spa, spa->spa_errlog_last);
+ total += get_errlog_size(spa, spa->spa_errlog_scrub);
+ mutex_exit(&spa->spa_errlog_lock);
+
+ mutex_enter(&spa->spa_errlist_lock);
+ total += get_errlist_size(spa, &spa->spa_errlist_last);
+ total += get_errlist_size(spa, &spa->spa_errlist_scrub);
+ mutex_exit(&spa->spa_errlist_lock);
+#endif
+ }
+ return (total);
+}
- mutex_enter(&spa->spa_errlog_lock);
- if (spa->spa_errlog_scrub != 0 &&
- zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub,
- &count) == 0)
- total += count;
-
- if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished &&
- zap_count(spa->spa_meta_objset, spa->spa_errlog_last,
- &count) == 0)
- total += count;
- mutex_exit(&spa->spa_errlog_lock);
+/*
+ * This function sweeps through an on-disk error log and stores all bookmarks
+ * as error bookmarks in a new ZAP object. At the end we discard the old one,
+ * and spa_update_errlog() will set the spa's on-disk error log to new ZAP
+ * object.
+ */
+static void
+sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
+ dmu_tx_t *tx)
+{
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ zbookmark_phys_t zb;
+ uint64_t count;
- mutex_enter(&spa->spa_errlist_lock);
- total += avl_numnodes(&spa->spa_errlist_last);
- total += avl_numnodes(&spa->spa_errlist_scrub);
- mutex_exit(&spa->spa_errlist_lock);
+ *newobj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG,
+ DMU_OT_NONE, 0, tx);
- return (total);
+ /*
+ * If we cannnot perform the upgrade we should clear the old on-disk
+ * error logs.
+ */
+ if (zap_count(spa->spa_meta_objset, spa_err_obj, &count) != 0) {
+ VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx));
+ return;
+ }
+
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ if (spa_upgrade_errlog_limit != 0 &&
+ zc.zc_cd == spa_upgrade_errlog_limit)
+ break;
+
+ name_to_bookmark(za.za_name, &zb);
+
+ zbookmark_err_phys_t zep;
+ zep.zb_object = zb.zb_object;
+ zep.zb_level = zb.zb_level;
+ zep.zb_blkid = zb.zb_blkid;
+
+ /*
+ * We cannot use get_head_and_birth_txg() because it will
+ * acquire the pool config lock, which we already have. In case
+ * of an error we simply continue.
+ */
+ uint64_t head_dataset_obj;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+ dsl_dataset_t *ds;
+ objset_t *os;
+
+ int error = dsl_dataset_hold_obj(dp, zb.zb_objset, FTAG, &ds);
+ if (error != 0)
+ continue;
+
+ head_dataset_obj =
+ dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
+
+ /*
+ * The objset and the dnode are required for getting the block
+ * pointer, which is used to determine if BP_IS_HOLE(). If
+ * getting the objset or the dnode fails, do not create a
+ * zap entry (presuming we know the dataset) as this may create
+ * spurious errors that we cannot ever resolve. If an error is
+ * truly persistent, it should re-appear after a scan.
+ */
+ if (dmu_objset_from_ds(ds, &os) != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ continue;
+ }
+
+ dnode_t *dn;
+ blkptr_t bp;
+
+ if (dnode_hold(os, zep.zb_object, FTAG, &dn) != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ continue;
+ }
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp,
+ NULL, NULL);
+
+ zep.zb_birth = bp.blk_birth;
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+
+ if (error != 0 || BP_IS_HOLE(&bp))
+ continue;
+
+ uint64_t err_obj;
+ error = zap_lookup_int_key(spa->spa_meta_objset, *newobj,
+ head_dataset_obj, &err_obj);
+
+ if (error == ENOENT) {
+ err_obj = zap_create(spa->spa_meta_objset,
+ DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx);
+
+ (void) zap_update_int_key(spa->spa_meta_objset,
+ *newobj, head_dataset_obj, err_obj, tx);
+ }
+
+ char buf[64];
+ char *name = "";
+ errphys_to_name(&zep, buf, sizeof (buf));
+
+ (void) zap_update(spa->spa_meta_objset, err_obj,
+ buf, 1, strlen(name) + 1, name, tx);
+ }
+ zap_cursor_fini(&zc);
+
+ VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx));
+}
+
+void
+spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx)
+{
+ uint64_t newobj = 0;
+
+ mutex_enter(&spa->spa_errlog_lock);
+ if (spa->spa_errlog_last != 0) {
+ sync_upgrade_errlog(spa, spa->spa_errlog_last, &newobj, tx);
+ spa->spa_errlog_last = newobj;
+ }
+
+ if (spa->spa_errlog_scrub != 0) {
+ sync_upgrade_errlog(spa, spa->spa_errlog_scrub, &newobj, tx);
+ spa->spa_errlog_scrub = newobj;
+ }
+ mutex_exit(&spa->spa_errlog_lock);
}
#ifdef _KERNEL
+/*
+ * If an error block is shared by two datasets it will be counted twice. For
+ * detailed message see spa_get_errlog_size() above.
+ */
static int
-process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count)
+process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count)
{
zap_cursor_t zc;
zap_attribute_t za;
- zbookmark_phys_t zb;
if (obj == 0)
return (0);
- for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ if (*count == 0) {
+ zap_cursor_fini(&zc);
+ return (SET_ERROR(ENOMEM));
+ }
+
+ zbookmark_phys_t zb;
+ name_to_bookmark(za.za_name, &zb);
+
+ if (copyout(&zb, (char *)uaddr +
+ (*count - 1) * sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0) {
+ zap_cursor_fini(&zc);
+ return (SET_ERROR(EFAULT));
+ }
+ *count -= 1;
- if (*count == 0) {
- zap_cursor_fini(&zc);
- return (SET_ERROR(ENOMEM));
}
+ zap_cursor_fini(&zc);
+ return (0);
+ }
- name_to_bookmark(za.za_name, &zb);
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
- if (copyout(&zb, (char *)addr +
- (*count - 1) * sizeof (zbookmark_phys_t),
- sizeof (zbookmark_phys_t)) != 0) {
- zap_cursor_fini(&zc);
- return (SET_ERROR(EFAULT));
+ zap_cursor_t head_ds_cursor;
+ zap_attribute_t head_ds_attr;
+
+ uint64_t head_ds_err_obj = za.za_first_integer;
+ uint64_t head_ds;
+ name_to_object(za.za_name, &head_ds);
+ for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset,
+ head_ds_err_obj); zap_cursor_retrieve(&head_ds_cursor,
+ &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) {
+
+ zbookmark_err_phys_t head_ds_block;
+ name_to_errphys(head_ds_attr.za_name, &head_ds_block);
+ int error = process_error_block(spa, head_ds,
+ &head_ds_block, count, uaddr, B_FALSE);
+
+ if (error != 0) {
+ zap_cursor_fini(&head_ds_cursor);
+ zap_cursor_fini(&zc);
+ return (error);
+ }
}
-
- *count -= 1;
+ zap_cursor_fini(&head_ds_cursor);
}
-
zap_cursor_fini(&zc);
-
return (0);
}
static int
-process_error_list(avl_tree_t *list, void *addr, size_t *count)
+process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
{
spa_error_entry_t *se;
- for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ for (se = avl_first(list); se != NULL;
+ se = AVL_NEXT(list, se)) {
- if (*count == 0)
- return (SET_ERROR(ENOMEM));
+ if (*count == 0)
+ return (SET_ERROR(ENOMEM));
- if (copyout(&se->se_bookmark, (char *)addr +
- (*count - 1) * sizeof (zbookmark_phys_t),
- sizeof (zbookmark_phys_t)) != 0)
- return (SET_ERROR(EFAULT));
+ if (copyout(&se->se_bookmark, (char *)uaddr +
+ (*count - 1) * sizeof (zbookmark_phys_t),
+ sizeof (zbookmark_phys_t)) != 0)
+ return (SET_ERROR(EFAULT));
- *count -= 1;
+ *count -= 1;
+ }
+ return (0);
}
+ for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
+ zbookmark_err_phys_t zep;
+ zep.zb_object = se->se_bookmark.zb_object;
+ zep.zb_level = se->se_bookmark.zb_level;
+ zep.zb_blkid = se->se_bookmark.zb_blkid;
+
+ uint64_t head_ds_obj;
+ int error = get_head_and_birth_txg(spa, &zep,
+ se->se_bookmark.zb_objset, &head_ds_obj);
+ if (error != 0)
+ return (error);
+
+ error = process_error_block(spa, head_ds_obj, &zep, count,
+ uaddr, B_FALSE);
+ if (error != 0)
+ return (error);
+ }
return (0);
}
#endif
@@ -229,7 +813,7 @@ process_error_list(avl_tree_t *list, void *addr, size_t *count)
* the error list lock when we are finished.
*/
int
-spa_get_errlog(spa_t *spa, void *uaddr, size_t *count)
+spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
{
int ret = 0;
@@ -244,10 +828,10 @@ spa_get_errlog(spa_t *spa, void *uaddr, size_t *count)
mutex_enter(&spa->spa_errlist_lock);
if (!ret)
- ret = process_error_list(&spa->spa_errlist_scrub, uaddr,
+ ret = process_error_list(spa, &spa->spa_errlist_scrub, uaddr,
count);
if (!ret)
- ret = process_error_list(&spa->spa_errlist_last, uaddr,
+ ret = process_error_list(spa, &spa->spa_errlist_last, uaddr,
count);
mutex_exit(&spa->spa_errlist_lock);
@@ -299,35 +883,91 @@ spa_errlog_drain(spa_t *spa)
/*
* Process a list of errors into the current on-disk log.
*/
-static void
+void
sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
{
spa_error_entry_t *se;
char buf[64];
void *cookie;
- if (avl_numnodes(t) != 0) {
- /* create log if necessary */
- if (*obj == 0)
- *obj = zap_create(spa->spa_meta_objset,
- DMU_OT_ERROR_LOG, DMU_OT_NONE,
- 0, tx);
+ if (avl_numnodes(t) == 0)
+ return;
+
+ /* create log if necessary */
+ if (*obj == 0)
+ *obj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG,
+ DMU_OT_NONE, 0, tx);
- /* add errors to the current log */
+ /* add errors to the current log */
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) {
char *name = se->se_name ? se->se_name : "";
bookmark_to_name(&se->se_bookmark, buf, sizeof (buf));
+ (void) zap_update(spa->spa_meta_objset, *obj, buf, 1,
+ strlen(name) + 1, name, tx);
+ }
+ } else {
+ for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) {
+ char *name = se->se_name ? se->se_name : "";
+
+ zbookmark_err_phys_t zep;
+ zep.zb_object = se->se_bookmark.zb_object;
+ zep.zb_level = se->se_bookmark.zb_level;
+ zep.zb_blkid = se->se_bookmark.zb_blkid;
+
+ /*
+ * If we cannot find out the head dataset and birth txg
+ * of the present error block, we simply continue.
+ * Reinserting that error block to the error lists,
+ * even if we are not syncing the final txg, results
+ * in duplicate posting of errors.
+ */
+ uint64_t head_dataset_obj;
+ int error = get_head_and_birth_txg(spa, &zep,
+ se->se_bookmark.zb_objset, &head_dataset_obj);
+ if (error != 0)
+ continue;
+
+ uint64_t err_obj;
+ error = zap_lookup_int_key(spa->spa_meta_objset,
+ *obj, head_dataset_obj, &err_obj);
+
+ if (error == ENOENT) {
+ err_obj = zap_create(spa->spa_meta_objset,
+ DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx);
+
+ (void) zap_update_int_key(spa->spa_meta_objset,
+ *obj, head_dataset_obj, err_obj, tx);
+ }
+ errphys_to_name(&zep, buf, sizeof (buf));
+
(void) zap_update(spa->spa_meta_objset,
- *obj, buf, 1, strlen(name) + 1, name, tx);
+ err_obj, buf, 1, strlen(name) + 1, name, tx);
}
+ }
+ /* purge the error list */
+ cookie = NULL;
+ while ((se = avl_destroy_nodes(t, &cookie)) != NULL)
+ kmem_free(se, sizeof (spa_error_entry_t));
+}
- /* purge the error list */
- cookie = NULL;
- while ((se = avl_destroy_nodes(t, &cookie)) != NULL)
- kmem_free(se, sizeof (spa_error_entry_t));
+static void
+delete_errlog(spa_t *spa, uint64_t spa_err_obj, dmu_tx_t *tx)
+{
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ VERIFY0(dmu_object_free(spa->spa_meta_objset,
+ za.za_first_integer, tx));
+ }
+ zap_cursor_fini(&zc);
}
+ VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx));
}
/*
@@ -378,8 +1018,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
*/
if (scrub_finished) {
if (spa->spa_errlog_last != 0)
- VERIFY(dmu_object_free(spa->spa_meta_objset,
- spa->spa_errlog_last, tx) == 0);
+ delete_errlog(spa, spa->spa_errlog_last, tx);
spa->spa_errlog_last = spa->spa_errlog_scrub;
spa->spa_errlog_scrub = 0;
@@ -406,6 +1045,137 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
mutex_exit(&spa->spa_errlog_lock);
}
+static void
+delete_dataset_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t ds,
+ dmu_tx_t *tx)
+{
+ if (spa_err_obj == 0)
+ return;
+
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj);
+ zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+ uint64_t head_ds;
+ name_to_object(za.za_name, &head_ds);
+ if (head_ds == ds) {
+ (void) zap_remove(spa->spa_meta_objset, spa_err_obj,
+ za.za_name, tx);
+ VERIFY0(dmu_object_free(spa->spa_meta_objset,
+ za.za_first_integer, tx));
+ break;
+ }
+ }
+ zap_cursor_fini(&zc);
+}
+
+void
+spa_delete_dataset_errlog(spa_t *spa, uint64_t ds, dmu_tx_t *tx)
+{
+ mutex_enter(&spa->spa_errlog_lock);
+ delete_dataset_errlog(spa, spa->spa_errlog_scrub, ds, tx);
+ delete_dataset_errlog(spa, spa->spa_errlog_last, ds, tx);
+ mutex_exit(&spa->spa_errlog_lock);
+}
+
+static int
+find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head,
+ uint64_t *txg)
+{
+ dsl_dataset_t *ds;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+
+ int error = dsl_dataset_hold_obj(dp, old_head, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+
+ while (prev_obj != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ if ((error = dsl_dataset_hold_obj(dp, prev_obj,
+ FTAG, &ds)) == 0 &&
+ dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj == new_head)
+ break;
+
+ if (error != 0)
+ return (error);
+
+ prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
+ prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
+ }
+ dsl_dataset_rele(ds, FTAG);
+ ASSERT(prev_obj != 0);
+ *txg = prev_obj_txg;
+ return (0);
+}
+
+static void
+swap_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t new_head, uint64_t
+ old_head, dmu_tx_t *tx)
+{
+ if (spa_err_obj == 0)
+ return;
+
+ uint64_t old_head_errlog;
+ int error = zap_lookup_int_key(spa->spa_meta_objset, spa_err_obj,
+ old_head, &old_head_errlog);
+
+ /* If no error log, then there is nothing to do. */
+ if (error != 0)
+ return;
+
+ uint64_t txg;
+ error = find_txg_ancestor_snapshot(spa, new_head, old_head, &txg);
+ if (error != 0)
+ return;
+
+ /*
+ * Create an error log if the file system being promoted does not
+ * already have one.
+ */
+ uint64_t new_head_errlog;
+ error = zap_lookup_int_key(spa->spa_meta_objset, spa_err_obj, new_head,
+ &new_head_errlog);
+
+ if (error != 0) {
+ new_head_errlog = zap_create(spa->spa_meta_objset,
+ DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx);
+
+ (void) zap_update_int_key(spa->spa_meta_objset, spa_err_obj,
+ new_head, new_head_errlog, tx);
+ }
+
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ zbookmark_err_phys_t err_block;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, old_head_errlog);
+ zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+
+ char *name = "";
+ name_to_errphys(za.za_name, &err_block);
+ if (err_block.zb_birth < txg) {
+ (void) zap_update(spa->spa_meta_objset, new_head_errlog,
+ za.za_name, 1, strlen(name) + 1, name, tx);
+
+ (void) zap_remove(spa->spa_meta_objset, old_head_errlog,
+ za.za_name, tx);
+ }
+ }
+ zap_cursor_fini(&zc);
+}
+
+void
+spa_swap_errlog(spa_t *spa, uint64_t new_head_ds, uint64_t old_head_ds,
+ dmu_tx_t *tx)
+{
+ mutex_enter(&spa->spa_errlog_lock);
+ swap_errlog(spa, spa->spa_errlog_scrub, new_head_ds, old_head_ds, tx);
+ swap_errlog(spa, spa->spa_errlog_last, new_head_ds, old_head_ds, tx);
+ mutex_exit(&spa->spa_errlog_lock);
+}
+
#if defined(_KERNEL)
/* error handling */
EXPORT_SYMBOL(spa_log_error);
@@ -415,4 +1185,14 @@ EXPORT_SYMBOL(spa_errlog_rotate);
EXPORT_SYMBOL(spa_errlog_drain);
EXPORT_SYMBOL(spa_errlog_sync);
EXPORT_SYMBOL(spa_get_errlists);
+EXPORT_SYMBOL(spa_delete_dataset_errlog);
+EXPORT_SYMBOL(spa_swap_errlog);
+EXPORT_SYMBOL(sync_error_list);
+EXPORT_SYMBOL(spa_upgrade_errlog);
#endif
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_spa, spa_, upgrade_errlog_limit, INT, ZMOD_RW,
+ "Limit the number of errors which will be upgraded to the new "
+ "on-disk error log when enabling head_errlog");
+/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
index 110a4eab99f9..f831509a4247 100644
--- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
+++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
@@ -257,7 +257,12 @@ static unsigned long zfs_unflushed_log_block_min = 1000;
* terms of performance. Thus we have a hard limit in the size of the log in
* terms of blocks.
*/
-static unsigned long zfs_unflushed_log_block_max = (1ULL << 18);
+static unsigned long zfs_unflushed_log_block_max = (1ULL << 17);
+
+/*
+ * Also we have a hard limit in the size of the log in terms of dirty TXGs.
+ */
+static unsigned long zfs_unflushed_log_txg_max = 1000;
/*
* Max # of rows allowed for the log_summary. The tradeoff here is accuracy and
@@ -333,9 +338,13 @@ spa_log_sm_set_blocklimit(spa_t *spa)
return;
}
- uint64_t calculated_limit =
- (spa_total_metaslabs(spa) * zfs_unflushed_log_block_pct) / 100;
- spa->spa_unflushed_stats.sus_blocklimit = MIN(MAX(calculated_limit,
+ uint64_t msdcount = 0;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e; e = list_next(&spa->spa_log_summary, e))
+ msdcount += e->lse_msdcount;
+
+ uint64_t limit = msdcount * zfs_unflushed_log_block_pct / 100;
+ spa->spa_unflushed_stats.sus_blocklimit = MIN(MAX(limit,
zfs_unflushed_log_block_min), zfs_unflushed_log_block_max);
}
@@ -380,8 +389,13 @@ spa_log_summary_verify_counts(spa_t *spa)
}
static boolean_t
-summary_entry_is_full(spa_t *spa, log_summary_entry_t *e)
+summary_entry_is_full(spa_t *spa, log_summary_entry_t *e, uint64_t txg)
{
+ if (e->lse_end == txg)
+ return (0);
+ if (e->lse_txgcount >= DIV_ROUND_UP(zfs_unflushed_log_txg_max,
+ zfs_max_logsm_summary_length))
+ return (1);
uint64_t blocks_per_row = MAX(1,
DIV_ROUND_UP(spa_log_sm_blocklimit(spa),
zfs_max_logsm_summary_length));
@@ -401,7 +415,7 @@ summary_entry_is_full(spa_t *spa, log_summary_entry_t *e)
* the metaslab.
*/
void
-spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
+spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg, boolean_t dirty)
{
/*
* We don't track summary data for read-only pools and this function
@@ -429,6 +443,8 @@ spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
}
target->lse_mscount--;
+ if (dirty)
+ target->lse_msdcount--;
}
/*
@@ -490,8 +506,10 @@ spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
void
spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone)
{
- for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
- e != NULL; e = list_head(&spa->spa_log_summary)) {
+ log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ if (e->lse_txgcount > 0)
+ e->lse_txgcount--;
+ for (; e != NULL; e = list_head(&spa->spa_log_summary)) {
if (e->lse_blkcount > blocks_gone) {
/*
* Assert that we stopped at an entry that is not
@@ -560,31 +578,52 @@ spa_log_sm_increment_current_mscount(spa_t *spa)
static void
summary_add_data(spa_t *spa, uint64_t txg, uint64_t metaslabs_flushed,
- uint64_t nblocks)
+ uint64_t metaslabs_dirty, uint64_t nblocks)
{
log_summary_entry_t *e = list_tail(&spa->spa_log_summary);
- if (e == NULL || summary_entry_is_full(spa, e)) {
+ if (e == NULL || summary_entry_is_full(spa, e, txg)) {
e = kmem_zalloc(sizeof (log_summary_entry_t), KM_SLEEP);
- e->lse_start = txg;
+ e->lse_start = e->lse_end = txg;
+ e->lse_txgcount = 1;
list_insert_tail(&spa->spa_log_summary, e);
}
ASSERT3U(e->lse_start, <=, txg);
+ if (e->lse_end < txg) {
+ e->lse_end = txg;
+ e->lse_txgcount++;
+ }
e->lse_mscount += metaslabs_flushed;
+ e->lse_msdcount += metaslabs_dirty;
e->lse_blkcount += nblocks;
}
static void
spa_log_summary_add_incoming_blocks(spa_t *spa, uint64_t nblocks)
{
- summary_add_data(spa, spa_syncing_txg(spa), 0, nblocks);
+ summary_add_data(spa, spa_syncing_txg(spa), 0, 0, nblocks);
}
void
-spa_log_summary_add_flushed_metaslab(spa_t *spa)
+spa_log_summary_add_flushed_metaslab(spa_t *spa, boolean_t dirty)
{
- summary_add_data(spa, spa_syncing_txg(spa), 1, 0);
+ summary_add_data(spa, spa_syncing_txg(spa), 1, dirty ? 1 : 0, 0);
+}
+
+void
+spa_log_summary_dirty_flushed_metaslab(spa_t *spa, uint64_t txg)
+{
+ log_summary_entry_t *target = NULL;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e != NULL; e = list_next(&spa->spa_log_summary, e)) {
+ if (e->lse_start > txg)
+ break;
+ target = e;
+ }
+ ASSERT3P(target, !=, NULL);
+ ASSERT3U(target->lse_mscount, !=, 0);
+ target->lse_msdcount++;
}
/*
@@ -630,6 +669,11 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
int64_t available_blocks =
spa_log_sm_blocklimit(spa) - spa_log_sm_nblocks(spa) - incoming;
+ int64_t available_txgs = zfs_unflushed_log_txg_max;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e; e = list_next(&spa->spa_log_summary, e))
+ available_txgs -= e->lse_txgcount;
+
/*
* This variable tells us the total number of flushes needed to
* keep the log size within the limit when we reach txgs_in_future.
@@ -637,9 +681,7 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
uint64_t total_flushes = 0;
/* Holds the current maximum of our estimates so far. */
- uint64_t max_flushes_pertxg =
- MIN(avl_numnodes(&spa->spa_metaslabs_by_flushed),
- zfs_min_metaslabs_to_flush);
+ uint64_t max_flushes_pertxg = zfs_min_metaslabs_to_flush;
/*
* For our estimations we only look as far in the future
@@ -653,11 +695,14 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* then keep skipping TXGs accumulating more blocks
* based on the incoming rate until we exceed it.
*/
- if (available_blocks >= 0) {
- uint64_t skip_txgs = (available_blocks / incoming) + 1;
+ if (available_blocks >= 0 && available_txgs >= 0) {
+ uint64_t skip_txgs = MIN(available_txgs + 1,
+ (available_blocks / incoming) + 1);
available_blocks -= (skip_txgs * incoming);
+ available_txgs -= skip_txgs;
txgs_in_future += skip_txgs;
ASSERT3S(available_blocks, >=, -incoming);
+ ASSERT3S(available_txgs, >=, -1);
}
/*
@@ -666,9 +711,10 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* based on the current entry in the summary, updating
* our available_blocks.
*/
- ASSERT3S(available_blocks, <, 0);
+ ASSERT(available_blocks < 0 || available_txgs < 0);
available_blocks += e->lse_blkcount;
- total_flushes += e->lse_mscount;
+ available_txgs += e->lse_txgcount;
+ total_flushes += e->lse_msdcount;
/*
* Keep the running maximum of the total_flushes that
@@ -680,8 +726,6 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
*/
max_flushes_pertxg = MAX(max_flushes_pertxg,
DIV_ROUND_UP(total_flushes, txgs_in_future));
- ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=,
- max_flushes_pertxg);
}
return (max_flushes_pertxg);
}
@@ -771,14 +815,11 @@ spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
uint64_t want_to_flush;
if (spa_flush_all_logs_requested(spa)) {
ASSERT3S(spa_state(spa), ==, POOL_STATE_EXPORTED);
- want_to_flush = avl_numnodes(&spa->spa_metaslabs_by_flushed);
+ want_to_flush = UINT64_MAX;
} else {
want_to_flush = spa_estimate_metaslabs_to_flush(spa);
}
- ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=,
- want_to_flush);
-
/* Used purely for verification purposes */
uint64_t visited = 0;
@@ -809,31 +850,22 @@ spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
if (want_to_flush == 0 && !spa_log_exceeds_memlimit(spa))
break;
- mutex_enter(&curr->ms_sync_lock);
- mutex_enter(&curr->ms_lock);
- boolean_t flushed = metaslab_flush(curr, tx);
- mutex_exit(&curr->ms_lock);
- mutex_exit(&curr->ms_sync_lock);
-
- /*
- * If we failed to flush a metaslab (because it was loading),
- * then we are done with the block heuristic as it's not
- * possible to destroy any log space maps once you've skipped
- * a metaslab. In that case we just set our counter to 0 but
- * we continue looping in case there is still memory pressure
- * due to unflushed changes. Note that, flushing a metaslab
- * that is not the oldest flushed in the pool, will never
- * destroy any log space maps [see spa_cleanup_old_sm_logs()].
- */
- if (!flushed) {
- want_to_flush = 0;
- } else if (want_to_flush > 0) {
- want_to_flush--;
- }
+ if (metaslab_unflushed_dirty(curr)) {
+ mutex_enter(&curr->ms_sync_lock);
+ mutex_enter(&curr->ms_lock);
+ metaslab_flush(curr, tx);
+ mutex_exit(&curr->ms_lock);
+ mutex_exit(&curr->ms_sync_lock);
+ if (want_to_flush > 0)
+ want_to_flush--;
+ } else
+ metaslab_unflushed_bump(curr, tx, B_FALSE);
visited++;
}
ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=, visited);
+
+ spa_log_sm_set_blocklimit(spa);
}
/*
@@ -904,6 +936,7 @@ spa_cleanup_old_sm_logs(spa_t *spa, dmu_tx_t *tx)
avl_remove(&spa->spa_sm_logs_by_txg, sls);
space_map_free_obj(mos, sls->sls_sm_obj, tx);
VERIFY0(zap_remove_int(mos, spacemap_zap, sls->sls_txg, tx));
+ spa_log_summary_decrement_blkcount(spa, sls->sls_nblocks);
spa->spa_unflushed_stats.sus_nblocks -= sls->sls_nblocks;
kmem_free(sls, sizeof (spa_log_sm_t));
}
@@ -963,12 +996,7 @@ spa_generate_syncing_log_sm(spa_t *spa, dmu_tx_t *tx)
VERIFY0(space_map_open(&spa->spa_syncing_log_sm, mos, sm_obj,
0, UINT64_MAX, SPA_MINBLOCKSHIFT));
- /*
- * If the log space map feature was just enabled, the blocklimit
- * has not yet been set.
- */
- if (spa_log_sm_blocklimit(spa) == 0)
- spa_log_sm_set_blocklimit(spa);
+ spa_log_sm_set_blocklimit(spa);
}
/*
@@ -1094,12 +1122,18 @@ spa_ld_log_sm_cb(space_map_entry_t *sme, void *arg)
panic("invalid maptype_t");
break;
}
+ if (!metaslab_unflushed_dirty(ms)) {
+ metaslab_set_unflushed_dirty(ms, B_TRUE);
+ spa_log_summary_dirty_flushed_metaslab(spa,
+ metaslab_unflushed_txg(ms));
+ }
return (0);
}
static int
spa_ld_log_sm_data(spa_t *spa)
{
+ spa_log_sm_t *sls, *psls;
int error = 0;
/*
@@ -1113,41 +1147,71 @@ spa_ld_log_sm_data(spa_t *spa)
ASSERT0(spa->spa_unflushed_stats.sus_memused);
hrtime_t read_logs_starttime = gethrtime();
- /* this is a no-op when we don't have space map logs */
- for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
- sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
- space_map_t *sm = NULL;
- error = space_map_open(&sm, spa_meta_objset(spa),
- sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT);
- if (error != 0) {
- spa_load_failed(spa, "spa_ld_log_sm_data(): failed at "
- "space_map_open(obj=%llu) [error %d]",
- (u_longlong_t)sls->sls_sm_obj, error);
- goto out;
+
+ /* Prefetch log spacemaps dnodes. */
+ for (sls = avl_first(&spa->spa_sm_logs_by_txg); sls;
+ sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ dmu_prefetch(spa_meta_objset(spa), sls->sls_sm_obj,
+ 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+ }
+
+ uint_t pn = 0;
+ uint64_t ps = 0;
+ psls = sls = avl_first(&spa->spa_sm_logs_by_txg);
+ while (sls != NULL) {
+ /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */
+ if (psls != NULL && pn < 16 &&
+ (pn < 2 || ps < 2 * dmu_prefetch_max)) {
+ error = space_map_open(&psls->sls_sm,
+ spa_meta_objset(spa), psls->sls_sm_obj, 0,
+ UINT64_MAX, SPA_MINBLOCKSHIFT);
+ if (error != 0) {
+ spa_load_failed(spa, "spa_ld_log_sm_data(): "
+ "failed at space_map_open(obj=%llu) "
+ "[error %d]",
+ (u_longlong_t)sls->sls_sm_obj, error);
+ goto out;
+ }
+ dmu_prefetch(spa_meta_objset(spa), psls->sls_sm_obj,
+ 0, 0, space_map_length(psls->sls_sm),
+ ZIO_PRIORITY_ASYNC_READ);
+ pn++;
+ ps += space_map_length(psls->sls_sm);
+ psls = AVL_NEXT(&spa->spa_sm_logs_by_txg, psls);
+ continue;
}
+ /* Load TXG log spacemap into ms_unflushed_allocs/frees. */
+ cond_resched();
+ ASSERT0(sls->sls_nblocks);
+ sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
+ spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
+ summary_add_data(spa, sls->sls_txg,
+ sls->sls_mscount, 0, sls->sls_nblocks);
+
struct spa_ld_log_sm_arg vla = {
.slls_spa = spa,
.slls_txg = sls->sls_txg
};
- error = space_map_iterate(sm, space_map_length(sm),
- spa_ld_log_sm_cb, &vla);
+ error = space_map_iterate(sls->sls_sm,
+ space_map_length(sls->sls_sm), spa_ld_log_sm_cb, &vla);
if (error != 0) {
- space_map_close(sm);
spa_load_failed(spa, "spa_ld_log_sm_data(): failed "
"at space_map_iterate(obj=%llu) [error %d]",
(u_longlong_t)sls->sls_sm_obj, error);
goto out;
}
- ASSERT0(sls->sls_nblocks);
- sls->sls_nblocks = space_map_nblocks(sm);
- spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
- summary_add_data(spa, sls->sls_txg,
- sls->sls_mscount, sls->sls_nblocks);
+ pn--;
+ ps -= space_map_length(sls->sls_sm);
+ space_map_close(sls->sls_sm);
+ sls->sls_sm = NULL;
+ sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls);
- space_map_close(sm);
+ /* Update log block limits considering just loaded. */
+ spa_log_sm_set_blocklimit(spa);
}
+
hrtime_t read_logs_endtime = gethrtime();
spa_load_note(spa,
"read %llu log space maps (%llu total blocks - blksz = %llu bytes) "
@@ -1157,6 +1221,18 @@ spa_ld_log_sm_data(spa_t *spa)
(longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000));
out:
+ if (error != 0) {
+ for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+ sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ if (sls->sls_sm) {
+ space_map_close(sls->sls_sm);
+ sls->sls_sm = NULL;
+ }
+ }
+ } else {
+ ASSERT0(pn);
+ ASSERT0(ps);
+ }
/*
* Now that the metaslabs contain their unflushed changes:
* [1] recalculate their actual allocated space
@@ -1237,6 +1313,9 @@ spa_ld_unflushed_txgs(vdev_t *vd)
}
ms->ms_unflushed_txg = entry.msp_unflushed_txg;
+ ms->ms_unflushed_dirty = B_FALSE;
+ ASSERT(range_tree_is_empty(ms->ms_unflushed_allocs));
+ ASSERT(range_tree_is_empty(ms->ms_unflushed_frees));
if (ms->ms_unflushed_txg != 0) {
mutex_enter(&spa->spa_flushed_ms_lock);
avl_add(&spa->spa_metaslabs_by_flushed, ms);
@@ -1300,6 +1379,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW,
"Lower-bound limit for the maximum amount of blocks allowed in "
"log spacemap (see zfs_unflushed_log_block_max)");
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW,
+ "Hard limit (upper-bound) in the size of the space map log "
+ "in terms of dirty TXGs.");
+
ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW,
"Tunable used to determine the number of blocks that can be used for "
"the spacemap log, expressed as a percentage of the total number of "
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index db2d2c5e44fb..ce7f020a0d86 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -1523,13 +1523,6 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
if (txg == 0)
spa_config_exit(spa, SCL_ALLOC, FTAG);
- /*
- * Regardless whether this vdev was just added or it is being
- * expanded, the metaslab count has changed. Recalculate the
- * block limit.
- */
- spa_log_sm_set_blocklimit(spa);
-
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 17f9d6c90804..5508d273758d 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -1386,7 +1386,6 @@ vdev_remove_complete(spa_t *spa)
vdev_metaslab_fini(vd);
metaslab_group_destroy(vd->vdev_mg);
vd->vdev_mg = NULL;
- spa_log_sm_set_blocklimit(spa);
}
if (vd->vdev_log_mg != NULL) {
ASSERT0(vd->vdev_ms_count);
@@ -2131,7 +2130,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
* metaslab_class_histogram_verify()
*/
vdev_metaslab_fini(vd);
- spa_log_sm_set_blocklimit(spa);
spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
*txg = spa_vdev_config_enter(spa);
@@ -2251,7 +2249,6 @@ spa_vdev_remove_top_check(vdev_t *vd)
* and not be raidz or draid.
*/
vdev_t *rvd = spa->spa_root_vdev;
- int num_indirect = 0;
for (uint64_t id = 0; id < rvd->vdev_children; id++) {
vdev_t *cvd = rvd->vdev_child[id];
@@ -2267,8 +2264,6 @@ spa_vdev_remove_top_check(vdev_t *vd)
if (cvd->vdev_ashift != 0 &&
cvd->vdev_alloc_bias == VDEV_BIAS_NONE)
ASSERT3U(cvd->vdev_ashift, ==, spa->spa_max_ashift);
- if (cvd->vdev_ops == &vdev_indirect_ops)
- num_indirect++;
if (!vdev_is_concrete(cvd))
continue;
if (vdev_get_nparity(cvd) != 0)
diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c
index 9d16fff81d0a..fc9167aa6611 100644
--- a/sys/contrib/openzfs/module/zfs/zfeature.c
+++ b/sys/contrib/openzfs/module/zfs/zfeature.c
@@ -389,6 +389,13 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
!spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION) &&
feature->fi_feature == SPA_FEATURE_BOOKMARK_V2)
spa->spa_errata = 0;
+
+ /*
+ * Convert the old on-disk error log to the new format when activating
+ * the head_errlog feature.
+ */
+ if (feature->fi_feature == SPA_FEATURE_HEAD_ERRLOG)
+ spa_upgrade_errlog(spa, tx);
}
static void
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index a2824c5cc804..b3f32d64f3ef 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -5670,7 +5670,7 @@ zfs_ioc_error_log(zfs_cmd_t *zc)
{
spa_t *spa;
int error;
- size_t count = (size_t)zc->zc_nvlist_dst_size;
+ uint64_t count = zc->zc_nvlist_dst_size;
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 62806e9fe8b1..a039b4da2833 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -68,7 +68,9 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
+ atomic_inc_32(&zp->z_sync_writes_cnt);
zil_commit(zfsvfs->z_log, zp->z_id);
+ atomic_dec_32(&zp->z_sync_writes_cnt);
ZFS_EXIT(zfsvfs);
}
tsd_set(zfs_fsyncer_key, NULL);
@@ -357,11 +359,11 @@ zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr,
if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) {
vattr_t va = {0};
- va.va_mask = AT_MODE;
+ va.va_mask = ATTR_MODE;
va.va_nodeid = zp->z_id;
va.va_mode = newmode;
- zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va, AT_MODE,
- NULL);
+ zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va,
+ ATTR_MODE, NULL);
*clear_setid_bits_txgp = dmu_tx_get_txg(tx);
}
} else {
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index f6adea572418..2a16d5cef2e2 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -166,15 +166,6 @@ zio_init(void)
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
-#if defined(_ILP32) && defined(_KERNEL)
- /*
- * Cache size limited to 1M on 32-bit platforms until ARC
- * buffers no longer require virtual address space.
- */
- if (size > zfs_max_recordsize)
- break;
-#endif
-
while (!ISP2(p2))
p2 &= p2 - 1;
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index eb68b05c567b..ac7c3a0c3232 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -513,6 +513,7 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_MKDIR_ATTR */
zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
zvol_replay_err, /* TX_WRITE2 */
+ zvol_replay_err, /* TX_SETSAXATTR */
};
/*
diff --git a/sys/contrib/openzfs/module/zstd/Makefile.in b/sys/contrib/openzfs/module/zstd/Makefile.in
deleted file mode 100644
index 80096c3e379d..000000000000
--- a/sys/contrib/openzfs/module/zstd/Makefile.in
+++ /dev/null
@@ -1,69 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-zstd_include = $(src)/include
-else
-zstd_include = $(srctree)/$(src)/include
-endif
-
-MODULE := zzstd
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-asflags-y := -I$(zstd_include)
-ccflags-y := -I$(zstd_include)
-
-# Zstd uses -O3 by default, so we should follow
-ccflags-y += -O3
-
-# -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h
-# Set it for other compilers, too.
-common_flags := -fno-tree-vectorize
-
-# SSE register return with SSE disabled if -march=znverX is passed
-common_flags += -U__BMI__
-
-# Quiet warnings about frame size due to unused code in unmodified zstd lib
-common_flags += -Wframe-larger-than=20480
-
-ccflags-y += $(common_flags)
-
-vanilla-objs := lib/common/entropy_common.o \
- lib/common/error_private.o \
- lib/common/fse_decompress.o \
- lib/common/pool.o \
- lib/common/zstd_common.o \
- lib/compress/fse_compress.o \
- lib/compress/hist.o \
- lib/compress/huf_compress.o \
- lib/compress/zstd_compress_literals.o \
- lib/compress/zstd_compress_sequences.o \
- lib/compress/zstd_compress_superblock.o \
- lib/compress/zstd_compress.o \
- lib/compress/zstd_double_fast.o \
- lib/compress/zstd_fast.o \
- lib/compress/zstd_lazy.o \
- lib/compress/zstd_ldm.o \
- lib/compress/zstd_opt.o \
- lib/decompress/huf_decompress.o \
- lib/decompress/zstd_ddict.o \
- lib/decompress/zstd_decompress.o \
- lib/decompress/zstd_decompress_block.o
-
-# Disable aarch64 neon SIMD instructions for kernel mode
-$(addprefix $(obj)/,$(vanilla-objs)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w $(common_flags)
-
-$(obj)/zfs_zstd.o: ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h $(common_flags)
-
-$(MODULE)-objs += zfs_zstd.o
-$(MODULE)-objs += zstd_sparc.o
-$(MODULE)-objs += $(vanilla-objs)
-
-all:
- mkdir -p lib/common lib/compress lib/decompress
-
-gensymbols:
- for obj in $(vanilla-objs); do echo; echo "/* $$obj: */"; @OBJDUMP@ -t $$obj | awk '$$2 == "g" && !/ zfs_/ {print "#define\t" $$6 " zfs_" $$6}' | sort; done >> include/zstd_compat_wrapper.h
-
-checksymbols:
- @OBJDUMP@ -t $(vanilla-objs) | awk '/file format/ {print} $$2 == "g" && !/ zfs_/ {++ret; print} END {exit ret}'
diff --git a/sys/contrib/openzfs/module/zstd/README.md b/sys/contrib/openzfs/module/zstd/README.md
index 26d618b61b6e..7ad00e0bd804 100644
--- a/sys/contrib/openzfs/module/zstd/README.md
+++ b/sys/contrib/openzfs/module/zstd/README.md
@@ -9,7 +9,7 @@ library, besides upgrading to a newer ZSTD release.
Tree structure:
-* `zfs_zstd.c` is the actual `zzstd` kernel module.
+* `zfs_zstd.c` are the actual `zfs` kernel module hooks.
* `lib/` contains the unmodified version of the `Zstandard` library
* `zstd-in.c` is our template file for generating the single-file library
* `include/`: This directory contains supplemental includes for platform
@@ -25,16 +25,7 @@ To update ZSTD the following steps need to be taken:
`grep include [path to zstd]/contrib/single_file_libs/zstd-in.c | awk '{ print $2 }'`
3. Remove debug.c, threading.c, and zstdmt_compress.c.
4. Update Makefiles with resulting file lists.
-
-~~~
-
-Note: if the zstd library for zfs is updated to a newer version,
-the macro list in include/zstd_compat_wrapper.h usually needs to be updated.
-this can be done with some hand crafting of the output of the following
-script (on the object file generated from the "single-file library" script in zstd's
-contrib/single_file_libs):
-`nm zstd.o | awk '{print "#define "$3 " zfs_" $3}' > macrotable`
-
+5. Follow symbol renaming notes in `include/zstd_compat_wrapper.h`
## Altering ZSTD and breaking changes
diff --git a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h
index de428175c7df..2c4baad27d4e 100644
--- a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h
+++ b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h
@@ -38,7 +38,7 @@
* This will cause a symbol collision with the older in-kernel zstd library.
*
* On update, truncate this file at the scissor line, rebuild the module,
- * and make gensymbols.
+ * and make gen-zstd-symbols.
*/
#define MEM_MODULE
diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c b/sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c
index a42759814fdd..e27414ccbbcd 100644
--- a/sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c
+++ b/sys/contrib/openzfs/module/zstd/lib/compress/fse_compress.c
@@ -304,7 +304,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
- size_t size;
+ size_t size __attribute__ ((unused));
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)malloc(size);
diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_superblock.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_superblock.c
index b693866c0ac1..ffa4bb67597f 100644
--- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_superblock.c
+++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_superblock.c
@@ -409,7 +409,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart;
size_t matchLengthSum = 0;
- size_t litLengthSum = 0;
+ size_t litLengthSum __attribute__ ((unused)) = 0;
while (send-sp > 0) {
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
litLengthSum += seqLen.litLength;
diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c
index 7f042b5bcd6f..2ccc6818754e 100644
--- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c
+++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c
@@ -50,7 +50,7 @@
#include "lib/zstd.h"
#include "lib/common/zstd_errors.h"
-kstat_t *zstd_ksp = NULL;
+static kstat_t *zstd_ksp = NULL;
typedef struct zstd_stats {
kstat_named_t zstd_stat_alloc_fail;
@@ -702,7 +702,7 @@ zstd_meminit(void)
}
/* Release object from pool and free memory */
-static void __exit
+static void
release_pool(struct zstd_pool *pool)
{
mutex_destroy(&pool->barrier);
@@ -712,7 +712,7 @@ release_pool(struct zstd_pool *pool)
}
/* Release memory pool objects */
-static void __exit
+static void
zstd_mempool_deinit(void)
{
for (int i = 0; i < ZSTD_POOL_MAX; i++) {
@@ -765,7 +765,7 @@ zstd_init(void)
return (0);
}
-extern void __exit
+extern void
zstd_fini(void)
{
/* Deinitialize kstat */
@@ -783,12 +783,10 @@ zstd_fini(void)
}
#if defined(_KERNEL)
+#ifdef __FreeBSD__
module_init(zstd_init);
module_exit(zstd_fini);
-
-ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
-ZFS_MODULE_LICENSE("Dual BSD/GPL");
-ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");
+#endif
EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);