aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in6
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3.c4
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ccm.c1
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ctr.c1
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm.c1
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/modes.c4
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S2
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S12
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S16
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S16
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S16
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S20
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S16
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S1
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h27
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha2_mod.c13
-rw-r--r--sys/contrib/openzfs/module/lua/lapi.c4
-rw-r--r--sys/contrib/openzfs/module/lua/ldo.c2
-rw-r--r--sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S15
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c30
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c12
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c16
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c30
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c37
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_compat.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c278
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c68
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c5
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c1
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-err.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-generic.c112
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c1
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-zone.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c10
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/arc_os.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/policy.c16
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c35
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c93
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c1
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c104
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c5
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c354
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c9
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c159
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c34
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c10
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_prop.c31
-rw-r--r--sys/contrib/openzfs/module/zcommon/zpool_prop.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c88
-rw-r--r--sys/contrib/openzfs/module/zfs/btree.c50
-rw-r--r--sys/contrib/openzfs/module/zfs/dataset_kstats.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c11
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c27
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_recv.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_send.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_traverse.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_crypt.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deadlist.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dir.c59
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_prop.c93
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/fm.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/mmp.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/range_tree.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c74
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_checkpoint.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_log_spacemap.c40
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c57
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_initialize.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz_math.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_rebuild.c22
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_trim.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_leaf.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_micro.c247
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp_get.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_chksum.c38
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fm.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_log.c88
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_onexit.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_replay.c153
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c46
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c43
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_compress.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c5
115 files changed, 2185 insertions, 962 deletions
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 7a20e6ee4615..581d50e64b42 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -150,12 +150,8 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)
-# Suppress objtool "can't find jump dest instruction at" warnings. They
-# are caused by the constants which are defined in the text section of the
-# assembly file using .byte instructions (e.g. bswap_mask). The objtool
-# utility tries to interpret them as opcodes and obviously fails doing so.
+# Suppress objtool "return with modified stack frame" warnings.
OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
-OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
# Suppress objtool "unsupported stack pointer realignment" warnings. We are
# not using a DRAP register while aligning the stack to a 64 byte boundary.
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
index 5f7018598820..604e05847ee6 100644
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
@@ -189,9 +189,7 @@ static void chunk_state_update(const blake3_ops_t *ops,
input_len -= BLAKE3_BLOCK_LEN;
}
- size_t take = chunk_state_fill_buf(ctx, input, input_len);
- input += take;
- input_len -= take;
+ chunk_state_fill_buf(ctx, input, input_len);
}
static output_t chunk_state_output(const blake3_chunk_state_t *ctx)
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
index ed5498dafaa1..4a8bb9bbc2c8 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
@@ -67,7 +67,6 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->ccm_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
mac_buf = (uint8_t *)ctx->ccm_mac_buf;
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
index c116ba3662ba..db6b1c71d5cd 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
@@ -60,7 +60,6 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->ctr_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
do {
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
index ca328d54a7e6..16ef14b8ccaf 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
@@ -118,7 +118,6 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->gcm_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
gops = gcm_impl_get_ops();
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
index b98db0ac14ec..2d1b5ff1a919 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
@@ -106,8 +106,10 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
} else {
/* one block spans two iovecs */
*out_data_1_len = iov_len - offset;
- if (vec_idx == zfs_uio_iovcnt(uio))
+ if (vec_idx == zfs_uio_iovcnt(uio)) {
+ *out_data_2 = NULL;
return;
+ }
vec_idx++;
zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
*out_data_2 = (uint8_t *)iov_base;
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
index f546e8933be1..a0525dd464f5 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -704,6 +704,7 @@ enc_tab:
ENTRY_NP(aes_encrypt_amd64)
+ ENDBR
#ifdef GLADMAN_INTERFACE
// Original interface
sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
@@ -809,6 +810,7 @@ dec_tab:
ENTRY_NP(aes_decrypt_amd64)
+ ENDBR
#ifdef GLADMAN_INTERFACE
// Original interface
sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
index f4d9cb766d46..cb08430b81ed 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
@@ -30,16 +30,6 @@
#define _ASM
#include <sys/asm_linkage.h>
-#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
-#if __has_include(<cet.h>)
-#include <cet.h>
-#endif
-#endif
-
-#if !defined(_CET_ENDBR)
-#define _CET_ENDBR
-#endif
-
.intel_syntax noprefix
.global zfs_blake3_hash_many_avx2
.text
@@ -47,7 +37,7 @@
.type zfs_blake3_hash_many_avx2,@function
.p2align 6
zfs_blake3_hash_many_avx2:
- _CET_ENDBR
+ ENDBR
push r15
push r14
push r13
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
index 71b5715c88c1..960406ea2c01 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
@@ -30,16 +30,6 @@
#define _ASM
#include <sys/asm_linkage.h>
-#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
-#if __has_include(<cet.h>)
-#include <cet.h>
-#endif
-#endif
-
-#if !defined(_CET_ENDBR)
-#define _CET_ENDBR
-#endif
-
.intel_syntax noprefix
.global zfs_blake3_hash_many_avx512
.global zfs_blake3_compress_in_place_avx512
@@ -52,7 +42,7 @@
.p2align 6
zfs_blake3_hash_many_avx512:
- _CET_ENDBR
+ ENDBR
push r15
push r14
push r13
@@ -2409,7 +2399,7 @@ zfs_blake3_hash_many_avx512:
jmp 4b
.p2align 6
zfs_blake3_compress_in_place_avx512:
- _CET_ENDBR
+ ENDBR
vmovdqu xmm0, xmmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi+0x10]
movzx eax, r8b
@@ -2491,7 +2481,7 @@ zfs_blake3_compress_in_place_avx512:
.p2align 6
zfs_blake3_compress_xof_avx512:
- _CET_ENDBR
+ ENDBR
vmovdqu xmm0, xmmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi+0x10]
movzx eax, r8b
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
index 20689a7dcef5..c4290aaa8faf 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
@@ -30,16 +30,6 @@
#define _ASM
#include <sys/asm_linkage.h>
-#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
-#if __has_include(<cet.h>)
-#include <cet.h>
-#endif
-#endif
-
-#if !defined(_CET_ENDBR)
-#define _CET_ENDBR
-#endif
-
.intel_syntax noprefix
.global zfs_blake3_hash_many_sse2
.global zfs_blake3_compress_in_place_sse2
@@ -52,7 +42,7 @@
.p2align 6
zfs_blake3_hash_many_sse2:
- _CET_ENDBR
+ ENDBR
push r15
push r14
push r13
@@ -2050,7 +2040,7 @@ zfs_blake3_hash_many_sse2:
.p2align 6
zfs_blake3_compress_in_place_sse2:
- _CET_ENDBR
+ ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
@@ -2161,7 +2151,7 @@ zfs_blake3_compress_in_place_sse2:
.p2align 6
zfs_blake3_compress_xof_sse2:
- _CET_ENDBR
+ ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
index c5975a4f0877..45b90cc9ed89 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
@@ -30,16 +30,6 @@
#define _ASM
#include <sys/asm_linkage.h>
-#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
-#if __has_include(<cet.h>)
-#include <cet.h>
-#endif
-#endif
-
-#if !defined(_CET_ENDBR)
-#define _CET_ENDBR
-#endif
-
.intel_syntax noprefix
.global zfs_blake3_compress_in_place_sse41
.global zfs_blake3_compress_xof_sse41
@@ -52,7 +42,7 @@
.p2align 6
zfs_blake3_hash_many_sse41:
- _CET_ENDBR
+ ENDBR
push r15
push r14
push r13
@@ -1812,7 +1802,7 @@ zfs_blake3_hash_many_sse41:
jmp 4b
.p2align 6
zfs_blake3_compress_in_place_sse41:
- _CET_ENDBR
+ ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
@@ -1911,7 +1901,7 @@ zfs_blake3_compress_in_place_sse41:
RET
.p2align 6
zfs_blake3_compress_xof_sse41:
- _CET_ENDBR
+ ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
index 70e419c2e4ab..cf17b3768712 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
@@ -47,6 +47,9 @@
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+#define _ASM
+#include <sys/asm_linkage.h>
+
.extern gcm_avx_can_use_movbe
.text
@@ -56,6 +59,7 @@
.align 32
_aesni_ctr32_ghash_6x:
.cfi_startproc
+ ENDBR
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
@@ -363,7 +367,7 @@ _aesni_ctr32_ghash_6x:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
#endif /* ifdef HAVE_MOVBE */
@@ -372,6 +376,7 @@ _aesni_ctr32_ghash_6x:
.align 32
_aesni_ctr32_ghash_no_movbe_6x:
.cfi_startproc
+ ENDBR
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
@@ -691,7 +696,7 @@ _aesni_ctr32_ghash_no_movbe_6x:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x
@@ -700,6 +705,7 @@ _aesni_ctr32_ghash_no_movbe_6x:
.align 32
aesni_gcm_decrypt:
.cfi_startproc
+ ENDBR
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
@@ -810,13 +816,14 @@ aesni_gcm_decrypt:
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
.cfi_startproc
+ ENDBR
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds.
@@ -880,7 +887,7 @@ _aesni_ctr32_6x:
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
- .byte 0xf3,0xc3
+ RET
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
@@ -911,6 +918,7 @@ _aesni_ctr32_6x:
.align 32
aesni_gcm_encrypt:
.cfi_startproc
+ ENDBR
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
@@ -1186,7 +1194,7 @@ aesni_gcm_encrypt:
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
@@ -1239,6 +1247,7 @@ atomic_toggle_boolean_nv:
RET
.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
+.pushsection .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -1252,6 +1261,7 @@ atomic_toggle_boolean_nv:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
+.popsection
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
index 90cc36b43a78..bf3724a23eae 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
@@ -97,6 +97,9 @@
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+#define _ASM
+#include <sys/asm_linkage.h>
+
.text
.globl gcm_gmult_clmul
@@ -104,6 +107,7 @@
.align 16
gcm_gmult_clmul:
.cfi_startproc
+ ENDBR
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@@ -149,7 +153,7 @@ gcm_gmult_clmul:
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size gcm_gmult_clmul,.-gcm_gmult_clmul
@@ -158,6 +162,7 @@ gcm_gmult_clmul:
.align 32
gcm_init_htab_avx:
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rsi),%xmm2
@@ -262,7 +267,7 @@ gcm_init_htab_avx:
vmovdqu %xmm5,-16(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size gcm_init_htab_avx,.-gcm_init_htab_avx
@@ -271,6 +276,7 @@ gcm_init_htab_avx:
.align 32
gcm_gmult_avx:
.cfi_startproc
+ ENDBR
jmp .L_gmult_clmul
.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
@@ -279,6 +285,7 @@ gcm_gmult_avx:
.align 32
gcm_ghash_avx:
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rdi),%xmm10
@@ -649,9 +656,11 @@ gcm_ghash_avx:
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
+
+.pushsection .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -705,6 +714,7 @@ gcm_ghash_avx:
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
+.popsection
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
index 1391bd59a017..60d34b4a3be0 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
@@ -84,6 +84,7 @@ SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
ENTRY_NP(SHA256TransformBlocks)
.cfi_startproc
+ ENDBR
movq %rsp, %rax
.cfi_def_cfa_register %rax
push %rbx
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
index e61e96957bc6..ed7fb362a1ac 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
@@ -85,6 +85,7 @@ SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
ENTRY_NP(SHA512TransformBlocks)
.cfi_startproc
+ ENDBR
movq %rsp, %rax
.cfi_def_cfa_register %rax
push %rbx
diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
index 58964c5d4497..e3e769ffd858 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
@@ -30,9 +30,29 @@
#include <sys/stack.h>
#include <sys/trap.h>
-#if defined(__linux__) && defined(CONFIG_SLS)
-#define RET ret; int3
-#else
+#if defined(_KERNEL) && defined(__linux__)
+#include <linux/linkage.h>
+#endif
+
+#ifndef ENDBR
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+/* CSTYLED */
+#if __has_include(<cet.h>)
+
+#include <cet.h>
+
+#ifdef _CET_ENDBR
+#define ENDBR _CET_ENDBR
+#endif /* _CET_ENDBR */
+
+#endif /* <cet.h> */
+#endif /* __ELF__ && __CET__ && __has_include */
+#endif /* !ENDBR */
+
+#ifndef ENDBR
+#define ENDBR
+#endif
+#ifndef RET
#define RET ret
#endif
@@ -122,6 +142,7 @@ extern "C" {
* insert the calls to mcount for profiling. ENTRY_NP is identical, but
* never calls mcount.
*/
+#undef ENTRY
#define ENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
diff --git a/sys/contrib/openzfs/module/icp/io/sha2_mod.c b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
index fadb58b81881..a58f0982c8c0 100644
--- a/sys/contrib/openzfs/module/icp/io/sha2_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
@@ -737,12 +737,15 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
*/
if (mechanism->cm_type % 3 == 2) {
if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t))
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- PROV_SHA2_GET_DIGEST_LEN(mechanism,
- PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
- if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
+ mechanism->cm_param_len != sizeof (ulong_t)) {
ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ } else {
+ PROV_SHA2_GET_DIGEST_LEN(mechanism,
+ PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
+ if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len >
+ sha_digest_len)
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
}
if (ret != CRYPTO_SUCCESS) {
diff --git a/sys/contrib/openzfs/module/lua/lapi.c b/sys/contrib/openzfs/module/lua/lapi.c
index 726e5c2ad4bb..703cf4cc2a36 100644
--- a/sys/contrib/openzfs/module/lua/lapi.c
+++ b/sys/contrib/openzfs/module/lua/lapi.c
@@ -250,6 +250,8 @@ LUA_API int lua_type (lua_State *L, int idx) {
LUA_API const char *lua_typename (lua_State *L, int t) {
UNUSED(L);
+ if (t > 8 || t < 0)
+ return "internal_type_error";
return ttypename(t);
}
@@ -442,7 +444,7 @@ LUA_API const void *lua_topointer (lua_State *L, int idx) {
case LUA_TTABLE: return hvalue(o);
case LUA_TLCL: return clLvalue(o);
case LUA_TCCL: return clCvalue(o);
- case LUA_TLCF: return cast(void *, cast(size_t, fvalue(o)));
+ case LUA_TLCF: return cast(void *, cast(uintptr_t, fvalue(o)));
case LUA_TTHREAD: return thvalue(o);
case LUA_TUSERDATA:
case LUA_TLIGHTUSERDATA:
diff --git a/sys/contrib/openzfs/module/lua/ldo.c b/sys/contrib/openzfs/module/lua/ldo.c
index 24677596de12..6bef80514ce2 100644
--- a/sys/contrib/openzfs/module/lua/ldo.c
+++ b/sys/contrib/openzfs/module/lua/ldo.c
@@ -452,7 +452,7 @@ int luaD_poscall (lua_State *L, StkId firstResult) {
}
res = ci->func; /* res == final position of 1st result */
wanted = ci->nresults;
- L->ci = ci = ci->previous; /* back to caller */
+ L->ci = ci->previous; /* back to caller */
/* move results to correct place */
for (i = wanted; i != 0 && firstResult < L->top; i--)
setobjs2s(L, res++, firstResult++);
diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
index fd661d72eedf..7e13fea05dda 100644
--- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
+++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
@@ -23,7 +23,15 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+#if defined(_KERNEL) && defined(__linux__)
+#include <linux/linkage.h>
+#endif
+
+#ifndef RET
+#define RET ret
+#endif
+#undef ENTRY
#define ENTRY(x) \
.text; \
.align 8; \
@@ -34,13 +42,6 @@ x:
#define SET_SIZE(x) \
.size x, [.-x]
-
-#if defined(__linux__) && defined(CONFIG_SLS)
-#define RET ret; int3
-#else
-#define RET ret
-#endif
-
/*
* Setjmp and longjmp implement non-local gotos using state vectors
* type label_t.
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c b/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c
index b692ccdf232a..04a5d2869d1b 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c
@@ -1654,13 +1654,13 @@ acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count)
*/
int
ace_trivial_common(void *acep, int aclcnt,
- uint64_t (*walk)(void *, uint64_t, int aclcnt,
+ uintptr_t (*walk)(void *, uintptr_t, int aclcnt,
uint16_t *, uint16_t *, uint32_t *))
{
uint16_t flags;
uint32_t mask;
uint16_t type;
- uint64_t cookie = 0;
+ uintptr_t cookie = 0;
while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
switch (flags & ACE_TYPE_FLAGS) {
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c
index 523e10ff6936..eb74720c984a 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c
@@ -105,3 +105,33 @@ kmem_strfree(char *str)
ASSERT3P(str, !=, NULL);
kmem_free(str, strlen(str) + 1);
}
+
+/*
+ * kmem_scnprintf() will return the number of characters that it would have
+ * printed whenever it is limited by value of the size variable, rather than
+ * the number of characters that it did print. This can cause misbehavior on
+ * subsequent uses of the return value, so we define a safe version that will
+ * return the number of characters actually printed, minus the NULL format
+ * character. Subsequent use of this by the safe string functions is safe
+ * whether it is snprintf(), strlcat() or strlcpy().
+ */
+
+int
+kmem_scnprintf(char *restrict str, size_t size, const char *restrict fmt, ...)
+{
+ int n;
+ va_list ap;
+
+ /* Make the 0 case a no-op so that we do not return -1 */
+ if (size == 0)
+ return (0);
+
+ va_start(ap, fmt);
+ n = vsnprintf(str, size, fmt, ap);
+ va_end(ap);
+
+ if (n >= size)
+ n = size - 1;
+
+ return (n);
+}
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
index ff11f5d7acb8..a07098afc5b4 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
@@ -125,7 +125,6 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
struct vfsconf *vfsp;
struct mount *mp;
vnode_t *vp, *mvp;
- struct ucred *pcr, *tcr;
int error;
ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
@@ -195,18 +194,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
*/
mp->mnt_flag |= MNT_IGNORE;
- /*
- * XXX: This is evil, but we can't mount a snapshot as a regular user.
- * XXX: Is is safe when snapshot is mounted from within a jail?
- */
- tcr = td->td_ucred;
- pcr = td->td_proc->p_ucred;
- td->td_ucred = kcred;
- td->td_proc->p_ucred = kcred;
error = VFS_MOUNT(mp);
- td->td_ucred = tcr;
- td->td_proc->p_ucred = pcr;
-
if (error != 0) {
/*
* Clear VI_MOUNT and decrement the use count "atomically",
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
index 0a19fbba717d..bb3cbc39ec75 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
@@ -124,7 +124,9 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
int vecnum;
zfs_iocparm_t *zp;
zfs_cmd_t *zc;
+#ifdef ZFS_LEGACY_SUPPORT
zfs_cmd_legacy_t *zcl;
+#endif
int rc, error;
void *uaddr;
@@ -133,7 +135,9 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
zp = (void *)arg;
uaddr = (void *)(uintptr_t)zp->zfs_cmd;
error = 0;
+#ifdef ZFS_LEGACY_SUPPORT
zcl = NULL;
+#endif
if (len != sizeof (zfs_iocparm_t)) {
printf("len %d vecnum: %d sizeof (zfs_cmd_t) %ju\n",
@@ -142,6 +146,7 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
}
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+#ifdef ZFS_LEGACY_SUPPORT
/*
* Remap ioctl code for legacy user binaries
*/
@@ -157,22 +162,29 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
goto out;
}
zfs_cmd_legacy_to_ozfs(zcl, zc);
- } else if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
+ } else
+#endif
+ if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
error = SET_ERROR(EFAULT);
goto out;
}
error = zfsdev_ioctl_common(vecnum, zc, 0);
+#ifdef ZFS_LEGACY_SUPPORT
if (zcl) {
zfs_cmd_ozfs_to_legacy(zc, zcl);
rc = copyout(zcl, uaddr, sizeof (*zcl));
- } else {
+ } else
+#endif
+ {
rc = copyout(zc, uaddr, sizeof (*zc));
}
if (error == 0 && rc != 0)
error = SET_ERROR(EFAULT);
out:
+#ifdef ZFS_LEGACY_SUPPORT
if (zcl)
kmem_free(zcl, sizeof (zfs_cmd_legacy_t));
+#endif
kmem_free(zc, sizeof (zfs_cmd_t));
MPASS(tsd_get(rrw_tsd_key) == NULL);
return (error);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 980bb1c0f941..48af1eaf8ea7 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -137,11 +137,11 @@ SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
/* arc.c */
int
-param_set_arc_long(SYSCTL_HANDLER_ARGS)
+param_set_arc_u64(SYSCTL_HANDLER_ARGS)
{
int err;
- err = sysctl_handle_long(oidp, arg1, 0, req);
+ err = sysctl_handle_64(oidp, arg1, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
@@ -171,7 +171,7 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
int err;
val = zfs_arc_max;
- err = sysctl_handle_long(oidp, &val, 0, req);
+ err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (SET_ERROR(err));
@@ -203,7 +203,7 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
int err;
val = zfs_arc_min;
- err = sysctl_handle_long(oidp, &val, 0, req);
+ err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (SET_ERROR(err));
@@ -599,7 +599,7 @@ param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
{
int err;
- err = sysctl_handle_long(oidp, &zfs_multihost_interval, 0, req);
+ err = sysctl_handle_64(oidp, &zfs_multihost_interval, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
@@ -676,7 +676,7 @@ param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
int err;
val = zfs_deadman_synctime_ms;
- err = sysctl_handle_long(oidp, &val, 0, req);
+ err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
zfs_deadman_synctime_ms = val;
@@ -693,7 +693,7 @@ param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
int err;
val = zfs_deadman_ziotime_ms;
- err = sysctl_handle_long(oidp, &val, 0, req);
+ err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
zfs_deadman_ziotime_ms = val;
@@ -761,11 +761,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
int
param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
{
- uint64_t val;
+ int val;
int err;
val = zfs_vdev_min_auto_ashift;
- err = sysctl_handle_64(oidp, &val, 0, req);
+ err = sysctl_handle_int(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (SET_ERROR(err));
@@ -779,20 +779,20 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
- CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
&zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
- param_set_min_auto_ashift, "QU",
+ param_set_min_auto_ashift, "IU",
"Min ashift used when creating new top-level vdev. (LEGACY)");
/* END CSTYLED */
int
param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
{
- uint64_t val;
+ int val;
int err;
val = zfs_vdev_max_auto_ashift;
- err = sysctl_handle_64(oidp, &val, 0, req);
+ err = sysctl_handle_int(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (SET_ERROR(err));
@@ -806,9 +806,9 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
- CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
&zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
- param_set_max_auto_ashift, "QU",
+ param_set_max_auto_ashift, "IU",
"Max ashift used when optimizing for logical -> physical sector size on"
" new top-level vdevs. (LEGACY)");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c
index 73cc6aa48c0b..a65dfec86caf 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c
@@ -40,8 +40,8 @@
static taskq_t *vdev_file_taskq;
-static unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
-static unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
void
vdev_file_init(void)
@@ -350,7 +350,7 @@ vdev_ops_t vdev_disk_ops = {
#endif
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
"Logical ashift for file-based devices");
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
"Physical ashift for file-based devices");
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
index 963102f3b62a..16bcd338de21 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
@@ -631,8 +631,8 @@ zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
return (NULL);
}
-static uint64_t
-zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+static uintptr_t
+zfs_ace_walk(void *datap, uintptr_t cookie, int aclcnt,
uint16_t *flags, uint16_t *type, uint32_t *mask)
{
(void) aclcnt;
@@ -642,7 +642,7 @@ zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
acep = zfs_acl_next_ace(aclp, acep, &who, mask,
flags, type);
- return ((uint64_t)(uintptr_t)acep);
+ return ((uintptr_t)acep);
}
/*
@@ -1133,6 +1133,7 @@ zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
cb->cb_acl_node);
}
+ ASSERT3P(cb->cb_acl_node, !=, NULL);
*dataptr = cb->cb_acl_node->z_acldata;
*length = cb->cb_acl_node->z_size;
}
@@ -1618,7 +1619,7 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
*/
int
zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
- vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+ vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids, zuserns_t *mnt_ns)
{
int error;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
@@ -1788,7 +1789,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
if (mask == 0)
return (SET_ERROR(ENOSYS));
- if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
+ if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr, NULL)))
return (error);
mutex_enter(&zp->z_acl_lock);
@@ -1951,7 +1952,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
if (zp->z_pflags & ZFS_IMMUTABLE)
return (SET_ERROR(EPERM));
- if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+ if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr, NULL)))
return (error);
error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
@@ -2340,7 +2341,8 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
* can define any form of access.
*/
int
-zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr,
+ zuserns_t *mnt_ns)
{
uint32_t working_mode;
int error;
@@ -2470,9 +2472,11 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
* NFSv4-style ZFS ACL format and call zfs_zaccess()
*/
int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr,
+ zuserns_t *mnt_ns)
{
- return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+ return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr,
+ mnt_ns));
}
/*
@@ -2483,7 +2487,7 @@ zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
{
int v4_mode = zfs_unix_to_v4(mode >> 6);
- return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+ return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr, NULL));
}
static int
@@ -2539,7 +2543,7 @@ zfs_delete_final_check(znode_t *zp, znode_t *dzp,
*
*/
int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr, zuserns_t *mnt_ns)
{
uint32_t dzp_working_mode = 0;
uint32_t zp_working_mode = 0;
@@ -2626,7 +2630,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
int
zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
- znode_t *tzp, cred_t *cr)
+ znode_t *tzp, cred_t *cr, zuserns_t *mnt_ns)
{
int add_perm;
int error;
@@ -2646,7 +2650,8 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
* to another.
*/
if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) {
- if ((error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr)))
+ if ((error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+ mnt_ns)))
return (error);
}
@@ -2656,19 +2661,19 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
* If that succeeds then check for add_file/add_subdir permissions
*/
- if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+ if ((error = zfs_zaccess_delete(sdzp, szp, cr, mnt_ns)))
return (error);
/*
* If we have a tzp, see if we can delete it?
*/
- if (tzp && (error = zfs_zaccess_delete(tdzp, tzp, cr)))
+ if (tzp && (error = zfs_zaccess_delete(tdzp, tzp, cr, mnt_ns)))
return (error);
/*
* Now check for add permissions
*/
- error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+ error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr, mnt_ns);
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c
index 778e4151656d..07232086d52b 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c
@@ -809,7 +809,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr)
*xvpp = NULL;
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
- &acl_ids)) != 0)
+ &acl_ids, NULL)) != 0)
return (error);
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0)) {
zfs_acl_ids_free(&acl_ids);
@@ -955,7 +955,7 @@ zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
(ZTOV(zp)->v_type == VREG &&
- zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
+ zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL) == 0))
return (0);
else
return (secpolicy_vnode_remove(ZTOV(zp), cr));
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_compat.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_compat.c
index d495cc0dc3f1..3ddffec91e83 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_compat.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_compat.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/cmn_err.h>
#include <sys/zfs_ioctl_compat.h>
+#ifdef ZFS_LEGACY_SUPPORT
enum zfs_ioc_legacy {
ZFS_IOC_LEGACY_NONE = -1,
ZFS_IOC_LEGACY_FIRST = 0,
@@ -319,7 +320,7 @@ zfs_ioctl_legacy_to_ozfs(int request)
int
zfs_ioctl_ozfs_to_legacy(int request)
{
- if (request > ZFS_IOC_LAST)
+ if (request >= ZFS_IOC_LAST)
return (-1);
if (request > ZFS_IOC_PLATFORM) {
@@ -361,3 +362,4 @@ zfs_cmd_ozfs_to_legacy(zfs_cmd_t *src, zfs_cmd_legacy_t *dst)
sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
dst->zc_jailid = src->zc_zoneid;
}
+#endif /* ZFS_LEGACY_SUPPORT */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index 4cb7f63b5230..b4c122bdf4c8 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -63,6 +63,7 @@
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
#include <sys/jail.h>
+#include <sys/osd.h>
#include <ufs/ufs/quota.h>
#include <sys/zfs_quota.h>
@@ -88,6 +89,20 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
+struct zfs_jailparam {
+ int mount_snapshot;
+};
+
+static struct zfs_jailparam zfs_jailparam0 = {
+ .mount_snapshot = 0,
+};
+
+static int zfs_jailparam_slot;
+
+SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters");
+SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I",
+ "Allow mounting snapshots in the .zfs directory for unjailed datasets");
+
SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
static int zfs_version_acl = ZFS_ACL_VERSION;
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
@@ -1298,7 +1313,7 @@ zfs_mount(vfs_t *vfsp)
char *osname;
int error = 0;
int canwrite;
- bool checkpointrewind;
+ bool checkpointrewind, isctlsnap = false;
if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
return (SET_ERROR(EINVAL));
@@ -1313,6 +1328,7 @@ zfs_mount(vfs_t *vfsp)
}
fetch_osname_options(osname, &checkpointrewind);
+ isctlsnap = (zfsctl_is_node(mvp) && strchr(osname, '@') != NULL);
/*
* Check for mount privilege?
@@ -1321,7 +1337,9 @@ zfs_mount(vfs_t *vfsp)
* we have local permission to allow it
*/
error = secpolicy_fs_mount(cr, mvp, vfsp);
- if (error) {
+ if (error && isctlsnap) {
+ secpolicy_fs_mount_clearopts(cr, vfsp);
+ } else if (error) {
if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
goto out;
@@ -1358,8 +1376,27 @@ zfs_mount(vfs_t *vfsp)
*/
if (!INGLOBALZONE(curproc) &&
(!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
- error = SET_ERROR(EPERM);
- goto out;
+ boolean_t mount_snapshot = B_FALSE;
+
+ /*
+ * Snapshots may be mounted in .zfs for unjailed datasets
+ * if allowed by the jail param zfs.mount_snapshot.
+ */
+ if (isctlsnap) {
+ struct prison *pr;
+ struct zfs_jailparam *zjp;
+
+ pr = curthread->td_ucred->cr_prison;
+ mtx_lock(&pr->pr_mtx);
+ zjp = osd_jail_get(pr, zfs_jailparam_slot);
+ mtx_unlock(&pr->pr_mtx);
+ if (zjp && zjp->mount_snapshot)
+ mount_snapshot = B_TRUE;
+ }
+ if (!mount_snapshot) {
+ error = SET_ERROR(EPERM);
+ goto out;
+ }
}
vfsp->vfs_flag |= MNT_NFS4ACLS;
@@ -2316,3 +2353,236 @@ zfsvfs_update_fromname(const char *oldname, const char *newname)
mtx_unlock(&mountlist_mtx);
}
#endif
+
+/*
+ * Find a prison with ZFS info.
+ * Return the ZFS info and the (locked) prison.
+ */
+static struct zfs_jailparam *
+zfs_jailparam_find(struct prison *spr, struct prison **prp)
+{
+ struct prison *pr;
+ struct zfs_jailparam *zjp;
+
+ for (pr = spr; ; pr = pr->pr_parent) {
+ mtx_lock(&pr->pr_mtx);
+ if (pr == &prison0) {
+ zjp = &zfs_jailparam0;
+ break;
+ }
+ zjp = osd_jail_get(pr, zfs_jailparam_slot);
+ if (zjp != NULL)
+ break;
+ mtx_unlock(&pr->pr_mtx);
+ }
+ *prp = pr;
+
+ return (zjp);
+}
+
+/*
+ * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the
+ * ZFS info and lock the prison.
+ */
+static void
+zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp)
+{
+ struct prison *ppr;
+ struct zfs_jailparam *zjp, *nzjp;
+ void **rsv;
+
+ /* If this prison already has ZFS info, return that. */
+ zjp = zfs_jailparam_find(pr, &ppr);
+ if (ppr == pr)
+ goto done;
+
+ /*
+ * Allocate a new info record. Then check again, in case something
+ * changed during the allocation.
+ */
+ mtx_unlock(&ppr->pr_mtx);
+ nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK);
+ rsv = osd_reserve(zfs_jailparam_slot);
+ zjp = zfs_jailparam_find(pr, &ppr);
+ if (ppr == pr) {
+ free(nzjp, M_PRISON);
+ osd_free_reserved(rsv);
+ goto done;
+ }
+ /* Inherit the initial values from the ancestor. */
+ mtx_lock(&pr->pr_mtx);
+ (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp);
+ (void) memcpy(nzjp, zjp, sizeof (*zjp));
+ zjp = nzjp;
+ mtx_unlock(&ppr->pr_mtx);
+done:
+ if (zjpp != NULL)
+ *zjpp = zjp;
+ else
+ mtx_unlock(&pr->pr_mtx);
+}
+
+/*
+ * Jail OSD methods for ZFS VFS info.
+ */
+static int
+zfs_jailparam_create(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct vfsoptlist *opts = data;
+ int jsys;
+
+ if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 &&
+ jsys == JAIL_SYS_INHERIT)
+ return (0);
+ /*
+ * Inherit a prison's initial values from its parent
+ * (different from JAIL_SYS_INHERIT which also inherits changes).
+ */
+ zfs_jailparam_alloc(pr, NULL);
+ return (0);
+}
+
+static int
+zfs_jailparam_get(void *obj, void *data)
+{
+ struct prison *ppr, *pr = obj;
+ struct vfsoptlist *opts = data;
+ struct zfs_jailparam *zjp;
+ int jsys, error;
+
+ zjp = zfs_jailparam_find(pr, &ppr);
+ jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+ error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ if (jsys == JAIL_SYS_NEW) {
+ error = vfs_setopt(opts, "zfs.mount_snapshot",
+ &zjp->mount_snapshot, sizeof (zjp->mount_snapshot));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ } else {
+ /*
+ * If this prison is inheriting its ZFS info, report
+ * empty/zero parameters.
+ */
+ static int mount_snapshot = 0;
+
+ error = vfs_setopt(opts, "zfs.mount_snapshot",
+ &mount_snapshot, sizeof (mount_snapshot));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ }
+ error = 0;
+done:
+ mtx_unlock(&ppr->pr_mtx);
+ return (error);
+}
+
+static int
+zfs_jailparam_set(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *ppr;
+ struct vfsoptlist *opts = data;
+ int error, jsys, mount_snapshot;
+
+ /* Set the parameters, which should be correct. */
+ error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
+ if (error == ENOENT)
+ jsys = -1;
+ error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
+ sizeof (mount_snapshot));
+ if (error == ENOENT)
+ mount_snapshot = -1;
+ else
+ jsys = JAIL_SYS_NEW;
+ if (jsys == JAIL_SYS_NEW) {
+ /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */
+ struct zfs_jailparam *zjp;
+
+ /*
+ * A child jail cannot have more permissions than its parent
+ */
+ if (pr->pr_parent != &prison0) {
+ zjp = zfs_jailparam_find(pr->pr_parent, &ppr);
+ mtx_unlock(&ppr->pr_mtx);
+ if (zjp->mount_snapshot < mount_snapshot) {
+ return (EPERM);
+ }
+ }
+ zfs_jailparam_alloc(pr, &zjp);
+ if (mount_snapshot != -1)
+ zjp->mount_snapshot = mount_snapshot;
+ mtx_unlock(&pr->pr_mtx);
+ } else {
+ /* "zfs=inherit": inherit the parent's ZFS info. */
+ mtx_lock(&pr->pr_mtx);
+ osd_jail_del(pr, zfs_jailparam_slot);
+ mtx_unlock(&pr->pr_mtx);
+ }
+ return (0);
+}
+
+static int
+zfs_jailparam_check(void *obj __unused, void *data)
+{
+ struct vfsoptlist *opts = data;
+ int error, jsys, mount_snapshot;
+
+ /* Check that the parameters are correct. */
+ error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
+ if (error != ENOENT) {
+ if (error != 0)
+ return (error);
+ if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
+ return (EINVAL);
+ }
+ error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
+ sizeof (mount_snapshot));
+ if (error != ENOENT) {
+ if (error != 0)
+ return (error);
+ if (mount_snapshot != 0 && mount_snapshot != 1)
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static void
+zfs_jailparam_destroy(void *data)
+{
+
+ free(data, M_PRISON);
+}
+
+static void
+zfs_jailparam_sysinit(void *arg __unused)
+{
+ struct prison *pr;
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_CREATE] = zfs_jailparam_create,
+ [PR_METHOD_GET] = zfs_jailparam_get,
+ [PR_METHOD_SET] = zfs_jailparam_set,
+ [PR_METHOD_CHECK] = zfs_jailparam_check,
+ };
+
+ zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods);
+ /* Copy the defaults to any existing prisons. */
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list)
+ zfs_jailparam_alloc(pr, NULL);
+ sx_sunlock(&allprison_lock);
+}
+
+static void
+zfs_jailparam_sysuninit(void *arg __unused)
+{
+
+ osd_jail_deregister(zfs_jailparam_slot);
+}
+
+SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY,
+ zfs_jailparam_sysinit, NULL);
+SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY,
+ zfs_jailparam_sysuninit, NULL);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index fae390a148d6..8a350ab4985c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -837,7 +837,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
/*
* Do we have permission to get into attribute directory?
*/
- error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
+ error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
if (error) {
vrele(ZTOV(zp));
}
@@ -856,7 +856,8 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
cnp->cn_flags &= ~NOEXECCHECK;
} else
#endif
- if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+ NULL))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -1036,6 +1037,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
* flag - large file flag [UNUSED].
* ct - caller context
* vsecp - ACL to be set
+ * mnt_ns - Unused on FreeBSD
*
* OUT: vpp - vnode of created or trunc'd entry.
*
@@ -1047,7 +1049,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
*/
int
zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
- znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+ znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zuserns_t *mnt_ns)
{
(void) excl, (void) mode, (void) flag;
znode_t *zp;
@@ -1110,7 +1112,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
* Create a new file object and update the directory
* to reference it.
*/
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
goto out;
}
@@ -1126,7 +1128,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
}
if ((error = zfs_acl_ids_create(dzp, 0, vap,
- cr, vsecp, &acl_ids)) != 0)
+ cr, vsecp, &acl_ids, NULL)) != 0)
goto out;
if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
@@ -1231,7 +1233,7 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
xattr_obj = 0;
xzp = NULL;
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
goto out;
}
@@ -1387,6 +1389,7 @@ zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
* ct - caller context
* flags - case flags
* vsecp - ACL to be set
+ * mnt_ns - Unused on FreeBSD
*
* OUT: vpp - vnode of created directory.
*
@@ -1398,7 +1401,7 @@ zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
*/
int
zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
- cred_t *cr, int flags, vsecattr_t *vsecp)
+ cred_t *cr, int flags, vsecattr_t *vsecp, zuserns_t *mnt_ns)
{
(void) flags, (void) vsecp;
znode_t *zp;
@@ -1447,7 +1450,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
}
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
- NULL, &acl_ids)) != 0) {
+ NULL, &acl_ids, NULL)) != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -1468,7 +1471,8 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
}
ASSERT3P(zp, ==, NULL);
- if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
+ mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_exit(zfsvfs, FTAG);
return (error);
@@ -1585,7 +1589,7 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
zilog = zfsvfs->z_log;
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
goto out;
}
@@ -1976,7 +1980,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
(vap->va_uid != crgetuid(cr))) {
if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
- skipaclchk, cr))) {
+ skipaclchk, cr, NULL))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -2142,7 +2146,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
* flags - ATTR_UTIME set if non-default time values provided.
* - ATTR_NOACLCHECK (CIFS context only).
* cr - credentials of caller.
- * ct - caller context
+ * mnt_ns - Unused on FreeBSD
*
* RETURN: 0 on success, error code on failure.
*
@@ -2150,7 +2154,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
* vp - ctime updated, mtime updated if size changed.
*/
int
-zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zuserns_t *mnt_ns)
{
vnode_t *vp = ZTOV(zp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
@@ -2322,7 +2326,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
- skipaclchk, cr);
+ skipaclchk, cr, mnt_ns);
}
if (mask & (AT_UID|AT_GID)) {
@@ -2359,7 +2363,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
((idmask == AT_UID) && take_owner) ||
((idmask == AT_GID) && take_group)) {
if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
- skipaclchk, cr) == 0) {
+ skipaclchk, cr, mnt_ns) == 0) {
/*
* Remove setuid/setgid for non-privileged users
*/
@@ -2468,7 +2472,8 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
}
if (mask & AT_MODE) {
- if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+ if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+ mnt_ns) == 0) {
err = secpolicy_setid_setsticky_clear(vp, vap,
&oldva, cr);
if (err) {
@@ -3264,7 +3269,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
* Note that if target and source are the same, this can be
* done in a single check.
*/
- if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+ if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
goto out;
if ((*svpp)->v_type == VDIR) {
@@ -3368,11 +3373,6 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
if (error == 0) {
zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
snm, tdzp, tnm, szp);
-
- /*
- * Update path information for the target vnode
- */
- vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
} else {
/*
* At this point, we have successfully created
@@ -3415,7 +3415,7 @@ out:
int
zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
- cred_t *cr, int flags)
+ cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zuserns_t *mnt_ns)
{
struct componentname scn, tcn;
vnode_t *sdvp, *tdvp;
@@ -3423,6 +3423,9 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
int error;
svp = tvp = NULL;
+ if (rflags != 0 || wo_vap != NULL)
+ return (SET_ERROR(EINVAL));
+
sdvp = ZTOV(sdzp);
tdvp = ZTOV(tdzp);
error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
@@ -3460,6 +3463,7 @@ fail:
* cr - credentials of caller.
* ct - caller context
* flags - case flags
+ * mnt_ns - Unused on FreeBSD
*
* RETURN: 0 on success, error code on failure.
*
@@ -3468,7 +3472,7 @@ fail:
*/
int
zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
- const char *link, znode_t **zpp, cred_t *cr, int flags)
+ const char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns)
{
(void) flags;
znode_t *zp;
@@ -3499,7 +3503,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
}
if ((error = zfs_acl_ids_create(dzp, 0,
- vap, cr, NULL, &acl_ids)) != 0) {
+ vap, cr, NULL, &acl_ids, NULL)) != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -3514,7 +3518,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
return (error);
}
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_exit(zfsvfs, FTAG);
return (error);
@@ -3730,7 +3734,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
return (SET_ERROR(EPERM));
}
- if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -3831,7 +3835,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
* On Linux we can get here through truncate_range() which
* operates directly on inodes, so we need to check access rights.
*/
- if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -4607,7 +4611,7 @@ zfs_freebsd_create(struct vop_create_args *ap)
*ap->a_vpp = NULL;
rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
- &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
+ &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
if (rc == 0)
*ap->a_vpp = ZTOV(zp);
if (zfsvfs->z_use_namecache &&
@@ -4661,7 +4665,7 @@ zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
*ap->a_vpp = NULL;
rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
- ap->a_cnp->cn_cred, 0, NULL);
+ ap->a_cnp->cn_cred, 0, NULL, NULL);
if (rc == 0)
*ap->a_vpp = ZTOV(zp);
@@ -4914,7 +4918,7 @@ zfs_freebsd_setattr(struct vop_setattr_args *ap)
xvap.xva_vattr.va_mask |= AT_XVATTR;
XVA_SET_REQ(&xvap, XAT_CREATETIME);
}
- return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
+ return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
}
#ifndef _SYS_SYSPROTO_H_
@@ -4985,7 +4989,7 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap)
*ap->a_vpp = NULL;
rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
- ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
+ ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
if (rc == 0) {
*ap->a_vpp = ZTOV(zp);
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
index 6345e9e69d30..6c269480cb4b 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@@ -298,7 +298,7 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
sharezp->z_is_sa = zfsvfs->z_use_sa;
VERIFY0(zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
- kcred, NULL, &acl_ids));
+ kcred, NULL, &acl_ids, NULL));
zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
ASSERT3P(zp, ==, sharezp);
POINTER_INVALIDATE(&sharezp->z_zfsvfs);
@@ -1773,7 +1773,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
rootzp->z_zfsvfs = zfsvfs;
VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
- cr, NULL, &acl_ids));
+ cr, NULL, &acl_ids, NULL));
zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
ASSERT3P(zp, ==, rootzp);
error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
@@ -1949,7 +1949,6 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
} else if (error != ENOENT) {
return (error);
}
- error = 0;
for (;;) {
uint64_t pobj;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
index 0410ddd65a5c..c5e745f7d196 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
@@ -1735,7 +1735,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
goto error;
if (locked) {
rw_exit(&key->zk_salt_lock);
- locked = B_FALSE;
}
if (authbuf != NULL)
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 8d2a6d77624b..631e020db9c9 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -1386,6 +1386,7 @@ zvol_os_create_minor(const char *name)
uint64_t volsize;
uint64_t volmode, hash;
int error;
+ bool replayed_zil = B_FALSE;
ZFS_LOG(1, "Creating ZVOL %s...", name);
hash = zvol_name_hash(name);
@@ -1490,11 +1491,12 @@ zvol_os_create_minor(const char *name)
zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable)
- zil_destroy(zv->zv_zilog, B_FALSE);
+ replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE);
else
- zil_replay(os, zv, zvol_replay_vector);
+ replayed_zil = zil_replay(os, zv, zvol_replay_vector);
}
- zil_close(zv->zv_zilog);
+ if (replayed_zil)
+ zil_close(zv->zv_zilog);
zv->zv_zilog = NULL;
/* TODO: prefetch for geom tasting */
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
index d0461a9f1298..e87954714e3a 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
@@ -37,7 +37,7 @@
#endif
#define MAX_HRTIMEOUT_SLACK_US 1000
-unsigned int spl_schedule_hrtimeout_slack_us = 0;
+static unsigned int spl_schedule_hrtimeout_slack_us = 0;
static int
param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-err.c b/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
index 7d3f6127c4af..29781b9515b2 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
@@ -32,7 +32,7 @@
* analysis and other such goodies.
* But we would still default to the current default of not to do that.
*/
-unsigned int spl_panic_halt;
+static unsigned int spl_panic_halt;
/* CSTYLED */
module_param(spl_panic_halt, uint, 0644);
MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index bc39ece9a427..71eedf635f73 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -23,6 +23,7 @@
* Solaris Porting Layer (SPL) Generic Implementation.
*/
+#include <sys/isa_defs.h>
#include <sys/sysmacros.h>
#include <sys/systeminfo.h>
#include <sys/vmsystm.h>
@@ -48,6 +49,7 @@
#include <sys/cred.h>
#include <sys/vnode.h>
#include <sys/misc.h>
+#include <linux/mod_compat.h>
unsigned long spl_hostid = 0;
EXPORT_SYMBOL(spl_hostid);
@@ -60,10 +62,10 @@ proc_t p0;
EXPORT_SYMBOL(p0);
/*
- * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna
*
- * "Further scramblings of Marsaglia's xorshift generators"
- * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ * "Scrambled Linear Pseudorandom Number Generators∗"
+ * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
*
* random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
* is to provide bytes containing random numbers. It is mapped to /dev/urandom
@@ -75,66 +77,85 @@ EXPORT_SYMBOL(p0);
* free of atomic instructions.
*
* A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
- * to generate words larger than 128 bits will paradoxically be limited to
- * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
- * 128-bit words and selecting the first will implicitly select the second. If
+ * to generate words larger than 256 bits will paradoxically be limited to
+ * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1`
+ * 256-bit words and selecting the first will implicitly select the second. If
* a caller finds this behavior undesirable, random_get_bytes() should be used
* instead.
*
* XXX: Linux interrupt handlers that trigger within the critical section
- * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will
* see the same numbers. Nothing in the code currently calls this in an
* interrupt handler, so this is considered to be okay. If that becomes a
* problem, we could create a set of per-cpu variables for interrupt handlers
* and use them when in_interrupt() from linux/preempt_mask.h evaluates to
* true.
*/
-void __percpu *spl_pseudo_entropy;
+static void __percpu *spl_pseudo_entropy;
/*
- * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
- * file:
+ * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0
+ * licensed file:
*
- * http://xorshift.di.unimi.it/xorshift128plus.c
+ * https://prng.di.unimi.it/xoshiro256plusplus.c
*/
+static inline uint64_t rotl(const uint64_t x, int k)
+{
+ return ((x << k) | (x >> (64 - k)));
+}
+
static inline uint64_t
spl_rand_next(uint64_t *s)
{
- uint64_t s1 = s[0];
- const uint64_t s0 = s[1];
- s[0] = s0;
- s1 ^= s1 << 23; // a
- s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
- return (s[1] + s0);
+ const uint64_t result = rotl(s[0] + s[3], 23) + s[0];
+
+ const uint64_t t = s[1] << 17;
+
+ s[2] ^= s[0];
+ s[3] ^= s[1];
+ s[1] ^= s[2];
+ s[0] ^= s[3];
+
+ s[2] ^= t;
+
+ s[3] = rotl(s[3], 45);
+
+ return (result);
}
static inline void
spl_rand_jump(uint64_t *s)
{
- static const uint64_t JUMP[] =
- { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+ static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba,
+ 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
uint64_t s0 = 0;
uint64_t s1 = 0;
+ uint64_t s2 = 0;
+ uint64_t s3 = 0;
int i, b;
for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
for (b = 0; b < 64; b++) {
if (JUMP[i] & 1ULL << b) {
s0 ^= s[0];
s1 ^= s[1];
+ s2 ^= s[2];
+ s3 ^= s[3];
}
(void) spl_rand_next(s);
}
s[0] = s0;
s[1] = s1;
+ s[2] = s2;
+ s[3] = s3;
}
int
random_get_pseudo_bytes(uint8_t *ptr, size_t len)
{
- uint64_t *xp, s[2];
+ uint64_t *xp, s[4];
ASSERT(ptr);
@@ -142,6 +163,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
s[0] = xp[0];
s[1] = xp[1];
+ s[2] = xp[2];
+ s[3] = xp[3];
while (len) {
union {
@@ -153,12 +176,22 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
len -= i;
entropy.ui64 = spl_rand_next(s);
+ /*
+ * xoshiro256++ has low entropy lower bytes, so we copy the
+ * higher order bytes first.
+ */
while (i--)
+#ifdef _ZFS_BIG_ENDIAN
*ptr++ = entropy.byte[i];
+#else
+ *ptr++ = entropy.byte[7 - i];
+#endif
}
xp[0] = s[0];
xp[1] = s[1];
+ xp[2] = s[2];
+ xp[3] = s[3];
put_cpu_ptr(spl_pseudo_entropy);
@@ -518,6 +551,29 @@ ddi_copyin(const void *from, void *to, size_t len, int flags)
}
EXPORT_SYMBOL(ddi_copyin);
+#define define_spl_param(type, fmt) \
+int \
+spl_param_get_##type(char *buf, zfs_kernel_param_t *kp) \
+{ \
+ return (scnprintf(buf, PAGE_SIZE, fmt "\n", \
+ *(type *)kp->arg)); \
+} \
+int \
+spl_param_set_##type(const char *buf, zfs_kernel_param_t *kp) \
+{ \
+ return (kstrto##type(buf, 0, (type *)kp->arg)); \
+} \
+const struct kernel_param_ops spl_param_ops_##type = { \
+ .set = spl_param_set_##type, \
+ .get = spl_param_get_##type, \
+}; \
+EXPORT_SYMBOL(spl_param_get_##type); \
+EXPORT_SYMBOL(spl_param_set_##type); \
+EXPORT_SYMBOL(spl_param_ops_##type);
+
+define_spl_param(s64, "%lld")
+define_spl_param(u64, "%llu")
+
/*
* Post a uevent to userspace whenever a new vdev adds to the pool. It is
* necessary to sync blkid information with udev, which zed daemon uses
@@ -741,10 +797,10 @@ spl_kvmem_init(void)
static int __init
spl_random_init(void)
{
- uint64_t s[2];
+ uint64_t s[4];
int i = 0;
- spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t),
+ spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t),
sizeof (uint64_t));
if (!spl_pseudo_entropy)
@@ -752,17 +808,19 @@ spl_random_init(void)
get_random_bytes(s, sizeof (s));
- if (s[0] == 0 && s[1] == 0) {
+ if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) {
if (jiffies != 0) {
s[0] = jiffies;
s[1] = ~0 - jiffies;
+ s[2] = ~jiffies;
+ s[3] = jiffies - ~0;
} else {
- (void) memcpy(s, "improbable seed", sizeof (s));
+ (void) memcpy(s, "improbable seed", 16);
}
printk("SPL: get_random_bytes() returned 0 "
"when generating random seed. Setting initial seed to "
- "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
- cpu_to_be64(s[1]));
+ "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]),
+ cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3]));
}
for_each_possible_cpu(i) {
@@ -772,6 +830,8 @@ spl_random_init(void)
wordp[0] = s[0];
wordp[1] = s[1];
+ wordp[2] = s[2];
+ wordp[3] = s[3];
}
return (0);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
index e355e2bfc3a0..edd04783b363 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@@ -151,7 +151,7 @@ MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
struct list_head spl_kmem_cache_list; /* List of caches */
struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
-taskq_t *spl_kmem_cache_taskq; /* Task queue for aging / reclaim */
+static taskq_t *spl_kmem_cache_taskq; /* Task queue for aging / reclaim */
static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
index a4a24dcae2bd..5e073950d61a 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
@@ -23,9 +23,9 @@
*/
#include <sys/list.h>
-#include <sys/mutex.h>
#include <sys/procfs_list.h>
#include <linux/proc_fs.h>
+#include <sys/mutex.h>
/*
* A procfs_list is a wrapper around a linked list which implements the seq_file
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index abf4dca585b2..84497359ce2e 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -1048,7 +1048,6 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
ASSERT(name != NULL);
ASSERT(minalloc >= 0);
- ASSERT(maxalloc <= INT_MAX);
ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
/* Scale the number of threads using nthreads as a percentage */
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
index 9421f81bf0c8..b489179f1257 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -25,7 +25,6 @@
*/
#include <sys/types.h>
-#include <sys/mutex.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
#include <linux/file.h>
@@ -37,6 +36,8 @@
#include <linux/proc_ns.h>
#endif
+#include <sys/mutex.h>
+
static kmutex_t zone_datasets_lock;
static struct list_head zone_datasets;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 2ab85f8cccd0..16530d82693e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -132,7 +132,7 @@ static abd_stats_t abd_stats = {
{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 },
};
-struct {
+static struct {
wmsum_t abdstat_struct_size;
wmsum_t abdstat_linear_cnt;
wmsum_t abdstat_linear_data_size;
@@ -597,10 +597,8 @@ abd_free_chunks(abd_t *abd)
struct scatterlist *sg;
abd_for_each_sg(abd, sg, n, i) {
- for (int j = 0; j < sg->length; j += PAGESIZE) {
- struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
- umem_free(p, PAGESIZE);
- }
+ struct page *p = nth_page(sg_page(sg), 0);
+ umem_free_aligned(p, PAGESIZE);
}
abd_free_sg_table(abd);
}
@@ -706,7 +704,7 @@ abd_free_zero_scatter(void)
__free_page(abd_zero_page);
#endif /* HAVE_ZERO_PAGE_GPL_ONLY */
#else
- umem_free(abd_zero_page, PAGESIZE);
+ umem_free_aligned(abd_zero_page, PAGESIZE);
#endif /* _KERNEL */
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index eaaf7d0bb746..6f730e9ddd83 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -358,11 +358,11 @@ arc_lowmem_fini(void)
}
int
-param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
+param_set_arc_u64(const char *buf, zfs_kernel_param_t *kp)
{
int error;
- error = param_set_long(buf, kp);
+ error = spl_param_set_u64(buf, kp);
if (error < 0)
return (SET_ERROR(error));
@@ -374,13 +374,13 @@ param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
int
param_set_arc_min(const char *buf, zfs_kernel_param_t *kp)
{
- return (param_set_arc_long(buf, kp));
+ return (param_set_arc_u64(buf, kp));
}
int
param_set_arc_max(const char *buf, zfs_kernel_param_t *kp)
{
- return (param_set_arc_long(buf, kp));
+ return (param_set_arc_u64(buf, kp));
}
int
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c b/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
index d502127b5ba3..7e5bd392437e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
@@ -30,7 +30,7 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
{
int ret;
- ret = param_set_ulong(val, kp);
+ ret = spl_param_set_u64(val, kp);
if (ret < 0)
return (ret);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/policy.c b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
index a69618978622..eaf38df864d3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/policy.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
@@ -214,8 +214,10 @@ secpolicy_vnode_setid_retain(struct znode *zp __maybe_unused, const cred_t *cr,
* Determine that subject can set the file setgid flag.
*/
int
-secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
+secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid, zuserns_t *mnt_ns,
+ zuserns_t *fs_ns)
{
+ gid = zfs_gid_to_vfsgid(mnt_ns, fs_ns, gid);
#if defined(CONFIG_USER_NS)
if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
return (EPERM);
@@ -284,8 +286,11 @@ secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
* Determine that subject can set the file setid flags.
*/
static int
-secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
+secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner, zuserns_t *mnt_ns,
+ zuserns_t *fs_ns)
{
+ owner = zfs_uid_to_vfsuid(mnt_ns, fs_ns, owner);
+
if (crgetuid(cr) == owner)
return (0);
@@ -310,13 +315,13 @@ secpolicy_vnode_stky_modify(const cred_t *cr)
int
secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
- const vattr_t *ovap, cred_t *cr)
+ const vattr_t *ovap, cred_t *cr, zuserns_t *mnt_ns, zuserns_t *fs_ns)
{
int error;
if ((vap->va_mode & S_ISUID) != 0 &&
(error = secpolicy_vnode_setid_modify(cr,
- ovap->va_uid)) != 0) {
+ ovap->va_uid, mnt_ns, fs_ns)) != 0) {
return (error);
}
@@ -334,7 +339,8 @@ secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
* group-id bit.
*/
if ((vap->va_mode & S_ISGID) != 0 &&
- secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
+ secpolicy_vnode_setids_setgids(cr, ovap->va_gid,
+ mnt_ns, fs_ns) != 0) {
vap->va_mode &= ~S_ISGID;
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
index f999df3b7db9..3efc8b9644fd 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
@@ -60,7 +60,7 @@ param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp)
{
int error;
- error = param_set_ulong(val, kp);
+ error = spl_param_set_u64(val, kp);
if (error < 0)
return (SET_ERROR(error));
@@ -74,7 +74,7 @@ param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp)
{
int error;
- error = param_set_ulong(val, kp);
+ error = spl_param_set_u64(val, kp);
if (error < 0)
return (SET_ERROR(error));
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 0fed09df5203..4f33009f14d4 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -56,7 +56,7 @@ static void *zfs_vdev_holder = VDEV_HOLDER;
* device is missing. The missing path may be transient since the links
* can be briefly removed and recreated in response to udev events.
*/
-static unsigned zfs_vdev_open_timeout_ms = 1000;
+static uint_t zfs_vdev_open_timeout_ms = 1000;
/*
* Size of the "reserved" partition, in blocks.
@@ -74,6 +74,12 @@ typedef struct dio_request {
struct bio *dr_bio[0]; /* Attached bio's */
} dio_request_t;
+/*
+ * BIO request failfast mask.
+ */
+
+static unsigned int zfs_vdev_failfast_mask = 1;
+
static fmode_t
vdev_bdev_mode(spa_mode_t spa_mode)
{
@@ -173,7 +179,7 @@ vdev_disk_error(zio_t *zio)
* which is safe from any context.
*/
printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
- "offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
+ "offset=%llu size=%llu flags=%llu\n", spa_name(zio->io_spa),
zio->io_vd->vdev_path, zio->io_error, zio->io_type,
(u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
zio->io_flags);
@@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
retry:
dr = vdev_disk_dio_alloc(bio_count);
- if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
- bio_set_flags_failfast(bdev, &flags);
+ if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
+ zio->io_vd->vdev_failfast == B_TRUE) {
+ bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
+ zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
+ }
dr->dr_zio = zio;
@@ -1006,17 +1015,17 @@ MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
int
param_set_min_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
{
- uint64_t val;
+ uint_t val;
int error;
- error = kstrtoull(buf, 0, &val);
+ error = kstrtouint(buf, 0, &val);
if (error < 0)
return (SET_ERROR(error));
if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
return (SET_ERROR(-EINVAL));
- error = param_set_ulong(buf, kp);
+ error = param_set_uint(buf, kp);
if (error < 0)
return (SET_ERROR(error));
@@ -1026,19 +1035,25 @@ param_set_min_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
int
param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
{
- uint64_t val;
+ uint_t val;
int error;
- error = kstrtoull(buf, 0, &val);
+ error = kstrtouint(buf, 0, &val);
if (error < 0)
return (SET_ERROR(error));
if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
return (SET_ERROR(-EINVAL));
- error = param_set_ulong(buf, kp);
+ error = param_set_uint(buf, kp);
if (error < 0)
return (SET_ERROR(error));
return (0);
}
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
+ "Timeout before determining that a device is missing");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
+ "Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
index 46e412f6eeb4..5abc0426d1a7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
@@ -53,8 +53,8 @@ static taskq_t *vdev_file_taskq;
* impact the vdev_ashift setting which can only be set at vdev creation
* time.
*/
-static unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
-static unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
static void
vdev_file_hold(vdev_t *vd)
@@ -376,7 +376,7 @@ vdev_ops_t vdev_disk_ops = {
#endif
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
"Logical ashift for file-based devices");
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
"Physical ashift for file-based devices");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
index 5935403b49d0..7d14863c56c4 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -629,18 +629,18 @@ zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
return (NULL);
}
-static uint64_t
-zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+static uintptr_t
+zfs_ace_walk(void *datap, uintptr_t cookie, int aclcnt,
uint16_t *flags, uint16_t *type, uint32_t *mask)
{
(void) aclcnt;
zfs_acl_t *aclp = datap;
- zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
+ zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)cookie;
uint64_t who;
acep = zfs_acl_next_ace(aclp, acep, &who, mask,
flags, type);
- return ((uint64_t)(uintptr_t)acep);
+ return ((uintptr_t)acep);
}
/*
@@ -1163,6 +1163,7 @@ zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
cb->cb_acl_node);
}
+ ASSERT3P(cb->cb_acl_node, !=, NULL);
*dataptr = cb->cb_acl_node->z_acldata;
*length = cb->cb_acl_node->z_size;
}
@@ -1284,7 +1285,7 @@ acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
*/
static int
ace_trivial_common(void *acep, int aclcnt,
- uint64_t (*walk)(void *, uint64_t, int aclcnt,
+ uintptr_t (*walk)(void *, uintptr_t, int,
uint16_t *, uint16_t *, uint32_t *))
{
uint16_t flags;
@@ -1801,7 +1802,7 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t va_mode, zfs_acl_t *paclp,
*/
int
zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
- vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+ vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids, zuserns_t *mnt_ns)
{
int error;
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
@@ -1888,8 +1889,10 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
acl_ids->z_mode |= S_ISGID;
} else {
if ((acl_ids->z_mode & S_ISGID) &&
- secpolicy_vnode_setids_setgids(cr, gid) != 0)
+ secpolicy_vnode_setids_setgids(cr, gid, mnt_ns,
+ zfs_i_user_ns(ZTOI(dzp))) != 0) {
acl_ids->z_mode &= ~S_ISGID;
+ }
}
if (acl_ids->z_aclp == NULL) {
@@ -1977,7 +1980,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
if (mask == 0)
return (SET_ERROR(ENOSYS));
- if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
+ if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr,
+ kcred->user_ns)))
return (error);
mutex_enter(&zp->z_acl_lock);
@@ -2136,7 +2140,8 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
if (zp->z_pflags & ZFS_IMMUTABLE)
return (SET_ERROR(EPERM));
- if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+ if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+ kcred->user_ns)))
return (error);
error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
@@ -2282,7 +2287,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
*/
static int
zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
- boolean_t anyaccess, cred_t *cr)
+ boolean_t anyaccess, cred_t *cr, zuserns_t *mnt_ns)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
zfs_acl_t *aclp;
@@ -2298,7 +2303,13 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
uid_t gowner;
uid_t fowner;
- zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+ if (mnt_ns) {
+ fowner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+ KUID_TO_SUID(ZTOI(zp)->i_uid));
+ gowner = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+ KGID_TO_SGID(ZTOI(zp)->i_gid));
+ } else
+ zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
mutex_enter(&zp->z_acl_lock);
@@ -2409,7 +2420,8 @@ zfs_has_access(znode_t *zp, cred_t *cr)
{
uint32_t have = ACE_ALL_PERMS;
- if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+ if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr,
+ kcred->user_ns) != 0) {
uid_t owner;
owner = zfs_fuid_map_id(ZTOZSB(zp),
@@ -2439,7 +2451,8 @@ zfs_has_access(znode_t *zp, cred_t *cr)
* we want to avoid that here.
*/
static int
-zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
+zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr,
+ zuserns_t *mnt_ns)
{
int err, mask;
int unmapped = 0;
@@ -2453,7 +2466,10 @@ zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
}
#if defined(HAVE_IOPS_PERMISSION_USERNS)
- err = generic_permission(cr->user_ns, ZTOI(zp), mask);
+ if (mnt_ns)
+ err = generic_permission(mnt_ns, ZTOI(zp), mask);
+ else
+ err = generic_permission(cr->user_ns, ZTOI(zp), mask);
#else
err = generic_permission(ZTOI(zp), mask);
#endif
@@ -2468,7 +2484,7 @@ zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
static int
zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
- boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+ boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr, zuserns_t *mnt_ns)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
int err;
@@ -2518,20 +2534,20 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
}
if (zp->z_pflags & ZFS_ACL_TRIVIAL)
- return (zfs_zaccess_trivial(zp, working_mode, cr));
+ return (zfs_zaccess_trivial(zp, working_mode, cr, mnt_ns));
- return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+ return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr, mnt_ns));
}
static int
zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
- cred_t *cr)
+ cred_t *cr, zuserns_t *mnt_ns)
{
if (*working_mode != ACE_WRITE_DATA)
return (SET_ERROR(EACCES));
return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
- check_privs, B_FALSE, cr));
+ check_privs, B_FALSE, cr, mnt_ns));
}
int
@@ -2598,7 +2614,8 @@ slow:
DTRACE_PROBE(zfs__fastpath__execute__access__miss);
if ((error = zfs_enter(ZTOZSB(zdp), FTAG)) != 0)
return (error);
- error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
+ error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+ kcred->user_ns);
zfs_exit(ZTOZSB(zdp), FTAG);
return (error);
}
@@ -2610,7 +2627,8 @@ slow:
* can define any form of access.
*/
int
-zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr,
+ zuserns_t *mnt_ns)
{
uint32_t working_mode;
int error;
@@ -2649,8 +2667,10 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
}
}
- owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
- cr, ZFS_OWNER);
+ owner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+ KUID_TO_SUID(ZTOI(zp)->i_uid));
+ owner = zfs_fuid_map_id(ZTOZSB(zp), owner, cr, ZFS_OWNER);
+
/*
* Map the bits required to the standard inode flags
* S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits. Map the bits
@@ -2675,7 +2695,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
needed_bits |= S_IXUSR;
if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
- &check_privs, skipaclchk, cr)) == 0) {
+ &check_privs, skipaclchk, cr, mnt_ns)) == 0) {
if (is_attr)
zrele(xzp);
return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
@@ -2689,7 +2709,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
}
if (error && (flags & V_APPEND)) {
- error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
+ error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr,
+ mnt_ns);
}
if (error && check_privs) {
@@ -2756,9 +2777,11 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
* NFSv4-style ZFS ACL format and call zfs_zaccess()
*/
int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr,
+ zuserns_t *mnt_ns)
{
- return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+ return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr,
+ mnt_ns));
}
/*
@@ -2769,7 +2792,7 @@ zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
{
int v4_mode = zfs_unix_to_v4(mode >> 6);
- return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+ return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr, kcred->user_ns));
}
/* See zfs_zaccess_delete() */
@@ -2846,7 +2869,7 @@ static const boolean_t zfs_write_implies_delete_child = B_TRUE;
* zfs_write_implies_delete_child
*/
int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr, zuserns_t *mnt_ns)
{
uint32_t wanted_dirperms;
uint32_t dzp_working_mode = 0;
@@ -2873,7 +2896,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
* (This is part of why we're checking the target first.)
*/
zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
- &zpcheck_privs, B_FALSE, cr);
+ &zpcheck_privs, B_FALSE, cr, mnt_ns);
if (zp_error == EACCES) {
/* We hit a DENY ACE. */
if (!zpcheck_privs)
@@ -2895,7 +2918,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
if (zfs_write_implies_delete_child)
wanted_dirperms |= ACE_WRITE_DATA;
dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
- &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
+ &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr, mnt_ns);
if (dzp_error == EACCES) {
/* We hit a DENY ACE. */
if (!dzpcheck_privs)
@@ -2977,7 +3000,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
int
zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
- znode_t *tzp, cred_t *cr)
+ znode_t *tzp, cred_t *cr, zuserns_t *mnt_ns)
{
int add_perm;
int error;
@@ -2999,21 +3022,21 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
* If that succeeds then check for add_file/add_subdir permissions
*/
- if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+ if ((error = zfs_zaccess_delete(sdzp, szp, cr, mnt_ns)))
return (error);
/*
* If we have a tzp, see if we can delete it?
*/
if (tzp) {
- if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
+ if ((error = zfs_zaccess_delete(tdzp, tzp, cr, mnt_ns)))
return (error);
}
/*
* Now check for add permissions
*/
- error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+ error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr, mnt_ns);
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index 4ae0a65370e5..519f13212fac 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -487,7 +487,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_is_sa = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_TRUE;
- zp->z_is_stale = B_FALSE;
zp->z_sa_hdl = NULL;
zp->z_blksz = 0;
zp->z_seq = 0;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
index 819416b68d5f..e5a600250659 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
@@ -35,7 +35,7 @@ typedef struct zfs_dbgmsg {
static procfs_list_t zfs_dbgmsgs;
static uint_t zfs_dbgmsg_size = 0;
-uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
+static uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
/*
* Internal ZFS debug messages are enabled by default.
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
index 6738d237b923..85aa94d8df6a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
@@ -926,6 +926,74 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
return (error);
}
+static int
+zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
+{
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
+ boolean_t unlinked = B_FALSE;
+ sa_bulk_attr_t bulk[3];
+ uint64_t mtime[2], ctime[2];
+ uint64_t links;
+ int count = 0;
+ int error;
+
+ if (zp_is_dir && !zfs_dirempty(zp))
+ return (SET_ERROR(ENOTEMPTY));
+
+ if (ZTOI(zp)->i_nlink <= zp_is_dir) {
+ zfs_panic_recover("zfs: link count on %lu is %u, "
+ "should be at least %u", zp->z_id,
+ (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
+ set_nlink(ZTOI(zp), zp_is_dir + 1);
+ }
+ drop_nlink(ZTOI(zp));
+ if (ZTOI(zp)->i_nlink == zp_is_dir) {
+ zp->z_unlinked = B_TRUE;
+ clear_nlink(ZTOI(zp));
+ unlinked = B_TRUE;
+ } else {
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+ NULL, &ctime, sizeof (ctime));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+ NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+ zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+ ctime);
+ }
+ links = ZTOI(zp)->i_nlink;
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+ NULL, &links, sizeof (links));
+ error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+ ASSERT3U(error, ==, 0);
+
+ if (unlinkedp != NULL)
+ *unlinkedp = unlinked;
+ else if (unlinked)
+ zfs_unlinked_add(zp, tx);
+
+ return (0);
+}
+
+/*
+ * Forcefully drop an nlink reference from (zp) and mark it for deletion if it
+ * was the last link. This *must* only be done to znodes which have already
+ * been zfs_link_destroy()'d with ZRENAMING. This is explicitly only used in
+ * the error path of zfs_rename(), where we have to correct the nlink count if
+ * we failed to link the target as well as failing to re-link the original
+ * znodes.
+ */
+int
+zfs_drop_nlink(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
+{
+ int error;
+
+ mutex_enter(&zp->z_lock);
+ error = zfs_drop_nlink_locked(zp, tx, unlinkedp);
+ mutex_exit(&zp->z_lock);
+
+ return (error);
+}
+
/*
* Unlink zp from dl, and mark zp for deletion if this was the last link. Can
* fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
@@ -966,31 +1034,9 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
return (error);
}
- if (ZTOI(zp)->i_nlink <= zp_is_dir) {
- zfs_panic_recover("zfs: link count on %lu is %u, "
- "should be at least %u", zp->z_id,
- (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
- set_nlink(ZTOI(zp), zp_is_dir + 1);
- }
- drop_nlink(ZTOI(zp));
- if (ZTOI(zp)->i_nlink == zp_is_dir) {
- zp->z_unlinked = B_TRUE;
- clear_nlink(ZTOI(zp));
- unlinked = B_TRUE;
- } else {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
- NULL, &ctime, sizeof (ctime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
- NULL, &zp->z_pflags, sizeof (zp->z_pflags));
- zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
- ctime);
- }
- links = ZTOI(zp)->i_nlink;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
- NULL, &links, sizeof (links));
- error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
- count = 0;
- ASSERT(error == 0);
+ /* The only error is !zfs_dirempty() and we checked earlier. */
+ error = zfs_drop_nlink_locked(zp, tx, &unlinked);
+ ASSERT3U(error, ==, 0);
mutex_exit(&zp->z_lock);
} else {
error = zfs_dropname(dl, zp, dzp, tx, flag);
@@ -1066,11 +1112,12 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr)
*xzpp = NULL;
- if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
+ if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr,
+ kcred->user_ns)))
return (error);
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
- &acl_ids)) != 0)
+ &acl_ids, kcred->user_ns)) != 0)
return (error);
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
zfs_acl_ids_free(&acl_ids);
@@ -1218,7 +1265,8 @@ zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
cr, ZFS_OWNER);
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
- zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)
+ zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+ kcred->user_ns) == 0)
return (0);
else
return (secpolicy_vnode_remove(cr));
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index da80428402cd..bc753614be27 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -246,7 +246,7 @@ zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
{
loff_t rc;
- if (*offp < 0 || *offp > MAXOFFSET_T)
+ if (*offp < 0)
return (EINVAL);
rc = vfs_llseek(fp, *offp, whence);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
index 4a2091f3c396..e2431fe8a803 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
@@ -279,11 +279,11 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
if (type_map[i].ztm_type & property->pd_types) {
- len += snprintf(buf + len, buflen - len, "%s ",
- type_map[i].ztm_name);
+ len += kmem_scnprintf(buf + len, buflen - len,
+ "%s ", type_map[i].ztm_name);
}
}
- len += snprintf(buf + len, buflen - len, "\n");
+ len += kmem_scnprintf(buf + len, buflen - len, "\n");
return (len);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index 64d6b4616e1c..c921e587c75c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1522,7 +1522,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_op = &zpl_super_operations;
sb->s_xattr = zpl_xattr_handlers;
sb->s_export_op = &zpl_export_operations;
- sb->s_d_op = &zpl_dentry_operations;
/* Set features for file system. */
zfs_set_fuid_feature(zfsvfs);
@@ -1556,6 +1555,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
error = zfs_root(zfsvfs, &root_inode);
if (error) {
(void) zfs_umount(sb);
+ zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
goto out;
}
@@ -1563,6 +1563,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_root = d_make_root(root_inode);
if (sb->s_root == NULL) {
(void) zfs_umount(sb);
+ zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
error = SET_ERROR(ENOMEM);
goto out;
}
@@ -1879,8 +1880,8 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
err2 = zfs_rezget(zp);
if (err2) {
+ zpl_d_drop_aliases(ZTOI(zp));
remove_inode_hash(ZTOI(zp));
- zp->z_is_stale = B_TRUE;
}
/* see comment in zfs_suspend_fs() */
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 1ff88c121a79..29d62837a82a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -476,7 +476,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
*/
if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
- B_TRUE, cr))) {
+ B_TRUE, cr, kcred->user_ns))) {
zrele(*zpp);
*zpp = NULL;
}
@@ -494,7 +494,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
* Check accessibility of directory.
*/
- if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+ kcred->user_ns))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -526,6 +527,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
* cr - credentials of caller.
* flag - file flag.
* vsecp - ACL to be set
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - znode of created or trunc'd entry.
*
@@ -537,7 +539,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
*/
int
zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
- int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+ int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
+ zuserns_t *mnt_ns)
{
znode_t *zp;
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
@@ -624,7 +627,8 @@ top:
* Create a new file object and update the directory
* to reference it.
*/
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr,
+ mnt_ns))) {
if (have_acl)
zfs_acl_ids_free(&acl_ids);
goto out;
@@ -643,7 +647,7 @@ top:
}
if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
- cr, vsecp, &acl_ids)) != 0)
+ cr, vsecp, &acl_ids, mnt_ns)) != 0)
goto out;
have_acl = B_TRUE;
@@ -738,7 +742,8 @@ top:
/*
* Verify requested access to file.
*/
- if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+ if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr,
+ mnt_ns))) {
goto out;
}
@@ -782,7 +787,8 @@ out:
int
zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
- int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+ int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp,
+ zuserns_t *mnt_ns)
{
(void) excl, (void) mode, (void) flag;
znode_t *zp = NULL, *dzp = ITOZ(dip);
@@ -829,14 +835,14 @@ top:
* Create a new file object and update the directory
* to reference it.
*/
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
if (have_acl)
zfs_acl_ids_free(&acl_ids);
goto out;
}
if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
- cr, vsecp, &acl_ids)) != 0)
+ cr, vsecp, &acl_ids, mnt_ns)) != 0)
goto out;
have_acl = B_TRUE;
@@ -967,7 +973,7 @@ top:
return (error);
}
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, kcred->user_ns))) {
goto out;
}
@@ -1147,6 +1153,7 @@ out:
* cr - credentials of caller.
* flags - case flags.
* vsecp - ACL to be set
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - znode of created directory.
*
@@ -1159,7 +1166,7 @@ out:
*/
int
zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
- cred_t *cr, int flags, vsecattr_t *vsecp)
+ cred_t *cr, int flags, vsecattr_t *vsecp, zuserns_t *mnt_ns)
{
znode_t *zp;
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
@@ -1216,7 +1223,7 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
}
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
- vsecp, &acl_ids)) != 0) {
+ vsecp, &acl_ids, mnt_ns)) != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -1237,7 +1244,8 @@ top:
return (error);
}
- if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
+ mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
zfs_exit(zfsvfs, FTAG);
@@ -1379,7 +1387,7 @@ top:
return (error);
}
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, kcred->user_ns))) {
goto out;
}
@@ -1811,6 +1819,7 @@ next:
* flags - ATTR_UTIME set if non-default time values provided.
* - ATTR_NOACLCHECK (CIFS context only).
* cr - credentials of caller.
+ * mnt_ns - user namespace of the mount
*
* RETURN: 0 if success
* error code if failure
@@ -1819,7 +1828,7 @@ next:
* ip - ctime updated, mtime updated if size changed.
*/
int
-zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zuserns_t *mnt_ns)
{
struct inode *ip;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
@@ -1968,7 +1977,8 @@ top:
*/
if (mask & ATTR_SIZE) {
- err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
+ err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr,
+ mnt_ns);
if (err)
goto out3;
@@ -1993,13 +2003,15 @@ top:
XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
- skipaclchk, cr);
+ skipaclchk, cr, mnt_ns);
}
if (mask & (ATTR_UID|ATTR_GID)) {
int idmask = (mask & (ATTR_UID|ATTR_GID));
int take_owner;
int take_group;
+ uid_t uid;
+ gid_t gid;
/*
* NOTE: even if a new mode is being set,
@@ -2013,9 +2025,13 @@ top:
* Take ownership or chgrp to group we are a member of
*/
- take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+ uid = zfs_uid_to_vfsuid((struct user_namespace *)mnt_ns,
+ zfs_i_user_ns(ip), vap->va_uid);
+ gid = zfs_gid_to_vfsgid((struct user_namespace *)mnt_ns,
+ zfs_i_user_ns(ip), vap->va_gid);
+ take_owner = (mask & ATTR_UID) && (uid == crgetuid(cr));
take_group = (mask & ATTR_GID) &&
- zfs_groupmember(zfsvfs, vap->va_gid, cr);
+ zfs_groupmember(zfsvfs, gid, cr);
/*
* If both ATTR_UID and ATTR_GID are set then take_owner and
@@ -2031,7 +2047,7 @@ top:
((idmask == ATTR_UID) && take_owner) ||
((idmask == ATTR_GID) && take_group)) {
if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
- skipaclchk, cr) == 0) {
+ skipaclchk, cr, mnt_ns) == 0) {
/*
* Remove setuid/setgid for non-privileged users
*/
@@ -2144,12 +2160,12 @@ top:
mutex_exit(&zp->z_lock);
if (mask & ATTR_MODE) {
- if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+ if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+ mnt_ns) == 0) {
err = secpolicy_setid_setsticky_clear(ip, vap,
- &oldva, cr);
+ &oldva, cr, mnt_ns, zfs_i_user_ns(ip));
if (err)
goto out3;
-
trim_mask |= ATTR_MODE;
} else {
need_policy = TRUE;
@@ -2640,6 +2656,9 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
* tnm - New entry name.
* cr - credentials of caller.
* flags - case flags
+ * rflags - RENAME_* flags
+ * wa_vap - attributes for RENAME_WHITEOUT (must be a char 0:0).
+ * mnt_ns - user namespace of the mount
*
* RETURN: 0 on success, error code on failure.
*
@@ -2648,7 +2667,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
*/
int
zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
- cred_t *cr, int flags)
+ cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zuserns_t *mnt_ns)
{
znode_t *szp, *tzp;
zfsvfs_t *zfsvfs = ZTOZSB(sdzp);
@@ -2660,10 +2679,33 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
int error = 0;
int zflg = 0;
boolean_t waited = B_FALSE;
+ /* Needed for whiteout inode creation. */
+ boolean_t fuid_dirtied;
+ zfs_acl_ids_t acl_ids;
+ boolean_t have_acl = B_FALSE;
+ znode_t *wzp = NULL;
+
if (snm == NULL || tnm == NULL)
return (SET_ERROR(EINVAL));
+ if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
+ return (SET_ERROR(EINVAL));
+
+ /* Already checked by Linux VFS, but just to make sure. */
+ if (rflags & RENAME_EXCHANGE &&
+ (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
+ * right kind of vattr_t for the whiteout file. These are set
+ * internally by ZFS so should never be incorrect.
+ */
+ VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+ VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
+ VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));
+
if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
return (error);
zilog = zfsvfs->z_log;
@@ -2840,8 +2882,7 @@ top:
* Note that if target and source are the same, this can be
* done in a single check.
*/
-
- if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+ if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns)))
goto out;
if (S_ISDIR(ZTOI(szp)->i_mode)) {
@@ -2857,17 +2898,19 @@ top:
* Does target exist?
*/
if (tzp) {
+ if (rflags & RENAME_NOREPLACE) {
+ error = SET_ERROR(EEXIST);
+ goto out;
+ }
/*
- * Source and target must be the same type.
+ * Source and target must be the same type (unless exchanging).
*/
- if (S_ISDIR(ZTOI(szp)->i_mode)) {
- if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
- error = SET_ERROR(ENOTDIR);
- goto out;
- }
- } else {
- if (S_ISDIR(ZTOI(tzp)->i_mode)) {
- error = SET_ERROR(EISDIR);
+ if (!(rflags & RENAME_EXCHANGE)) {
+ boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
+ boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
+
+ if (s_is_dir != t_is_dir) {
+ error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
goto out;
}
}
@@ -2880,12 +2923,43 @@ top:
error = 0;
goto out;
}
+ } else if (rflags & RENAME_EXCHANGE) {
+ /* Target must exist for RENAME_EXCHANGE. */
+ error = SET_ERROR(ENOENT);
+ goto out;
+ }
+
+ /* Set up inode creation for RENAME_WHITEOUT. */
+ if (rflags & RENAME_WHITEOUT) {
+ /*
+ * Whiteout files are not regular files or directories, so to
+ * match zfs_create() we do not inherit the project id.
+ */
+ uint64_t wo_projid = ZFS_DEFAULT_PROJID;
+
+ error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns);
+ if (error)
+ goto out;
+
+ if (!have_acl) {
+ error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
+ &acl_ids, mnt_ns);
+ if (error)
+ goto out;
+ have_acl = B_TRUE;
+ }
+
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
+ error = SET_ERROR(EDQUOT);
+ goto out;
+ }
}
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
- dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+ dmu_tx_hold_zap(tx, sdzp->z_id,
+ (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm);
dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
if (sdzp != tdzp) {
dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
@@ -2895,7 +2969,21 @@ top:
dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, tzp);
}
+ if (rflags & RENAME_WHITEOUT) {
+ dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+ ZFS_SA_BASE_ATTR_SIZE);
+ dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
+ dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+ if (!zfsvfs->z_use_sa &&
+ acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
+ }
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
@@ -2925,58 +3013,110 @@ top:
return (error);
}
- if (tzp) /* Attempt to remove the existing target */
- error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+ /*
+ * Unlink the source.
+ */
+ szp->z_pflags |= ZFS_AV_MODIFIED;
+ if (tdzp->z_pflags & ZFS_PROJINHERIT)
+ szp->z_pflags |= ZFS_PROJINHERIT;
+
+ error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+ (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+ VERIFY0(error);
- if (error == 0) {
- error = zfs_link_create(tdl, szp, tx, ZRENAMING);
- if (error == 0) {
- szp->z_pflags |= ZFS_AV_MODIFIED;
- if (tdzp->z_pflags & ZFS_PROJINHERIT)
- szp->z_pflags |= ZFS_PROJINHERIT;
-
- error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
- (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+ error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+ if (error)
+ goto commit;
+
+ /*
+ * Unlink the target.
+ */
+ if (tzp) {
+ int tzflg = zflg;
+
+ if (rflags & RENAME_EXCHANGE) {
+ /* This inode will be re-linked soon. */
+ tzflg |= ZRENAMING;
+
+ tzp->z_pflags |= ZFS_AV_MODIFIED;
+ if (sdzp->z_pflags & ZFS_PROJINHERIT)
+ tzp->z_pflags |= ZFS_PROJINHERIT;
+
+ error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+ (void *)&tzp->z_pflags, sizeof (uint64_t), tx);
ASSERT0(error);
+ }
+ error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
+ if (error)
+ goto commit_link_szp;
+ }
- error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
- if (error == 0) {
- zfs_log_rename(zilog, tx, TX_RENAME |
- (flags & FIGNORECASE ? TX_CI : 0), sdzp,
- sdl->dl_name, tdzp, tdl->dl_name, szp);
- } else {
- /*
- * At this point, we have successfully created
- * the target name, but have failed to remove
- * the source name. Since the create was done
- * with the ZRENAMING flag, there are
- * complications; for one, the link count is
- * wrong. The easiest way to deal with this
- * is to remove the newly created target, and
- * return the original error. This must
- * succeed; fortunately, it is very unlikely to
- * fail, since we just created it.
- */
- VERIFY3U(zfs_link_destroy(tdl, szp, tx,
- ZRENAMING, NULL), ==, 0);
- }
- } else {
- /*
- * If we had removed the existing target, subsequent
- * call to zfs_link_create() to add back the same entry
- * but, the new dnode (szp) should not fail.
- */
- ASSERT(tzp == NULL);
+ /*
+ * Create the new target links:
+ * * We always link the target.
+ * * RENAME_EXCHANGE: Link the old target to the source.
+ * * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
+ */
+ error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+ if (error) {
+ /*
+ * If we have removed the existing target, a subsequent call to
+ * zfs_link_create() to add back the same entry, but with a new
+ * dnode (szp), should not fail.
+ */
+ ASSERT3P(tzp, ==, NULL);
+ goto commit_link_tzp;
+ }
+
+ switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+ case RENAME_EXCHANGE:
+ error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
+ /*
+ * The same argument as zfs_link_create() failing for
+ * szp applies here, since the source directory must
+ * have had an entry we are replacing.
+ */
+ ASSERT0(error);
+ if (error)
+ goto commit_unlink_td_szp;
+ break;
+ case RENAME_WHITEOUT:
+ zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
+ error = zfs_link_create(sdl, wzp, tx, ZNEW);
+ if (error) {
+ zfs_znode_delete(wzp, tx);
+ remove_inode_hash(ZTOI(wzp));
+ goto commit_unlink_td_szp;
}
+ break;
}
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
+ switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+ case RENAME_EXCHANGE:
+ zfs_log_rename_exchange(zilog, tx,
+ (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+ tdzp, tdl->dl_name, szp);
+ break;
+ case RENAME_WHITEOUT:
+ zfs_log_rename_whiteout(zilog, tx,
+ (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+ tdzp, tdl->dl_name, szp, wzp);
+ break;
+ default:
+ ASSERT0(rflags & ~RENAME_NOREPLACE);
+ zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0),
+ sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp);
+ break;
+ }
+
+commit:
dmu_tx_commit(tx);
out:
- if (zl != NULL)
- zfs_rename_unlock(&zl);
-
- zfs_dirent_unlock(sdl);
- zfs_dirent_unlock(tdl);
+ if (have_acl)
+ zfs_acl_ids_free(&acl_ids);
zfs_znode_update_vfs(sdzp);
if (sdzp == tdzp)
@@ -2987,16 +3127,57 @@ out:
zfs_znode_update_vfs(szp);
zrele(szp);
+ if (wzp) {
+ zfs_znode_update_vfs(wzp);
+ zrele(wzp);
+ }
if (tzp) {
zfs_znode_update_vfs(tzp);
zrele(tzp);
}
+ if (zl != NULL)
+ zfs_rename_unlock(&zl);
+
+ zfs_dirent_unlock(sdl);
+ zfs_dirent_unlock(tdl);
+
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
zfs_exit(zfsvfs, FTAG);
return (error);
+
+ /*
+ * Clean-up path for broken link state.
+ *
+ * At this point we are in a (very) bad state, so we need to do our
+ * best to correct the state. In particular, all of the nlinks are
+ * wrong because we were destroying and creating links with ZRENAMING.
+ *
+ * In some form, all of these operations have to resolve the state:
+ *
+ * * link_destroy() *must* succeed. Fortunately, this is very likely
+ * since we only just created it.
+ *
+ * * link_create()s are allowed to fail (though they shouldn't because
+ * we only just unlinked them and are putting the entries back
+ * during clean-up). But if they fail, we can just forcefully drop
+ * the nlink value to (at the very least) avoid broken nlink values
+ * -- though in the case of non-empty directories we will have to
+ * panic (otherwise we'd have a leaked directory with a broken ..).
+ */
+commit_unlink_td_szp:
+ VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL));
+commit_link_tzp:
+ if (tzp) {
+ if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
+ VERIFY0(zfs_drop_nlink(tzp, tx, NULL));
+ }
+commit_link_szp:
+ if (zfs_link_create(sdl, szp, tx, ZRENAMING))
+ VERIFY0(zfs_drop_nlink(szp, tx, NULL));
+ goto commit;
}
/*
@@ -3008,6 +3189,7 @@ out:
* link - Name for new symlink entry.
* cr - credentials of caller.
* flags - case flags
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - Znode for new symbolic link.
*
@@ -3018,7 +3200,7 @@ out:
*/
int
zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
- znode_t **zpp, cred_t *cr, int flags)
+ znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns)
{
znode_t *zp;
zfs_dirlock_t *dl;
@@ -3056,7 +3238,7 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
}
if ((error = zfs_acl_ids_create(dzp, 0,
- vap, cr, NULL, &acl_ids)) != 0) {
+ vap, cr, NULL, &acl_ids, mnt_ns)) != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -3073,7 +3255,7 @@ top:
return (error);
}
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
zfs_exit(zfsvfs, FTAG);
@@ -3325,7 +3507,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
return (SET_ERROR(EPERM));
}
- if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr,
+ kcred->user_ns))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
@@ -3951,7 +4134,8 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
* On Linux we can get here through truncate_range() which
* operates directly on inodes, so we need to check access rights.
*/
- if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+ kcred->user_ns))) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index 73c21b6c00a8..662147ab4722 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -422,7 +422,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
break;
case S_IFDIR:
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+ ip->i_flags |= S_IOPS_WRAPPER;
+ ip->i_op = &zpl_dir_inode_operations.ops;
+#else
ip->i_op = &zpl_dir_inode_operations;
+#endif
ip->i_fop = &zpl_dir_file_operations;
ITOZ(ip)->z_zn_prefetch = B_TRUE;
break;
@@ -552,7 +557,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_atime_dirty = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
- zp->z_is_stale = B_FALSE;
zp->z_suspended = B_FALSE;
zp->z_sa_hdl = NULL;
zp->z_mapcnt = 0;
@@ -1960,7 +1964,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
}
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
- cr, NULL, &acl_ids));
+ cr, NULL, &acl_ids, kcred->user_ns));
zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
ASSERT3P(zp, ==, rootzp);
error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
@@ -2136,7 +2140,6 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
} else if (error != ENOENT) {
return (error);
}
- error = 0;
for (;;) {
uint64_t pobj = 0;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
index 671300932384..6f2bf7ed7569 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
@@ -231,6 +231,7 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
keydata_len = zio_crypt_table[crypt].ci_keylen;
memset(key, 0, sizeof (zio_crypt_key_t));
+ rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
/* fill keydata buffers and salt with random data */
ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
@@ -282,7 +283,6 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
key->zk_crypt = crypt;
key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
key->zk_salt_count = 0;
- rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
return (0);
@@ -1968,7 +1968,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
if (locked) {
rw_exit(&key->zk_salt_lock);
- locked = B_FALSE;
}
if (authbuf != NULL)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index 837629e4a5e0..f0779c81dc75 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -371,7 +371,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
+#ifdef HAVE_IOPS_MKDIR_USERNS
+ zpl_vap_init(vap, dip, mode | S_IFDIR, cr, user_ns);
+#else
+ zpl_vap_init(vap, dip, mode | S_IFDIR, cr, kcred->user_ns);
+#endif
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index 25fc6b223297..c56e3691e70a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -1085,7 +1085,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
crhold(cr);
cookie = spl_fstrans_mark();
- err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+ err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns);
spl_fstrans_unmark(cookie);
crfree(cr);
@@ -1133,7 +1133,7 @@ zpl_ioctl_setxattr(struct file *filp, void __user *arg)
crhold(cr);
cookie = spl_fstrans_mark();
- err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+ err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns);
spl_fstrans_unmark(cookie);
crfree(cr);
@@ -1221,7 +1221,7 @@ zpl_ioctl_setdosflags(struct file *filp, void __user *arg)
crhold(cr);
cookie = spl_fstrans_mark();
- err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+ err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns);
spl_fstrans_unmark(cookie);
crfree(cr);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index 7578753ed8ce..93eae7201506 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -24,6 +24,7 @@
*/
+#include <sys/sysmacros.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
@@ -33,7 +34,6 @@
#include <sys/zpl.h>
#include <sys/file.h>
-
static struct dentry *
zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
@@ -112,18 +112,22 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
void
-zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
+zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr,
+ zuserns_t *mnt_ns)
{
vap->va_mask = ATTR_MODE;
vap->va_mode = mode;
- vap->va_uid = crgetuid(cr);
- if (dir && dir->i_mode & S_ISGID) {
+ vap->va_uid = zfs_vfsuid_to_uid((struct user_namespace *)mnt_ns,
+ zfs_i_user_ns(dir), crgetuid(cr));
+
+ if (dir->i_mode & S_ISGID) {
vap->va_gid = KGID_TO_SGID(dir->i_gid);
if (S_ISDIR(mode))
vap->va_mode |= S_ISGID;
} else {
- vap->va_gid = crgetgid(cr);
+ vap->va_gid = zfs_vfsgid_to_gid((struct user_namespace *)mnt_ns,
+ zfs_i_user_ns(dir), crgetgid(cr));
}
}
@@ -140,14 +144,17 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
+#ifndef HAVE_IOPS_CREATE_USERNS
+ zuserns_t *user_ns = kcred->user_ns;
+#endif
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, mode, cr);
+ zpl_vap_init(vap, dir, mode, cr, user_ns);
cookie = spl_fstrans_mark();
error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
- mode, &zp, cr, 0, NULL);
+ mode, &zp, cr, 0, NULL, user_ns);
if (error == 0) {
error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
if (error == 0)
@@ -184,6 +191,9 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
+#ifndef HAVE_IOPS_MKNOD_USERNS
+ zuserns_t *user_ns = kcred->user_ns;
+#endif
/*
* We currently expect Linux to supply rdev=0 for all sockets
@@ -194,12 +204,12 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, mode, cr);
+ zpl_vap_init(vap, dir, mode, cr, user_ns);
vap->va_rdev = rdev;
cookie = spl_fstrans_mark();
error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
- mode, &zp, cr, 0, NULL);
+ mode, &zp, cr, 0, NULL, user_ns);
if (error == 0) {
error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
if (error == 0)
@@ -236,6 +246,9 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
+#ifndef HAVE_TMPFILE_USERNS
+ zuserns_t *userns = kcred->user_ns;
+#endif
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
@@ -245,10 +258,10 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
*/
if (!IS_POSIXACL(dir))
mode &= ~current_umask();
- zpl_vap_init(vap, dir, mode, cr);
+ zpl_vap_init(vap, dir, mode, cr, userns);
cookie = spl_fstrans_mark();
- error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
+ error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL, userns);
if (error == 0) {
/* d_tmpfile will do drop_nlink, so we should set it first */
set_nlink(ip, 1);
@@ -311,13 +324,17 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
znode_t *zp;
int error;
fstrans_cookie_t cookie;
+#ifndef HAVE_IOPS_MKDIR_USERNS
+ zuserns_t *user_ns = kcred->user_ns;
+#endif
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
+ zpl_vap_init(vap, dir, mode | S_IFDIR, cr, user_ns);
cookie = spl_fstrans_mark();
- error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL);
+ error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL,
+ user_ns);
if (error == 0) {
error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
if (error == 0)
@@ -439,7 +456,11 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
int error;
fstrans_cookie_t cookie;
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+ error = zpl_setattr_prepare(user_ns, dentry, ia);
+#else
error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
+#endif
if (error)
return (error);
@@ -447,8 +468,20 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
vap->va_mode = ia->ia_mode;
- vap->va_uid = KUID_TO_SUID(ia->ia_uid);
- vap->va_gid = KGID_TO_SGID(ia->ia_gid);
+ if (ia->ia_valid & ATTR_UID)
+#ifdef HAVE_IATTR_VFSID
+ vap->va_uid = zfs_vfsuid_to_uid(user_ns, zfs_i_user_ns(ip),
+ __vfsuid_val(ia->ia_vfsuid));
+#else
+ vap->va_uid = KUID_TO_SUID(ia->ia_uid);
+#endif
+ if (ia->ia_valid & ATTR_GID)
+#ifdef HAVE_IATTR_VFSID
+ vap->va_gid = zfs_vfsgid_to_gid(user_ns, zfs_i_user_ns(ip),
+ __vfsgid_val(ia->ia_vfsgid));
+#else
+ vap->va_gid = KGID_TO_SGID(ia->ia_gid);
+#endif
vap->va_size = ia->ia_size;
vap->va_atime = ia->ia_atime;
vap->va_mtime = ia->ia_mtime;
@@ -458,7 +491,11 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
cookie = spl_fstrans_mark();
- error = -zfs_setattr(ITOZ(ip), vap, 0, cr);
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+ error = -zfs_setattr(ITOZ(ip), vap, 0, cr, user_ns);
+#else
+ error = -zfs_setattr(ITOZ(ip), vap, 0, cr, kcred->user_ns);
+#endif
if (!error && (ia->ia_valid & ATTR_MODE))
error = zpl_chmod_acl(ip);
@@ -474,32 +511,42 @@ static int
#ifdef HAVE_IOPS_RENAME_USERNS
zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
- unsigned int flags)
+ unsigned int rflags)
#else
zpl_rename2(struct inode *sdip, struct dentry *sdentry,
- struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+ struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
#endif
{
cred_t *cr = CRED();
+ vattr_t *wo_vap = NULL;
int error;
fstrans_cookie_t cookie;
-
- /* We don't have renameat2(2) support */
- if (flags)
- return (-EINVAL);
+#ifndef HAVE_IOPS_RENAME_USERNS
+ zuserns_t *user_ns = kcred->user_ns;
+#endif
crhold(cr);
+ if (rflags & RENAME_WHITEOUT) {
+ wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+ zpl_vap_init(wo_vap, sdip, S_IFCHR, cr, user_ns);
+ wo_vap->va_rdev = makedevice(0, 0);
+ }
+
cookie = spl_fstrans_mark();
error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip),
- dname(tdentry), cr, 0);
+ dname(tdentry), cr, 0, rflags, wo_vap, user_ns);
spl_fstrans_unmark(cookie);
+ if (wo_vap)
+ kmem_free(wo_vap, sizeof (vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
+#if !defined(HAVE_IOPS_RENAME_USERNS) && \
+ !defined(HAVE_RENAME_WANTS_FLAGS) && \
+ !defined(HAVE_RENAME2)
static int
zpl_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
@@ -521,14 +568,17 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
znode_t *zp;
int error;
fstrans_cookie_t cookie;
+#ifndef HAVE_IOPS_SYMLINK_USERNS
+ zuserns_t *user_ns = kcred->user_ns;
+#endif
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
+ zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr, user_ns);
cookie = spl_fstrans_mark();
error = -zfs_symlink(ITOZ(dir), dname(dentry), vap,
- (char *)name, &zp, cr, 0);
+ (char *)name, &zp, cr, 0, user_ns);
if (error == 0) {
error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
if (error) {
@@ -698,46 +748,6 @@ out:
return (error);
}
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- unsigned int flags = (nd ? nd->flags : 0);
-#else
-zpl_revalidate(struct dentry *dentry, unsigned int flags)
-{
-#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
- /* CSTYLED */
- zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
- int error;
-
- if (flags & LOOKUP_RCU)
- return (-ECHILD);
-
- /*
- * After a rollback negative dentries created before the rollback
- * time must be invalidated. Otherwise they can obscure files which
- * are only present in the rolled back dataset.
- */
- if (dentry->d_inode == NULL) {
- spin_lock(&dentry->d_lock);
- error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
- spin_unlock(&dentry->d_lock);
-
- if (error)
- return (0);
- }
-
- /*
- * The dentry may reference a stale inode if a mounted file system
- * was rolled back to a point in time where the object didn't exist.
- */
- if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
- return (0);
-
- return (1);
-}
-
const struct inode_operations zpl_inode_operations = {
.setattr = zpl_setattr,
.getattr = zpl_getattr,
@@ -755,7 +765,12 @@ const struct inode_operations zpl_inode_operations = {
#endif /* CONFIG_FS_POSIX_ACL */
};
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+const struct inode_operations_wrapper zpl_dir_inode_operations = {
+ .ops = {
+#else
const struct inode_operations zpl_dir_inode_operations = {
+#endif
.create = zpl_create,
.lookup = zpl_lookup,
.link = zpl_link,
@@ -764,7 +779,9 @@ const struct inode_operations zpl_dir_inode_operations = {
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+#ifdef HAVE_RENAME2
+ .rename2 = zpl_rename2,
+#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
.rename = zpl_rename2,
#else
.rename = zpl_rename,
@@ -786,6 +803,10 @@ const struct inode_operations zpl_dir_inode_operations = {
#endif /* HAVE_SET_ACL */
.get_acl = zpl_get_acl,
#endif /* CONFIG_FS_POSIX_ACL */
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+ },
+ .rename2 = zpl_rename2,
+#endif
};
const struct inode_operations zpl_symlink_inode_operations = {
@@ -826,7 +847,3 @@ const struct inode_operations zpl_special_inode_operations = {
.get_acl = zpl_get_acl,
#endif /* CONFIG_FS_POSIX_ACL */
};
-
-dentry_operations_t zpl_dentry_operations = {
- .d_revalidate = zpl_revalidate,
-};
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index e3945a2a05fe..63ba731dd804 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -374,7 +374,11 @@ const struct super_operations zpl_super_operations = {
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
+#if defined(HAVE_IDMAP_MNT_API)
+ .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+#else
.fs_flags = FS_USERNS_MOUNT,
+#endif
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index a010667adfa8..99d9b3793f29 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -499,7 +499,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
vap->va_gid = crgetgid(cr);
error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
- cr, 0, NULL);
+ cr, 0, NULL, kcred->user_ns);
if (error)
goto out;
}
@@ -738,9 +738,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
static int
-__zpl_xattr_user_set(struct inode *ip, const char *name,
+__zpl_xattr_user_set(struct user_namespace *user_ns,
+ struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
+ (void) user_ns;
int error = 0;
/* xattr_resolve_name will do this for us if this is defined */
#ifndef HAVE_XATTR_HANDLER_NAME
@@ -846,9 +848,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
static int
-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
+__zpl_xattr_trusted_set(struct user_namespace *user_ns,
+ struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
+ (void) user_ns;
char *xattr_name;
int error;
@@ -914,9 +918,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
static int
-__zpl_xattr_security_set(struct inode *ip, const char *name,
+__zpl_xattr_security_set(struct user_namespace *user_ns,
+ struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
+ (void) user_ns;
char *xattr_name;
int error;
/* xattr_resolve_name will do this for us if this is defined */
@@ -940,7 +946,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
int error = 0;
for (xattr = xattrs; xattr->name != NULL; xattr++) {
- error = __zpl_xattr_security_set(ip,
+ error = __zpl_xattr_security_set(NULL, ip,
xattr->name, xattr->value, xattr->value_len, 0);
if (error < 0)
@@ -1300,7 +1306,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
static int
-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_access(struct user_namespace *mnt_ns,
+ struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
struct posix_acl *acl;
@@ -1314,8 +1321,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
+#if defined(HAVE_XATTR_SET_USERNS)
+ if (!zpl_inode_owner_or_capable(mnt_ns, ip))
+ return (-EPERM);
+#else
+ (void) mnt_ns;
if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (-EPERM);
+#endif
if (value) {
acl = zpl_acl_from_xattr(value, size);
@@ -1339,7 +1352,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
static int
-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_default(struct user_namespace *mnt_ns,
+ struct inode *ip, const char *name,
const void *value, size_t size, int flags)
{
struct posix_acl *acl;
@@ -1353,8 +1367,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
+#if defined(HAVE_XATTR_SET_USERNS)
+ if (!zpl_inode_owner_or_capable(mnt_ns, ip))
+ return (-EPERM);
+#else
+ (void) mnt_ns;
if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (-EPERM);
+#endif
if (value) {
acl = zpl_acl_from_xattr(value, size);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 0d4e0dcd5a3d..01e6456207b0 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -114,7 +114,7 @@ struct zvol_state_os {
boolean_t use_blk_mq;
};
-taskq_t *zvol_taskq;
+static taskq_t *zvol_taskq;
static struct ida zvol_ida;
typedef struct zv_request_stack {
@@ -1279,6 +1279,7 @@ zvol_os_create_minor(const char *name)
int error = 0;
int idx;
uint64_t hash = zvol_name_hash(name);
+ bool replayed_zil = B_FALSE;
if (zvol_inhibit_dev)
return (0);
@@ -1420,11 +1421,12 @@ zvol_os_create_minor(const char *name)
zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
if (spa_writeable(dmu_objset_spa(os))) {
if (zil_replay_disable)
- zil_destroy(zv->zv_zilog, B_FALSE);
+ replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE);
else
- zil_replay(os, zv, zvol_replay_vector);
+ replayed_zil = zil_replay(os, zv, zvol_replay_vector);
}
- zil_close(zv->zv_zilog);
+ if (replayed_zil)
+ zil_close(zv->zv_zilog);
zv->zv_zilog = NULL;
/*
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
index 0e91304ecd4b..9c65702b8d43 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
@@ -25,6 +25,7 @@
* Copyright 2016, Joyent, Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -369,6 +370,8 @@ zfs_prop_init(void)
static const zprop_index_t redundant_metadata_table[] = {
{ "all", ZFS_REDUNDANT_METADATA_ALL },
{ "most", ZFS_REDUNDANT_METADATA_MOST },
+ { "some", ZFS_REDUNDANT_METADATA_SOME },
+ { "none", ZFS_REDUNDANT_METADATA_NONE },
{ NULL }
};
@@ -388,7 +391,7 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
ZFS_REDUNDANT_METADATA_ALL,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "all | most", "REDUND_MD",
+ "all | most | some | none", "REDUND_MD",
redundant_metadata_table, sfeatures);
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
@@ -745,6 +748,8 @@ zfs_prop_init(void)
boolean_t
zfs_prop_delegatable(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
zprop_desc_t *pd = &zfs_prop_table[prop];
/* The mlslabel property is never delegatable. */
@@ -855,6 +860,8 @@ zfs_prop_valid_for_type(int prop, zfs_type_t types, boolean_t headcheck)
zprop_type_t
zfs_prop_get_type(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_proptype);
}
@@ -864,6 +871,8 @@ zfs_prop_get_type(zfs_prop_t prop)
boolean_t
zfs_prop_readonly(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
@@ -875,6 +884,8 @@ zfs_prop_readonly(zfs_prop_t prop)
boolean_t
zfs_prop_visible(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_visible &&
zfs_prop_table[prop].pd_zfs_mod_supported);
}
@@ -885,6 +896,8 @@ zfs_prop_visible(zfs_prop_t prop)
boolean_t
zfs_prop_setonce(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
}
@@ -892,12 +905,16 @@ zfs_prop_setonce(zfs_prop_t prop)
const char *
zfs_prop_default_string(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_strdefault);
}
uint64_t
zfs_prop_default_numeric(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_numdefault);
}
@@ -908,6 +925,8 @@ zfs_prop_default_numeric(zfs_prop_t prop)
const char *
zfs_prop_to_name(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_name);
}
@@ -917,6 +936,8 @@ zfs_prop_to_name(zfs_prop_t prop)
boolean_t
zfs_prop_inheritable(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
zfs_prop_table[prop].pd_attr == PROP_ONETIME);
}
@@ -969,6 +990,8 @@ zfs_prop_valid_keylocation(const char *str, boolean_t encrypted)
const char *
zfs_prop_values(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_values);
}
@@ -980,6 +1003,8 @@ zfs_prop_values(zfs_prop_t prop)
int
zfs_prop_is_string(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
}
@@ -991,6 +1016,8 @@ zfs_prop_is_string(zfs_prop_t prop)
const char *
zfs_prop_column_name(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_colname);
}
@@ -1001,6 +1028,8 @@ zfs_prop_column_name(zfs_prop_t prop)
boolean_t
zfs_prop_align_right(zfs_prop_t prop)
{
+ ASSERT3S(prop, >=, 0);
+ ASSERT3S(prop, <, ZFS_NUM_PROPS);
return (zfs_prop_table[prop].pd_rightalign);
}
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index 4737bd628ddf..285b97909631 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -420,6 +420,9 @@ vdev_prop_init(void)
boolean_na_table, sfeatures);
/* default index properties */
+ zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
+ sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 11a1e5112544..d4921d0ba7db 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -667,15 +667,15 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
{
abd_verify(abd);
ASSERT3U(abd->abd_size, >=, n);
+#ifdef ZFS_DEBUG
+ (void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
+#endif
if (abd_is_linear(abd)) {
ASSERT3P(buf, ==, abd_to_buf(abd));
} else {
ASSERT0(abd_cmp_buf(abd, buf, n));
zio_buf_free(buf, n);
}
-#ifdef ZFS_DEBUG
- (void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
-#endif
}
void
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 33865f715b0f..f51f427c1bfd 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -419,12 +419,12 @@ boolean_t arc_warm;
/*
* These tunables are for performance analysis.
*/
-unsigned long zfs_arc_max = 0;
-unsigned long zfs_arc_min = 0;
-unsigned long zfs_arc_meta_limit = 0;
-unsigned long zfs_arc_meta_min = 0;
-static unsigned long zfs_arc_dnode_limit = 0;
-static unsigned long zfs_arc_dnode_reduce_percent = 10;
+uint64_t zfs_arc_max = 0;
+uint64_t zfs_arc_min = 0;
+uint64_t zfs_arc_meta_limit = 0;
+uint64_t zfs_arc_meta_min = 0;
+static uint64_t zfs_arc_dnode_limit = 0;
+static uint_t zfs_arc_dnode_reduce_percent = 10;
static uint_t zfs_arc_grow_retry = 0;
static uint_t zfs_arc_shrink_shift = 0;
static uint_t zfs_arc_p_min_shift = 0;
@@ -449,17 +449,17 @@ int zfs_compressed_arc_enabled = B_TRUE;
* ARC will evict meta buffers that exceed arc_meta_limit. This
* tunable make arc_meta_limit adjustable for different workloads.
*/
-static unsigned long zfs_arc_meta_limit_percent = 75;
+static uint64_t zfs_arc_meta_limit_percent = 75;
/*
* Percentage that can be consumed by dnodes of ARC meta buffers.
*/
-static unsigned long zfs_arc_dnode_limit_percent = 10;
+static uint_t zfs_arc_dnode_limit_percent = 10;
/*
* These tunables are Linux-specific
*/
-static unsigned long zfs_arc_sys_free = 0;
+static uint64_t zfs_arc_sys_free = 0;
static uint_t zfs_arc_min_prefetch_ms = 0;
static uint_t zfs_arc_min_prescient_prefetch_ms = 0;
static int zfs_arc_p_dampener_disable = 1;
@@ -781,12 +781,12 @@ uint64_t zfs_crc64_table[256];
#define L2ARC_FEED_TYPES 4
/* L2ARC Performance Tunables */
-unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
-unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
-unsigned long l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
-unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
-unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
+uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
+uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
+uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
+uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
+uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
int l2arc_feed_again = B_TRUE; /* turbo warmup */
int l2arc_norw = B_FALSE; /* no reads during writes */
@@ -909,7 +909,7 @@ static int l2arc_mfuonly = 0;
* will vary depending of how well the specific device handles
* these commands.
*/
-static unsigned long l2arc_trim_ahead = 0;
+static uint64_t l2arc_trim_ahead = 0;
/*
* Performance tuning of L2ARC persistence:
@@ -925,7 +925,7 @@ static unsigned long l2arc_trim_ahead = 0;
* not to waste space.
*/
static int l2arc_rebuild_enabled = B_TRUE;
-static unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
+static uint64_t l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
/* L2ARC persistence rebuild control routines. */
void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
@@ -3939,7 +3939,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
* dropping from L1+L2 cached to L2-only,
* realloc to remove the L1 header.
*/
- hdr = arc_hdr_realloc(hdr, hdr_full_cache,
+ (void) arc_hdr_realloc(hdr, hdr_full_cache,
hdr_l2only_cache);
*real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
} else {
@@ -4469,7 +4469,7 @@ restart:
* meta buffers. Requests to the upper layers will be made with
* increasingly large scan sizes until the ARC is below the limit.
*/
- if (meta_used > arc_meta_limit) {
+ if (meta_used > arc_meta_limit || arc_available_memory() < 0) {
if (type == ARC_BUFC_DATA) {
type = ARC_BUFC_METADATA;
} else {
@@ -5136,7 +5136,7 @@ arc_adapt(int bytes, arc_state_t *state)
if (!zfs_arc_p_dampener_disable)
mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
- arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult);
+ arc_p = MIN(arc_c - arc_p_min, arc_p + (uint64_t)bytes * mult);
} else if (state == arc_mfu_ghost) {
uint64_t delta;
@@ -5173,7 +5173,7 @@ arc_adapt(int bytes, arc_state_t *state)
atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max)
arc_c = arc_c_max;
- else if (state == arc_anon)
+ else if (state == arc_anon && arc_p < arc_c >> 1)
atomic_add_64(&arc_p, (int64_t)bytes);
if (arc_p > arc_c)
arc_p = arc_c;
@@ -5386,7 +5386,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
hdr->b_l1hdr.b_state == arc_anon &&
(zfs_refcount_count(&arc_anon->arcs_size) +
- zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
+ zfs_refcount_count(&arc_mru->arcs_size) > arc_p &&
+ arc_p < arc_c >> 1))
arc_p = MIN(arc_c, arc_p + size);
}
}
@@ -8539,6 +8540,7 @@ l2arc_dev_get_next(void)
else if (next == first)
break;
+ ASSERT3P(next, !=, NULL);
} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
next->l2ad_trim_all);
@@ -11076,20 +11078,20 @@ EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
- param_get_ulong, ZMOD_RW, "Minimum ARC size in bytes");
+ spl_param_get_u64, ZMOD_RW, "Minimum ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
- param_get_ulong, ZMOD_RW, "Maximum ARC size in bytes");
+ spl_param_get_u64, ZMOD_RW, "Maximum ARC size in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Metadata limit for ARC size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Metadata limit for ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
- param_set_arc_long, param_get_ulong, ZMOD_RW,
+ param_set_arc_int, param_get_uint, ZMOD_RW,
"Percent of ARC size for ARC meta limit");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Minimum ARC metadata size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Minimum ARC metadata size in bytes");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
"Meta objects to scan for prune");
@@ -11128,25 +11130,25 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms,
param_set_arc_int, param_get_uint, ZMOD_RW,
"Min life of prescient prefetched block in ms");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, U64, ZMOD_RW,
"Max write bytes per interval");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, U64, ZMOD_RW,
"Extra write bytes during device warmup");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, U64, ZMOD_RW,
"Number of max device writes to precache");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, U64, ZMOD_RW,
"Compressed l2arc_headroom multiplier");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, U64, ZMOD_RW,
"TRIM ahead L2ARC write size multiplier");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, U64, ZMOD_RW,
"Seconds between L2ARC writing");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, U64, ZMOD_RW,
"Min feed interval in milliseconds");
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, noprefetch, INT, ZMOD_RW,
@@ -11164,7 +11166,7 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
"Rebuild the L2ARC when importing a pool");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, U64, ZMOD_RW,
"Min size in bytes to write rebuild log blocks in L2ARC");
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
@@ -11176,17 +11178,17 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "System free memory target size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "System free memory target size in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Minimum bytes of dnodes in ARC");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Minimum bytes of dnodes in ARC");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
- param_set_arc_long, param_get_ulong, ZMOD_RW,
+ param_set_arc_int, param_get_uint, ZMOD_RW,
"Percent of ARC meta buffers for dnodes");
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, UINT, ZMOD_RW,
"Percentage of excess dnodes to try to unpin");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
index f0a9222a4308..4c25afaa8199 100644
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -102,7 +102,7 @@ zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
(void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size);
(void) memset(leaf->btl_elems +
(hdr->bth_first + hdr->bth_count) * size, 0x0f,
- BTREE_LEAF_ESIZE -
+ tree->bt_leaf_size - offsetof(zfs_btree_leaf_t, btl_elems) -
(hdr->bth_first + hdr->bth_count) * size);
}
#endif
@@ -173,16 +173,44 @@ zfs_btree_fini(void)
kmem_cache_destroy(zfs_btree_leaf_cache);
}
+static void *
+zfs_btree_leaf_alloc(zfs_btree_t *tree)
+{
+ if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+ return (kmem_cache_alloc(zfs_btree_leaf_cache, KM_SLEEP));
+ else
+ return (kmem_alloc(tree->bt_leaf_size, KM_SLEEP));
+}
+
+static void
+zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)
+{
+ if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+ return (kmem_cache_free(zfs_btree_leaf_cache, ptr));
+ else
+ return (kmem_free(ptr, tree->bt_leaf_size));
+}
+
void
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
size_t size)
{
- ASSERT3U(size, <=, BTREE_LEAF_ESIZE / 2);
+ zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
+}
+
+void
+zfs_btree_create_custom(zfs_btree_t *tree,
+ int (*compar) (const void *, const void *),
+ size_t size, size_t lsize)
+{
+ size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
+ ASSERT3U(size, <=, esize / 2);
memset(tree, 0, sizeof (*tree));
tree->bt_compar = compar;
tree->bt_elem_size = size;
- tree->bt_leaf_cap = P2ALIGN(BTREE_LEAF_ESIZE / size, 2);
+ tree->bt_leaf_size = lsize;
+ tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
tree->bt_height = -1;
tree->bt_bulk = NULL;
}
@@ -290,7 +318,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
zfs_btree_core_t *node = NULL;
uint32_t child = 0;
- uint64_t depth = 0;
+ uint32_t depth = 0;
/*
* Iterate down the tree, finding which child the value should be in
@@ -811,8 +839,7 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
move_count++;
}
tree->bt_num_nodes++;
- zfs_btree_leaf_t *new_leaf = kmem_cache_alloc(zfs_btree_leaf_cache,
- KM_SLEEP);
+ zfs_btree_leaf_t *new_leaf = zfs_btree_leaf_alloc(tree);
zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr;
new_hdr->bth_parent = leaf->btl_hdr.bth_parent;
new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) +
@@ -1078,8 +1105,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
ASSERT0(where->bti_offset);
tree->bt_num_nodes++;
- zfs_btree_leaf_t *leaf = kmem_cache_alloc(zfs_btree_leaf_cache,
- KM_SLEEP);
+ zfs_btree_leaf_t *leaf = zfs_btree_leaf_alloc(tree);
tree->bt_root = &leaf->btl_hdr;
tree->bt_height++;
@@ -1378,7 +1404,7 @@ zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node)
{
tree->bt_num_nodes--;
if (!zfs_btree_is_core(node)) {
- kmem_cache_free(zfs_btree_leaf_cache, node);
+ zfs_btree_leaf_free(tree, node);
} else {
kmem_free(node, sizeof (zfs_btree_core_t) +
BTREE_CORE_ELEMS * tree->bt_elem_size);
@@ -1991,7 +2017,7 @@ zfs_btree_verify_counts(zfs_btree_t *tree)
*/
static uint64_t
zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
- int64_t height)
+ int32_t height)
{
if (!zfs_btree_is_core(hdr)) {
VERIFY0(height);
@@ -2117,8 +2143,10 @@ zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
for (size_t i = 0; i < hdr->bth_first * size; i++)
VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
+ size_t esize = tree->bt_leaf_size -
+ offsetof(zfs_btree_leaf_t, btl_elems);
for (size_t i = (hdr->bth_first + hdr->bth_count) * size;
- i < BTREE_LEAF_ESIZE; i++)
+ i < esize; i++)
VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
} else {
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
index b63f42a21e44..57b8faf213eb 100644
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -128,8 +128,13 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
" snprintf() for kstat name returned %d",
(unsigned long long)dmu_objset_id(objset), n);
return (SET_ERROR(EINVAL));
+ } else if (n >= KSTAT_STRLEN) {
+ zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
+ "kstat name length (%d) exceeds limit (%d)",
+ (unsigned long long)dmu_objset_id(objset),
+ n, KSTAT_STRLEN);
+ return (SET_ERROR(ENAMETOOLONG));
}
- ASSERT3U(n, <, KSTAT_STRLEN);
kstat_t *kstat = kstat_create(kstat_module_name, 0, kstat_name,
"dataset", KSTAT_TYPE_NAMED,
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index db1123d37d98..7982d9702896 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -227,8 +227,8 @@ typedef struct dbuf_cache {
dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
/* Size limits for the caches */
-static unsigned long dbuf_cache_max_bytes = ULONG_MAX;
-static unsigned long dbuf_metadata_cache_max_bytes = ULONG_MAX;
+static uint64_t dbuf_cache_max_bytes = UINT64_MAX;
+static uint64_t dbuf_metadata_cache_max_bytes = UINT64_MAX;
/* Set the default sizes of the caches to log2 fraction of arc size */
static uint_t dbuf_cache_shift = 5;
@@ -1549,7 +1549,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
uint32_t aflags = ARC_FLAG_NOWAIT;
int err, zio_flags;
- err = zio_flags = 0;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -2687,6 +2686,7 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dbuf_dirty_record_t *dr;
dr = list_head(&db->db_dirty_records);
+ ASSERT3P(dr, !=, NULL);
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dl = &dr->dt.dl;
dl->dr_overridden_by = *bp;
@@ -2748,6 +2748,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
dmu_buf_will_not_fill(dbuf, tx);
dr = list_head(&db->db_dirty_records);
+ ASSERT3P(dr, !=, NULL);
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dl = &dr->dt.dl;
encode_embedded_bp_compressed(&dl->dr_overridden_by,
@@ -5120,7 +5121,7 @@ EXPORT_SYMBOL(dmu_buf_set_user_ie);
EXPORT_SYMBOL(dmu_buf_get_user);
EXPORT_SYMBOL(dmu_buf_get_blkptr);
-ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, U64, ZMOD_RW,
"Maximum size in bytes of the dbuf cache.");
ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW,
@@ -5129,7 +5130,7 @@ ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, lowater_pct, UINT, ZMOD_RW,
"Percentage below dbuf_cache_max_bytes when dbuf eviction stops.");
-ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, U64, ZMOD_RW,
"Maximum size in bytes of dbuf metadata cache.");
ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, cache_shift, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 9e67eb51f415..45304e7ddf7a 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -28,6 +28,7 @@
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/dmu.h>
@@ -70,7 +71,7 @@ static int zfs_nopwrite_enabled = 1;
* will wait until the next TXG.
* A value of zero will disable this throttle.
*/
-static unsigned long zfs_per_txg_dirty_frees_percent = 30;
+static uint_t zfs_per_txg_dirty_frees_percent = 30;
/*
* Enable/disable forcing txg sync when dirty checking for holes with lseek().
@@ -1435,7 +1436,7 @@ dmu_return_arcbuf(arc_buf_t *buf)
*/
int
dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
- const zio_prop_t *zp, enum zio_flag flags, dmu_tx_t *tx)
+ const zio_prop_t *zp, zio_flag_t flags, dmu_tx_t *tx)
{
dbuf_dirty_record_t *dr =
dbuf_dirty_lightweight(dn, dbuf_whichblock(dn, 0, offset), tx);
@@ -1992,12 +1993,22 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
ZCHECKSUM_FLAG_EMBEDDED))
checksum = ZIO_CHECKSUM_FLETCHER_4;
- if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
- (os->os_redundant_metadata ==
- ZFS_REDUNDANT_METADATA_MOST &&
- (level >= zfs_redundant_metadata_most_ditto_level ||
- DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
+ switch (os->os_redundant_metadata) {
+ case ZFS_REDUNDANT_METADATA_ALL:
copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_MOST:
+ if (level >= zfs_redundant_metadata_most_ditto_level ||
+ DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))
+ copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_SOME:
+ if (DMU_OT_IS_CRITICAL(type))
+ copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_NONE:
+ break;
+ }
} else if (wp & WP_NOFILL) {
ASSERT(level == 0);
@@ -2355,7 +2366,7 @@ EXPORT_SYMBOL(dmu_ot);
ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW,
"Enable NOP writes");
-ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW,
"Percentage of dirtied blocks from frees in one TXG");
ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index 4c20afcdb9c6..c17c829a04d8 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -32,6 +32,7 @@
* Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -287,7 +288,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
* Inheritance and range checking should have been done by now.
*/
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
- newval == ZFS_REDUNDANT_METADATA_MOST);
+ newval == ZFS_REDUNDANT_METADATA_MOST ||
+ newval == ZFS_REDUNDANT_METADATA_SOME ||
+ newval == ZFS_REDUNDANT_METADATA_NONE);
os->os_redundant_metadata = newval;
}
@@ -479,7 +482,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb;
int size;
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index a9e4a6745905..339fb149a49f 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -646,7 +646,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
* so add the DS_HOLD_FLAG_DECRYPT flag only if we are dealing
* with a dataset we may encrypt.
*/
- if (drba->drba_dcp != NULL &&
+ if (drba->drba_dcp == NULL ||
drba->drba_dcp->cp_crypt != ZIO_CRYPT_OFF) {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
@@ -1344,7 +1344,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
dnode_t *dn;
abd_t *abd = rrd->abd;
zio_cksum_t bp_cksum = bp->blk_cksum;
- enum zio_flag flags = ZIO_FLAG_SPECULATIVE |
+ zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
if (rwa->raw)
@@ -2186,7 +2186,7 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
zio_prop_t zp;
dmu_write_policy(rwa->os, dn, 0, 0, &zp);
- enum zio_flag zio_flags = 0;
+ zio_flag_t zio_flags = 0;
if (rwa->raw) {
zp.zp_encrypt = B_TRUE;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 4ee3ffc352b8..ccb7eb20756d 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -934,7 +934,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
if (BP_GET_TYPE(bp) == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
- enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zioflags = ZIO_FLAG_CANFAIL;
if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
@@ -1654,7 +1654,7 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
!split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
!BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
- enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zioflags = ZIO_FLAG_CANFAIL;
if (srta->featureflags & DMU_BACKUP_FEATURE_RAW) {
zioflags |= ZIO_FLAG_RAW;
@@ -2511,8 +2511,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
}
if (featureflags & DMU_BACKUP_FEATURE_RAW) {
- uint64_t ivset_guid = (ancestor_zb != NULL) ?
- ancestor_zb->zbm_ivset_guid : 0;
+ uint64_t ivset_guid = ancestor_zb->zbm_ivset_guid;
nvlist_t *keynvl = NULL;
ASSERT(os->os_encrypted);
@@ -2716,6 +2715,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
err = dmu_send_impl(&dspp);
}
+ if (dspp.fromredactsnaps)
+ kmem_free(dspp.fromredactsnaps,
+ dspp.numfromredactsnaps * sizeof (uint64_t));
+
dsl_dataset_rele(dspp.to_ds, FTAG);
return (err);
}
@@ -2924,6 +2927,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
/* dmu_send_impl will call dsl_pool_rele for us. */
err = dmu_send_impl(&dspp);
} else {
+ if (dspp.fromredactsnaps)
+ kmem_free(dspp.fromredactsnaps,
+ dspp.numfromredactsnaps *
+ sizeof (uint64_t));
dsl_pool_rele(dspp.dp, FTAG);
}
} else {
diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
index 2ed75640f68d..377634c72bba 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
@@ -111,6 +111,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return (0);
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@@ -670,7 +671,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
uint32_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 101d2ee7b7a2..1d63d7de65a1 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -58,7 +58,7 @@ unsigned int zfetch_max_distance = 64 * 1024 * 1024;
/* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
-unsigned long zfetch_array_rd_sz = 1024 * 1024;
+uint64_t zfetch_array_rd_sz = 1024 * 1024;
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
@@ -565,5 +565,5 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
"Max bytes to prefetch indirects for per stream");
-ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
"Number of bytes in a array_read");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index 8ca7ba8957aa..b95c94beff1f 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -229,7 +229,6 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
switch (error) {
case ESRCH:
/* happy path: new bmark doesn't exist, proceed after switch */
- error = 0;
break;
case 0:
error = SET_ERROR(EEXIST);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
index ce2e6ce742a2..382de208b01d 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
@@ -2671,6 +2671,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
objset_phys_t *osp = buf;
uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
+ const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0};
/* look up the key from the spa's keystore */
ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
@@ -2696,8 +2697,21 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
if (memcmp(portable_mac, osp->os_portable_mac,
ZIO_OBJSET_MAC_LEN) != 0 ||
memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
- abd_return_buf(abd, buf, datalen);
- return (SET_ERROR(ECKSUM));
+ /*
+ * If the MAC is zeroed out, we failed to decrypt it.
+ * This should only arise, at least on Linux,
+ * if we hit edge case handling for useraccounting, since we
+ * shouldn't get here without bailing out on error earlier
+ * otherwise.
+ *
+ * So if we're in that case, we can just fall through and
+ * special-casing noticing that it's zero will handle it
+ * elsewhere, since we can just regenerate it.
+ */
+ if (memcmp(local_mac, zeroed_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+ abd_return_buf(abd, buf, datalen);
+ return (SET_ERROR(ECKSUM));
+ }
}
abd_return_buf(abd, buf, datalen);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index 7a066b786cd0..c7577fc584af 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -3421,7 +3421,8 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
conflicting_snaps = B_TRUE;
} else if (err == ESRCH) {
err = 0;
- } else if (err != 0) {
+ }
+ if (err != 0) {
goto out;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
index 1ecae0fe3865..2b33446e66af 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
@@ -92,7 +92,7 @@
* will be loaded into memory and shouldn't take up an inordinate amount of
* space. We settled on ~500000 entries, corresponding to roughly 128M.
*/
-unsigned long zfs_livelist_max_entries = 500000;
+uint64_t zfs_livelist_max_entries = 500000;
/*
* We can approximate how much of a performance gain a livelist will give us
@@ -542,6 +542,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
ASSERT3P(dle, !=, NULL);
dle_prev = AVL_PREV(&dl->dl_tree, dle);
+ ASSERT3P(dle_prev, !=, NULL);
dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
@@ -1039,7 +1040,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t,
return (err);
}
-ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, U64, ZMOD_RW,
"Size to start the next sub-livelist in a livelist");
ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, min_percent_shared, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index d93c7f08c1c2..c1afaa6aaf82 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -54,6 +54,15 @@
#include "zfs_prop.h"
/*
+ * This controls if we verify the ZVOL quota or not.
+ * Currently, quotas are not implemented for ZVOLs.
+ * The quota size is the size of the ZVOL.
+ * The size of the volume already implies the ZVOL size quota.
+ * The quota mechanism can introduce a significant performance drop.
+ */
+static int zvol_enforce_quotas = B_TRUE;
+
+/*
* Filesystem and Snapshot Limits
* ------------------------------
*
@@ -815,6 +824,18 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
prop == ZFS_PROP_SNAPSHOT_LIMIT);
+ if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
+ /*
+ * We don't enforce the limit for temporary snapshots. This is
+ * indicated by a NULL cred_t argument.
+ */
+ if (cr == NULL)
+ return (0);
+
+ count_prop = DD_FIELD_SNAPSHOT_COUNT;
+ } else {
+ count_prop = DD_FIELD_FILESYSTEM_COUNT;
+ }
/*
* If we're allowed to change the limit, don't enforce the limit
* e.g. this can happen if a snapshot is taken by an administrative
@@ -834,19 +855,6 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
if (delta == 0)
return (0);
- if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
- /*
- * We don't enforce the limit for temporary snapshots. This is
- * indicated by a NULL cred_t argument.
- */
- if (cr == NULL)
- return (0);
-
- count_prop = DD_FIELD_SNAPSHOT_COUNT;
- } else {
- count_prop = DD_FIELD_FILESYSTEM_COUNT;
- }
-
/*
* If an ancestor has been provided, stop checking the limit once we
* hit that dir. We need this during rename so that we don't overcount
@@ -1268,6 +1276,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
uint64_t quota;
struct tempreserve *tr;
int retval;
+ uint64_t ext_quota;
uint64_t ref_rsrv;
top_of_function:
@@ -1311,7 +1320,9 @@ top_of_function:
* If this transaction will result in a net free of space,
* we want to let it through.
*/
- if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
+ if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0 ||
+ (tx->tx_objset && dmu_objset_type(tx->tx_objset) == DMU_OST_ZVOL &&
+ zvol_enforce_quotas == B_FALSE))
quota = UINT64_MAX;
else
quota = dsl_dir_phys(dd)->dd_quota;
@@ -1343,7 +1354,16 @@ top_of_function:
* on-disk is over quota and there are no pending changes
* or deferred frees (which may free up space for us).
*/
- if (used_on_disk + est_inflight >= quota) {
+ ext_quota = quota >> 5;
+ if (quota == UINT64_MAX)
+ ext_quota = 0;
+
+ if (used_on_disk >= quota) {
+ /* Quota exceeded */
+ mutex_exit(&dd->dd_lock);
+ DMU_TX_STAT_BUMP(dmu_tx_quota);
+ return (retval);
+ } else if (used_on_disk + est_inflight >= quota + ext_quota) {
if (est_inflight > 0 || used_on_disk < quota) {
retval = SET_ERROR(ERESTART);
} else {
@@ -1390,10 +1410,9 @@ top_of_function:
ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
first = B_FALSE;
goto top_of_function;
-
- } else {
- return (0);
}
+
+ return (0);
}
/*
@@ -2474,3 +2493,7 @@ dsl_dir_cancel_waiters(dsl_dir_t *dd)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
#endif
+
+/* CSTYLED */
+ZFS_MODULE_PARAM(zfs, , zvol_enforce_quotas, INT, ZMOD_RW,
+ "Enable strict ZVOL quota enforcment");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index 4fd3722a051e..5ca918a87ee1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -99,8 +99,8 @@
* capped at zfs_dirty_data_max_max. It can also be overridden with a module
* parameter.
*/
-unsigned long zfs_dirty_data_max = 0;
-unsigned long zfs_dirty_data_max_max = 0;
+uint64_t zfs_dirty_data_max = 0;
+uint64_t zfs_dirty_data_max_max = 0;
uint_t zfs_dirty_data_max_percent = 10;
uint_t zfs_dirty_data_max_max_percent = 25;
@@ -109,7 +109,7 @@ uint_t zfs_dirty_data_max_max_percent = 25;
* when approaching the limit until log data is cleared out after txg sync.
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
*/
-unsigned long zfs_wrlog_data_max = 0;
+uint64_t zfs_wrlog_data_max = 0;
/*
* If there's at least this much dirty data (as a percentage of
@@ -138,7 +138,7 @@ uint_t zfs_delay_min_dirty_percent = 60;
* Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the
* multiply in dmu_tx_delay().
*/
-unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
+uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
/*
* This determines the number of threads used by the dp_sync_taskq.
@@ -331,7 +331,6 @@ dsl_pool_open(dsl_pool_t *dp)
/*
* We might not have created the remap bpobj yet.
*/
- err = 0;
} else {
goto out;
}
@@ -1465,20 +1464,20 @@ ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max_percent, UINT, ZMOD_RD,
ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, UINT, ZMOD_RW,
"Transaction delay threshold");
-ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, U64, ZMOD_RW,
"Determines the dirty space limit");
-ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, U64, ZMOD_RW,
"The size limit of write-transaction zil log data");
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
-ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, U64, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, UINT, ZMOD_RW,
"Dirty data txg sync threshold as a percentage of zfs_dirty_data_max");
-ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, U64, ZMOD_RW,
"How quickly delay approaches infinity");
ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c
index 610e887b3fba..d1c0059092b1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/zfs_context.h>
@@ -41,6 +42,7 @@
#define ZPROP_INHERIT_SUFFIX "$inherit"
#define ZPROP_RECVD_SUFFIX "$recvd"
+#define ZPROP_IUV_SUFFIX "$iuv"
static int
dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
@@ -69,6 +71,17 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
return (0);
}
+static int
+dsl_prop_known_index(zfs_prop_t prop, uint64_t value)
+{
+ const char *str = NULL;
+ if (prop != ZPROP_CONT && prop != ZPROP_INVAL &&
+ zfs_prop_get_type(prop) == PROP_TYPE_INDEX)
+ return (!zfs_prop_index_to_string(prop, value, &str));
+
+ return (-1);
+}
+
int
dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
@@ -81,6 +94,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
boolean_t inheriting = B_FALSE;
char *inheritstr;
char *recvdstr;
+ char *iuvstr;
ASSERT(dsl_pool_config_held(dd->dd_pool));
@@ -91,6 +105,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+ iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
/*
* Note: dd may become NULL, therefore we shouldn't dereference it
@@ -105,6 +120,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheriting = B_TRUE;
}
+ /* Check for a iuv value. */
+ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
+ iuvstr, intsz, numints, buf);
+ if (dsl_prop_known_index(zfs_name_to_prop(propname),
+ *(uint64_t *)buf) != 1)
+ err = ENOENT;
+ if (err != ENOENT) {
+ if (setpoint != NULL && err == 0)
+ dsl_dir_name(dd, setpoint);
+ break;
+ }
+
/* Check for a local value. */
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
propname, intsz, numints, buf);
@@ -155,6 +182,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
+ kmem_strfree(iuvstr);
return (err);
}
@@ -647,6 +675,45 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_rele(dd, FTAG);
}
+
+/*
+ * For newer values in zfs index type properties, we add a new key
+ * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object
+ * to store the new property value and store the default value in the
+ * existing prop key. So that the propname$iuv key is ignored by the older zfs
+ * versions and the default property value from the existing prop key is
+ * used.
+ */
+static void
+dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname,
+ int intsz, int numints, const void *value, dmu_tx_t *tx)
+{
+ char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
+ boolean_t iuv = B_FALSE;
+ zfs_prop_t prop = zfs_name_to_prop(propname);
+
+ switch (prop) {
+ case ZFS_PROP_REDUNDANT_METADATA:
+ if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME ||
+ *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE)
+ iuv = B_TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (iuv) {
+ VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints,
+ value, tx));
+ uint64_t val = zfs_prop_default_numeric(prop);
+ VERIFY0(zap_update(mos, zapobj, propname, intsz, numints,
+ &val, tx));
+ } else {
+ zap_remove(mos, zapobj, iuvstr, tx);
+ }
+ kmem_strfree(iuvstr);
+}
+
void
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
@@ -659,6 +726,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
+ char *iuvstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
@@ -692,6 +760,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+ iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
switch ((int)source) {
case ZPROP_SRC_NONE:
@@ -709,11 +778,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
/*
* remove propname$inherit
* set propname -> value
+ * set propname$iuv -> new property value
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
+ (void) dsl_prop_set_iuv(mos, zapobj, propname, intsz,
+ numints, value, tx);
break;
case ZPROP_SRC_INHERITED:
/*
@@ -723,6 +795,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
*/
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
+ err = zap_remove(mos, zapobj, iuvstr, tx);
+ ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
@@ -763,6 +837,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
+ kmem_strfree(iuvstr);
/*
* If we are left with an empty snap zap we can destroy it.
@@ -1012,6 +1087,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
propname = za.za_name;
source = setpoint;
+
+ /* Skip if iuv entries are preset. */
+ valstr = kmem_asprintf("%s%s", propname,
+ ZPROP_IUV_SUFFIX);
+ err = zap_contains(mos, propobj, valstr);
+ kmem_strfree(valstr);
+ if (err == 0)
+ continue;
} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
/* Skip explicitly inherited entries. */
continue;
@@ -1044,6 +1127,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
source = ((flags & DSL_PROP_GET_INHERITING) ?
setpoint : ZPROP_SOURCE_VAL_RECVD);
+ } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) {
+ (void) strlcpy(buf, za.za_name,
+ MIN(sizeof (buf), suffix - za.za_name + 1));
+ propname = buf;
+ source = setpoint;
+ prop = zfs_name_to_prop(propname);
+
+ if (dsl_prop_known_index(prop,
+ za.za_first_integer) != 1)
+ continue;
} else {
/*
* For backward compatibility, skip suffixes we don't
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index f0cd1feaf55b..03c2aa313af0 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -147,13 +147,13 @@ static int zfs_scan_strict_mem_lim = B_FALSE;
* overload the drives with I/O, since that is protected by
* zfs_vdev_scrub_max_active.
*/
-static unsigned long zfs_scan_vdev_limit = 4 << 20;
+static uint64_t zfs_scan_vdev_limit = 4 << 20;
static uint_t zfs_scan_issue_strategy = 0;
/* don't queue & sort zios, go direct */
static int zfs_scan_legacy = B_FALSE;
-static unsigned long zfs_scan_max_ext_gap = 2 << 20; /* in bytes */
+static uint64_t zfs_scan_max_ext_gap = 2 << 20; /* in bytes */
/*
* fill_weight is non-tunable at runtime, so we copy it at module init from
@@ -192,9 +192,9 @@ static int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
static int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
static const enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */
-static unsigned long zfs_async_block_max_blocks = ULONG_MAX;
+static uint64_t zfs_async_block_max_blocks = UINT64_MAX;
/* max number of dedup blocks to free in a single TXG */
-static unsigned long zfs_max_async_dedup_frees = 100000;
+static uint64_t zfs_max_async_dedup_frees = 100000;
/* set to disable resilver deferring */
static int zfs_resilver_disable_defer = B_FALSE;
@@ -1470,6 +1470,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return (0);
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
@@ -4446,7 +4447,7 @@ dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
}
-ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, U64, ZMOD_RW,
"Max bytes in flight per leaf vdev for scrubs and resilvers");
ZFS_MODULE_PARAM(zfs, zfs_, scrub_min_time_ms, UINT, ZMOD_RW,
@@ -4470,10 +4471,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_io, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_prefetch, INT, ZMOD_RW,
"Set to disable scrub prefetching");
-ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, U64, ZMOD_RW,
"Max number of blocks freed in one txg");
-ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, U64, ZMOD_RW,
"Max number of dedup blocks freed in one txg");
ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,
@@ -4494,7 +4495,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW,
"Scan progress on-disk checkpointing interval");
-ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, U64, ZMOD_RW,
"Max gap in bytes between sequential scrub / resilver I/Os");
ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_soft_fact, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index 32b5cf8facd1..3f05d759770b 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -955,6 +955,7 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
}
atomic_inc_64(
&erpt_kstat_data.fmri_set_failed.value.ui64);
+ va_end(ap);
return;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index efcfeecd778e..c624833bc981 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -51,12 +51,12 @@
* operation, we will try to write this amount of data to each disk before
* moving on to the next top-level vdev.
*/
-static unsigned long metaslab_aliquot = 1024 * 1024;
+static uint64_t metaslab_aliquot = 1024 * 1024;
/*
* For testing, make some blocks above a certain size be gang blocks.
*/
-unsigned long metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
+uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
/*
* In pools where the log space map feature is not enabled we touch
@@ -286,7 +286,7 @@ static const int max_disabled_ms = 3;
* Time (in seconds) to respect ms_max_size when the metaslab is not loaded.
* To avoid 64-bit overflow, don't set above UINT32_MAX.
*/
-static unsigned long zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */
+static uint64_t zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */
/*
* Maximum percentage of memory to use on storing loaded metaslabs. If loading
@@ -5131,8 +5131,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
if (vd != NULL && vd->vdev_mg != NULL) {
mg = vdev_get_mg(vd, mc);
- if (flags & METASLAB_HINTBP_AVOID &&
- mg->mg_next != NULL)
+ if (flags & METASLAB_HINTBP_AVOID)
mg = mg->mg_next;
} else {
mg = mca->mca_rotor;
@@ -5201,12 +5200,11 @@ top:
ASSERT(mg->mg_initialized);
/*
- * Avoid writing single-copy data to a failing,
+ * Avoid writing single-copy data to an unhealthy,
* non-redundant vdev, unless we've already tried all
* other vdevs.
*/
- if ((vd->vdev_stat.vs_write_errors > 0 ||
- vd->vdev_state < VDEV_STATE_HEALTHY) &&
+ if (vd->vdev_state < VDEV_STATE_HEALTHY &&
d == 0 && !try_hard && vd->vdev_children == 0) {
metaslab_trace_add(zal, mg, NULL, psize, d,
TRACE_VDEV_ERROR, allocator);
@@ -6203,7 +6201,7 @@ metaslab_unflushed_txg(metaslab_t *ms)
return (ms->ms_unflushed_txg);
}
-ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, U64, ZMOD_RW,
"Allocation granularity (a.k.a. stripe size)");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_load, INT, ZMOD_RW,
@@ -6251,7 +6249,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, segment_weight_enabled, INT,
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
"Segment-based metaslab selection maximum buckets before switching");
-ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
"Blocks larger than this size are forced to be gang blocks");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
@@ -6260,7 +6258,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,
"When looking in size tree, use largest segment instead of exact fit");
-ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, ULONG,
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64,
ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, mem_limit, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c
index 92fd6c422330..ef0e01df390f 100644
--- a/sys/contrib/openzfs/module/zfs/mmp.c
+++ b/sys/contrib/openzfs/module/zfs/mmp.c
@@ -156,7 +156,7 @@
* vary with the I/O load and this observed value is the ub_mmp_delay which is
* stored in the uberblock. The minimum allowed value is 100 ms.
*/
-ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
+uint64_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
/*
* Used to control the duration of the activity test on import. Smaller values
@@ -303,8 +303,10 @@ mmp_next_leaf(spa_t *spa)
do {
leaf = list_next(&spa->spa_leaf_list, leaf);
- if (leaf == NULL)
+ if (leaf == NULL) {
leaf = list_head(&spa->spa_leaf_list);
+ ASSERT3P(leaf, !=, NULL);
+ }
/*
* We skip unwritable, offline, detached, and dRAID spare
@@ -548,11 +550,11 @@ mmp_thread(void *arg)
uint32_t mmp_fail_intervals = MMP_FAIL_INTVS_OK(
zfs_multihost_fail_intervals);
hrtime_t mmp_fail_ns = mmp_fail_intervals * mmp_interval;
- boolean_t last_spa_suspended = suspended;
- boolean_t last_spa_multihost = multihost;
- uint64_t last_mmp_interval = mmp_interval;
- uint32_t last_mmp_fail_intervals = mmp_fail_intervals;
- hrtime_t last_mmp_fail_ns = mmp_fail_ns;
+ boolean_t last_spa_suspended;
+ boolean_t last_spa_multihost;
+ uint64_t last_mmp_interval;
+ uint32_t last_mmp_fail_intervals;
+ hrtime_t last_mmp_fail_ns;
callb_cpr_t cpr;
int skip_wait = 0;
@@ -734,7 +736,7 @@ mmp_signal_all_threads(void)
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval,
- param_set_multihost_interval, param_get_ulong, ZMOD_RW,
+ param_set_multihost_interval, spl_param_get_u64, ZMOD_RW,
"Milliseconds between mmp writes to each leaf");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
index a2923d1664c7..894c30fcae16 100644
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -369,6 +369,7 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
* invalid as soon as we do any mutating btree operations.
*/
rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after);
+ ASSERT3P(rs_after, !=, NULL);
rs_set_start_raw(rs_after, rt, before_start);
rs_set_fill(rs_after, rt, after_fill + before_fill + fill);
rs = rs_after;
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index cc367745e486..fe7051db2737 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -218,7 +218,7 @@ static int spa_load_print_vdev_tree = B_FALSE;
* there are also risks of performing an inadvertent rewind as we might be
* missing all the vdevs with the latest uberblocks.
*/
-unsigned long zfs_max_missing_tvds = 0;
+uint64_t zfs_max_missing_tvds = 0;
/*
* The parameters below are similar to zfs_max_missing_tvds but are only
@@ -5267,7 +5267,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
* If we've recovered the pool, pass back any information we
* gathered while doing the load.
*/
- if (state == SPA_LOAD_RECOVER) {
+ if (state == SPA_LOAD_RECOVER && config != NULL) {
fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info);
}
@@ -6803,8 +6803,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
pvd = oldvd->vdev_parent;
- if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
- VDEV_ALLOC_ATTACH)) != 0)
+ if (spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
+ VDEV_ALLOC_ATTACH) != 0)
return (spa_vdev_exit(spa, NULL, txg, EINVAL));
if (newrootvd->vdev_children != 1)
@@ -6819,10 +6819,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
return (spa_vdev_exit(spa, newrootvd, txg, error));
/*
- * Spares can't replace logs
+ * log, dedup and special vdevs should not be replaced by spares.
*/
- if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
+ if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
+ oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+ }
/*
* A dRAID spare can only replace a child of its parent dRAID vdev.
@@ -7160,7 +7162,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* it may be that the unwritability of the disk is the reason
* it's being detached!
*/
- error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
+ (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
/*
* Remove vd from its parent and compact the parent's children.
@@ -8867,36 +8869,36 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
spa_history_log_internal(spa, "set", tx,
"%s=%lld", nvpair_name(elem),
(longlong_t)intval);
- } else {
- ASSERT(0); /* not allowed */
- }
- switch (prop) {
- case ZPOOL_PROP_DELEGATION:
- spa->spa_delegation = intval;
- break;
- case ZPOOL_PROP_BOOTFS:
- spa->spa_bootfs = intval;
- break;
- case ZPOOL_PROP_FAILUREMODE:
- spa->spa_failmode = intval;
- break;
- case ZPOOL_PROP_AUTOTRIM:
- spa->spa_autotrim = intval;
- spa_async_request(spa,
- SPA_ASYNC_AUTOTRIM_RESTART);
- break;
- case ZPOOL_PROP_AUTOEXPAND:
- spa->spa_autoexpand = intval;
- if (tx->tx_txg != TXG_INITIAL)
+ switch (prop) {
+ case ZPOOL_PROP_DELEGATION:
+ spa->spa_delegation = intval;
+ break;
+ case ZPOOL_PROP_BOOTFS:
+ spa->spa_bootfs = intval;
+ break;
+ case ZPOOL_PROP_FAILUREMODE:
+ spa->spa_failmode = intval;
+ break;
+ case ZPOOL_PROP_AUTOTRIM:
+ spa->spa_autotrim = intval;
spa_async_request(spa,
- SPA_ASYNC_AUTOEXPAND);
- break;
- case ZPOOL_PROP_MULTIHOST:
- spa->spa_multihost = intval;
- break;
- default:
- break;
+ SPA_ASYNC_AUTOTRIM_RESTART);
+ break;
+ case ZPOOL_PROP_AUTOEXPAND:
+ spa->spa_autoexpand = intval;
+ if (tx->tx_txg != TXG_INITIAL)
+ spa_async_request(spa,
+ SPA_ASYNC_AUTOEXPAND);
+ break;
+ case ZPOOL_PROP_MULTIHOST:
+ spa->spa_multihost = intval;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ASSERT(0); /* not allowed */
}
}
@@ -10016,7 +10018,7 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD,
"Number of threads per IO worker taskqueue");
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,
"Allow importing pool with up to this number of missing top-level "
"vdevs (in read-only mode)");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
index a837b1ce97ec..b588f7041e5c 100644
--- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
+++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
@@ -158,7 +158,7 @@
* amount of checkpointed data that has been freed within them while
* the pool had a checkpoint.
*/
-static unsigned long zfs_spa_discard_memory_limit = 16 * 1024 * 1024;
+static uint64_t zfs_spa_discard_memory_limit = 16 * 1024 * 1024;
int
spa_checkpoint_get_stats(spa_t *spa, pool_checkpoint_stat_t *pcs)
@@ -631,7 +631,7 @@ EXPORT_SYMBOL(spa_checkpoint_discard_thread);
EXPORT_SYMBOL(spa_checkpoint_discard_thread_check);
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, U64, ZMOD_RW,
"Limit for memory used in prefetching the checkpoint space map done "
"on each vdev while discarding the checkpoint");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
index 4ecce8214f6a..2878e68c6e4b 100644
--- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
+++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
@@ -188,13 +188,13 @@ static const unsigned long zfs_log_sm_blksz = 1ULL << 17;
* (thus the _ppm suffix; reads as "parts per million"). As an example,
* the default of 1000 allows 0.1% of memory to be used.
*/
-static unsigned long zfs_unflushed_max_mem_ppm = 1000;
+static uint64_t zfs_unflushed_max_mem_ppm = 1000;
/*
* Specific hard-limit in memory that ZFS allows to be used for
* unflushed changes.
*/
-static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30;
+static uint64_t zfs_unflushed_max_mem_amt = 1ULL << 30;
/*
* The following tunable determines the number of blocks that can be used for
@@ -243,33 +243,33 @@ static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30;
* provide upper and lower bounds for the log block limit.
* [see zfs_unflushed_log_block_{min,max}]
*/
-static unsigned long zfs_unflushed_log_block_pct = 400;
+static uint_t zfs_unflushed_log_block_pct = 400;
/*
* If the number of metaslabs is small and our incoming rate is high, we could
* get into a situation that we are flushing all our metaslabs every TXG. Thus
* we always allow at least this many log blocks.
*/
-static unsigned long zfs_unflushed_log_block_min = 1000;
+static uint64_t zfs_unflushed_log_block_min = 1000;
/*
* If the log becomes too big, the import time of the pool can take a hit in
* terms of performance. Thus we have a hard limit in the size of the log in
* terms of blocks.
*/
-static unsigned long zfs_unflushed_log_block_max = (1ULL << 17);
+static uint64_t zfs_unflushed_log_block_max = (1ULL << 17);
/*
* Also we have a hard limit in the size of the log in terms of dirty TXGs.
*/
-static unsigned long zfs_unflushed_log_txg_max = 1000;
+static uint64_t zfs_unflushed_log_txg_max = 1000;
/*
* Max # of rows allowed for the log_summary. The tradeoff here is accuracy and
* stability of the flushing algorithm (longer summary) vs its runtime overhead
* (smaller summary is faster to traverse).
*/
-static unsigned long zfs_max_logsm_summary_length = 10;
+static uint64_t zfs_max_logsm_summary_length = 10;
/*
* Tunable that sets the lower bound on the metaslabs to flush every TXG.
@@ -282,7 +282,7 @@ static unsigned long zfs_max_logsm_summary_length = 10;
* The point of this tunable is to be used in extreme cases where we really
* want to flush more metaslabs than our adaptable heuristic plans to flush.
*/
-static unsigned long zfs_min_metaslabs_to_flush = 1;
+static uint64_t zfs_min_metaslabs_to_flush = 1;
/*
* Tunable that specifies how far in the past do we want to look when trying to
@@ -293,7 +293,7 @@ static unsigned long zfs_min_metaslabs_to_flush = 1;
* average over all the blocks that we walk
* [see spa_estimate_incoming_log_blocks].
*/
-static unsigned long zfs_max_log_walking = 5;
+static uint64_t zfs_max_log_walking = 5;
/*
* This tunable exists solely for testing purposes. It ensures that the log
@@ -507,6 +507,7 @@ void
spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone)
{
log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ ASSERT3P(e, !=, NULL);
if (e->lse_txgcount > 0)
e->lse_txgcount--;
for (; e != NULL; e = list_head(&spa->spa_log_summary)) {
@@ -690,7 +691,8 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* based on the incoming rate until we exceed it.
*/
if (available_blocks >= 0 && available_txgs >= 0) {
- uint64_t skip_txgs = MIN(available_txgs + 1,
+ uint64_t skip_txgs = (incoming == 0) ?
+ available_txgs + 1 : MIN(available_txgs + 1,
(available_blocks / incoming) + 1);
available_blocks -= (skip_txgs * incoming);
available_txgs -= skip_txgs;
@@ -1356,34 +1358,34 @@ spa_ld_log_spacemaps(spa_t *spa)
}
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, U64, ZMOD_RW,
"Specific hard-limit in memory that ZFS allows to be used for "
"unflushed changes");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, U64, ZMOD_RW,
"Percentage of the overall system memory that ZFS allows to be "
"used for unflushed changes (value is calculated over 1000000 for "
"finer granularity)");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, U64, ZMOD_RW,
"Hard limit (upper-bound) in the size of the space map log "
"in terms of blocks.");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, U64, ZMOD_RW,
"Lower-bound limit for the maximum amount of blocks allowed in "
"log spacemap (see zfs_unflushed_log_block_max)");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, U64, ZMOD_RW,
"Hard limit (upper-bound) in the size of the space map log "
"in terms of dirty TXGs.");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, UINT, ZMOD_RW,
"Tunable used to determine the number of blocks that can be used for "
"the spacemap log, expressed as a percentage of the total number of "
"metaslabs in the pool (e.g. 400 means the number of log blocks is "
"capped at 4 times the number of metaslabs)");
-ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, U64, ZMOD_RW,
"The number of past TXGs that the flushing algorithm of the log "
"spacemap feature uses to estimate incoming log blocks");
@@ -1392,8 +1394,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, keep_log_spacemaps_at_export, INT, ZMOD_RW,
"during pool export/destroy");
/* END CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, U64, ZMOD_RW,
"Maximum number of rows allowed in the summary of the spacemap log");
-ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, U64, ZMOD_RW,
"Minimum number of metaslabs to flush per dirty TXG");
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 102070013404..ca55d55405d3 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -304,20 +304,20 @@ int zfs_free_leak_on_eio = B_FALSE;
* has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
* in one of three behaviors controlled by zfs_deadman_failmode.
*/
-unsigned long zfs_deadman_synctime_ms = 600000UL; /* 10 min. */
+uint64_t zfs_deadman_synctime_ms = 600000UL; /* 10 min. */
/*
* This value controls the maximum amount of time zio_wait() will block for an
* outstanding IO. By default this is 300 seconds at which point the "hung"
* behavior will be applied as described for zfs_deadman_synctime_ms.
*/
-unsigned long zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */
+uint64_t zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */
/*
* Check time in milliseconds. This defines the frequency at which we check
* for hung I/O.
*/
-unsigned long zfs_deadman_checktime_ms = 60000UL; /* 1 min. */
+uint64_t zfs_deadman_checktime_ms = 60000UL; /* 1 min. */
/*
* By default the deadman is enabled.
@@ -1536,7 +1536,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
- SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
+ SNPRINTF_BLKPTR(kmem_scnprintf, ' ', buf, buflen, bp, type, checksum,
compress);
}
@@ -2922,7 +2922,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, recover, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW,
"Set to ignore IO errors during free and permanently leak the space");
-ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, U64, ZMOD_RW,
"Dead I/O check interval in milliseconds");
ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW,
@@ -2943,11 +2943,11 @@ ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode,
"Failmode for deadman timer");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms,
- param_set_deadman_synctime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_synctime, spl_param_get_u64, ZMOD_RW,
"Pool sync expiration time in milliseconds");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms,
- param_set_deadman_ziotime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_ziotime, spl_param_get_u64, ZMOD_RW,
"IO expiration time in milliseconds");
ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 66cec052b669..4520ca31b7d7 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -144,8 +144,8 @@ int zfs_nocacheflush = 0;
* be forced by vdev logical ashift or by user via ashift property, but won't
* be set automatically as a performance optimization.
*/
-uint64_t zfs_vdev_max_auto_ashift = 14;
-uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+uint_t zfs_vdev_max_auto_ashift = 14;
+uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
void
vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
}
}
+ if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+ spa_t *spa = vd->vdev_spa;
+ uint64_t failfast;
+
+ error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+ vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
+ 1, &failfast);
+ if (error == 0) {
+ vd->vdev_failfast = failfast & 1;
+ } else if (error == ENOENT) {
+ vd->vdev_failfast = vdev_prop_default_numeric(
+ VDEV_PROP_FAILFAST);
+ } else {
+ vdev_dbgmsg(vd,
+ "vdev_load: zap_lookup(top_zap=%llu) "
+ "failed [error=%d]",
+ (u_longlong_t)vd->vdev_top_zap, error);
+ }
+ }
+
/*
* Load any rebuild state from the top-level vdev zap.
*/
@@ -5648,7 +5668,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
uint64_t vdev_guid;
nvlist_t *nvprops;
- int error;
+ int error = 0;
ASSERT(vd != NULL);
@@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
else
error = spa_vdev_alloc(spa, vdev_guid);
break;
+ case VDEV_PROP_FAILFAST:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_failfast = intval & 1;
+ break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6022,6 +6049,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
+ case VDEV_PROP_FAILFAST:
+ src = ZPROP_SRC_LOCAL;
+ strval = NULL;
+
+ err = zap_lookup(mos, objid, nvpair_name(elem),
+ sizeof (uint64_t), 1, &intval);
+ if (err == ENOENT) {
+ intval = vdev_prop_default_numeric(
+ prop);
+ err = 0;
+ } else if (err) {
+ break;
+ }
+ if (intval == vdev_prop_default_numeric(prop))
+ src = ZPROP_SRC_DEFAULT;
+
+ vdev_prop_add_list(outnvl, propname, strval,
+ intval, src);
+ break;
/* Text Properties */
case VDEV_PROP_COMMENT:
/* Exists in the ZAP below */
@@ -6078,7 +6124,6 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
strval = NULL;
zprop_source_t src = ZPROP_SRC_DEFAULT;
propname = za.za_name;
- prop = vdev_name_to_prop(propname);
switch (za.za_integer_length) {
case 8:
@@ -6156,11 +6201,11 @@ ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW,
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
- param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+ param_set_min_auto_ashift, param_get_uint, ZMOD_RW,
"Minimum ashift used when creating new top-level vdevs");
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
- param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+ param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
"Maximum ashift used when optimizing for logical -> physical sector "
"size on new top-level vdevs");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index 0ca0c245e952..814a1f0efe4c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -189,14 +189,14 @@ static uint_t zfs_condense_indirect_obsolete_pct = 25;
* consumed by the obsolete space map; the default of 1GB is small enough
* that we typically don't mind "wasting" it.
*/
-static unsigned long zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024;
+static uint64_t zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024;
/*
* Don't bother condensing if the mapping uses less than this amount of
* memory. The default of 128KB is considered a "trivial" amount of
* memory and not worth reducing.
*/
-static unsigned long zfs_condense_min_mapping_bytes = 128 * 1024;
+static uint64_t zfs_condense_min_mapping_bytes = 128 * 1024;
/*
* This is used by the test suite so that it can ensure that certain
@@ -1319,6 +1319,7 @@ vdev_indirect_io_start(zio_t *zio)
vdev_indirect_gather_splits, zio);
indirect_split_t *first = list_head(&iv->iv_splits);
+ ASSERT3P(first, !=, NULL);
if (first->is_size == zio->io_size) {
/*
* This is not a split block; we are pointing to the entire
@@ -1891,11 +1892,11 @@ ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_obsolete_pct, UINT,
"Minimum obsolete percent of bytes in the mapping "
"to attempt condensing");
-ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, U64, ZMOD_RW,
"Don't bother condensing if the mapping uses less than this amount of "
"memory");
-ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, ULONG,
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, U64,
ZMOD_RW,
"Minimum size obsolete spacemap to attempt condensing");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
index 965fb7ef0593..75beb0cc3d12 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
@@ -36,17 +36,13 @@
/*
* Value that is written to disk during initialization.
*/
-#ifdef _ILP32
-static unsigned long zfs_initialize_value = 0xdeadbeefUL;
-#else
-static unsigned long zfs_initialize_value = 0xdeadbeefdeadbeeeULL;
-#endif
+static uint64_t zfs_initialize_value = 0xdeadbeefdeadbeeeULL;
/* maximum number of I/Os outstanding per leaf vdev */
static const int zfs_initialize_limit = 1;
/* size of initializing writes; default 1MiB, see zfs_remove_max_segment */
-static unsigned long zfs_initialize_chunk_size = 1024 * 1024;
+static uint64_t zfs_initialize_chunk_size = 1024 * 1024;
static boolean_t
vdev_initialize_should_stop(vdev_t *vd)
@@ -261,15 +257,9 @@ vdev_initialize_block_fill(void *buf, size_t len, void *unused)
(void) unused;
ASSERT0(len % sizeof (uint64_t));
-#ifdef _ILP32
- for (uint64_t i = 0; i < len; i += sizeof (uint32_t)) {
- *(uint32_t *)((char *)(buf) + i) = zfs_initialize_value;
- }
-#else
for (uint64_t i = 0; i < len; i += sizeof (uint64_t)) {
*(uint64_t *)((char *)(buf) + i) = zfs_initialize_value;
}
-#endif
return (0);
}
@@ -765,8 +755,8 @@ EXPORT_SYMBOL(vdev_initialize_stop_all);
EXPORT_SYMBOL(vdev_initialize_stop_wait);
EXPORT_SYMBOL(vdev_initialize_restart);
-ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, U64, ZMOD_RW,
"Value written during zpool initialize");
-ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, U64, ZMOD_RW,
"Size in bytes of writes by zpool initialize");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 1acb89cea393..ec55674393ce 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -605,7 +605,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
int maxblocksize;
boolean_t stretch = B_FALSE;
avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
- enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+ zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
uint64_t next_offset;
abd_t *abd;
@@ -725,6 +725,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* after our span is mandatory.
*/
dio = AVL_NEXT(t, last);
+ ASSERT3P(dio, !=, NULL);
dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
} else {
/* do not include the optional i/o */
@@ -756,6 +757,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
do {
dio = nio;
nio = AVL_NEXT(t, dio);
+ ASSERT3P(dio, !=, NULL);
zio_add_child(dio, aio);
vdev_queue_io_remove(vq, dio);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
index f74a76a8d5ba..2980f8acfbd7 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
@@ -285,17 +285,17 @@ raidz_math_kstat_headers(char *buf, size_t size)
{
ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
- ssize_t off = snprintf(buf, size, "%-17s", "implementation");
+ ssize_t off = kmem_scnprintf(buf, size, "%-17s", "implementation");
for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_gen_name[i]);
for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_rec_name[i]);
- (void) snprintf(buf + off, size - off, "\n");
+ (void) kmem_scnprintf(buf + off, size - off, "\n");
return (0);
}
@@ -311,34 +311,35 @@ raidz_math_kstat_data(char *buf, size_t size, void *data)
ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
if (cstat == fstat) {
- off += snprintf(buf + off, size - off, "%-17s", "fastest");
+ off += kmem_scnprintf(buf + off, size - off, "%-17s",
+ "fastest");
for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
int id = fstat->gen[i];
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_supp_impl[id]->name);
}
for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
int id = fstat->rec[i];
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_supp_impl[id]->name);
}
} else {
ptrdiff_t id = cstat - raidz_impl_kstats;
- off += snprintf(buf + off, size - off, "%-17s",
+ off += kmem_scnprintf(buf + off, size - off, "%-17s",
raidz_supp_impl[id]->name);
for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
- off += snprintf(buf + off, size - off, "%-16llu",
+ off += kmem_scnprintf(buf + off, size - off, "%-16llu",
(u_longlong_t)cstat->gen[i]);
for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
- off += snprintf(buf + off, size - off, "%-16llu",
+ off += kmem_scnprintf(buf + off, size - off, "%-16llu",
(u_longlong_t)cstat->rec[i]);
}
- (void) snprintf(buf + off, size - off, "\n");
+ (void) kmem_scnprintf(buf + off, size - off, "\n");
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 1ce578e228d8..1f56275c853b 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -22,6 +22,7 @@
*
* Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/vdev_impl.h>
@@ -103,7 +104,7 @@
* Size of rebuild reads; defaults to 1MiB per data disk and is capped at
* SPA_MAXBLOCKSIZE.
*/
-static unsigned long zfs_rebuild_max_segment = 1024 * 1024;
+static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
/*
* Maximum number of parallelly executed bytes per leaf vdev caused by a
@@ -121,7 +122,7 @@ static unsigned long zfs_rebuild_max_segment = 1024 * 1024;
* With a value of 32MB the sequential resilver write rate was measured at
* 800MB/s sustained while rebuilding to a distributed spare.
*/
-static unsigned long zfs_rebuild_vdev_limit = 32 << 20;
+static uint64_t zfs_rebuild_vdev_limit = 32 << 20;
/*
* Automatically start a pool scrub when the last active sequential resilver
@@ -134,6 +135,7 @@ static int zfs_rebuild_scrub_enabled = 1;
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
*/
static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg);
+static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);
/*
* Clear the per-vdev rebuild bytes value for a vdev tree.
@@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
mutex_enter(&vd->vdev_rebuild_lock);
+
+ /*
+ * Handle a second device failure if it occurs after all rebuild I/O
+ * has completed but before this sync task has been executed.
+ */
+ if (vd->vdev_rebuild_reset_wanted) {
+ mutex_exit(&vd->vdev_rebuild_lock);
+ vdev_rebuild_reset_sync(arg, tx);
+ return;
+ }
+
vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
vrp->vrp_end_time = gethrestime_sec();
@@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg)
ASSERT(vd->vdev_rebuilding);
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
- ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE);
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
@@ -1138,10 +1150,10 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
return (error);
}
-ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, U64, ZMOD_RW,
"Max segment size in bytes of rebuild reads");
-ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, U64, ZMOD_RW,
"Max bytes in flight per leaf vdev for sequential resilvers");
ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c
index 5905d9a07571..5b5076c8722c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_trim.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c
@@ -1188,12 +1188,11 @@ vdev_autotrim_thread(void *arg)
mutex_exit(&vd->vdev_autotrim_lock);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
- uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
- uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
-
while (!vdev_autotrim_should_stop(vd)) {
int txgs_per_trim = MAX(zfs_trim_txg_batch, 1);
boolean_t issued_trim = B_FALSE;
+ uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
+ uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
/*
* All of the metaslabs are divided in to groups of size
diff --git a/sys/contrib/openzfs/module/zfs/zap_leaf.c b/sys/contrib/openzfs/module/zfs/zap_leaf.c
index 25c2d5163a26..2e8489c7dfcf 100644
--- a/sys/contrib/openzfs/module/zfs/zap_leaf.c
+++ b/sys/contrib/openzfs/module/zfs/zap_leaf.c
@@ -646,7 +646,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
* form of the name. But all callers have one of these on hand anyway,
* so might as well take advantage. A cleaner but slower interface
* would accept neither argument, and compute the normalized name as
- * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
+ * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))).
*/
boolean_t
zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
@@ -667,7 +667,7 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
continue;
if (zn == NULL) {
- zn = zap_name_alloc(zap, name, MT_NORMALIZE);
+ zn = zap_name_alloc_str(zap, name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_leaf_array_match(zeh->zeh_leaf, zn,
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 58a5c9f600b7..606f426404cc 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -33,7 +33,7 @@
#include <sys/zap.h>
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
-#include <sys/avl.h>
+#include <sys/btree.h>
#include <sys/arc.h>
#include <sys/dmu_objset.h>
@@ -92,7 +92,7 @@ zap_hash(zap_name_t *zn)
wp++, i++) {
uint64_t word = *wp;
- for (int j = 0; j < zn->zn_key_intlen; j++) {
+ for (int j = 0; j < 8; j++) {
h = (h >> 8) ^
zfs_crc64_table[(h ^ word) & 0xFF];
word >>= NBBY;
@@ -162,18 +162,25 @@ zap_match(zap_name_t *zn, const char *matchname)
}
}
+static zap_name_t *
+zap_name_alloc(zap_t *zap)
+{
+ zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zn->zn_zap = zap;
+ return (zn);
+}
+
void
zap_name_free(zap_name_t *zn)
{
kmem_free(zn, sizeof (zap_name_t));
}
-zap_name_t *
-zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
+static int
+zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
{
- zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zap_t *zap = zn->zn_zap;
- zn->zn_zap = zap;
zn->zn_key_intlen = sizeof (*key);
zn->zn_key_orig = key;
zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
@@ -194,17 +201,13 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the hash is computed from.
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zap->zap_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zap->zap_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_normbuf;
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
} else {
- if (mt != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ if (mt != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_key_orig;
zn->zn_key_norm_numints = zn->zn_key_orig_numints;
}
@@ -217,13 +220,22 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the matching is based on. (Not the hash!)
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zn->zn_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zn->zn_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
}
+ return (0);
+}
+
+zap_name_t *
+zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
+{
+ zap_name_t *zn = zap_name_alloc(zap);
+ if (zap_name_init_str(zn, key, mt) != 0) {
+ zap_name_free(zn);
+ return (NULL);
+ }
return (zn);
}
@@ -277,45 +289,46 @@ mze_compare(const void *arg1, const void *arg2)
const mzap_ent_t *mze1 = arg1;
const mzap_ent_t *mze2 = arg2;
- int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash);
- if (likely(cmp))
- return (cmp);
-
- return (TREE_CMP(mze1->mze_cd, mze2->mze_cd));
+ return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd,
+ (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
}
static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash)
+mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
{
+ mzap_ent_t mze;
+
ASSERT(zap->zap_ismicro);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
- mze->mze_chunkid = chunkid;
- mze->mze_hash = hash;
- mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
- ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
- avl_add(&zap->zap_m.zap_avl, mze);
+ mze.mze_chunkid = chunkid;
+ ASSERT0(hash & 0xffffffff);
+ mze.mze_hash = hash >> 32;
+ ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff);
+ mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd;
+ ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0);
+ zfs_btree_add(&zap->zap_m.zap_tree, &mze);
}
static mzap_ent_t *
-mze_find(zap_name_t *zn)
+mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
{
mzap_ent_t mze_tofind;
mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
+ zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree;
ASSERT(zn->zn_zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
- mze_tofind.mze_hash = zn->zn_hash;
+ ASSERT0(zn->zn_hash & 0xffffffff);
+ mze_tofind.mze_hash = zn->zn_hash >> 32;
mze_tofind.mze_cd = 0;
- mze = avl_find(avl, &mze_tofind, &idx);
+ mze = zfs_btree_find(tree, &mze_tofind, idx);
if (mze == NULL)
- mze = avl_nearest(avl, idx, AVL_AFTER);
- for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
+ mze = zfs_btree_next(tree, idx, idx);
+ for (; mze && mze->mze_hash == mze_tofind.mze_hash;
+ mze = zfs_btree_next(tree, idx, idx)) {
ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
return (mze);
@@ -328,18 +341,21 @@ static uint32_t
mze_find_unused_cd(zap_t *zap, uint64_t hash)
{
mzap_ent_t mze_tofind;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
ASSERT(zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
uint32_t cd = 0;
- for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
if (mze->mze_cd != cd)
break;
cd++;
@@ -364,16 +380,18 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
{
zap_t *zap = zn->zn_zap;
mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
uint32_t mzap_ents = 0;
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
- for (mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
mzap_ents++;
}
@@ -384,24 +402,10 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
}
static void
-mze_remove(zap_t *zap, mzap_ent_t *mze)
-{
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- avl_remove(&zap->zap_m.zap_avl, mze);
- kmem_free(mze, sizeof (mzap_ent_t));
-}
-
-static void
mze_destroy(zap_t *zap)
{
- mzap_ent_t *mze;
- void *avlcookie = NULL;
-
- while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)))
- kmem_free(mze, sizeof (mzap_ent_t));
- avl_destroy(&zap->zap_m.zap_avl);
+ zfs_btree_clear(&zap->zap_m.zap_tree);
+ zfs_btree_destroy(&zap->zap_m.zap_tree);
}
static zap_t *
@@ -448,21 +452,26 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap->zap_salt = zap_m_phys(zap)->mz_salt;
zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
- avl_create(&zap->zap_m.zap_avl, mze_compare,
- sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
- for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) {
+ /*
+ * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
+ * overhead on massive inserts below. It still allows to store
+ * 62 entries before we have to add 2KB B-tree core node.
+ */
+ zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
+ sizeof (mzap_ent_t), 512);
+
+ zap_name_t *zn = zap_name_alloc(zap);
+ for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze =
&zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0]) {
- zap_name_t *zn;
-
zap->zap_m.zap_num_entries++;
- zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
mze_insert(zap, i, zn->zn_hash);
- zap_name_free(zn);
}
}
+ zap_name_free(zn);
} else {
zap->zap_salt = zap_f_phys(zap)->zap_salt;
zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
@@ -657,24 +666,25 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
dprintf("upgrading obj=%llu with %u chunks\n",
(u_longlong_t)zap->zap_object, nchunks);
- /* XXX destroy the avl later, so we can use the stored hash value */
+ /* XXX destroy the tree later, so we can use the stored hash value */
mze_destroy(zap);
fzap_upgrade(zap, tx, flags);
+ zap_name_t *zn = zap_name_alloc(zap);
for (int i = 0; i < nchunks; i++) {
mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
if (mze->mze_name[0] == 0)
continue;
dprintf("adding %s=%llu\n",
mze->mze_name, (u_longlong_t)mze->mze_value);
- zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
/* If we fail here, we would end up losing entries */
VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
tag, tx));
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
- zap_name_free(zn);
}
+ zap_name_free(zn);
vmem_free(mzp, sz);
*zapp = zap;
return (0);
@@ -916,22 +926,23 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
* See also the comment above zap_entry_normalization_conflict().
*/
static boolean_t
-mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
+mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
+ zfs_btree_index_t *idx)
{
- int direction = AVL_BEFORE;
boolean_t allocdzn = B_FALSE;
+ mzap_ent_t *other;
+ zfs_btree_index_t oidx;
if (zap->zap_normflags == 0)
return (B_FALSE);
-again:
- for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
+ for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx);
other && other->mze_hash == mze->mze_hash;
- other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
+ other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) {
if (zn == NULL) {
- zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
- MT_NORMALIZE);
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
@@ -941,9 +952,20 @@ again:
}
}
- if (direction == AVL_BEFORE) {
- direction = AVL_AFTER;
- goto again;
+ for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx);
+ other && other->mze_hash == mze->mze_hash;
+ other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) {
+
+ if (zn == NULL) {
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
+ allocdzn = B_TRUE;
+ }
+ if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
+ if (allocdzn)
+ zap_name_free(zn);
+ return (B_TRUE);
+ }
}
if (allocdzn)
@@ -971,7 +993,7 @@ zap_lookup_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
@@ -979,7 +1001,8 @@ zap_lookup_impl(zap_t *zap, const char *name,
err = fzap_lookup(zn, integer_size, num_integers, buf,
realname, rn_len, ncp);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -990,11 +1013,13 @@ zap_lookup_impl(zap_t *zap, const char *name,
} else {
*(uint64_t *)buf =
MZE_PHYS(zap, mze)->mze_value;
- (void) strlcpy(realname,
- MZE_PHYS(zap, mze)->mze_name, rn_len);
+ if (realname != NULL)
+ (void) strlcpy(realname,
+ MZE_PHYS(zap, mze)->mze_name,
+ rn_len);
if (ncp) {
*ncp = mzap_normalization_conflict(zap,
- zn, mze);
+ zn, mze, &idx);
}
}
}
@@ -1031,7 +1056,7 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, 0);
+ zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1134,7 +1159,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1142,7 +1167,8 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
if (!zap->zap_ismicro) {
err = fzap_length(zn, integer_size, num_integers);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -1182,7 +1208,7 @@ static void
mzap_addent(zap_name_t *zn, uint64_t value)
{
zap_t *zap = zn->zn_zap;
- int start = zap->zap_m.zap_alloc_next;
+ uint16_t start = zap->zap_m.zap_alloc_next;
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
@@ -1198,7 +1224,7 @@ mzap_addent(zap_name_t *zn, uint64_t value)
ASSERT(cd < zap_maxcd(zap));
again:
- for (int i = start; i < zap->zap_m.zap_num_chunks; i++) {
+ for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0] == 0) {
mze->mze_value = value;
@@ -1229,7 +1255,7 @@ zap_add_impl(zap_t *zap, const char *key,
const uint64_t *intval = val;
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, key, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
@@ -1247,7 +1273,8 @@ zap_add_impl(zap_t *zap, const char *key,
}
zap = zn->zn_zap; /* fzap_add() may change zap */
} else {
- if (mze_find(zn) != NULL) {
+ zfs_btree_index_t idx;
+ if (mze_find(zn, &idx) != NULL) {
err = SET_ERROR(EEXIST);
} else {
mzap_addent(zn, *intval);
@@ -1327,7 +1354,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1348,7 +1375,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
}
zap = zn->zn_zap; /* fzap_update() may change zap */
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze != NULL) {
MZE_PHYS(zap, mze)->mze_value = *intval;
} else {
@@ -1398,20 +1426,20 @@ zap_remove_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
if (!zap->zap_ismicro) {
err = fzap_remove(zn, tx);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
zap->zap_m.zap_num_entries--;
- memset(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 0,
- sizeof (mzap_ent_phys_t));
- mze_remove(zap, mze);
+ memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
+ zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
}
}
zap_name_free(zn);
@@ -1582,29 +1610,30 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
if (!zc->zc_zap->zap_ismicro) {
err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
} else {
- avl_index_t idx;
+ zfs_btree_index_t idx;
mzap_ent_t mze_tofind;
- mze_tofind.mze_hash = zc->zc_hash;
+ mze_tofind.mze_hash = zc->zc_hash >> 32;
mze_tofind.mze_cd = zc->zc_cd;
- mzap_ent_t *mze =
- avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
+ mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
+ &mze_tofind, &idx);
if (mze == NULL) {
- mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
- idx, AVL_AFTER);
+ mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
+ &idx, &idx);
}
if (mze) {
mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
za->za_normalization_conflict =
- mzap_normalization_conflict(zc->zc_zap, NULL, mze);
+ mzap_normalization_conflict(zc->zc_zap, NULL,
+ mze, &idx);
za->za_integer_length = 8;
za->za_num_integers = 1;
za->za_first_integer = mzep->mze_value;
(void) strlcpy(za->za_name, mzep->mze_name,
sizeof (za->za_name));
- zc->zc_hash = mze->mze_hash;
+ zc->zc_hash = (uint64_t)mze->mze_hash << 32;
zc->zc_cd = mze->mze_cd;
err = 0;
} else {
diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c
index fe90242ca40d..5ebf1bbbc8cc 100644
--- a/sys/contrib/openzfs/module/zfs/zcp.c
+++ b/sys/contrib/openzfs/module/zfs/zcp.c
@@ -109,8 +109,8 @@
#define ZCP_NVLIST_MAX_DEPTH 20
static const uint64_t zfs_lua_check_instrlimit_interval = 100;
-unsigned long zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT;
-unsigned long zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT;
+uint64_t zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT;
+uint64_t zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT;
/*
* Forward declarations for mutually recursive functions
@@ -277,9 +277,9 @@ zcp_table_to_nvlist(lua_State *state, int index, int depth)
}
break;
case LUA_TNUMBER:
- VERIFY3U(sizeof (buf), >,
- snprintf(buf, sizeof (buf), "%lld",
- (longlong_t)lua_tonumber(state, -2)));
+ (void) snprintf(buf, sizeof (buf), "%lld",
+ (longlong_t)lua_tonumber(state, -2));
+
key = buf;
if (saw_str_could_collide) {
key_could_collide = B_TRUE;
@@ -1443,8 +1443,8 @@ zcp_parse_args(lua_State *state, const char *fname, const zcp_arg_t *pargs,
}
}
-ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, U64, ZMOD_RW,
"Max instruction limit that can be specified for a channel program");
-ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, U64, ZMOD_RW,
"Max memory limit that can be specified for a channel program");
diff --git a/sys/contrib/openzfs/module/zfs/zcp_get.c b/sys/contrib/openzfs/module/zfs/zcp_get.c
index cd17374eb422..f28266b8095f 100644
--- a/sys/contrib/openzfs/module/zfs/zcp_get.c
+++ b/sys/contrib/openzfs/module/zfs/zcp_get.c
@@ -467,7 +467,8 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
} else {
error = dsl_prop_get_ds(ds, prop_name, sizeof (numval),
1, &numval, setpoint);
-
+ if (error != 0)
+ goto out;
#ifdef _KERNEL
/* Fill in temporary value for prop, if applicable */
(void) zfs_get_temporary_prop(ds, zfs_prop, &numval, setpoint);
@@ -489,6 +490,7 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
(void) lua_pushnumber(state, numval);
}
}
+out:
kmem_free(strval, ZAP_MAXVALUELEN);
if (error == 0)
get_prop_src(state, setpoint, zfs_prop);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
index 74b4cb8d2e63..4a9a36d87e66 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@@ -81,15 +81,15 @@ chksum_kstat_headers(char *buf, size_t size)
{
ssize_t off = 0;
- off += snprintf(buf + off, size, "%-23s", "implementation");
- off += snprintf(buf + off, size - off, "%8s", "1k");
- off += snprintf(buf + off, size - off, "%8s", "4k");
- off += snprintf(buf + off, size - off, "%8s", "16k");
- off += snprintf(buf + off, size - off, "%8s", "64k");
- off += snprintf(buf + off, size - off, "%8s", "256k");
- off += snprintf(buf + off, size - off, "%8s", "1m");
- off += snprintf(buf + off, size - off, "%8s", "4m");
- (void) snprintf(buf + off, size - off, "%8s\n", "16m");
+ off += kmem_scnprintf(buf + off, size, "%-23s", "implementation");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "1k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "4k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "16k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "64k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "256k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "1m");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "4m");
+ (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m");
return (0);
}
@@ -102,23 +102,23 @@ chksum_kstat_data(char *buf, size_t size, void *data)
char b[24];
cs = (chksum_stat_t *)data;
- snprintf(b, 23, "%s-%s", cs->name, cs->impl);
- off += snprintf(buf + off, size - off, "%-23s", b);
- off += snprintf(buf + off, size - off, "%8llu",
+ kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl);
+ off += kmem_scnprintf(buf + off, size - off, "%-23s", b);
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs1k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs4k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs16k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs64k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs256k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs1m);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs4m);
- (void) snprintf(buf + off, size - off, "%8llu\n",
+ (void) kmem_scnprintf(buf + off, size - off, "%8llu\n",
(u_longlong_t)cs->bs16m);
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
index 06aa1214ace8..fd0dc7d69bf8 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@@ -253,7 +253,6 @@ void
zfs_ereport_clear(spa_t *spa, vdev_t *vd)
{
uint64_t vdev_guid, pool_guid;
- int cnt = 0;
ASSERT(vd != NULL || spa != NULL);
if (vd == NULL) {
@@ -277,7 +276,6 @@ zfs_ereport_clear(spa_t *spa, vdev_t *vd)
avl_remove(&recent_events_tree, entry);
list_remove(&recent_events_list, entry);
kmem_free(entry, sizeof (*entry));
- cnt++;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index c3266c09306b..a5168b937588 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -229,14 +229,14 @@ static zfsdev_state_t *zfsdev_state_list;
* for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
* Defaults to 0=auto which is handled by platform code.
*/
-unsigned long zfs_max_nvlist_src_size = 0;
+uint64_t zfs_max_nvlist_src_size = 0;
/*
* When logging the output nvlist of an ioctl in the on-disk history, limit
* the logged size to this many bytes. This must be less than DMU_MAX_ACCESS.
* This applies primarily to zfs_ioc_channel_program().
*/
-static unsigned long zfs_history_output_max = 1024 * 1024;
+static uint64_t zfs_history_output_max = 1024 * 1024;
uint_t zfs_fsyncer_key;
uint_t zfs_allow_log_key;
@@ -7884,8 +7884,8 @@ zfs_kmod_fini(void)
tsd_destroy(&zfs_allow_log_key);
}
-ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
-ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
"Maximum size in bytes of ZFS ioctl output that will be logged");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index c92044337bce..77bf9140d52d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -494,6 +494,29 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
zil_itx_assign(zilog, itx, tx);
}
+static void
+do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
+ const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
+{
+ itx_t *itx;
+ lr_rename_t *lr;
+ size_t snamesize = strlen(sname) + 1;
+ size_t dnamesize = strlen(dname) + 1;
+
+ if (zil_replaying(zilog, tx))
+ return;
+
+ itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
+ lr = (lr_rename_t *)&itx->itx_lr;
+ lr->lr_sdoid = sdzp->z_id;
+ lr->lr_tdoid = tdzp->z_id;
+ memcpy((char *)(lr + 1), sname, snamesize);
+ memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
+ itx->itx_oid = szp->z_id;
+
+ zil_itx_assign(zilog, itx, tx);
+}
+
/*
* Handles TX_RENAME transactions.
*/
@@ -501,18 +524,71 @@ void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
{
+ txtype |= TX_RENAME;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_EXCHANGE transactions.
+ */
+void
+zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp)
+{
+ txtype |= TX_RENAME_EXCHANGE;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_WHITEOUT transactions.
+ *
+ * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
+ * zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
+ */
+void
+zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp, znode_t *wzp)
+{
itx_t *itx;
- lr_rename_t *lr;
+ lr_rename_whiteout_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
if (zil_replaying(zilog, tx))
return;
+ txtype |= TX_RENAME_WHITEOUT;
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
- lr = (lr_rename_t *)&itx->itx_lr;
- lr->lr_sdoid = sdzp->z_id;
- lr->lr_tdoid = tdzp->z_id;
+ lr = (lr_rename_whiteout_t *)&itx->itx_lr;
+ lr->lr_rename.lr_sdoid = sdzp->z_id;
+ lr->lr_rename.lr_tdoid = tdzp->z_id;
+
+ /*
+ * RENAME_WHITEOUT will create an entry at the source znode, so we need
+ * to store the same data that the equivalent call to zfs_log_create()
+ * would.
+ */
+ lr->lr_wfoid = wzp->z_id;
+ LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
+ sizeof (uint64_t));
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
+ lr->lr_wcrtime, sizeof (uint64_t) * 2);
+ lr->lr_wmode = wzp->z_mode;
+ lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
+ lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));
+
+ /*
+ * This rdev will always be makdevice(0, 0) but because the ZIL log and
+ * replay code needs to be platform independent (and there is no
+ * platform independent makdev()) we need to copy the one created
+ * during the rename operation.
+ */
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
+ sizeof (lr->lr_wrdev));
+
memcpy((char *)(lr + 1), sname, snamesize);
memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
itx->itx_oid = szp->z_id;
@@ -525,7 +601,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
* called as soon as the write is on stable storage (be it via a DMU sync or a
* ZIL commit).
*/
-static long zfs_immediate_write_sz = 32768;
+static int64_t zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
@@ -815,5 +891,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
zil_itx_assign(zilog, itx, tx);
}
-ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, LONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, S64, ZMOD_RW,
"Largest data block to write to zil");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
index dfcdeeb5b46f..63acf7ab2e4d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@@ -151,7 +151,7 @@ zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
*/
int
zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
- uint64_t *action_handle)
+ uintptr_t *action_handle)
{
zfs_onexit_t *zo;
zfs_onexit_action_node_t *ap;
@@ -170,7 +170,7 @@ zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
list_insert_tail(&zo->zo_actions, ap);
mutex_exit(&zo->zo_lock);
if (action_handle)
- *action_handle = (uint64_t)(uintptr_t)ap;
+ *action_handle = (uintptr_t)ap;
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c
index 379e1d1a7b57..0293e46d5858 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_replay.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c
@@ -386,8 +386,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
lr->lr_uid, lr->lr_gid);
}
+#if defined(__linux__)
error = zfs_create(dzp, name, &xva.xva_vattr,
- 0, 0, &zp, kcred, vflg, &vsec);
+ 0, 0, &zp, kcred, vflg, &vsec, kcred->user_ns);
+#else
+ error = zfs_create(dzp, name, &xva.xva_vattr,
+ 0, 0, &zp, kcred, vflg, &vsec, NULL);
+#endif
break;
case TX_MKDIR_ACL:
aclstart = (caddr_t)(lracl + 1);
@@ -416,8 +421,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
+#if defined(__linux__)
+ error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+ &zp, kcred, vflg, &vsec, kcred->user_ns);
+#else
error = zfs_mkdir(dzp, name, &xva.xva_vattr,
- &zp, kcred, vflg, &vsec);
+ &zp, kcred, vflg, &vsec, NULL);
+#endif
break;
default:
error = SET_ERROR(ENOTSUP);
@@ -527,8 +537,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
if (name == NULL)
name = (char *)start;
+#if defined(__linux__)
+ error = zfs_create(dzp, name, &xva.xva_vattr,
+ 0, 0, &zp, kcred, vflg, NULL, kcred->user_ns);
+#else
error = zfs_create(dzp, name, &xva.xva_vattr,
- 0, 0, &zp, kcred, vflg, NULL);
+ 0, 0, &zp, kcred, vflg, NULL, NULL);
+#endif
break;
case TX_MKDIR_ATTR:
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
@@ -545,8 +560,14 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
if (name == NULL)
name = (char *)(lr + 1);
+#if defined(__linux__)
+ error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+ &zp, kcred, vflg, NULL, kcred->user_ns);
+#else
error = zfs_mkdir(dzp, name, &xva.xva_vattr,
- &zp, kcred, vflg, NULL);
+ &zp, kcred, vflg, NULL, NULL);
+#endif
+
break;
case TX_MKXATTR:
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred);
@@ -554,8 +575,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
case TX_SYMLINK:
name = (char *)(lr + 1);
link = name + strlen(name) + 1;
+#if defined(__linux__)
+ error = zfs_symlink(dzp, name, &xva.xva_vattr,
+ link, &zp, kcred, vflg, kcred->user_ns);
+#else
error = zfs_symlink(dzp, name, &xva.xva_vattr,
- link, &zp, kcred, vflg);
+ link, &zp, kcred, vflg, NULL);
+#endif
break;
default:
error = SET_ERROR(ENOTSUP);
@@ -643,18 +669,21 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
}
static int
-zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
+do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname,
+ char *tname, uint64_t rflags, vattr_t *wo_vap)
{
- zfsvfs_t *zfsvfs = arg1;
- lr_rename_t *lr = arg2;
- char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
- char *tname = sname + strlen(sname) + 1;
znode_t *sdzp, *tdzp;
- int error;
- int vflg = 0;
+ int error, vflg = 0;
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
+ /* Only Linux currently supports RENAME_* flags. */
+#ifdef __linux__
+ VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT));
+
+ /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */
+ VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+#else
+ VERIFY0(rflags);
+#endif
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
return (error);
@@ -667,7 +696,13 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
- error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg);
+#if defined(__linux__)
+ error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
+ wo_vap, kcred->user_ns);
+#else
+ error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
+ wo_vap, NULL);
+#endif
zrele(tdzp);
zrele(sdzp);
@@ -675,6 +710,86 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
}
static int
+zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
+{
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_t *lr = arg2;
+ char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
+ char *tname = sname + strlen(sname) + 1;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
+}
+
+static int
+zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
+{
+#ifdef __linux__
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_t *lr = arg2;
+ char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
+ char *tname = sname + strlen(sname) + 1;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
+ NULL));
+#else
+ return (SET_ERROR(ENOTSUP));
+#endif
+}
+
+static int
+zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
+{
+#ifdef __linux__
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_whiteout_t *lr = arg2;
+ int error;
+ /* sname and tname follow lr_rename_whiteout_t */
+ char *sname = (char *)(lr + 1);
+ char *tname = sname + strlen(sname) + 1;
+ /* For the whiteout file. */
+ xvattr_t xva;
+ uint64_t objid;
+ uint64_t dnodesize;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ objid = LR_FOID_GET_OBJ(lr->lr_wfoid);
+ dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT;
+
+ xva_init(&xva);
+ zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
+ lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid);
+
+ /*
+ * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which
+ * assigns the object's creation time, generation number, and dnode
+ * slot count. The generic zfs_rename() has no concept of these
+ * attributes, so we smuggle the values inside the vattr's otherwise
+ * unused va_ctime, va_nblocks, and va_fsid fields.
+ */
+ ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime);
+ xva.xva_vattr.va_nblocks = lr->lr_wgen;
+ xva.xva_vattr.va_fsid = dnodesize;
+
+ error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
+ if (error)
+ return (error);
+
+ return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
+ RENAME_WHITEOUT, &xva.xva_vattr));
+#else
+ return (SET_ERROR(ENOTSUP));
+#endif
+}
+
+static int
zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
@@ -860,7 +975,11 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
- error = zfs_setattr(zp, vap, 0, kcred);
+#if defined(__linux__)
+ error = zfs_setattr(zp, vap, 0, kcred, kcred->user_ns);
+#else
+ error = zfs_setattr(zp, vap, 0, kcred, NULL);
+#endif
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
@@ -1069,4 +1188,6 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
+ zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */
+ zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */
};
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 57f03f116273..45ecb0773260 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -64,7 +64,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
int error = 0;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
- (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
+ (void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
@@ -168,15 +168,25 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
return (error);
if (flag & V_ACE_MASK)
- error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
+#if defined(__linux__)
+ error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
+ kcred->user_ns);
+#else
+ error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
+ NULL);
+#endif
else
- error = zfs_zaccess_rwx(zp, mode, flag, cr);
+#if defined(__linux__)
+ error = zfs_zaccess_rwx(zp, mode, flag, cr, kcred->user_ns);
+#else
+ error = zfs_zaccess_rwx(zp, mode, flag, cr, NULL);
+#endif
zfs_exit(zfsvfs, FTAG);
return (error);
}
-static unsigned long zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
+static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
/*
* Read bytes from specified file into supplied buffer.
@@ -991,5 +1001,5 @@ EXPORT_SYMBOL(zfs_write);
EXPORT_SYMBOL(zfs_getsecattr);
EXPORT_SYMBOL(zfs_setsecattr);
-ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
"Bytes to read per chunk");
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index dc5b8018e16e..02e6f4b83b9c 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -132,7 +132,7 @@ static int zil_nocacheflush = 0;
* Any writes above that will be executed with lower (asynchronous) priority
* to limit potential SLOG device abuse by single active ZIL writer.
*/
-static unsigned long zil_slog_bulk = 768 * 1024;
+static uint64_t zil_slog_bulk = 768 * 1024;
static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;
@@ -237,7 +237,7 @@ static int
zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
blkptr_t *nbp, void *dst, char **end)
{
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf = NULL;
zbookmark_phys_t zb;
@@ -315,7 +315,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
static int
zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
{
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
const blkptr_t *bp = &lr->lr_blkptr;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf = NULL;
@@ -339,6 +339,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
if (wbuf == NULL)
zio_flags |= ZIO_FLAG_RAW;
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@@ -479,8 +480,18 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
lrbuf, &end);
- if (error != 0)
+ if (error != 0) {
+ if (claimed) {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+
+ dmu_objset_name(zilog->zl_os, name);
+
+ cmn_err(CE_WARN, "ZFS read log block error %d, "
+ "dataset %s, seq 0x%llx\n", error, name,
+ (u_longlong_t)blk_seq);
+ }
break;
+ }
for (lrp = lrbuf; lrp < end; lrp += reclen) {
lr_t *lr = (lr_t *)lrp;
@@ -504,10 +515,6 @@ done:
zilog->zl_parse_blk_count = blk_count;
zilog->zl_parse_lr_count = lr_count;
- ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
- (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
- (decrypt && error == EIO));
-
zil_bp_tree_fini(zilog);
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
@@ -758,11 +765,9 @@ zil_commit_activate_saxattr_feature(zilog_t *zilog)
uint64_t txg = 0;
dmu_tx_t *tx = NULL;
- if (spa_feature_is_enabled(zilog->zl_spa,
- SPA_FEATURE_ZILSAXATTR) &&
+ if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) &&
dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL &&
- !dsl_dataset_feature_is_active(ds,
- SPA_FEATURE_ZILSAXATTR)) {
+ !dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)) {
tx = dmu_tx_create(zilog->zl_os);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
dsl_dataset_dirty(ds, tx);
@@ -882,8 +887,9 @@ zil_create(zilog_t *zilog)
* txg_wait_synced() here either when keep_first is set, because both
* zil_create() and zil_destroy() will wait for any in-progress destroys
* to complete.
+ * Return B_TRUE if there were any entries to replay.
*/
-void
+boolean_t
zil_destroy(zilog_t *zilog, boolean_t keep_first)
{
const zil_header_t *zh = zilog->zl_header;
@@ -899,7 +905,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
zilog->zl_old_header = *zh; /* debugging aid */
if (BP_IS_HOLE(&zh->zh_log))
- return;
+ return (B_FALSE);
tx = dmu_tx_create(zilog->zl_os);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
@@ -932,6 +938,8 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
mutex_exit(&zilog->zl_lock);
dmu_tx_commit(tx);
+
+ return (B_TRUE);
}
void
@@ -3844,8 +3852,9 @@ zil_incr_blks(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg)
/*
* If this dataset has a non-empty intent log, replay it and destroy it.
+ * Return B_TRUE if there were any entries to replay.
*/
-void
+boolean_t
zil_replay(objset_t *os, void *arg,
zil_replay_func_t *const replay_func[TX_MAX_TYPE])
{
@@ -3854,8 +3863,7 @@ zil_replay(objset_t *os, void *arg,
zil_replay_arg_t zr;
if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
- zil_destroy(zilog, B_TRUE);
- return;
+ return (zil_destroy(zilog, B_TRUE));
}
zr.zr_replay = replay_func;
@@ -3878,6 +3886,8 @@ zil_replay(objset_t *os, void *arg,
zil_destroy(zilog, B_FALSE);
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
zilog->zl_replay = B_FALSE;
+
+ return (B_TRUE);
}
boolean_t
@@ -3945,7 +3955,7 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_zil, zil_, nocacheflush, INT, ZMOD_RW,
"Disable ZIL cache flushes");
-ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW,
"Limit in bytes slog sync writes per commit");
ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index c2e3c6169fa3..928e28813931 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -512,8 +512,9 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
/*
* If this is an authenticated block, just check the MAC. It would be
- * nice to separate this out into its own flag, but for the moment
- * enum zio_flag is out of bits.
+ * nice to separate this out into its own flag, but when this was done,
+ * we had run out of bits in what is now zio_flag_t. Future cleanup
+ * could make this a flag bit.
*/
if (BP_IS_AUTHENTICATED(bp)) {
if (ot == DMU_OT_OBJSET) {
@@ -802,7 +803,7 @@ static zio_t *
zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done,
void *private, zio_type_t type, zio_priority_t priority,
- enum zio_flag flags, vdev_t *vd, uint64_t offset,
+ zio_flag_t flags, vdev_t *vd, uint64_t offset,
const zbookmark_phys_t *zb, enum zio_stage stage,
enum zio_stage pipeline)
{
@@ -901,7 +902,7 @@ zio_destroy(zio_t *zio)
zio_t *
zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
- void *private, enum zio_flag flags)
+ void *private, zio_flag_t flags)
{
zio_t *zio;
@@ -913,7 +914,7 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
}
zio_t *
-zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
+zio_root(spa_t *spa, zio_done_func_t *done, void *private, zio_flag_t flags)
{
return (zio_null(NULL, spa, NULL, done, private, flags));
}
@@ -1099,7 +1100,7 @@ zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp)
zio_t *
zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
abd_t *data, uint64_t size, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
+ zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1117,7 +1118,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
zio_done_func_t *physdone, zio_done_func_t *done,
- void *private, zio_priority_t priority, enum zio_flag flags,
+ void *private, zio_priority_t priority, zio_flag_t flags,
const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1160,7 +1161,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_t *
zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data,
uint64_t size, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb)
+ zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1203,7 +1204,6 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
*/
if (BP_IS_EMBEDDED(bp))
return;
- metaslab_check_free(spa, bp);
/*
* Frees that are for the currently-syncing txg, are not going to be
@@ -1220,6 +1220,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
txg != spa->spa_syncing_txg ||
(spa_sync_pass(spa) >= zfs_sync_pass_deferred_free &&
!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))) {
+ metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
} else {
VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
@@ -1233,7 +1234,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
*/
zio_t *
zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
- enum zio_flag flags)
+ zio_flag_t flags)
{
ASSERT(!BP_IS_HOLE(bp));
ASSERT(spa_syncing_txg(spa) == txg);
@@ -1266,7 +1267,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *
zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
- zio_done_func_t *done, void *private, enum zio_flag flags)
+ zio_done_func_t *done, void *private, zio_flag_t flags)
{
zio_t *zio;
@@ -1303,7 +1304,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *
zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *private, enum zio_flag flags)
+ zio_done_func_t *done, void *private, zio_flag_t flags)
{
zio_t *zio;
int c;
@@ -1328,7 +1329,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_t *
zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_done_func_t *done, void *private, zio_priority_t priority,
- enum zio_flag flags, enum trim_flag trim_flags)
+ zio_flag_t flags, enum trim_flag trim_flags)
{
zio_t *zio;
@@ -1348,7 +1349,7 @@ zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
abd_t *data, int checksum, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, boolean_t labels)
+ zio_priority_t priority, zio_flag_t flags, boolean_t labels)
{
zio_t *zio;
@@ -1369,7 +1370,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
abd_t *data, int checksum, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, boolean_t labels)
+ zio_priority_t priority, zio_flag_t flags, boolean_t labels)
{
zio_t *zio;
@@ -1406,7 +1407,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
abd_t *data, uint64_t size, int type, zio_priority_t priority,
- enum zio_flag flags, zio_done_func_t *done, void *private)
+ zio_flag_t flags, zio_done_func_t *done, void *private)
{
enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
zio_t *zio;
@@ -1480,7 +1481,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
zio_t *
zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size,
- zio_type_t type, zio_priority_t priority, enum zio_flag flags,
+ zio_type_t type, zio_priority_t priority, zio_flag_t flags,
zio_done_func_t *done, void *private)
{
zio_t *zio;
@@ -2030,7 +2031,7 @@ zio_deadman_impl(zio_t *pio, int ziodepth)
"delta=%llu queued=%llu io=%llu "
"path=%s "
"last=%llu type=%d "
- "priority=%d flags=0x%x stage=0x%x "
+ "priority=%d flags=0x%llx stage=0x%x "
"pipeline=0x%x pipeline-trace=0x%x "
"objset=%llu object=%llu "
"level=%llu blkid=%llu "
@@ -2040,8 +2041,8 @@ zio_deadman_impl(zio_t *pio, int ziodepth)
(u_longlong_t)delta, pio->io_delta, pio->io_delay,
vd ? vd->vdev_path : "NULL",
vq ? vq->vq_io_complete_ts : 0, pio->io_type,
- pio->io_priority, pio->io_flags, pio->io_stage,
- pio->io_pipeline, pio->io_pipeline_trace,
+ pio->io_priority, (u_longlong_t)pio->io_flags,
+ pio->io_stage, pio->io_pipeline, pio->io_pipeline_trace,
(u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid,
(u_longlong_t)pio->io_offset, (u_longlong_t)pio->io_size,
@@ -3360,7 +3361,7 @@ zio_ddt_write(zio_t *zio)
return (zio);
}
-ddt_entry_t *freedde; /* for debugging */
+static ddt_entry_t *freedde; /* for debugging */
static zio_t *
zio_ddt_free(zio_t *zio)
diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c
index 4c9cbc962093..0fb91ac81522 100644
--- a/sys/contrib/openzfs/module/zfs/zio_compress.c
+++ b/sys/contrib/openzfs/module/zfs/zio_compress.c
@@ -44,7 +44,7 @@
* If nonzero, every 1/X decompression attempts will fail, simulating
* an undetected memory error.
*/
-unsigned long zio_decompress_fail_fraction = 0;
+static unsigned long zio_decompress_fail_fraction = 0;
/*
* Compression vectors.
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 2e2860ff0212..20578a8223b2 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -514,6 +514,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
zvol_replay_err, /* TX_WRITE2 */
zvol_replay_err, /* TX_SETSAXATTR */
+ zvol_replay_err, /* TX_RENAME_EXCHANGE */
+ zvol_replay_err, /* TX_RENAME_WHITEOUT */
};
/*
@@ -1026,8 +1028,7 @@ zvol_add_clones(const char *dsname, list_t *minors_list)
out:
if (dd != NULL)
dsl_dir_rele(dd, FTAG);
- if (dp != NULL)
- dsl_pool_rele(dp, FTAG);
+ dsl_pool_rele(dp, FTAG);
}
/*