diff options
author | Martin Matuska <mm@FreeBSD.org> | 2021-06-08 14:48:37 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2021-06-08 14:52:44 +0000 |
commit | 1603881667360c015f6685131f2f25474fa67a72 (patch) | |
tree | 1e2793b4b19eabab1534c3dd2074e3ca022c5e76 /sys/contrib/openzfs/module | |
parent | ed9215c8e05f06c00683bb0624e82644796d4385 (diff) | |
parent | 75b4cbf62590c23fac3667537961a2a75fdc2cc3 (diff) | |
download | src-1603881667360c015f6685131f2f25474fa67a72.tar.gz src-1603881667360c015f6685131f2f25474fa67a72.zip |
zfs: merge openzfs/zfs@75b4cbf62 (master) into main
Notable upstream pull request merges:
#11710 Allow zfs to send replication streams with missing snapshots
#11751 Avoid taking global lock to destroy zfsdev state
#11786 Ratelimit deadman zevents as with delay zevents
#11803 ZFS traverse_visitbp optimization to limit prefetch
#11813 Allow pool names that look like Solaris disk names
#11822 Atomically check and set dropped zevent count
#11822 Don't scale zfs_zevent_len_max by CPU count
#11833 Refactor zfsdev state init/destroy to share common code
#11837 zfs get -p only outputs 3 columns if "clones" property is empty
#11843 libzutil: zfs_isnumber(): return false if input empty
#11849 Use dsl_scan_setup_check() to setup a scrub
#11861 Improvements to the 'compatibility' property
#11862 cmd/zfs receive: allow dry-run (-n) to check property args
#11864 receive: don't fail inheriting (-x) properties on wrong dataset type
#11877 Combine zio caches if possible
#11881 FreeBSD: use vnlru_free_vfsops if available
#11883 FreeBSD: add support for lockless symlink lookup
#11884 FreeBSD: add missing seqc write begin/end around zfs_acl_chown_setattr
#11896 Fix crash in zio_done error reporting
#11905 zfs-send(8): Restore sorting of flags
#11926 FreeBSD: damage control racing .. lookups in face of mkdir/rmdir
#11930 vdev_mirror: don't scrub/resilver devices that can't be read
#11938 Fix AVX512BW Fletcher code on AVX512-but-not-BW machines
#11955 zfs get: don't lookup mount options when using "-s local"
#11956 libzfs: add keylocation=https://, backed by fetch(3) or libcurl
#11959 vdev_id: variable not getting expanded under map_slot()
#11966 Scale worker threads and taskqs with number of CPUs
#11994 Clean up use of zfs_log_create in zfs_dir
#11997 FreeBSD: Don't force xattr mount option
#11997 FreeBSD: Implement xattr=sa
#11997 FreeBSD: Use SET_ERROR to trace xattr name errors
#11998 Simplify/fix dnode_move() for dn_zfetch
#12003 FreeBSD: Initialize/destroy zp->z_lock
#12010 Fix dRAID self-healing short columns
#12033 Revert "Fix raw sends on encrypted datasets when copying back snapshots"
#12040 Reinstate the old zpool read label logic as a fallback
#12046 Improve scrub maxinflight_bytes math
#12049 FreeBSD: avoid memory allocation in arc_prune_async
#12052 FreeBSD: incorporate changes to the VFS_QUOTACTL(9) KPI
#12061 Fix dRAID sequential resilver silent damage handling
#12072 Let zfs diff be more permissive
#12077 FreeBSD: Retry OCF ENOMEM errors.
#12088 Propagate vdev state due to invalid label corruption
#12091 libzfs: On FreeBSD, use MNT_NOWAIT with getfsstat
#12097 FreeBSD: Update dataset_kstats for zvols in dev mode
#12104 FreeBSD boot code reminder after zpool upgrade
#12114 Introduce write-mostly sums
Obtained from: OpenZFS
OpenZFS commit: 75b4cbf62590c23fac3667537961a2a75fdc2cc3
Diffstat (limited to 'sys/contrib/openzfs/module')
87 files changed, 1559 insertions, 1429 deletions
diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c index d0473d883b3d..1a95092bc2b6 100644 --- a/sys/contrib/openzfs/module/avl/avl.c +++ b/sys/contrib/openzfs/module/avl/avl.c @@ -1008,7 +1008,7 @@ avl_destroy_nodes(avl_tree_t *tree, void **cookie) --tree->avl_numnodes; /* - * If we just did a right child or there isn't one, go up to parent. + * If we just removed a right child or there isn't one, go up to parent. */ if (child == 1 || parent->avl_child[1] == NULL) { node = parent; diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in index 7a01b2f08b8e..858c5a610c26 100644 --- a/sys/contrib/openzfs/module/icp/Makefile.in +++ b/sys/contrib/openzfs/module/icp/Makefile.in @@ -45,7 +45,6 @@ $(MODULE)-objs += algs/aes/aes_modes.o $(MODULE)-objs += algs/edonr/edonr.o $(MODULE)-objs += algs/sha1/sha1.o $(MODULE)-objs += algs/sha2/sha2.o -$(MODULE)-objs += algs/sha1/sha1.o $(MODULE)-objs += algs/skein/skein.o $(MODULE)-objs += algs/skein/skein_block.o $(MODULE)-objs += algs/skein/skein_iv.o @@ -70,6 +69,12 @@ $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o # utility tries to interpret them as opcodes and obviously fails doing so. OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y +# Suppress objtool "unsupported stack pointer realignment" warnings. We are +# not using a DRAP register while aligning the stack to a 64 byte boundary. +# See #6950 for the reasoning. +OBJECT_FILES_NON_STANDARD_sha1-x86_64.o := y +OBJECT_FILES_NON_STANDARD_sha256_impl.o := y +OBJECT_FILES_NON_STANDARD_sha512_impl.o := y ICP_DIRS = \ api \ diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c index 7c677095f1ef..ee96e692ef00 100644 --- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c +++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c @@ -337,7 +337,7 @@ Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p) * * Checksum functions like this one can go over the stack frame size check * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can - * safely ignore the compiler error since we know that in ZoL, that + * safely ignore the compiler error since we know that in OpenZFS, that * the function will be called from a worker thread that won't be using * much stack. The only function that goes over the 1k limit is Q512(), * which only goes over it by a hair (1248 bytes on ARM32). diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c index 23686c59e8ce..7332834cbe37 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -1399,7 +1399,7 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) } datap += done; } - /* Decrypt remainder, which is less then chunk size, in one go. */ + /* Decrypt remainder, which is less than chunk size, in one go. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { done = aesni_gcm_decrypt(datap, datap, bleft, @@ -1415,7 +1415,7 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); /* - * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, + * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain, * decrypt them block by block. */ while (bleft > 0) { diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S index cb923784a730..fc844cd8c74f 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S @@ -69,16 +69,27 @@ sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks) #define _ASM #include <sys/asm_linkage.h> ENTRY_NP(sha1_block_data_order) +.cfi_startproc + mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_offset %rbx,-16 push %rbp +.cfi_offset %rbp,-24 push %r12 - mov %rsp,%rax +.cfi_offset %r12,-32 mov %rdi,%r8 # reassigned argument +.cfi_register %rdi, %r8 sub $72,%rsp mov %rsi,%r9 # reassigned argument +.cfi_register %rsi, %r9 and $-64,%rsp mov %rdx,%r10 # reassigned argument +.cfi_register %rdx, %r10 mov %rax,64(%rsp) +# echo ".cfi_cfa_expression %rsp+64,deref,+8" | +# openssl/crypto/perlasm/x86_64-xlate.pl +.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 mov 0(%r8),%edx mov 4(%r8),%esi @@ -1337,10 +1348,15 @@ ENTRY_NP(sha1_block_data_order) sub $1,%r10 jnz .Lloop mov 64(%rsp),%rsp - pop %r12 - pop %rbp - pop %rbx +.cfi_def_cfa %rsp,8 + movq -24(%rsp),%r12 +.cfi_restore %r12 + movq -16(%rsp),%rbp +.cfi_restore %rbp + movq -8(%rsp),%rbx +.cfi_restore %rbx ret +.cfi_endproc SET_SIZE(sha1_block_data_order) .data diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S index 766b75355f0b..28b048d2db24 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -83,12 +83,21 @@ SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) #include <sys/asm_linkage.h> ENTRY_NP(SHA256TransformBlocks) +.cfi_startproc + movq %rsp, %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_offset %rbx,-16 push %rbp +.cfi_offset %rbp,-24 push %r12 +.cfi_offset %r12,-32 push %r13 +.cfi_offset %r13,-40 push %r14 +.cfi_offset %r14,-48 push %r15 +.cfi_offset %r15,-56 mov %rsp,%rbp # copy %rsp shl $4,%rdx # num*16 sub $16*4+4*8,%rsp @@ -99,6 +108,9 @@ ENTRY_NP(SHA256TransformBlocks) mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg mov %rbp,16*4+3*8(%rsp) # save copy of %rsp +# echo ".cfi_cfa_expression %rsp+88,deref,+56" | +# openssl/crypto/perlasm/x86_64-xlate.pl +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38 #.picmeup %rbp # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts @@ -2026,14 +2038,28 @@ ENTRY_NP(SHA256TransformBlocks) jb .Lloop mov 16*4+3*8(%rsp),%rsp +.cfi_def_cfa %rsp,56 pop %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 pop %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 pop %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 pop %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 pop %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp pop %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx ret +.cfi_endproc SET_SIZE(SHA256TransformBlocks) .data diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S index 6e37618761b2..746c85a98566 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -84,12 +84,21 @@ SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) #include <sys/asm_linkage.h> ENTRY_NP(SHA512TransformBlocks) +.cfi_startproc + movq %rsp, %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_offset %rbx,-16 push %rbp +.cfi_offset %rbp,-24 push %r12 +.cfi_offset %r12,-32 push %r13 +.cfi_offset %r13,-40 push %r14 +.cfi_offset %r14,-48 push %r15 +.cfi_offset %r15,-56 mov %rsp,%rbp # copy %rsp shl $4,%rdx # num*16 sub $16*8+4*8,%rsp @@ -100,6 +109,9 @@ ENTRY_NP(SHA512TransformBlocks) mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg mov %rbp,16*8+3*8(%rsp) # save copy of %rsp +# echo ".cfi_cfa_expression %rsp+152,deref,+56" | +# openssl/crypto/perlasm/x86_64-xlate.pl +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38 #.picmeup %rbp # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts @@ -2027,14 +2039,28 @@ ENTRY_NP(SHA512TransformBlocks) jb .Lloop mov 16*8+3*8(%rsp),%rsp +.cfi_def_cfa %rsp,56 pop %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 pop %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 pop %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 pop %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 pop %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp pop %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx ret +.cfi_endproc SET_SIZE(SHA512TransformBlocks) .data diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c b/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c index 66e27cefa396..7fd0e36e1ba7 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/acl_common.c @@ -43,7 +43,6 @@ #include <grp.h> #include <pwd.h> #include <acl_common.h> -#define ASSERT assert #endif #define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \ @@ -170,8 +169,9 @@ ksort(caddr_t v, int n, int s, int (*f)(void *, void *)) return; /* Sanity check on arguments */ - ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0); - ASSERT(s > 0); + ASSERT3U(((uintptr_t)v & 0x3), ==, 0); + ASSERT3S((s & 0x3), ==, 0); + ASSERT3S(s, >, 0); for (g = n / 2; g > 0; g /= 2) { for (i = g; i < n; i++) { for (j = i - g; j >= 0 && diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/callb.c b/sys/contrib/openzfs/module/os/freebsd/spl/callb.c index fffa85b6b91b..0b7fefc89a26 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/callb.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/callb.c @@ -75,7 +75,7 @@ typedef struct callb { typedef struct callb_table { kmutex_t ct_lock; /* protect all callb states */ callb_t *ct_freelist; /* free callb structures */ - int ct_busy; /* != 0 prevents additions */ + boolean_t ct_busy; /* B_TRUE prevents additions */ kcondvar_t ct_busy_cv; /* to wait for not busy */ int ct_ncallb; /* num of callbs allocated */ callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */ @@ -98,7 +98,7 @@ callb_cpr_t callb_cprinfo_safe = { static void callb_init(void *dummy __unused) { - callb_table.ct_busy = 0; /* mark table open for additions */ + callb_table.ct_busy = B_FALSE; /* mark table open for additions */ mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL); } @@ -139,7 +139,7 @@ callb_add_common(boolean_t (*func)(void *arg, int code), { callb_t *cp; - ASSERT(class < NCBCLASS); + ASSERT3S(class, <, NCBCLASS); mutex_enter(&ct->ct_lock); while (ct->ct_busy) @@ -259,7 +259,7 @@ callb_execute_class(int class, int code) callb_t *cp; void *ret = NULL; - ASSERT(class < NCBCLASS); + ASSERT3S(class, <, NCBCLASS); mutex_enter(&ct->ct_lock); @@ -351,8 +351,8 @@ void callb_lock_table(void) { mutex_enter(&ct->ct_lock); - ASSERT(ct->ct_busy == 0); - ct->ct_busy = 1; + ASSERT(!ct->ct_busy); + ct->ct_busy = B_TRUE; mutex_exit(&ct->ct_lock); } @@ -363,8 +363,8 @@ void callb_unlock_table(void) { mutex_enter(&ct->ct_lock); - ASSERT(ct->ct_busy != 0); - ct->ct_busy = 0; + ASSERT(ct->ct_busy); + ct->ct_busy = B_FALSE; cv_broadcast(&ct->ct_busy_cv); mutex_exit(&ct->ct_lock); } diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/list.c b/sys/contrib/openzfs/module/os/freebsd/spl/list.c index 0f5ae629126c..62374a417704 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/list.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/list.c @@ -61,9 +61,8 @@ void list_create(list_t *list, size_t size, size_t offset) { - ASSERT(list); - ASSERT(size > 0); - ASSERT(size >= offset + sizeof (list_node_t)); + ASSERT3P(list, !=, NULL); + ASSERT3U(size, >=, offset + sizeof (list_node_t)); list->list_size = size; list->list_offset = offset; @@ -76,9 +75,9 @@ list_destroy(list_t *list) { list_node_t *node = &list->list_head; - ASSERT(list); - ASSERT(list->list_head.list_next == node); - ASSERT(list->list_head.list_prev == node); + ASSERT3P(list, !=, NULL); + ASSERT3P(list->list_head.list_next, ==, node); + ASSERT3P(list->list_head.list_prev, ==, node); node->list_next = node->list_prev = NULL; } @@ -124,7 +123,7 @@ list_remove(list_t *list, void *object) { list_node_t *lold = list_d2l(list, object); ASSERT(!list_empty(list)); - ASSERT(lold->list_next != NULL); + ASSERT3P(lold->list_next, !=, NULL); list_remove_node(lold); } @@ -195,8 +194,8 @@ list_move_tail(list_t *dst, list_t *src) list_node_t *dstnode = &dst->list_head; list_node_t *srcnode = &src->list_head; - ASSERT(dst->list_size == src->list_size); - ASSERT(dst->list_offset == src->list_offset); + ASSERT3U(dst->list_size, ==, src->list_size); + ASSERT3U(dst->list_offset, ==, src->list_offset); if (list_empty(src)) return; diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c index cfc61dd7fc2a..ee8f1d851a48 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kmem.c @@ -112,7 +112,7 @@ zfs_kmem_free(void *buf, size_t size __unused) if (i == buf) break; } - ASSERT(i != NULL); + ASSERT3P(i, !=, NULL); LIST_REMOVE(i, next); mtx_unlock(&kmem_items_mtx); memset(buf, 0xDC, MAX(size, 16)); @@ -162,7 +162,7 @@ kmem_cache_create(char *name, size_t bufsize, size_t align, { kmem_cache_t *cache; - ASSERT(vmp == NULL); + ASSERT3P(vmp, ==, NULL); cache = kmem_alloc(sizeof (*cache), KM_SLEEP); strlcpy(cache->kc_name, name, sizeof (cache->kc_name)); @@ -324,7 +324,7 @@ void spl_kmem_cache_set_move(kmem_cache_t *skc, kmem_cbrc_t (move)(void *, void *, size_t, void *)) { - ASSERT(move != NULL); + ASSERT3P(move, !=, NULL); } #ifdef KMEM_DEBUG diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c index 43ce358298b5..e591921ace1b 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c @@ -69,7 +69,7 @@ __kstat_set_seq_raw_ops(kstat_t *ksp, static int kstat_default_update(kstat_t *ksp, int rw) { - ASSERT(ksp != NULL); + ASSERT3P(ksp, !=, NULL); if (rw == KSTAT_WRITE) return (EACCES); @@ -223,7 +223,7 @@ restart: sbuf_printf(sb, "%s", ksp->ks_raw_buf); } else { - ASSERT(ksp->ks_ndata == 1); + ASSERT3U(ksp->ks_ndata, ==, 1); sbuf_hexdump(sb, ksp->ks_data, ksp->ks_data_size, NULL, 0); } @@ -250,7 +250,7 @@ __kstat_create(const char *module, int instance, const char *name, KASSERT(instance == 0, ("instance=%d", instance)); if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO)) - ASSERT(ks_ndata == 1); + ASSERT3U(ks_ndata, ==, 1); if (class == NULL) class = "misc"; @@ -461,7 +461,7 @@ kstat_install(kstat_t *ksp) struct sysctl_oid *root; if (ksp->ks_ndata == UINT32_MAX) - VERIFY(ksp->ks_type == KSTAT_TYPE_RAW); + VERIFY3U(ksp->ks_type, ==, KSTAT_TYPE_RAW); switch (ksp->ks_type) { case KSTAT_TYPE_NAMED: @@ -493,7 +493,7 @@ kstat_install(kstat_t *ksp) default: panic("unsupported kstat type %d\n", ksp->ks_type); } - VERIFY(root != NULL); + VERIFY3P(root, !=, NULL); ksp->ks_sysctl_root = root; } @@ -535,7 +535,7 @@ kstat_waitq_exit(kstat_io_t *kiop) delta = new - kiop->wlastupdate; kiop->wlastupdate = new; wcnt = kiop->wcnt--; - ASSERT((int)wcnt > 0); + ASSERT3S(wcnt, >, 0); kiop->wlentime += delta * wcnt; kiop->wtime += delta; } @@ -566,7 +566,7 @@ kstat_runq_exit(kstat_io_t *kiop) delta = new - kiop->rlastupdate; kiop->rlastupdate = new; rcnt = kiop->rcnt--; - ASSERT((int)rcnt > 0); + ASSERT3S(rcnt, >, 0); kiop->rlentime += delta * rcnt; kiop->rtime += delta; } diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c index d13b64b4cd26..00b1df766ab9 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_string.c @@ -102,6 +102,6 @@ kmem_asprintf(const char *fmt, ...) void kmem_strfree(char *str) { - ASSERT(str != NULL); + ASSERT3P(str, !=, NULL); kmem_free(str, strlen(str) + 1); } diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c index 8c0e495681e9..d5d50080fafd 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_sysevent.c @@ -245,7 +245,7 @@ sysevent_worker(void *arg __unused) if (error == ESHUTDOWN) break; } else { - VERIFY(event != NULL); + VERIFY3P(event, !=, NULL); log_sysevent(event); nvlist_free(event); } diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c index 59a781ee1b64..0bf251a1edac 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c @@ -48,7 +48,7 @@ int zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) { - ASSERT(zfs_uio_rw(uio) == dir); + ASSERT3U(zfs_uio_rw(uio), ==, dir); return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio))); } @@ -102,6 +102,6 @@ zfs_uioskip(zfs_uio_t *uio, size_t n) int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) { - ASSERT(zfs_uio_rw(uio) == dir); + ASSERT3U(zfs_uio_rw(uio), ==, dir); return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio))); } diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c index 09c8401267df..60ea627e975b 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c @@ -275,13 +275,13 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, void vn_rele_async(vnode_t *vp, taskq_t *taskq) { - VERIFY(vp->v_count > 0); + VERIFY3U(vp->v_usecount, >, 0); if (refcount_release_if_not_last(&vp->v_usecount)) { #if __FreeBSD_version < 1300045 vdrop(vp); #endif return; } - VERIFY(taskq_dispatch((taskq_t *)taskq, - (task_func_t *)vrele, vp, TQ_SLEEP) != 0); + VERIFY3U(taskq_dispatch((taskq_t *)taskq, + (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c index ff4d80ef1dfd..00aa21c6c9ec 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c @@ -168,8 +168,7 @@ abd_verify_scatter(abd_t *abd) * if an error if the ABD has been marked as a linear page. */ ASSERT(!abd_is_linear_page(abd)); - ASSERT3U(ABD_SCATTER(abd).abd_offset, <, - zfs_abd_chunk_size); + ASSERT3U(ABD_SCATTER(abd).abd_offset, <, zfs_abd_chunk_size); n = abd_scatter_chunkcnt(abd); for (i = 0; i < n; i++) { ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL); @@ -306,7 +305,7 @@ void abd_free_linear_page(abd_t *abd) { /* - * FreeBSD does not have have scatter linear pages + * FreeBSD does not have scatter linear pages * so there is an error. */ VERIFY(0); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c index e73efd810e53..05377bb7ed98 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c @@ -158,11 +158,7 @@ arc_default_max(uint64_t min, uint64_t allmem) static void arc_prune_task(void *arg) { -#ifdef __LP64__ - int64_t nr_scan = (int64_t)arg; -#else - int64_t nr_scan = (int32_t)arg; -#endif + int64_t nr_scan = (intptr_t)arg; arc_reduce_target_size(ptob(nr_scan)); #if __FreeBSD_version >= 1300139 @@ -188,15 +184,13 @@ arc_prune_task(void *arg) void arc_prune_async(int64_t adjust) { - int64_t *adjustptr; #ifndef __LP64__ - if (adjust > __LONG_MAX) - adjust = __LONG_MAX; + if (adjust > INTPTR_MAX) + adjust = INTPTR_MAX; #endif - - adjustptr = (void *)adjust; - taskq_dispatch(arc_prune_taskq, arc_prune_task, adjustptr, TQ_SLEEP); + taskq_dispatch(arc_prune_taskq, arc_prune_task, + (void *)(intptr_t)adjust, TQ_SLEEP); ARCSTAT_BUMP(arcstat_prune); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c index 03d14ed7cf5c..6a67dbc9f616 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c @@ -172,11 +172,13 @@ zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp) break; mtx_lock(&session->fs_lock); while (session->fs_done == false) - msleep(crp, &session->fs_lock, PRIBIO, - "zfs_crypto", hz/5); + msleep(crp, &session->fs_lock, 0, + "zfs_crypto", 0); mtx_unlock(&session->fs_lock); - if (crp->crp_etype != EAGAIN) { + if (crp->crp_etype == ENOMEM) { + pause("zcrnomem", 1); + } else if (crp->crp_etype != EAGAIN) { error = crp->crp_etype; break; } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c index fb8f560316ea..2cf54a3cef65 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c @@ -120,7 +120,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_t *db = dbp[i]; caddr_t va; - ASSERT(size > 0); + ASSERT3U(size, >, 0); ASSERT3U(db->db_size, >=, PAGESIZE); bufoff = offset - db->db_offset; @@ -170,7 +170,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, int err; ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); - ASSERT(last_size <= PAGE_SIZE); + ASSERT3S(last_size, <=, PAGE_SIZE); err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); @@ -182,7 +182,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, if (dbp[0]->db_offset != 0 || numbufs > 1) { for (i = 0; i < numbufs; i++) { ASSERT(ISP2(dbp[i]->db_size)); - ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); + ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0); ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); } } @@ -202,10 +202,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, vm_page_do_sunbusy(m); break; } - ASSERT(m->dirty == 0); + ASSERT3U(m->dirty, ==, 0); ASSERT(!pmap_page_is_write_mapped(m)); - ASSERT(db->db_size > PAGE_SIZE); + ASSERT3U(db->db_size, >, PAGE_SIZE); bufoff = IDX_TO_OFF(m->pindex) % db->db_size; va = zfs_map_page(m, &sf); bcopy((char *)db->db_data + bufoff, va, PAGESIZE); @@ -229,7 +229,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, if (m != bogus_page) { vm_page_assert_xbusied(m); ASSERT(vm_page_none_valid(m)); - ASSERT(m->dirty == 0); + ASSERT3U(m->dirty, ==, 0); ASSERT(!pmap_page_is_write_mapped(m)); va = zfs_map_page(m, &sf); } @@ -248,25 +248,28 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, * end of file anyway. */ tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); + ASSERT3S(tocpy, >=, 0); if (m != bogus_page) bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); pgoff += tocpy; - ASSERT(pgoff <= PAGESIZE); + ASSERT3S(pgoff, >=, 0); + ASSERT3S(pgoff, <=, PAGESIZE); if (pgoff == PAGESIZE) { if (m != bogus_page) { zfs_unmap_page(sf); vm_page_valid(m); } - ASSERT(mi < count); + ASSERT3S(mi, <, count); mi++; pgoff = 0; } bufoff += tocpy; - ASSERT(bufoff <= db->db_size); + ASSERT3S(bufoff, >=, 0); + ASSERT3S(bufoff, <=, db->db_size); if (bufoff == db->db_size) { - ASSERT(di < numbufs); + ASSERT3S(di, <, numbufs); di++; bufoff = 0; } @@ -286,23 +289,23 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, * with a size that is not a multiple of the page size. */ if (mi == count) { - ASSERT(di >= numbufs - 1); + ASSERT3S(di, >=, numbufs - 1); IMPLY(*rahead != 0, di == numbufs - 1); IMPLY(*rahead != 0, bufoff != 0); - ASSERT(pgoff == 0); + ASSERT0(pgoff); } if (di == numbufs) { - ASSERT(mi >= count - 1); - ASSERT(*rahead == 0); + ASSERT3S(mi, >=, count - 1); + ASSERT0(*rahead); IMPLY(pgoff == 0, mi == count); if (pgoff != 0) { - ASSERT(mi == count - 1); - ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); + ASSERT3S(mi, ==, count - 1); + ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0); } } #endif if (pgoff != 0) { - ASSERT(m != bogus_page); + ASSERT3P(m, !=, bogus_page); bzero(va + pgoff, PAGESIZE - pgoff); zfs_unmap_page(sf); vm_page_valid(m); @@ -318,17 +321,17 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, vm_page_do_sunbusy(m); break; } - ASSERT(m->dirty == 0); + ASSERT3U(m->dirty, ==, 0); ASSERT(!pmap_page_is_write_mapped(m)); - ASSERT(db->db_size > PAGE_SIZE); + ASSERT3U(db->db_size, >, PAGE_SIZE); bufoff = IDX_TO_OFF(m->pindex) % db->db_size; tocpy = MIN(db->db_size - bufoff, PAGESIZE); va = zfs_map_page(m, &sf); bcopy((char *)db->db_data + bufoff, va, tocpy); if (tocpy < PAGESIZE) { - ASSERT(i == *rahead - 1); - ASSERT((db->db_size & PAGE_MASK) != 0); + ASSERT3S(i, ==, *rahead - 1); + ASSERT3U((db->db_size & PAGE_MASK), !=, 0); bzero(va + tocpy, PAGESIZE - tocpy); } zfs_unmap_page(sf); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index d5ba0d93a569..d9096575d853 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -113,7 +113,6 @@ static int zfs__fini(void); static void zfs_shutdown(void *, int); static eventhandler_tag zfs_shutdown_event_tag; -extern zfsdev_state_t *zfsdev_state_list; #define ZFS_MIN_KSTACK_PAGES 4 @@ -182,70 +181,29 @@ out: static void zfsdev_close(void *data) { - zfsdev_state_t *zs = data; - - ASSERT(zs != NULL); - - mutex_enter(&zfsdev_state_lock); - - ASSERT(zs->zs_minor != 0); - - zs->zs_minor = -1; - zfs_onexit_destroy(zs->zs_onexit); - zfs_zevent_destroy(zs->zs_zevent); - zs->zs_onexit = NULL; - zs->zs_zevent = NULL; - - mutex_exit(&zfsdev_state_lock); + zfsdev_state_destroy(data); } -static int -zfs_ctldev_init(struct cdev *devp) +void +zfsdev_private_set_state(void *priv __unused, zfsdev_state_t *zs) { - boolean_t newzs = B_FALSE; - minor_t minor; - zfsdev_state_t *zs, *zsprev = NULL; - - ASSERT(MUTEX_HELD(&zfsdev_state_lock)); - - minor = zfsdev_minor_alloc(); - if (minor == 0) - return (SET_ERROR(ENXIO)); - - for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) { - if (zs->zs_minor == -1) - break; - zsprev = zs; - } - - if (!zs) { - zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP); - newzs = B_TRUE; - } - devfs_set_cdevpriv(zs, zfsdev_close); +} - zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit); - zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent); - - if (newzs) { - zs->zs_minor = minor; - wmb(); - zsprev->zs_next = zs; - } else { - wmb(); - zs->zs_minor = minor; - } - return (0); +zfsdev_state_t * +zfsdev_private_get_state(void *priv) +{ + return (priv); } static int -zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td) +zfsdev_open(struct cdev *devp __unused, int flag __unused, int mode __unused, + struct thread *td __unused) { int error; mutex_enter(&zfsdev_state_lock); - error = zfs_ctldev_init(devp); + error = zfsdev_state_init(NULL); mutex_exit(&zfsdev_state_lock); return (error); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c index 2bc78cb451e8..070e7a5b9f1b 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c @@ -95,8 +95,7 @@ spa_generate_rootconf(const char *name) for (i = 0; i < count; i++) { uint64_t txg; - VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG, - &txg) == 0); + txg = fnvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG); if (txg > best_txg) { best_txg = txg; best_cfg = configs[i]; @@ -115,72 +114,66 @@ spa_generate_rootconf(const char *name) break; if (configs[i] == NULL) continue; - VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE, - &nvtop) == 0); - nvlist_dup(nvtop, &tops[i], KM_SLEEP); + nvtop = fnvlist_lookup_nvlist(configs[i], + ZPOOL_CONFIG_VDEV_TREE); + tops[i] = fnvlist_dup(nvtop); } for (i = 0; holes != NULL && i < nholes; i++) { if (i >= nchildren) continue; if (tops[holes[i]] != NULL) continue; - nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP); - VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, - VDEV_TYPE_HOLE) == 0); - VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, - holes[i]) == 0); - VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, - 0) == 0); + tops[holes[i]] = fnvlist_alloc(); + fnvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE); + fnvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, holes[i]); + fnvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, 0); } for (i = 0; i < nchildren; i++) { if (tops[i] != NULL) continue; - nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP); - VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, - VDEV_TYPE_MISSING) == 0); - VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, - i) == 0); - VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, - 0) == 0); + tops[i] = fnvlist_alloc(); + fnvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, + VDEV_TYPE_MISSING); + fnvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, i); + fnvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, 0); } /* * Create pool config based on the best vdev config. */ - nvlist_dup(best_cfg, &config, KM_SLEEP); + config = fnvlist_dup(best_cfg); /* * Put this pool's top-level vdevs into a root vdev. */ - VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pgid) == 0); - VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) == 0); - VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); - VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); - VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - tops, nchildren) == 0); + pgid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID); + nvroot = fnvlist_alloc(); + fnvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT); + fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL); + fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid); + fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, tops, + nchildren); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot); /* * Drop vdev config elements that should not be present at pool level. */ - nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64); - nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64); + fnvlist_remove(config, ZPOOL_CONFIG_GUID); + fnvlist_remove(config, ZPOOL_CONFIG_TOP_GUID); for (i = 0; i < count; i++) - nvlist_free(configs[i]); + fnvlist_free(configs[i]); kmem_free(configs, count * sizeof (void *)); for (i = 0; i < nchildren; i++) - nvlist_free(tops[i]); + fnvlist_free(tops[i]); kmem_free(tops, nchildren * sizeof (void *)); - nvlist_free(nvroot); + fnvlist_free(nvroot); return (config); } @@ -201,10 +194,9 @@ spa_import_rootpool(const char *name, bool checkpointrewind) mutex_enter(&spa_namespace_lock); if (config != NULL) { - VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &pname) == 0 && strcmp(name, pname) == 0); - VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) - == 0); + pname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); + VERIFY0(strcmp(name, pname)); + txg = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG); if ((spa = spa_lookup(pname)) != NULL) { /* @@ -213,7 +205,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) */ if (spa->spa_state == POOL_STATE_ACTIVE) { mutex_exit(&spa_namespace_lock); - nvlist_free(config); + fnvlist_free(config); return (0); } @@ -235,12 +227,12 @@ spa_import_rootpool(const char *name, bool checkpointrewind) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; } else if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); - nvlist_free(config); + fnvlist_free(config); cmn_err(CE_NOTE, "Cannot find the pool label for '%s'", name); return (EIO); } else { - VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0); + config = fnvlist_dup(spa->spa_config); } spa->spa_is_root = B_TRUE; spa->spa_import_flags = ZFS_IMPORT_VERBATIM; @@ -251,15 +243,14 @@ spa_import_rootpool(const char *name, bool checkpointrewind) /* * Build up a vdev tree based on the boot device's label config. */ - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvtop) == 0); + nvtop = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); - nvlist_free(config); + fnvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", pname); return (error); @@ -270,7 +261,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&spa_namespace_lock); - nvlist_free(config); + fnvlist_free(config); return (0); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c index 825bd706e0c0..fc04a7476154 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c @@ -59,13 +59,13 @@ vdev_file_fini(void) static void vdev_file_hold(vdev_t *vd) { - ASSERT(vd->vdev_path != NULL); + ASSERT3P(vd->vdev_path, !=, NULL); } static void vdev_file_rele(vdev_t *vd) { - ASSERT(vd->vdev_path != NULL); + ASSERT3P(vd->vdev_path, !=, NULL); } static mode_t @@ -137,7 +137,8 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * administrator has already decided that the pool should be available * to local zone users, so the underlying devices should be as well. */ - ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); + ASSERT3P(vd->vdev_path, !=, NULL); + ASSERT(vd->vdev_path[0] == '/'); error = zfs_file_open(vd->vdev_path, vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c index c9e8e21982cf..3853b2b5c900 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c @@ -396,8 +396,8 @@ vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, p = datas[i]; s = sizes[i]; end = off + s; - ASSERT((off % cp->provider->sectorsize) == 0); - ASSERT((s % cp->provider->sectorsize) == 0); + ASSERT0(off % cp->provider->sectorsize); + ASSERT0(s % cp->provider->sectorsize); for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { bios[j] = g_alloc_bio(); @@ -409,7 +409,7 @@ vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, g_io_request(bios[j], cp); } } - ASSERT(j == n_bios); + ASSERT3S(j, ==, n_bios); /* Wait for all of the bios to complete, and clean them up */ for (i = j = 0; i < ncmds; i++) { @@ -467,7 +467,7 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; sizes[l] = size; errors[l] = 0; - ASSERT(offsets[l] % pp->sectorsize == 0); + ASSERT0(offsets[l] % pp->sectorsize); } /* Issue the IO requests */ @@ -557,7 +557,7 @@ process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) goto ignore; - VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); + txg = fnvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG); if (*known_pool_guid != 0) { if (pool_guid != *known_pool_guid) @@ -568,8 +568,8 @@ process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, resize_configs(configs, count, id); if ((*configs)[id] != NULL) { - VERIFY(nvlist_lookup_uint64((*configs)[id], - ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); + known_txg = fnvlist_lookup_uint64((*configs)[id], + ZPOOL_CONFIG_POOL_TXG); if (txg <= known_txg) goto ignore; nvlist_free((*configs)[id]); @@ -813,7 +813,7 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * Set the TLS to indicate downstack that we * should not access zvols */ - VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0); + VERIFY0(tsd_set(zfs_geom_probe_vdev_key, vd)); /* * We must have a pathname, and it must be absolute. @@ -873,7 +873,7 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, } /* Clear the TLS now that tasting is done */ - VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0); + VERIFY0(tsd_set(zfs_geom_probe_vdev_key, NULL)); if (cp == NULL) { ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); @@ -1160,7 +1160,7 @@ vdev_geom_io_done(zio_t *zio) struct bio *bp = zio->io_bio; if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { - ASSERT(bp == NULL); + ASSERT3P(bp, ==, NULL); return; } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c index 97cb201934dc..48f58807e800 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c @@ -52,7 +52,7 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) if (vdev_is_dead(vd)) return (ENXIO); - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL); pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); abd_zero(pad2, VDEV_PAD_SIZE); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c index 7089d0e0e887..9b410863019e 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c @@ -354,7 +354,8 @@ zfs_external_acl(znode_t *zp) * after upgrade the SA_ZPL_ZNODE_ACL should have been * removed */ - VERIFY(zp->z_is_sa && error == ENOENT); + VERIFY(zp->z_is_sa); + VERIFY3S(error, ==, ENOENT); return (0); } } @@ -427,7 +428,8 @@ zfs_znode_acl_version(znode_t *zp) * After upgrade SA_ZPL_ZNODE_ACL should have * been removed. */ - VERIFY(zp->z_is_sa && error == ENOENT); + VERIFY(zp->z_is_sa); + VERIFY3S(error, ==, ENOENT); return (ZFS_ACL_VERSION_FUID); } } @@ -575,7 +577,7 @@ zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who, { zfs_acl_node_t *aclnode; - ASSERT(aclp); + ASSERT3P(aclp, !=, NULL); if (start == NULL) { aclnode = list_head(&aclp->z_acl); @@ -804,7 +806,7 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) void *cookie = NULL; zfs_acl_node_t *newaclnode; - ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL); + ASSERT3U(aclp->z_version, ==, ZFS_ACL_VERSION_INITIAL); /* * First create the ACE in a contiguous piece of memory * for zfs_copy_ace_2_fuid(). @@ -826,9 +828,9 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) newaclnode = zfs_acl_node_alloc(aclp->z_acl_count * sizeof (zfs_object_ace_t)); aclp->z_ops = &zfs_acl_fuid_ops; - VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp, + VERIFY0(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count, - &newaclnode->z_size, NULL, cr) == 0); + &newaclnode->z_size, NULL, cr)); newaclnode->z_ace_count = aclp->z_acl_count; aclp->z_version = ZFS_ACL_VERSION; kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t)); @@ -1204,7 +1206,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) && (zfsvfs->z_version >= ZPL_VERSION_FUID)) zfs_acl_xform(zp, aclp, cr); - ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID); + ASSERT3U(aclp->z_version, >=, ZFS_ACL_VERSION_FUID); otype = DMU_OT_ACL; } @@ -1560,8 +1562,8 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, * Copy special opaque data if any */ if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) { - VERIFY((data2sz = aclp->z_ops->ace_data(acep, - &data2)) == data1sz); + data2sz = aclp->z_ops->ace_data(acep, &data2); + VERIFY3U(data2sz, ==, data1sz); bcopy(data1, data2, data2sz); } @@ -1630,7 +1632,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (zfsvfs->z_replay == B_FALSE) ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); } else - ASSERT(dzp->z_vnode == NULL); + ASSERT3P(dzp->z_vnode, ==, NULL); bzero(acl_ids, sizeof (zfs_acl_ids_t)); acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); @@ -1849,8 +1851,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) aclnode->z_size); start = (caddr_t)start + aclnode->z_size; } - ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp == - aclp->z_acl_bytes); + ASSERT3U((caddr_t)start - (caddr_t)vsecp->vsa_aclentp, + ==, aclp->z_acl_bytes); } } if (mask & VSA_ACE_ACLFLAGS) { @@ -2009,8 +2011,8 @@ top: } error = zfs_aclset_common(zp, aclp, cr, tx); - ASSERT(error == 0); - ASSERT(zp->z_acl_cached == NULL); + ASSERT0(error); + ASSERT3P(zp->z_acl_cached, ==, NULL); zp->z_acl_cached = aclp; if (fuid_dirtied) @@ -2123,7 +2125,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, return (error); } - ASSERT(zp->z_acl_cached); + ASSERT3P(zp->z_acl_cached, !=, NULL); while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, &iflags, &type))) { @@ -2349,7 +2351,6 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR)); -#ifdef __FreeBSD_kernel__ /* * In FreeBSD, we don't care about permissions of individual ADS. * Note that not checking them is not just an optimization - without @@ -2357,42 +2358,9 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) */ if (zp->z_pflags & ZFS_XATTR) return (0); -#else - /* - * If attribute then validate against base file - */ - if (is_attr) { - uint64_t parent; - - if ((error = sa_lookup(zp->z_sa_hdl, - SA_ZPL_PARENT(zp->z_zfsvfs), &parent, - sizeof (parent))) != 0) - return (error); - - if ((error = zfs_zget(zp->z_zfsvfs, - parent, &xzp)) != 0) { - return (error); - } - - check_zp = xzp; - - /* - * fixup mode to map to xattr perms - */ - - if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) { - mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA); - mode |= ACE_WRITE_NAMED_ATTRS; - } - - if (mode & (ACE_READ_DATA|ACE_EXECUTE)) { - mode &= ~(ACE_READ_DATA|ACE_EXECUTE); - mode |= ACE_READ_NAMED_ATTRS; - } - } -#endif owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); + /* * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC * in needed_bits. Map the bits mapped by working_mode (currently @@ -2444,7 +2412,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) */ error = 0; - ASSERT(working_mode != 0); + ASSERT3U(working_mode, !=, 0); if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) && owner == crgetuid(cr))) @@ -2610,7 +2578,8 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr) &zpcheck_privs, B_FALSE, cr)) == 0) return (0); - ASSERT(dzp_error && zp_error); + ASSERT(dzp_error); + ASSERT(zp_error); if (!dzpcheck_privs) return (dzp_error); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index 3ab4502bbc25..a9fe1b647238 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -352,7 +352,7 @@ zfsctl_create(zfsvfs_t *zfsvfs) vnode_t *rvp; uint64_t crtime[2]; - ASSERT(zfsvfs->z_ctldir == NULL); + ASSERT3P(zfsvfs->z_ctldir, ==, NULL); snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR); @@ -360,8 +360,8 @@ zfsctl_create(zfsvfs_t *zfsvfs) ZFSCTL_INO_ROOT); dot_zfs->snapdir = snapdir; - VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0); - VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), + VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp)); + VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), &crtime, sizeof (crtime))); ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); vput(rvp); @@ -637,7 +637,7 @@ zfsctl_root_lookup(struct vop_lookup_args *ap) int nameiop = ap->a_cnp->cn_nameiop; int err; - ASSERT(dvp->v_type == VDIR); + ASSERT3S(dvp->v_type, ==, VDIR); if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) return (SET_ERROR(ENOTSUP)); @@ -673,7 +673,7 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) zfs_uio_init(&uio, ap->a_uio); - ASSERT(vp->v_type == VDIR); + ASSERT3S(vp->v_type, ==, VDIR); error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, &dots_offset); @@ -918,7 +918,7 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap) int flags = cnp->cn_flags; int err; - ASSERT(dvp->v_type == VDIR); + ASSERT3S(dvp->v_type, ==, VDIR); if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) return (SET_ERROR(ENOTSUP)); @@ -1013,7 +1013,7 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap) * make .zfs/snapshot/<snapname> accessible over NFS * without requiring manual mounts of <snapname>. */ - ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); + ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs); VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; /* Clear the root flag (set via VFS_ROOT) as well. */ @@ -1039,7 +1039,7 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap) zfs_uio_init(&uio, ap->a_uio); - ASSERT(vp->v_type == VDIR); + ASSERT3S(vp->v_type, ==, VDIR); error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, &uio, &dots_offset); @@ -1143,7 +1143,7 @@ zfsctl_snapshot_inactive(struct vop_inactive_args *ap) { vnode_t *vp = ap->a_vp; - VERIFY(vrecycle(vp) == 1); + VERIFY3S(vrecycle(vp), ==, 1); return (0); } @@ -1248,7 +1248,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) vnode_t *vp; int error; - ASSERT(zfsvfs->z_ctldir != NULL); + ASSERT3P(zfsvfs->z_ctldir, !=, NULL); *zfsvfsp = NULL; error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, ZFSCTL_INO_SNAPDIR, objsetid, &vp); @@ -1280,7 +1280,7 @@ zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) uint64_t cookie; int error; - ASSERT(zfsvfs->z_ctldir != NULL); + ASSERT3P(zfsvfs->z_ctldir, !=, NULL); cookie = 0; for (;;) { diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c index 74742ad3669f..7239db80851c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c @@ -42,9 +42,12 @@ kstat_t *zfs_dbgmsg_kstat; /* * Internal ZFS debug messages are enabled by default. * - * # Print debug messages + * # Print debug messages as they're logged * dtrace -n 'zfs-dbgmsg { print(stringof(arg0)); }' * + * # Print all logged dbgmsg entries + * sysctl kstat.zfs.misc.dbgmsg + * * # Disable the kernel debug message log. * sysctl vfs.zfs.dbgmsg_enable=0 */ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c index de145a677600..7fff329a939c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c @@ -273,10 +273,9 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) zfsvfs_t *zfsvfs = zp->z_zfsvfs; ASSERT(zp->z_unlinked); - ASSERT(zp->z_links == 0); + ASSERT3U(zp->z_links, ==, 0); - VERIFY3U(0, ==, - zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + VERIFY0(zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1); } @@ -433,7 +432,7 @@ zfs_rmnode(znode_t *zp) uint64_t count; int error; - ASSERT(zp->z_links == 0); + ASSERT3U(zp->z_links, ==, 0); if (zfsvfs->z_replay == B_FALSE) ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); @@ -599,7 +598,7 @@ zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, &zp->z_links, sizeof (zp->z_links)); } else { - ASSERT(zp->z_unlinked == 0); + ASSERT(!zp->z_unlinked); } value = zfs_dirent(zp, zp->z_mode); error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name, @@ -758,7 +757,7 @@ zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, count = 0; ASSERT0(error); } else { - ASSERT(zp->z_unlinked == 0); + ASSERT(!zp->z_unlinked); error = zfs_dropname(dzp, name, zp, tx, flag); if (error != 0) return (error); @@ -806,7 +805,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr) int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; - uint64_t parent __unused; + uint64_t parent __maybe_unused; *xvpp = NULL; @@ -840,17 +839,15 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr) if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); -#ifdef ZFS_DEBUG - error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), - &parent, sizeof (parent)); - ASSERT(error == 0 && parent == zp->z_id); -#endif + ASSERT0(sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, + sizeof (parent))); + ASSERT3U(parent, ==, zp->z_id); - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); - (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, - xzp, "", NULL, acl_ids.z_fuidp, vap); + zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, + acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c index 06546c12e420..908cff6810eb 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c @@ -295,14 +295,12 @@ zfs_file_unlink(const char *fnamep) #if __FreeBSD_version >= 1300018 rc = kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0, 0); -#else -#ifdef AT_BENEATH +#elif __FreeBSD_version >= 1202504 || defined(AT_BENEATH) rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep), seg, 0, 0); #else rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep), seg, 0); #endif -#endif return (SET_ERROR(rc)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c index 0e0c16033b15..7f7e2b72c51a 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c @@ -138,6 +138,23 @@ zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +/* Update the VFS's cache of mountpoint properties */ +void +zfs_ioctl_update_mount_cache(const char *dsname) +{ + zfsvfs_t *zfsvfs; + + if (getzfsvfs(dsname, &zfsvfs) == 0) { + struct mount *mp = zfsvfs->z_vfs; + VFS_STATFS(mp, &mp->mnt_stat); + zfs_vfs_rele(zfsvfs); + } + /* + * Ignore errors; we can't do anything useful if either getzfsvfs or + * VFS_STATFS fails. + */ +} + uint64_t zfs_max_nvlist_src_size_os(void) { diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 4f2d7df87fc0..4e22206de329 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -585,14 +585,6 @@ snapdir_changed_cb(void *arg, uint64_t newval) } static void -vscan_changed_cb(void *arg, uint64_t newval) -{ - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_vscan = newval; -} - -static void acl_mode_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; @@ -635,9 +627,9 @@ zfs_register_callbacks(vfs_t *vfsp) boolean_t do_xattr = B_FALSE; int error = 0; - ASSERT(vfsp); + ASSERT3P(vfsp, !=, NULL); zfsvfs = vfsp->vfs_data; - ASSERT(zfsvfs); + ASSERT3P(zfsvfs, !=, NULL); os = zfsvfs->z_os; /* @@ -753,8 +745,6 @@ zfs_register_callbacks(vfs_t *vfsp) error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zfsvfs); - error = error ? error : dsl_prop_register(ds, - zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; @@ -846,6 +836,10 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) &sa_obj); if (error != 0) return (error); + + error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); + if (error == 0 && val == ZFS_XATTR_SA) + zfsvfs->z_xattr_sa = B_TRUE; } error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, @@ -860,7 +854,7 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) &zfsvfs->z_root); if (error != 0) return (error); - ASSERT(zfsvfs->z_root != 0); + ASSERT3U(zfsvfs->z_root, !=, 0); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, &zfsvfs->z_unlinkedobj); @@ -1139,7 +1133,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs) mutex_destroy(&zfsvfs->z_znodes_lock); mutex_destroy(&zfsvfs->z_lock); - ASSERT(zfsvfs->z_nr_znodes == 0); + ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); list_destroy(&zfsvfs->z_all_znodes); ZFS_TEARDOWN_DESTROY(zfsvfs); ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); @@ -1181,8 +1175,8 @@ zfs_domount(vfs_t *vfsp, char *osname) int error = 0; zfsvfs_t *zfsvfs; - ASSERT(vfsp); - ASSERT(osname); + ASSERT3P(vfsp, !=, NULL); + ASSERT3P(osname, !=, NULL); error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); if (error) @@ -1220,9 +1214,9 @@ zfs_domount(vfs_t *vfsp, char *osname) * because that's where other Solaris filesystems put it. */ fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); - ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); + ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); vfsp->vfs_fsid.val[0] = fsid_guid; - vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | + vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | (vfsp->mnt_vfc->vfc_typenum & 0xFF); /* @@ -1606,11 +1600,11 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; - zp = list_next(&zfsvfs->z_all_znodes, zp)) - if (zp->z_sa_hdl) { - ASSERT(ZTOV(zp)->v_count >= 0); + zp = list_next(&zfsvfs->z_all_znodes, zp)) { + if (zp->z_sa_hdl != NULL) { zfs_znode_dmu_fini(zp); } + } mutex_exit(&zfsvfs->z_znodes_lock); /* @@ -1697,7 +1691,7 @@ zfs_umount(vfs_t *vfsp, int fflag) taskqueue_drain(zfsvfs_taskq->tq_queue, &zfsvfs->z_unlinked_drain_task); - VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); + VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); os = zfsvfs->z_os; /* @@ -1959,7 +1953,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) goto bail; ds->ds_dir->dd_activity_cancelled = B_FALSE; - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); zfs_set_fuid_feature(zfsvfs); @@ -2172,7 +2166,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); - VERIFY(0 == sa_set_sa_object(os, sa_obj)); + VERIFY0(sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); } @@ -2286,7 +2280,7 @@ zfs_get_vfs_flag_unmounted(objset_t *os) zfsvfs_t *zfvp; boolean_t unmounted = B_FALSE; - ASSERT(dmu_objset_type(os) == DMU_OST_ZFS); + ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); mutex_enter(&os->os_user_ptr_lock); zfvp = dmu_objset_get_user(os); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index f915b0ab1f03..46a632b0385c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -231,15 +231,6 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr) return (SET_ERROR(EPERM)); } - if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && - ZTOV(zp)->v_type == VREG && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { - if (fs_vscan(*vpp, cr, 0) != 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EACCES)); - } - } - /* Keep a count of the synchronous opens in the znode */ if (flag & (FSYNC | FDSYNC)) atomic_inc_32(&zp->z_sync_cnt); @@ -262,11 +253,6 @@ zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) if ((flag & (FSYNC | FDSYNC)) && (count == 1)) atomic_dec_32(&zp->z_sync_cnt); - if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && - ZTOV(zp)->v_type == VREG && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) - VERIFY(fs_vscan(vp, cr, 1) == 0); - ZFS_EXIT(zfsvfs); return (0); } @@ -473,9 +459,9 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) caddr_t va; int off; - ASSERT(vp->v_mount != NULL); + ASSERT3P(vp->v_mount, !=, NULL); obj = vp->v_object; - ASSERT(obj != NULL); + ASSERT3P(obj, !=, NULL); off = start & PAGEOFFSET; zfs_vmobject_wlock_12(obj); @@ -530,11 +516,11 @@ mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) int len = nbytes; int error = 0; - ASSERT(zfs_uio_segflg(uio) == UIO_NOCOPY); - ASSERT(vp->v_mount != NULL); + ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); + ASSERT3P(vp->v_mount, !=, NULL); obj = vp->v_object; - ASSERT(obj != NULL); - ASSERT((zfs_uio_offset(uio) & PAGEOFFSET) == 0); + ASSERT3P(obj, !=, NULL); + ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); zfs_vmobject_wlock_12(obj); for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { @@ -611,9 +597,9 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) int off; int error = 0; - ASSERT(vp->v_mount != NULL); + ASSERT3P(vp->v_mount, !=, NULL); obj = vp->v_object; - ASSERT(obj != NULL); + ASSERT3P(obj, !=, NULL); start = zfs_uio_offset(uio); off = start & PAGEOFFSET; @@ -781,7 +767,9 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, znode_t *zdp = VTOZ(dvp); znode_t *zp; zfsvfs_t *zfsvfs = zdp->z_zfsvfs; +#if __FreeBSD_version > 1300124 seqc_t dvp_seqc; +#endif int error = 0; /* @@ -807,7 +795,9 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zdp); +#if __FreeBSD_version > 1300124 dvp_seqc = vn_seqc_read_notmodify(dvp); +#endif *vpp = NULL; @@ -978,6 +968,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, } } +#if __FreeBSD_version > 1300124 if ((cnp->cn_flags & ISDOTDOT) != 0) { /* * FIXME: zfs_lookup_lock relocks vnodes and does nothing to @@ -995,6 +986,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cnp->cn_flags &= ~MAKEENTRY; } } +#endif /* Insert name into cache (as non-existent) if appropriate. */ if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && @@ -1407,7 +1399,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; - ASSERT(vap->va_type == VDIR); + ASSERT3U(vap->va_type, ==, VDIR); /* * If we have an ephemeral id, ACL, or XVATTR then @@ -1915,7 +1907,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, } outcount += reclen; - ASSERT(outcount <= bufsize); + ASSERT3S(outcount, <=, bufsize); /* Prefetch znode */ if (prefetch) @@ -2775,12 +2767,12 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) new_mode = zp->z_mode; } err = zfs_acl_chown_setattr(zp); - ASSERT(err == 0); + ASSERT0(err); if (attrzp) { vn_seqc_write_begin(ZTOV(attrzp)); err = zfs_acl_chown_setattr(attrzp); vn_seqc_write_end(ZTOV(attrzp)); - ASSERT(err == 0); + ASSERT0(err); } } @@ -2788,7 +2780,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &new_mode, sizeof (new_mode)); zp->z_mode = new_mode; - ASSERT3U((uintptr_t)aclp, !=, 0); + ASSERT3P(aclp, !=, NULL); err = zfs_aclset_common(zp, aclp, cr, tx); ASSERT0(err); if (zp->z_acl_cached) @@ -2874,7 +2866,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) - ASSERT(vp->v_type == VREG); + ASSERT3S(vp->v_type, ==, VREG); zfs_xvattr_set(zp, xvap, tx); } @@ -2896,7 +2888,7 @@ out: if (err == 0 && attrzp) { err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, xattr_count, tx); - ASSERT(err2 == 0); + ASSERT0(err2); } if (attrzp) @@ -3424,8 +3416,8 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, * succeed; fortunately, it is very unlikely to * fail, since we just created it. */ - VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, - ZRENAMING, NULL), ==, 0); + VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, + ZRENAMING, NULL)); } } if (error == 0) { @@ -3529,7 +3521,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, boolean_t fuid_dirtied; uint64_t txtype = TX_SYMLINK; - ASSERT(vap->va_type == VLNK); + ASSERT3S(vap->va_type, ==, VLNK); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); @@ -3600,7 +3592,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, /* * Create a new object for the symlink. - * for version 4 ZPL datsets the symlink will be an SA attribute + * for version 4 ZPL datasets the symlink will be an SA attribute */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); @@ -3703,7 +3695,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, uint64_t parent; uid_t owner; - ASSERT(ZTOV(tdzp)->v_type == VDIR); + ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(tdzp); @@ -4489,6 +4481,7 @@ zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) } #endif +#if __FreeBSD_version >= 1300139 static int zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) { @@ -4508,6 +4501,7 @@ zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) } return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); } +#endif #ifndef _SYS_SYSPROTO_H_ struct vop_access_args { @@ -4581,7 +4575,7 @@ zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) struct componentname *cnp = ap->a_cnp; char nm[NAME_MAX + 1]; - ASSERT(cnp->cn_namelen < sizeof (nm)); + ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, @@ -4996,8 +4990,10 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) struct componentname *cnp = ap->a_cnp; vattr_t *vap = ap->a_vap; znode_t *zp = NULL; +#if __FreeBSD_version >= 1300139 char *symlink; size_t symlink_len; +#endif int rc; ASSERT(cnp->cn_flags & SAVENAME); @@ -5011,6 +5007,7 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) if (rc == 0) { *ap->a_vpp = ZTOV(zp); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); +#if __FreeBSD_version >= 1300139 MPASS(zp->z_cached_symlink == NULL); symlink_len = strlen(ap->a_target); symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); @@ -5020,6 +5017,7 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, (uintptr_t)symlink); } +#endif } return (rc); } @@ -5036,13 +5034,16 @@ static int zfs_freebsd_readlink(struct vop_readlink_args *ap) { zfs_uio_t uio; + int error; +#if __FreeBSD_version >= 1300139 znode_t *zp = VTOZ(ap->a_vp); char *symlink, *base; size_t symlink_len; - int error; bool trycache; +#endif zfs_uio_init(&uio, ap->a_uio); +#if __FreeBSD_version >= 1300139 trycache = false; if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && zfs_uio_iovcnt(&uio) == 1) { @@ -5050,7 +5051,9 @@ zfs_freebsd_readlink(struct vop_readlink_args *ap) symlink_len = zfs_uio_iovlen(&uio, 0); trycache = true; } +#endif error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); +#if __FreeBSD_version >= 1300139 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || error != 0 || !trycache) { return (error); @@ -5065,6 +5068,7 @@ zfs_freebsd_readlink(struct vop_readlink_args *ap) cache_symlink_free(symlink, symlink_len + 1); } } +#endif return (error); } @@ -5154,7 +5158,7 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap) znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - ASSERT(zp != NULL); + ASSERT3P(zp, !=, NULL); #if __FreeBSD_version < 1300042 /* Destroy the vm object and flush associated pages. */ @@ -5246,10 +5250,10 @@ zfs_create_attrname(int attrnamespace, const char *name, char *attrname, /* We don't allow '/' character in attribute name. */ if (strchr(name, '/') != NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* We don't allow attribute names that start with "freebsd:" string. */ if (strncmp(name, "freebsd:", 8) == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); bzero(attrname, size); @@ -5274,15 +5278,38 @@ zfs_create_attrname(int attrnamespace, const char *name, char *attrname, break; case EXTATTR_NAMESPACE_EMPTY: default: - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, name) >= size) { - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); } return (0); } +static int +zfs_ensure_xattr_cached(znode_t *zp) +{ + int error = 0; + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + + if (zp->z_xattr_cached != NULL) + return (0); + + if (rw_write_held(&zp->z_xattr_lock)) + return (zfs_sa_get_xattr(zp)); + + if (!rw_tryupgrade(&zp->z_xattr_lock)) { + rw_exit(&zp->z_xattr_lock); + rw_enter(&zp->z_xattr_lock, RW_WRITER); + } + if (zp->z_xattr_cached == NULL) + error = zfs_sa_get_xattr(zp); + rw_downgrade(&zp->z_xattr_lock); + return (error); +} + #ifndef _SYS_SYSPROTO_H_ struct vop_getextattr { IN struct vnode *a_vp; @@ -5295,45 +5322,19 @@ struct vop_getextattr { }; #endif -/* - * Vnode operating to retrieve a named extended attribute. - */ static int -zfs_getextattr(struct vop_getextattr_args *ap) +zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) { - zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; struct thread *td = ap->a_td; struct nameidata nd; - char attrname[255]; struct vattr va; vnode_t *xvp = NULL, *vp; int error, flags; - /* - * If the xattr property is off, refuse the request. - */ - if (!(zfsvfs->z_flags & ZSB_XATTR)) { - return (SET_ERROR(EOPNOTSUPP)); - } - - error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, VREAD); - if (error != 0) - return (error); - - error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, - sizeof (attrname)); - if (error != 0) - return (error); - - ZFS_ENTER(zfsvfs); - error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, LOOKUP_XATTR, B_FALSE); - if (error != 0) { - ZFS_EXIT(zfsvfs); + if (error != 0) return (error); - } flags = FREAD; NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, @@ -5341,12 +5342,8 @@ zfs_getextattr(struct vop_getextattr_args *ap) error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); - if (error != 0) { - ZFS_EXIT(zfsvfs); - if (error == ENOENT) - error = ENOATTR; + if (error != 0) return (error); - } if (ap->a_size != NULL) { error = VOP_GETATTR(vp, &va, ap->a_cred); @@ -5357,42 +5354,56 @@ zfs_getextattr(struct vop_getextattr_args *ap) VOP_UNLOCK1(vp); vn_close(vp, flags, ap->a_cred, td); - ZFS_EXIT(zfsvfs); return (error); } -#ifndef _SYS_SYSPROTO_H_ -struct vop_deleteextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -#endif +static int +zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) +{ + znode_t *zp = VTOZ(ap->a_vp); + uchar_t *nv_value; + uint_t nv_size; + int error; + + error = zfs_ensure_xattr_cached(zp); + if (error != 0) + return (error); + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + ASSERT3P(zp->z_xattr_cached, !=, NULL); + + error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, + &nv_value, &nv_size); + if (error) + return (error); + + if (ap->a_size != NULL) + *ap->a_size = nv_size; + else if (ap->a_uio != NULL) + error = uiomove(nv_value, nv_size, ap->a_uio); + + return (error); +} /* - * Vnode operation to remove a named attribute. + * Vnode operation to retrieve a named extended attribute. */ static int -zfs_deleteextattr(struct vop_deleteextattr_args *ap) +zfs_getextattr(struct vop_getextattr_args *ap) { - zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; - struct thread *td = ap->a_td; - struct nameidata nd; - char attrname[255]; - vnode_t *xvp = NULL, *vp; + znode_t *zp = VTOZ(ap->a_vp); + zfsvfs_t *zfsvfs = ZTOZSB(zp); + char attrname[EXTATTR_MAXNAMELEN+1]; int error; /* * If the xattr property is off, refuse the request. */ - if (!(zfsvfs->z_flags & ZSB_XATTR)) { + if (!(zfsvfs->z_flags & ZSB_XATTR)) return (SET_ERROR(EOPNOTSUPP)); - } error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, VWRITE); + ap->a_cred, ap->a_td, VREAD); if (error != 0) return (error); @@ -5401,24 +5412,50 @@ zfs_deleteextattr(struct vop_deleteextattr_args *ap) if (error != 0) return (error); + error = ENOENT; ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp) + rw_enter(&zp->z_xattr_lock, RW_READER); + if (zfsvfs->z_use_sa && zp->z_is_sa) + error = zfs_getextattr_sa(ap, attrname); + if (error == ENOENT) + error = zfs_getextattr_dir(ap, attrname); + rw_exit(&zp->z_xattr_lock); + ZFS_EXIT(zfsvfs); + if (error == ENOENT) + error = SET_ERROR(ENOATTR); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct vop_deleteextattr { + IN struct vnode *a_vp; + IN int a_attrnamespace; + IN const char *a_name; + IN struct ucred *a_cred; + IN struct thread *a_td; +}; +#endif + +static int +zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) +{ + struct thread *td = ap->a_td; + struct nameidata nd; + vnode_t *xvp = NULL, *vp; + int error; error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, LOOKUP_XATTR, B_FALSE); - if (error != 0) { - ZFS_EXIT(zfsvfs); + if (error != 0) return (error); - } NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, attrname, xvp, td); error = namei(&nd); vp = nd.ni_vp; if (error != 0) { - ZFS_EXIT(zfsvfs); NDFREE(&nd, NDF_ONLY_PNBUF); - if (error == ENOENT) - error = ENOATTR; return (error); } @@ -5430,72 +5467,123 @@ zfs_deleteextattr(struct vop_deleteextattr_args *ap) vrele(vp); else vput(vp); - ZFS_EXIT(zfsvfs); return (error); } -#ifndef _SYS_SYSPROTO_H_ -struct vop_setextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - INOUT struct uio *a_uio; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -#endif +static int +zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) +{ + znode_t *zp = VTOZ(ap->a_vp); + nvlist_t *nvl; + int error; + + error = zfs_ensure_xattr_cached(zp); + if (error != 0) + return (error); + + ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); + ASSERT3P(zp->z_xattr_cached, !=, NULL); + + nvl = zp->z_xattr_cached; + error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); + if (error == 0) + error = zfs_sa_set_xattr(zp); + if (error != 0) { + zp->z_xattr_cached = NULL; + nvlist_free(nvl); + } + return (error); +} /* - * Vnode operation to set a named attribute. + * Vnode operation to remove a named attribute. */ static int -zfs_setextattr(struct vop_setextattr_args *ap) +zfs_deleteextattr(struct vop_deleteextattr_args *ap) { - zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; - struct thread *td = ap->a_td; - struct nameidata nd; - char attrname[255]; - struct vattr va; - vnode_t *xvp = NULL, *vp; - int error, flags; + znode_t *zp = VTOZ(ap->a_vp); + zfsvfs_t *zfsvfs = ZTOZSB(zp); + char attrname[EXTATTR_MAXNAMELEN+1]; + int error; /* * If the xattr property is off, refuse the request. */ - if (!(zfsvfs->z_flags & ZSB_XATTR)) { + if (!(zfsvfs->z_flags & ZSB_XATTR)) return (SET_ERROR(EOPNOTSUPP)); - } error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error != 0) return (error); + error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, sizeof (attrname)); if (error != 0) return (error); + size_t size = 0; + struct vop_getextattr_args vga = { + .a_vp = ap->a_vp, + .a_size = &size, + .a_cred = ap->a_cred, + .a_td = ap->a_td, + }; + error = ENOENT; ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + rw_enter(&zp->z_xattr_lock, RW_WRITER); + if (zfsvfs->z_use_sa && zp->z_is_sa) { + error = zfs_getextattr_sa(&vga, attrname); + if (error == 0) + error = zfs_deleteextattr_sa(ap, attrname); + } + if (error == ENOENT) { + error = zfs_getextattr_dir(&vga, attrname); + if (error == 0) + error = zfs_deleteextattr_dir(ap, attrname); + } + rw_exit(&zp->z_xattr_lock); + ZFS_EXIT(zfsvfs); + if (error == ENOENT) + error = SET_ERROR(ENOATTR); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct vop_setextattr { + IN struct vnode *a_vp; + IN int a_attrnamespace; + IN const char *a_name; + INOUT struct uio *a_uio; + IN struct ucred *a_cred; + IN struct thread *a_td; +}; +#endif + +static int +zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) +{ + struct thread *td = ap->a_td; + struct nameidata nd; + struct vattr va; + vnode_t *xvp = NULL, *vp; + int error, flags; error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); - if (error != 0) { - ZFS_EXIT(zfsvfs); + if (error != 0) return (error); - } flags = FFLAGS(O_WRONLY | O_CREAT); - NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, - xvp, td); + NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td); error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, NULL); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); - if (error != 0) { - ZFS_EXIT(zfsvfs); + if (error != 0) return (error); - } VATTR_NULL(&va); va.va_size = 0; @@ -5505,70 +5593,131 @@ zfs_setextattr(struct vop_setextattr_args *ap) VOP_UNLOCK1(vp); vn_close(vp, flags, ap->a_cred, td); - ZFS_EXIT(zfsvfs); return (error); } -#ifndef _SYS_SYSPROTO_H_ -struct vop_listextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - INOUT struct uio *a_uio; - OUT size_t *a_size; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -#endif +static int +zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) +{ + znode_t *zp = VTOZ(ap->a_vp); + nvlist_t *nvl; + size_t sa_size; + int error; + + error = zfs_ensure_xattr_cached(zp); + if (error != 0) + return (error); + + ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); + ASSERT3P(zp->z_xattr_cached, !=, NULL); + + nvl = zp->z_xattr_cached; + size_t entry_size = ap->a_uio->uio_resid; + if (entry_size > DXATTR_MAX_ENTRY_SIZE) + return (SET_ERROR(EFBIG)); + error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); + if (error != 0) + return (error); + if (sa_size > DXATTR_MAX_SA_SIZE) + return (SET_ERROR(EFBIG)); + uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); + error = uiomove(buf, entry_size, ap->a_uio); + if (error == 0) + error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); + kmem_free(buf, entry_size); + if (error == 0) + error = zfs_sa_set_xattr(zp); + if (error != 0) { + zp->z_xattr_cached = NULL; + nvlist_free(nvl); + } + return (error); +} /* - * Vnode operation to retrieve extended attributes on a vnode. + * Vnode operation to set a named attribute. */ static int -zfs_listextattr(struct vop_listextattr_args *ap) +zfs_setextattr(struct vop_setextattr_args *ap) { - zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; - struct thread *td = ap->a_td; - struct nameidata nd; - char attrprefix[16]; - uint8_t dirbuf[sizeof (struct dirent)]; - struct dirent *dp; - struct iovec aiov; - struct uio auio; - size_t *sizep = ap->a_size; - size_t plen; - vnode_t *xvp = NULL, *vp; - int done, error, eof, pos; - zfs_uio_t uio; - - zfs_uio_init(&uio, ap->a_uio); + znode_t *zp = VTOZ(ap->a_vp); + zfsvfs_t *zfsvfs = ZTOZSB(zp); + char attrname[EXTATTR_MAXNAMELEN+1]; + int error; /* * If the xattr property is off, refuse the request. */ - if (!(zfsvfs->z_flags & ZSB_XATTR)) { + if (!(zfsvfs->z_flags & ZSB_XATTR)) return (SET_ERROR(EOPNOTSUPP)); - } error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, VREAD); + ap->a_cred, ap->a_td, VWRITE); if (error != 0) return (error); - error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, - sizeof (attrprefix)); + error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, + sizeof (attrname)); if (error != 0) return (error); - plen = strlen(attrprefix); + struct vop_deleteextattr_args vda = { + .a_vp = ap->a_vp, + .a_cred = ap->a_cred, + .a_td = ap->a_td, + }; + error = ENOENT; ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + rw_enter(&zp->z_xattr_lock, RW_WRITER); + if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { + error = zfs_setextattr_sa(ap, attrname); + if (error == 0) + /* + * Successfully put into SA, we need to clear the one + * in dir if present. + */ + zfs_deleteextattr_dir(&vda, attrname); + } + if (error) { + error = zfs_setextattr_dir(ap, attrname); + if (error == 0) + /* + * Successfully put into dir, we need to clear the one + * in SA if present. + */ + zfs_deleteextattr_sa(&vda, attrname); + } + rw_exit(&zp->z_xattr_lock); + ZFS_EXIT(zfsvfs); + return (error); +} - if (sizep != NULL) - *sizep = 0; +#ifndef _SYS_SYSPROTO_H_ +struct vop_listextattr { + IN struct vnode *a_vp; + IN int a_attrnamespace; + INOUT struct uio *a_uio; + OUT size_t *a_size; + IN struct ucred *a_cred; + IN struct thread *a_td; +}; +#endif + +static int +zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) +{ + struct thread *td = ap->a_td; + struct nameidata nd; + uint8_t dirbuf[sizeof (struct dirent)]; + struct iovec aiov; + struct uio auio; + vnode_t *xvp = NULL, *vp; + int error, eof; error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, LOOKUP_XATTR, B_FALSE); if (error != 0) { - ZFS_EXIT(zfsvfs); /* * ENOATTR means that the EA directory does not yet exist, * i.e. there are no extended attributes there. @@ -5583,10 +5732,8 @@ zfs_listextattr(struct vop_listextattr_args *ap) error = namei(&nd); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); - if (error != 0) { - ZFS_EXIT(zfsvfs); + if (error != 0) return (error); - } auio.uio_iov = &aiov; auio.uio_iovcnt = 1; @@ -5595,18 +5742,18 @@ zfs_listextattr(struct vop_listextattr_args *ap) auio.uio_rw = UIO_READ; auio.uio_offset = 0; - do { - uint8_t nlen; + size_t plen = strlen(attrprefix); + do { aiov.iov_base = (void *)dirbuf; aiov.iov_len = sizeof (dirbuf); auio.uio_resid = sizeof (dirbuf); error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); - done = sizeof (dirbuf) - auio.uio_resid; if (error != 0) break; - for (pos = 0; pos < done; ) { - dp = (struct dirent *)(dirbuf + pos); + int done = sizeof (dirbuf) - auio.uio_resid; + for (int pos = 0; pos < done; ) { + struct dirent *dp = (struct dirent *)(dirbuf + pos); pos += dp->d_reclen; /* * XXX: Temporarily we also accept DT_UNKNOWN, as this @@ -5614,24 +5761,23 @@ zfs_listextattr(struct vop_listextattr_args *ap) */ if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) continue; - if (plen == 0 && + else if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) continue; else if (strncmp(dp->d_name, attrprefix, plen) != 0) continue; - nlen = dp->d_namlen - plen; - if (sizep != NULL) - *sizep += 1 + nlen; - else if (GET_UIO_STRUCT(&uio) != NULL) { + uint8_t nlen = dp->d_namlen - plen; + if (ap->a_size != NULL) { + *ap->a_size += 1 + nlen; + } else if (ap->a_uio != NULL) { /* * Format of extattr name entry is one byte for * length and the rest for name. */ - error = zfs_uiomove(&nlen, 1, zfs_uio_rw(&uio), - &uio); + error = uiomove(&nlen, 1, ap->a_uio); if (error == 0) { - error = zfs_uiomove(dp->d_name + plen, - nlen, zfs_uio_rw(&uio), &uio); + char *namep = dp->d_name + plen; + error = uiomove(namep, nlen, ap->a_uio); } if (error != 0) break; @@ -5640,8 +5786,92 @@ zfs_listextattr(struct vop_listextattr_args *ap) } while (!eof && error == 0); vput(vp); - ZFS_EXIT(zfsvfs); + return (error); +} + +static int +zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) +{ + znode_t *zp = VTOZ(ap->a_vp); + int error; + error = zfs_ensure_xattr_cached(zp); + if (error != 0) + return (error); + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + ASSERT3P(zp->z_xattr_cached, !=, NULL); + + size_t plen = strlen(attrprefix); + nvpair_t *nvp = NULL; + while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { + ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); + + const char *name = nvpair_name(nvp); + if (plen == 0 && strncmp(name, "freebsd:", 8) == 0) + continue; + else if (strncmp(name, attrprefix, plen) != 0) + continue; + uint8_t nlen = strlen(name) - plen; + if (ap->a_size != NULL) { + *ap->a_size += 1 + nlen; + } else if (ap->a_uio != NULL) { + /* + * Format of extattr name entry is one byte for + * length and the rest for name. + */ + error = uiomove(&nlen, 1, ap->a_uio); + if (error == 0) { + char *namep = __DECONST(char *, name) + plen; + error = uiomove(namep, nlen, ap->a_uio); + } + if (error != 0) + break; + } + } + + return (error); +} + +/* + * Vnode operation to retrieve extended attributes on a vnode. + */ +static int +zfs_listextattr(struct vop_listextattr_args *ap) +{ + znode_t *zp = VTOZ(ap->a_vp); + zfsvfs_t *zfsvfs = ZTOZSB(zp); + char attrprefix[16]; + int error; + + if (ap->a_size != NULL) + *ap->a_size = 0; + + /* + * If the xattr property is off, refuse the request. + */ + if (!(zfsvfs->z_flags & ZSB_XATTR)) + return (SET_ERROR(EOPNOTSUPP)); + + error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, + ap->a_cred, ap->a_td, VREAD); + if (error != 0) + return (error); + + error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, + sizeof (attrprefix)); + if (error != 0) + return (error); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + rw_enter(&zp->z_xattr_lock, RW_READER); + if (zfsvfs->z_use_sa && zp->z_is_sa) + error = zfs_listextattr_sa(ap, attrprefix); + if (error == 0) + error = zfs_listextattr_dir(ap, attrprefix); + rw_exit(&zp->z_xattr_lock); + ZFS_EXIT(zfsvfs); return (error); } @@ -5828,7 +6058,9 @@ struct vop_vector zfs_vnodeops = { #if __FreeBSD_version >= 1300102 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, #endif +#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, +#endif .vop_access = zfs_freebsd_access, .vop_allocate = VOP_EINVAL, .vop_lookup = zfs_cache_lookup, @@ -5878,7 +6110,9 @@ struct vop_vector zfs_fifoops = { #if __FreeBSD_version >= 1300102 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, #endif +#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, +#endif .vop_access = zfs_freebsd_access, .vop_getattr = zfs_freebsd_getattr, .vop_inactive = zfs_freebsd_inactive, @@ -5902,7 +6136,9 @@ struct vop_vector zfs_shareops = { #if __FreeBSD_version >= 1300121 .vop_fplookup_vexec = VOP_EAGAIN, #endif +#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = VOP_EAGAIN, +#endif .vop_access = zfs_freebsd_access, .vop_inactive = zfs_freebsd_inactive, .vop_reclaim = zfs_freebsd_reclaim, diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c index 0491b2ff3e28..3eb5cd490d03 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -143,11 +143,15 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) list_link_init(&zp->z_link_node); + mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); zp->z_acl_cached = NULL; + zp->z_xattr_cached = NULL; + zp->z_xattr_parent = 0; zp->z_vnode = NULL; return (0); } @@ -161,10 +165,13 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); ASSERT3P(zp->z_vnode, ==, NULL); ASSERT(!list_link_active(&zp->z_link_node)); + mutex_destroy(&zp->z_lock); mutex_destroy(&zp->z_acl_lock); + rw_destroy(&zp->z_xattr_lock); zfs_rangelock_fini(&zp->z_rangelock); - ASSERT(zp->z_acl_cached == NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); } @@ -175,14 +182,12 @@ static int zfs_znode_cache_constructor_smr(void *mem, int size __unused, void *private, int flags) { - return (zfs_znode_cache_constructor(mem, private, flags)); } static void zfs_znode_cache_destructor_smr(void *mem, int size __unused, void *private) { - zfs_znode_cache_destructor(mem, private); } @@ -192,7 +197,7 @@ zfs_znode_init(void) /* * Initialize zcache */ - ASSERT(znode_uma_zone == NULL); + ASSERT3P(znode_uma_zone, ==, NULL); znode_uma_zone = uma_zcreate("zfs_znode_cache", sizeof (znode_t), zfs_znode_cache_constructor_smr, zfs_znode_cache_destructor_smr, NULL, NULL, 0, 0); @@ -202,14 +207,16 @@ zfs_znode_init(void) static znode_t * zfs_znode_alloc_kmem(int flags) { - return (uma_zalloc_smr(znode_uma_zone, flags)); } static void zfs_znode_free_kmem(znode_t *zp) { - + if (zp->z_xattr_cached) { + nvlist_free(zp->z_xattr_cached); + zp->z_xattr_cached = NULL; + } uma_zfree_smr(znode_uma_zone, zp); } #else @@ -219,7 +226,7 @@ zfs_znode_init(void) /* * Initialize zcache */ - ASSERT(znode_cache == NULL); + ASSERT3P(znode_cache, ==, NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); @@ -228,14 +235,16 @@ zfs_znode_init(void) static znode_t * zfs_znode_alloc_kmem(int flags) { - return (kmem_cache_alloc(znode_cache, flags)); } static void zfs_znode_free_kmem(znode_t *zp) { - + if (zp->z_xattr_cached) { + nvlist_free(zp->z_xattr_cached); + zp->z_xattr_cached = NULL; + } kmem_cache_free(znode_cache, zp); } #endif @@ -282,7 +291,7 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) sharezp->z_zfsvfs = zfsvfs; sharezp->z_is_sa = zfsvfs->z_use_sa; - VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, + VERIFY0(zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, kcred, NULL, &acl_ids)); zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, sharezp); @@ -345,10 +354,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); - ASSERT(zp->z_sa_hdl == NULL); - ASSERT(zp->z_acl_cached == NULL); + ASSERT3P(zp->z_sa_hdl, ==, NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); if (sa_hdl == NULL) { - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; @@ -444,7 +453,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; +#if __FreeBSD_version >= 1300139 atomic_store_ptr(&zp->z_cached_symlink, NULL); +#endif vp = ZTOV(zp); @@ -502,7 +513,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, break; case VREG: if (parent == zfsvfs->z_shares_dir) { - ASSERT(zp->z_uid == 0 && zp->z_gid == 0); + ASSERT0(zp->z_uid); + ASSERT0(zp->z_gid); vp->v_op = &zfs_shareops; } break; @@ -568,7 +580,8 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; - ASSERT(vap && ((vap->va_mask & AT_MODE) == AT_MODE)); + ASSERT3P(vap, !=, NULL); + ASSERT3U((vap->va_mask & AT_MODE), ==, AT_MODE); if (zfsvfs->z_replay) { obj = vap->va_nodeid; @@ -621,7 +634,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); - VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); + VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer @@ -684,7 +697,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } /* Now add in all of the "SA" attributes */ - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* @@ -771,11 +784,11 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, acl_ids->z_fuid, acl_ids->z_fgid); } - VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); + VERIFY0(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); - ASSERT(*zpp != NULL); + ASSERT3P(*zpp, !=, NULL); } else { /* * If we are creating the root node, the "parent" we @@ -820,7 +833,7 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); - ASSERT(xoap); + ASSERT3P(xoap, !=, NULL); ASSERT_VOP_IN_SEQC(ZTOV(zp)); @@ -1077,9 +1090,16 @@ zfs_rezget(znode_t *zp) zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } - mutex_exit(&zp->z_acl_lock); - ASSERT(zp->z_sa_hdl == NULL); + + rw_enter(&zp->z_xattr_lock, RW_WRITER); + if (zp->z_xattr_cached) { + nvlist_free(zp->z_xattr_cached); + zp->z_xattr_cached = NULL; + } + rw_exit(&zp->z_xattr_lock); + + ASSERT3P(zp->z_sa_hdl, ==, NULL); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); @@ -1191,9 +1211,9 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); - VERIFY(0 == dmu_object_free(os, acl_obj, tx)); + VERIFY0(dmu_object_free(os, acl_obj, tx)); } - VERIFY(0 == dmu_object_free(os, obj, tx)); + VERIFY0(dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); zfs_znode_free(zp); @@ -1205,7 +1225,7 @@ zfs_zinactive(znode_t *zp) zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; - ASSERT(zp->z_sa_hdl); + ASSERT3P(zp->z_sa_hdl, !=, NULL); /* * Don't allow a zfs_zget() while were trying to release this znode @@ -1238,9 +1258,11 @@ void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; +#if __FreeBSD_version >= 1300139 char *symlink; +#endif - ASSERT(zp->z_sa_hdl == NULL); + ASSERT3P(zp->z_sa_hdl, ==, NULL); zp->z_vnode = NULL; mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); @@ -1253,6 +1275,15 @@ zfs_znode_free(znode_t *zp) cache_symlink_free(symlink, strlen(symlink) + 1); } +#if __FreeBSD_version >= 1300139 + symlink = atomic_load_ptr(&zp->z_cached_symlink); + if (symlink != NULL) { + atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, + (uintptr_t)NULL); + cache_symlink_free(symlink, strlen(symlink) + 1); + } +#endif + if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; @@ -1403,7 +1434,7 @@ zfs_extend(znode_t *zp, uint64_t end) zp->z_size = end; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), &zp->z_size, sizeof (zp->z_size), tx)); vnode_pager_setsize(ZTOV(zp), end); @@ -1521,7 +1552,7 @@ zfs_trunc(znode_t *zp, uint64_t end) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); } - VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); + VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); dmu_tx_commit(tx); @@ -1598,7 +1629,7 @@ log: NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); @@ -1631,7 +1662,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Set starting attributes. @@ -1643,8 +1674,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) uint64_t val; char *name; - ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); - VERIFY(nvpair_value_uint64(elem, &val) == 0); + ASSERT3S(nvpair_type(elem), ==, DATA_TYPE_UINT64); + val = fnvpair_value_uint64(elem); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) @@ -1652,13 +1683,13 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } - ASSERT(error == 0); + ASSERT0(error); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } - ASSERT(version != 0); + ASSERT3U(version, !=, 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); /* @@ -1669,7 +1700,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); - ASSERT(error == 0); + ASSERT0(error); } else { sa_obj = 0; } @@ -1679,7 +1710,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Create root znode. Create minimal znode/vnode/zfsvfs @@ -1710,7 +1741,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table); - ASSERT(error == 0); + ASSERT0(error); /* * Fold case on file systems that are always or sometimes case @@ -1727,12 +1758,12 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); rootzp->z_zfsvfs = zfsvfs; - VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, + VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_acl_ids_free(&acl_ids); POINTER_INVALIDATE(&rootzp->z_zfsvfs); @@ -1745,7 +1776,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = zfs_create_share_dir(zfsvfs, tx); - ASSERT(error == 0); + ASSERT0(error); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); @@ -1913,7 +1944,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, int is_xattrdir; if (prevdb) { - ASSERT(prevhdl != NULL); + ASSERT3P(prevhdl, !=, NULL); zfs_release_sa_handle(prevhdl, prevdb, FTAG); } @@ -1939,7 +1970,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, complen = strlen(component); path -= complen; - ASSERT(path >= buf); + ASSERT3P(path, >=, buf); bcopy(component, path, complen); obj = pobj; @@ -1956,7 +1987,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, } if (sa_hdl != NULL && sa_hdl != hdl) { - ASSERT(sa_db != NULL); + ASSERT3P(sa_db, !=, NULL); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c index 9fe678d2574f..aeb42b304e73 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c @@ -114,7 +114,7 @@ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left * in plaintext for scrubbing and claiming, but the bonus buffers might contain * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing - * which which pieces of the block need to be encrypted. For more details about + * which pieces of the block need to be encrypted. For more details about * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). * * OBJECT SET AUTHENTICATION: @@ -239,7 +239,7 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) uint_t keydata_len; zio_crypt_info_t *ci = NULL; - ASSERT(key != NULL); + ASSERT3P(key, !=, NULL); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ci = &zio_crypt_table[crypt]; @@ -1077,16 +1077,6 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); /* - * This is necessary here as we check next whether - * OBJSET_FLAG_USERACCOUNTING_COMPLETE or - * OBJSET_FLAG_USEROBJACCOUNTING are set in order to - * decide if the local_mac should be zeroed out. - */ - intval = osp->os_flags; - if (should_bswap) - intval = BSWAP_64(intval); - - /* * The local MAC protects the user, group and project accounting. * If these objects are not present, the local MAC is zeroed out. */ @@ -1097,10 +1087,7 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, (datalen >= OBJSET_PHYS_SIZE_V2 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE) || - (datalen <= OBJSET_PHYS_SIZE_V1) || - (((intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE) == 0 || - (intval & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE) == 0) && - key->zk_version > 0)) { + (datalen <= OBJSET_PHYS_SIZE_V1)) { bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (0); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index ba315f104738..aecb9f4c7d87 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -420,7 +420,7 @@ zvol_geom_destroy(zvol_state_t *zv) g_topology_assert(); mutex_enter(&zv->zv_state_lock); - VERIFY(zsg->zsg_state == ZVOL_GEOM_RUNNING); + VERIFY3S(zsg->zsg_state, ==, ZVOL_GEOM_RUNNING); mutex_exit(&zv->zv_state_lock); zsg->zsg_provider = NULL; g_wither_geom(pp->geom, ENXIO); @@ -761,12 +761,13 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) volsize = zv->zv_volsize; /* * uio_loffset == volsize isn't an error as - * its required for EOF processing. + * it's required for EOF processing. */ if (zfs_uio_resid(&uio) > 0 && (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize)) return (SET_ERROR(EIO)); + ssize_t start_resid = zfs_uio_resid(&uio); lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), zfs_uio_resid(&uio), RL_READER); while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) { @@ -785,6 +786,8 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) } } zfs_rangelock_exit(lr); + int64_t nread = start_resid - zfs_uio_resid(&uio); + dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); return (error); } @@ -809,6 +812,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize)) return (SET_ERROR(EIO)); + ssize_t start_resid = zfs_uio_resid(&uio); sync = (ioflag & IO_SYNC) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); @@ -840,6 +844,8 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) break; } zfs_rangelock_exit(lr); + int64_t nwritten = start_resid - zfs_uio_resid(&uio); + dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); if (sync) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); @@ -1164,8 +1170,8 @@ zvol_ensure_zilog(zvol_state_t *zv) zvol_get_data); zv->zv_flags |= ZVOL_WRITTEN_TO; /* replay / destroy done in zvol_create_minor_impl() */ - VERIFY0((zv->zv_zilog->zl_header->zh_flags & - ZIL_REPLAY_NEEDED)); + VERIFY0(zv->zv_zilog->zl_header->zh_flags & + ZIL_REPLAY_NEEDED); } rw_downgrade(&zv->zv_suspend_lock); } diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c index 36fdff72a133..91eeaccfdc47 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c @@ -586,8 +586,10 @@ spl_getattr(struct file *filp, struct kstat *stat) AT_STATX_SYNC_AS_STAT); #elif defined(HAVE_2ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, stat); -#else +#elif defined(HAVE_3ARGS_VFS_GETATTR) rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat); +#else +#error "No available vfs_getattr()" #endif if (rc) return (-rc); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c index 6b3d559ffc1c..2151ef008fd6 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -100,13 +100,10 @@ MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB"); * For small objects the Linux slab allocator should be used to make the most * efficient use of the memory. However, large objects are not supported by * the Linux slab and therefore the SPL implementation is preferred. A cutoff - * of 16K was determined to be optimal for architectures using 4K pages. + * of 16K was determined to be optimal for architectures using 4K pages and + * to also work well on architecutres using larger 64K page sizes. */ -#if PAGE_SIZE == 4096 unsigned int spl_kmem_cache_slab_limit = 16384; -#else -unsigned int spl_kmem_cache_slab_limit = 0; -#endif module_param(spl_kmem_cache_slab_limit, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_slab_limit, "Objects less than N bytes use the Linux slab"); @@ -527,9 +524,7 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) * Size a slab based on the size of each aligned object plus spl_kmem_obj_t. * When on-slab we want to target spl_kmem_cache_obj_per_slab. However, * for very small objects we may end up with more than this so as not - * to waste space in the minimal allocation of a single page. Also for - * very large objects we may use as few as spl_kmem_cache_obj_per_slab_min, - * lower than this and we will fail. + * to waste space in the minimal allocation of a single page. */ static int spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size) diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c index 943966cbb17a..2b342140d0e2 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c @@ -245,7 +245,21 @@ spl_kmem_alloc_impl(size_t size, int flags, int node) return (NULL); } } else { + /* + * We use kmalloc when doing kmem_alloc(KM_NOSLEEP), + * because kvmalloc/vmalloc may sleep. We also use + * kmalloc on systems with limited kernel VA space (e.g. + * 32-bit), which have HIGHMEM. Otherwise we use + * kvmalloc, which tries to get contiguous physical + * memory (fast, like kmalloc) and falls back on using + * virtual memory to stitch together pages (slow, like + * vmalloc). + */ +#ifdef CONFIG_HIGHMEM if (flags & KM_VMEM) { +#else + if ((flags & KM_VMEM) || !(flags & KM_NOSLEEP)) { +#endif ptr = spl_kvmalloc(size, lflags); } else { ptr = kmalloc_node(size, lflags, node); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c index 3e58598d43f8..c4af27a7fcd7 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c @@ -53,73 +53,19 @@ static struct proc_dir_entry *proc_spl_taskq_all = NULL; static struct proc_dir_entry *proc_spl_taskq = NULL; struct proc_dir_entry *proc_spl_kstat = NULL; -static int -proc_copyin_string(char *kbuffer, int kbuffer_size, const char *ubuffer, - int ubuffer_size) -{ - int size; - - if (ubuffer_size > kbuffer_size) - return (-EOVERFLOW); - - if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size)) - return (-EFAULT); - - /* strip trailing whitespace */ - size = strnlen(kbuffer, ubuffer_size); - while (size-- >= 0) - if (!isspace(kbuffer[size])) - break; - - /* empty string */ - if (size < 0) - return (-EINVAL); - - /* no space to terminate */ - if (size == kbuffer_size) - return (-EOVERFLOW); - - kbuffer[size + 1] = 0; - return (0); -} - -static int -proc_copyout_string(char *ubuffer, int ubuffer_size, const char *kbuffer, - char *append) -{ - /* - * NB if 'append' != NULL, it's a single character to append to the - * copied out string - usually "\n", for /proc entries and - * (i.e. a terminating zero byte) for sysctl entries - */ - int size = MIN(strlen(kbuffer), ubuffer_size); - - if (copy_to_user(ubuffer, kbuffer, size)) - return (-EFAULT); - - if (append != NULL && size < ubuffer_size) { - if (copy_to_user(ubuffer + size, append, 1)) - return (-EFAULT); - - size++; - } - - return (size); -} - #ifdef DEBUG_KMEM static int proc_domemused(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc = 0; - unsigned long min = 0, max = ~0, val; + unsigned long val; spl_ctl_table dummy = *table; dummy.data = &val; dummy.proc_handler = &proc_dointvec; - dummy.extra1 = &min; - dummy.extra2 = &max; + dummy.extra1 = &table_min; + dummy.extra2 = &table_max; if (write) { *ppos += *lenp; @@ -141,14 +87,14 @@ proc_doslab(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc = 0; - unsigned long min = 0, max = ~0, val = 0, mask; + unsigned long val = 0, mask; spl_ctl_table dummy = *table; spl_kmem_cache_t *skc = NULL; dummy.data = &val; dummy.proc_handler = &proc_dointvec; - dummy.extra1 = &min; - dummy.extra2 = &max; + dummy.extra1 = &table_min; + dummy.extra2 = &table_max; if (write) { *ppos += *lenp; @@ -187,39 +133,34 @@ static int proc_dohostid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int len, rc = 0; char *end, str[32]; + unsigned long hid; + spl_ctl_table dummy = *table; + + dummy.data = str; + dummy.maxlen = sizeof (str) - 1; + + if (!write) + snprintf(str, sizeof (str), "%lx", + (unsigned long) zone_get_hostid(NULL)); + + /* always returns 0 */ + proc_dostring(&dummy, write, buffer, lenp, ppos); if (write) { /* * We can't use proc_doulongvec_minmax() in the write - * case here because hostid while a hex value has no - * leading 0x which confuses the helper function. + * case here because hostid, while a hex value, has no + * leading 0x, which confuses the helper function. */ - rc = proc_copyin_string(str, sizeof (str), buffer, *lenp); - if (rc < 0) - return (rc); - spl_hostid = simple_strtoul(str, &end, 16); + hid = simple_strtoul(str, &end, 16); if (str == end) return (-EINVAL); - - } else { - len = snprintf(str, sizeof (str), "%lx", - (unsigned long) zone_get_hostid(NULL)); - if (*ppos >= len) - rc = 0; - else - rc = proc_copyout_string(buffer, - *lenp, str + *ppos, "\n"); - - if (rc >= 0) { - *lenp = rc; - *ppos += rc; - } + spl_hostid = hid; } - return (rc); + return (0); } static void diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c index db23fb64a298..834c527117a3 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c @@ -158,3 +158,54 @@ spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...) } while (1); } EXPORT_SYMBOL(spl_kthread_create); + +/* + * The "why" argument indicates the allowable side-effects of the call: + * + * FORREAL: Extract the next pending signal from p_sig into p_cursig; + * stop the process if a stop has been requested or if a traced signal + * is pending. + * + * JUSTLOOKING: Don't stop the process, just indicate whether or not + * a signal might be pending (FORREAL is needed to tell for sure). + */ +int +issig(int why) +{ + ASSERT(why == FORREAL || why == JUSTLOOKING); + + if (!signal_pending(current)) + return (0); + + if (why != FORREAL) + return (1); + + struct task_struct *task = current; + spl_kernel_siginfo_t __info; + sigset_t set; + siginitsetinv(&set, 1ULL << (SIGSTOP - 1) | 1ULL << (SIGTSTP - 1)); + sigorsets(&set, &task->blocked, &set); + + spin_lock_irq(&task->sighand->siglock); + int ret; + if ((ret = dequeue_signal(task, &set, &__info)) != 0) { +#ifdef HAVE_SIGNAL_STOP + spin_unlock_irq(&task->sighand->siglock); + kernel_signal_stop(); +#else + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + spl_set_special_state(TASK_STOPPED); + + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +#endif + return (0); + } + + spin_unlock_irq(&task->sighand->siglock); + + return (1); +} + +EXPORT_SYMBOL(issig); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c index 207a51d75bc9..82b32d1cc3fa 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c @@ -1106,8 +1106,8 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr) sizeof (xzp->z_id), tx)); if (!zp->z_unlinked) - (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, - xzp, "", NULL, acl_ids.z_fuidp, vap); + zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, + acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c index 99c6ffc95940..35e647375d9d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c @@ -279,8 +279,10 @@ zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr) AT_STATX_SYNC_AS_STAT); #elif defined(HAVE_2ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, &stat); -#else +#elif defined(HAVE_3ARGS_VFS_GETATTR) rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); +#else +#error "No available vfs_getattr()" #endif if (rc) return (-rc); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c index 6f5cff1770e1..fee3fe540b90 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c @@ -87,71 +87,20 @@ zfs_vfs_rele(zfsvfs_t *zfsvfs) deactivate_super(zfsvfs->z_sb); } -static int -zfsdev_state_init(struct file *filp) +void +zfsdev_private_set_state(void *priv, zfsdev_state_t *zs) { - zfsdev_state_t *zs, *zsprev = NULL; - minor_t minor; - boolean_t newzs = B_FALSE; - - ASSERT(MUTEX_HELD(&zfsdev_state_lock)); - - minor = zfsdev_minor_alloc(); - if (minor == 0) - return (SET_ERROR(ENXIO)); - - for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) { - if (zs->zs_minor == -1) - break; - zsprev = zs; - } - - if (!zs) { - zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP); - newzs = B_TRUE; - } + struct file *filp = priv; filp->private_data = zs; - - zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit); - zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent); - - /* - * In order to provide for lock-free concurrent read access - * to the minor list in zfsdev_get_state_impl(), new entries - * must be completely written before linking them into the - * list whereas existing entries are already linked; the last - * operation must be updating zs_minor (from -1 to the new - * value). - */ - if (newzs) { - zs->zs_minor = minor; - smp_wmb(); - zsprev->zs_next = zs; - } else { - smp_wmb(); - zs->zs_minor = minor; - } - - return (0); } -static int -zfsdev_state_destroy(struct file *filp) +zfsdev_state_t * +zfsdev_private_get_state(void *priv) { - zfsdev_state_t *zs; + struct file *filp = priv; - ASSERT(MUTEX_HELD(&zfsdev_state_lock)); - ASSERT(filp->private_data != NULL); - - zs = filp->private_data; - zs->zs_minor = -1; - zfs_onexit_destroy(zs->zs_onexit); - zfs_zevent_destroy(zs->zs_zevent); - zs->zs_onexit = NULL; - zs->zs_zevent = NULL; - - return (0); + return (filp->private_data); } static int @@ -169,13 +118,9 @@ zfsdev_open(struct inode *ino, struct file *filp) static int zfsdev_release(struct inode *ino, struct file *filp) { - int error; - - mutex_enter(&zfsdev_state_lock); - error = zfsdev_state_destroy(filp); - mutex_exit(&zfsdev_state_lock); + zfsdev_state_destroy(filp); - return (-error); + return (0); } static long @@ -212,6 +157,12 @@ zfs_max_nvlist_src_size_os(void) return (MIN(ptob(zfs_totalram_pages) / 4, 128 * 1024 * 1024)); } +/* Update the VFS's cache of mountpoint properties */ +void +zfs_ioctl_update_mount_cache(const char *dsname) +{ +} + void zfs_ioctl_init_os(void) { @@ -283,7 +234,7 @@ zfsdev_detach(void) #endif static int __init -_init(void) +openzfs_init(void) { int error; @@ -309,7 +260,7 @@ _init(void) } static void __exit -_fini(void) +openzfs_fini(void) { zfs_sysfs_fini(); zfs_kmod_fini(); @@ -319,8 +270,8 @@ _fini(void) } #if defined(_KERNEL) -module_init(_init); -module_exit(_fini); +module_init(openzfs_init); +module_exit(openzfs_fini); #endif ZFS_MODULE_DESCRIPTION("ZFS"); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 5d672af0e8aa..ff0b0d9df8f0 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -434,12 +434,6 @@ snapdir_changed_cb(void *arg, uint64_t newval) } static void -vscan_changed_cb(void *arg, uint64_t newval) -{ - ((zfsvfs_t *)arg)->z_vscan = newval; -} - -static void acl_mode_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; @@ -512,8 +506,6 @@ zfs_register_callbacks(vfs_t *vfsp) zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); - error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 8aeed6f568cf..24c016c5fcf1 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -175,18 +175,6 @@ * return (error); // done, report error */ -/* - * Virus scanning is unsupported. It would be possible to add a hook - * here to performance the required virus scan. This could be done - * entirely in the kernel or potentially as an update to invoke a - * scanning utility. - */ -static int -zfs_vscan(struct inode *ip, cred_t *cr, int async) -{ - return (0); -} - /* ARGSUSED */ int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr) @@ -204,15 +192,6 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr) return (SET_ERROR(EPERM)); } - /* Virus scan eligible files on open */ - if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { - if (zfs_vscan(ip, cr, 0) != 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EACCES)); - } - } - /* Keep a count of the synchronous opens in the znode */ if (flag & O_SYNC) atomic_inc_32(&zp->z_sync_cnt); @@ -235,10 +214,6 @@ zfs_close(struct inode *ip, int flag, cred_t *cr) if (flag & O_SYNC) atomic_dec_32(&zp->z_sync_cnt); - if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) - VERIFY(zfs_vscan(ip, cr, 1) == 0); - ZFS_EXIT(zfsvfs); return (0); } @@ -3140,7 +3115,7 @@ top: /* * Create a new object for the symlink. - * for version 4 ZPL datsets the symlink will be an SA attribute + * for version 4 ZPL datasets the symlink will be an SA attribute */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); @@ -3950,6 +3925,13 @@ zfs_fid(struct inode *ip, fid_t *fidp) int size, i, error; ZFS_ENTER(zfsvfs); + + if (fidp->fid_len < SHORT_FID_LEN) { + fidp->fid_len = SHORT_FID_LEN; + ZFS_EXIT(zfsvfs); + return (SET_ERROR(ENOSPC)); + } + ZFS_VERIFY_ZP(zp); if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index d59c1bb0716a..6015aea62dca 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -151,9 +151,9 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_xattr_lock); zfs_rangelock_fini(&zp->z_rangelock); - ASSERT(zp->z_dirlocks == NULL); - ASSERT(zp->z_acl_cached == NULL); - ASSERT(zp->z_xattr_cached == NULL); + ASSERT3P(zp->z_dirlocks, ==, NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); } static int @@ -217,7 +217,7 @@ zfs_znode_fini(void) * created or destroyed. This kind of locking would normally reside in the * znode itself but in this case that's impossible because the znode and SA * buffer may not yet exist. Therefore the locking is handled externally - * with an array of mutexs and AVLs trees which contain per-object locks. + * with an array of mutexes and AVLs trees which contain per-object locks. * * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted * in to the correct AVL tree and finally the per-object lock is held. In diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c index 2c58fecb2066..94406999cb89 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c @@ -115,7 +115,7 @@ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left * in plaintext for scrubbing and claiming, but the bonus buffers might contain * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing - * which which pieces of the block need to be encrypted. For more details about + * which pieces of the block need to be encrypted. For more details about * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). * * OBJECT SET AUTHENTICATION: @@ -1198,16 +1198,6 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); /* - * This is necessary here as we check next whether - * OBJSET_FLAG_USERACCOUNTING_COMPLETE or - * OBJSET_FLAG_USEROBJACCOUNTING are set in order to - * decide if the local_mac should be zeroed out. - */ - intval = osp->os_flags; - if (should_bswap) - intval = BSWAP_64(intval); - - /* * The local MAC protects the user, group and project accounting. * If these objects are not present, the local MAC is zeroed out. */ @@ -1218,10 +1208,7 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, (datalen >= OBJSET_PHYS_SIZE_V2 && osp->os_userused_dnode.dn_type == DMU_OT_NONE && osp->os_groupused_dnode.dn_type == DMU_OT_NONE) || - (datalen <= OBJSET_PHYS_SIZE_V1) || - (((intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE) == 0 || - (intval & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE) == 0) && - key->zk_version > 0)) { + (datalen <= OBJSET_PHYS_SIZE_V1)) { bzero(local_mac, ZIO_OBJSET_MAC_LEN); return (0); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c index eaf048c38db1..5be63532d329 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c @@ -41,15 +41,19 @@ zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable) struct inode *ip = dentry->d_inode; #endif /* HAVE_ENCODE_FH_WITH_INODE */ fstrans_cookie_t cookie; - fid_t *fid = (fid_t *)fh; + ushort_t empty_fid = 0; + fid_t *fid; int len_bytes, rc; len_bytes = *max_len * sizeof (__u32); - if (len_bytes < offsetof(fid_t, fid_data)) - return (255); + if (len_bytes < offsetof(fid_t, fid_data)) { + fid = (fid_t *)&empty_fid; + } else { + fid = (fid_t *)fh; + fid->fid_len = len_bytes - offsetof(fid_t, fid_data); + } - fid->fid_len = len_bytes - offsetof(fid_t, fid_data); cookie = spl_fstrans_mark(); if (zfsctl_is_node(ip)) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index d042783da1b2..524c43dcded4 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -148,7 +148,7 @@ zpl_aio_fsync(struct kiocb *kiocb, int datasync) #elif defined(HAVE_FSYNC_RANGE) /* - * Linux 3.1 - 3.x API, + * Linux 3.1 API, * As of 3.1 the responsibility to call filemap_write_and_wait_range() has * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex * lock is no longer held by the caller, for zfs we don't require the lock @@ -342,9 +342,6 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) ssize_t wrote = count - uio.uio_resid; kiocb->ki_pos += wrote; - if (wrote > 0) - iov_iter_advance(from, wrote); - return (wrote); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index cf0eab3e8c90..98c2fb3a0c92 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -149,14 +149,17 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag) error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0, mode, &zp, cr, 0, NULL); if (error == 0) { - d_instantiate(dentry, ZTOI(zp)); - error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ZTOI(zp), dir); - if (error) + if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); + remove_inode_hash(ZTOI(zp)); + iput(ZTOI(zp)); + } else { + d_instantiate(dentry, ZTOI(zp)); + } } spl_fstrans_unmark(cookie); @@ -198,14 +201,17 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0, mode, &zp, cr, 0, NULL); if (error == 0) { - d_instantiate(dentry, ZTOI(zp)); - error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ZTOI(zp), dir); - if (error) + if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); + remove_inode_hash(ZTOI(zp)); + iput(ZTOI(zp)); + } else { + d_instantiate(dentry, ZTOI(zp)); + } } spl_fstrans_unmark(cookie); @@ -218,7 +224,12 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, #ifdef HAVE_TMPFILE static int +#ifdef HAVE_TMPFILE_USERNS +zpl_tmpfile(struct user_namespace *userns, struct inode *dir, + struct dentry *dentry, umode_t mode) +#else zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +#endif { cred_t *cr = CRED(); struct inode *ip; @@ -308,14 +319,17 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) cookie = spl_fstrans_mark(); error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL); if (error == 0) { - d_instantiate(dentry, ZTOI(zp)); - error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ZTOI(zp), dir); - if (error) + if (error) { (void) zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0); + remove_inode_hash(ZTOI(zp)); + iput(ZTOI(zp)); + } else { + d_instantiate(dentry, ZTOI(zp)); + } } spl_fstrans_unmark(cookie); @@ -488,11 +502,14 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) error = -zfs_symlink(ITOZ(dir), dname(dentry), vap, (char *)name, &zp, cr, 0); if (error == 0) { - d_instantiate(dentry, ZTOI(zp)); - error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name); - if (error) + if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); + remove_inode_hash(ZTOI(zp)); + iput(ZTOI(zp)); + } else { + d_instantiate(dentry, ZTOI(zp)); + } } spl_fstrans_unmark(cookie); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c index 971cd6ad031e..66f197e4c77a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c @@ -605,7 +605,7 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value, cookie = spl_fstrans_mark(); ZPL_ENTER(zfsvfs); ZPL_VERIFY_ZP(zp); - rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER); + rw_enter(&zp->z_xattr_lock, RW_WRITER); /* * Before setting the xattr check to see if it already exists. @@ -656,7 +656,7 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value, if (error == 0 && (where & XATTR_IN_SA)) zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr); out: - rw_exit(&ITOZ(ip)->z_xattr_lock); + rw_exit(&zp->z_xattr_lock); ZPL_EXIT(zfsvfs); spl_fstrans_unmark(cookie); crfree(cr); @@ -926,11 +926,8 @@ xattr_handler_t zpl_xattr_security_handler = { * attribute implemented by filesystems in the kernel." - xattr(7) */ #ifdef CONFIG_FS_POSIX_ACL -#ifndef HAVE_SET_ACL -static -#endif -int -zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) +static int +zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type) { char *name, *value = NULL; int error = 0; @@ -1002,6 +999,19 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) return (error); } +#ifdef HAVE_SET_ACL +int +#ifdef HAVE_SET_ACL_USERNS +zpl_set_acl(struct user_namespace *userns, struct inode *ip, + struct posix_acl *acl, int type) +#else +zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) +#endif /* HAVE_SET_ACL_USERNS */ +{ + return (zpl_set_acl_impl(ip, acl, type)); +} +#endif /* HAVE_SET_ACL */ + struct posix_acl * zpl_get_acl(struct inode *ip, int type) { @@ -1083,7 +1093,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir) umode_t mode; if (S_ISDIR(ip->i_mode)) { - error = zpl_set_acl(ip, acl, ACL_TYPE_DEFAULT); + error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT); if (error) goto out; } @@ -1093,8 +1103,10 @@ zpl_init_acl(struct inode *ip, struct inode *dir) if (error >= 0) { ip->i_mode = mode; zfs_mark_inode_dirty(ip); - if (error > 0) - error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS); + if (error > 0) { + error = zpl_set_acl_impl(ip, acl, + ACL_TYPE_ACCESS); + } } } out: @@ -1121,7 +1133,7 @@ zpl_chmod_acl(struct inode *ip) error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode); if (!error) - error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS); + error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS); zpl_posix_acl_release(acl); @@ -1250,8 +1262,7 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name, } else { acl = NULL; } - - error = zpl_set_acl(ip, acl, type); + error = zpl_set_acl_impl(ip, acl, type); zpl_posix_acl_release(acl); return (error); @@ -1291,7 +1302,7 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name, acl = NULL; } - error = zpl_set_acl(ip, acl, type); + error = zpl_set_acl_impl(ip, acl, type); zpl_posix_acl_release(acl); return (error); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 7756d819fdeb..741979f11af8 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -756,7 +756,9 @@ static struct block_device_operations zvol_ops = { .ioctl = zvol_ioctl, .compat_ioctl = zvol_compat_ioctl, .check_events = zvol_check_events, +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK .revalidate_disk = zvol_revalidate_disk, +#endif .getgeo = zvol_getgeo, .owner = THIS_MODULE, #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS diff --git a/sys/contrib/openzfs/module/unicode/u8_textprep.c b/sys/contrib/openzfs/module/unicode/u8_textprep.c index be816d728359..c1d9a325f511 100644 --- a/sys/contrib/openzfs/module/unicode/u8_textprep.c +++ b/sys/contrib/openzfs/module/unicode/u8_textprep.c @@ -884,7 +884,7 @@ do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz, * | B0| B1| ... | Bm| * +---+---+-...-+---+ * - * The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH). + * The first byte, B0, is always less than 0xF5 (U8_DECOMP_BOTH). * * (2) Canonical decomposition mappings: * diff --git a/sys/contrib/openzfs/module/zcommon/zfs_comutil.c b/sys/contrib/openzfs/module/zcommon/zfs_comutil.c index 1cec60ac1d67..886167759be8 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_comutil.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_comutil.c @@ -26,7 +26,7 @@ /* * This file is intended for functions that ought to be common between user * land (libzfs) and the kernel. When many common routines need to be shared - * then a separate file should to be created. + * then a separate file should be created. */ #if !defined(_KERNEL) diff --git a/sys/contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c b/sys/contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c index 300ec4c1fb69..963f089b04f5 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c @@ -210,6 +210,12 @@ fletcher_4_avx512bw_byteswap(fletcher_4_ctx_t *ctx, const void *buf, } STACK_FRAME_NON_STANDARD(fletcher_4_avx512bw_byteswap); +static boolean_t +fletcher_4_avx512bw_valid(void) +{ + return (fletcher_4_avx512f_valid() && zfs_avx512bw_available()); +} + const fletcher_4_ops_t fletcher_4_avx512bw_ops = { .init_native = fletcher_4_avx512f_init, .fini_native = fletcher_4_avx512f_fini, @@ -217,7 +223,7 @@ const fletcher_4_ops_t fletcher_4_avx512bw_ops = { .init_byteswap = fletcher_4_avx512f_init, .fini_byteswap = fletcher_4_avx512f_fini, .compute_byteswap = fletcher_4_avx512bw_byteswap, - .valid = fletcher_4_avx512f_valid, + .valid = fletcher_4_avx512bw_valid, .name = "avx512bw" }; #endif diff --git a/sys/contrib/openzfs/module/zcommon/zfs_namecheck.c b/sys/contrib/openzfs/module/zcommon/zfs_namecheck.c index 0011a971cacb..7ecce451b42d 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_namecheck.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_namecheck.c @@ -450,12 +450,6 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) return (-1); } - if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) { - if (why) - *why = NAME_ERR_DISKLIKE; - return (-1); - } - return (0); } diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c index 402d749c1aeb..d17321990809 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c @@ -583,7 +583,7 @@ zfs_prop_init(void) "ENCROOT"); zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation", "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "prompt | <file URI>", "KEYLOCATION"); + "prompt | <file URI> | <https URL> | <http URL>", "KEYLOCATION"); zprop_register_string(ZFS_PROP_REDACT_SNAPS, "redact_snaps", NULL, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<snapshot>[,...]", @@ -936,6 +936,10 @@ zfs_prop_valid_keylocation(const char *str, boolean_t encrypted) return (B_TRUE); else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) return (B_TRUE); + else if (strlen(str) > 8 && strncmp("https://", str, 8) == 0) + return (B_TRUE); + else if (strlen(str) > 7 && strncmp("http://", str, 7) == 0) + return (B_TRUE); return (B_FALSE); } diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c index 1e6645c90c95..2d1be9752d4f 100644 --- a/sys/contrib/openzfs/module/zfs/abd.c +++ b/sys/contrib/openzfs/module/zfs/abd.c @@ -381,7 +381,7 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free) child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) { /* * We always pass B_FALSE for free_on_free as it is the - * original child gang ABDs responsibilty to determine + * original child gang ABDs responsibility to determine * if any of its child ABDs should be free'd on the call * to abd_free(). */ diff --git a/sys/contrib/openzfs/module/zfs/aggsum.c b/sys/contrib/openzfs/module/zfs/aggsum.c index e46da95f676c..c4ea4f86fc5f 100644 --- a/sys/contrib/openzfs/module/zfs/aggsum.c +++ b/sys/contrib/openzfs/module/zfs/aggsum.c @@ -78,11 +78,11 @@ */ /* - * We will borrow aggsum_borrow_multiplier times the current request, so we will - * have to get the as_lock approximately every aggsum_borrow_multiplier calls to - * aggsum_delta(). + * We will borrow 2^aggsum_borrow_shift times the current request, so we will + * have to get the as_lock approximately every 2^aggsum_borrow_shift calls to + * aggsum_add(). */ -static uint_t aggsum_borrow_multiplier = 10; +static uint_t aggsum_borrow_shift = 4; void aggsum_init(aggsum_t *as, uint64_t value) @@ -90,9 +90,14 @@ aggsum_init(aggsum_t *as, uint64_t value) bzero(as, sizeof (*as)); as->as_lower_bound = as->as_upper_bound = value; mutex_init(&as->as_lock, NULL, MUTEX_DEFAULT, NULL); - as->as_numbuckets = boot_ncpus; - as->as_buckets = kmem_zalloc(boot_ncpus * sizeof (aggsum_bucket_t), - KM_SLEEP); + /* + * Too many buckets may hurt read performance without improving + * write. From 12 CPUs use bucket per 2 CPUs, from 48 per 4, etc. + */ + as->as_bucketshift = highbit64(boot_ncpus / 6) / 2; + as->as_numbuckets = ((boot_ncpus - 1) >> as->as_bucketshift) + 1; + as->as_buckets = kmem_zalloc(as->as_numbuckets * + sizeof (aggsum_bucket_t), KM_SLEEP); for (int i = 0; i < as->as_numbuckets; i++) { mutex_init(&as->as_buckets[i].asc_lock, NULL, MUTEX_DEFAULT, NULL); @@ -111,59 +116,49 @@ aggsum_fini(aggsum_t *as) int64_t aggsum_lower_bound(aggsum_t *as) { - return (as->as_lower_bound); + return (atomic_load_64((volatile uint64_t *)&as->as_lower_bound)); } -int64_t +uint64_t aggsum_upper_bound(aggsum_t *as) { - return (as->as_upper_bound); -} - -static void -aggsum_flush_bucket(aggsum_t *as, struct aggsum_bucket *asb) -{ - ASSERT(MUTEX_HELD(&as->as_lock)); - ASSERT(MUTEX_HELD(&asb->asc_lock)); - - /* - * We use atomic instructions for this because we read the upper and - * lower bounds without the lock, so we need stores to be atomic. - */ - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, - asb->asc_delta + asb->asc_borrowed); - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, - asb->asc_delta - asb->asc_borrowed); - asb->asc_delta = 0; - asb->asc_borrowed = 0; + return (atomic_load_64(&as->as_upper_bound)); } uint64_t aggsum_value(aggsum_t *as) { - int64_t rv; + int64_t lb; + uint64_t ub; mutex_enter(&as->as_lock); - if (as->as_lower_bound == as->as_upper_bound) { - rv = as->as_lower_bound; + lb = as->as_lower_bound; + ub = as->as_upper_bound; + if (lb == ub) { for (int i = 0; i < as->as_numbuckets; i++) { ASSERT0(as->as_buckets[i].asc_delta); ASSERT0(as->as_buckets[i].asc_borrowed); } mutex_exit(&as->as_lock); - return (rv); + return (lb); } for (int i = 0; i < as->as_numbuckets; i++) { struct aggsum_bucket *asb = &as->as_buckets[i]; + if (asb->asc_borrowed == 0) + continue; mutex_enter(&asb->asc_lock); - aggsum_flush_bucket(as, asb); + lb += asb->asc_delta + asb->asc_borrowed; + ub += asb->asc_delta - asb->asc_borrowed; + asb->asc_delta = 0; + asb->asc_borrowed = 0; mutex_exit(&asb->asc_lock); } - VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound); - rv = as->as_lower_bound; + ASSERT3U(lb, ==, ub); + atomic_store_64((volatile uint64_t *)&as->as_lower_bound, lb); + atomic_store_64(&as->as_upper_bound, lb); mutex_exit(&as->as_lock); - return (rv); + return (lb); } void @@ -172,7 +167,8 @@ aggsum_add(aggsum_t *as, int64_t delta) struct aggsum_bucket *asb; int64_t borrow; - asb = &as->as_buckets[CPU_SEQID_UNSTABLE % as->as_numbuckets]; + asb = &as->as_buckets[(CPU_SEQID_UNSTABLE >> as->as_bucketshift) % + as->as_numbuckets]; /* Try fast path if we already borrowed enough before. */ mutex_enter(&asb->asc_lock); @@ -188,21 +184,22 @@ aggsum_add(aggsum_t *as, int64_t delta) * We haven't borrowed enough. Take the global lock and borrow * considering what is requested now and what we borrowed before. */ - borrow = (delta < 0 ? -delta : delta) * aggsum_borrow_multiplier; + borrow = (delta < 0 ? -delta : delta); + borrow <<= aggsum_borrow_shift + as->as_bucketshift; mutex_enter(&as->as_lock); - mutex_enter(&asb->asc_lock); - delta += asb->asc_delta; - asb->asc_delta = 0; if (borrow >= asb->asc_borrowed) borrow -= asb->asc_borrowed; else borrow = (borrow - (int64_t)asb->asc_borrowed) / 4; + mutex_enter(&asb->asc_lock); + delta += asb->asc_delta; + asb->asc_delta = 0; asb->asc_borrowed += borrow; - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, - delta - borrow); - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, - delta + borrow); mutex_exit(&asb->asc_lock); + atomic_store_64((volatile uint64_t *)&as->as_lower_bound, + as->as_lower_bound + delta - borrow); + atomic_store_64(&as->as_upper_bound, + as->as_upper_bound + delta + borrow); mutex_exit(&as->as_lock); } @@ -214,27 +211,35 @@ aggsum_add(aggsum_t *as, int64_t delta) int aggsum_compare(aggsum_t *as, uint64_t target) { - if (as->as_upper_bound < target) + int64_t lb; + uint64_t ub; + int i; + + if (atomic_load_64(&as->as_upper_bound) < target) return (-1); - if (as->as_lower_bound > target) + lb = atomic_load_64((volatile uint64_t *)&as->as_lower_bound); + if (lb > 0 && (uint64_t)lb > target) return (1); mutex_enter(&as->as_lock); - for (int i = 0; i < as->as_numbuckets; i++) { + lb = as->as_lower_bound; + ub = as->as_upper_bound; + for (i = 0; i < as->as_numbuckets; i++) { struct aggsum_bucket *asb = &as->as_buckets[i]; + if (asb->asc_borrowed == 0) + continue; mutex_enter(&asb->asc_lock); - aggsum_flush_bucket(as, asb); + lb += asb->asc_delta + asb->asc_borrowed; + ub += asb->asc_delta - asb->asc_borrowed; + asb->asc_delta = 0; + asb->asc_borrowed = 0; mutex_exit(&asb->asc_lock); - if (as->as_upper_bound < target) { - mutex_exit(&as->as_lock); - return (-1); - } - if (as->as_lower_bound > target) { - mutex_exit(&as->as_lock); - return (1); - } + if (ub < target || (lb > 0 && (uint64_t)lb > target)) + break; } - VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound); - ASSERT3U(as->as_lower_bound, ==, target); + if (i >= as->as_numbuckets) + ASSERT3U(lb, ==, ub); + atomic_store_64((volatile uint64_t *)&as->as_lower_bound, lb); + atomic_store_64(&as->as_upper_bound, ub); mutex_exit(&as->as_lock); - return (0); + return (ub < target ? -1 : (uint64_t)lb > target ? 1 : 0); } diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 55c71a3829cd..5526cae378fb 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -305,6 +305,7 @@ #include <sys/arc_impl.h> #include <sys/trace_zfs.h> #include <sys/aggsum.h> +#include <sys/wmsum.h> #include <cityhash.h> #include <sys/vdev_trim.h> #include <sys/zfs_racct.h> @@ -692,14 +693,14 @@ arc_state_t *arc_mfu; */ aggsum_t arc_size; aggsum_t arc_meta_used; -aggsum_t astat_data_size; -aggsum_t astat_metadata_size; -aggsum_t astat_dbuf_size; +wmsum_t astat_data_size; +wmsum_t astat_metadata_size; +wmsum_t astat_dbuf_size; aggsum_t astat_dnode_size; -aggsum_t astat_bonus_size; -aggsum_t astat_hdr_size; +wmsum_t astat_bonus_size; +wmsum_t astat_hdr_size; aggsum_t astat_l2_hdr_size; -aggsum_t astat_abd_chunk_waste_size; +wmsum_t astat_abd_chunk_waste_size; hrtime_t arc_growtime; list_t arc_prune_list; @@ -2645,22 +2646,22 @@ arc_space_consume(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - aggsum_add(&astat_data_size, space); + wmsum_add(&astat_data_size, space); break; case ARC_SPACE_META: - aggsum_add(&astat_metadata_size, space); + wmsum_add(&astat_metadata_size, space); break; case ARC_SPACE_BONUS: - aggsum_add(&astat_bonus_size, space); + wmsum_add(&astat_bonus_size, space); break; case ARC_SPACE_DNODE: aggsum_add(&astat_dnode_size, space); break; case ARC_SPACE_DBUF: - aggsum_add(&astat_dbuf_size, space); + wmsum_add(&astat_dbuf_size, space); break; case ARC_SPACE_HDRS: - aggsum_add(&astat_hdr_size, space); + wmsum_add(&astat_hdr_size, space); break; case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, space); @@ -2672,7 +2673,7 @@ arc_space_consume(uint64_t space, arc_space_type_t type) * scatter ABD's come from the ARC, because other users are * very short-lived. */ - aggsum_add(&astat_abd_chunk_waste_size, space); + wmsum_add(&astat_abd_chunk_waste_size, space); break; } @@ -2691,28 +2692,28 @@ arc_space_return(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - aggsum_add(&astat_data_size, -space); + wmsum_add(&astat_data_size, -space); break; case ARC_SPACE_META: - aggsum_add(&astat_metadata_size, -space); + wmsum_add(&astat_metadata_size, -space); break; case ARC_SPACE_BONUS: - aggsum_add(&astat_bonus_size, -space); + wmsum_add(&astat_bonus_size, -space); break; case ARC_SPACE_DNODE: aggsum_add(&astat_dnode_size, -space); break; case ARC_SPACE_DBUF: - aggsum_add(&astat_dbuf_size, -space); + wmsum_add(&astat_dbuf_size, -space); break; case ARC_SPACE_HDRS: - aggsum_add(&astat_hdr_size, -space); + wmsum_add(&astat_hdr_size, -space); break; case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, -space); break; case ARC_SPACE_ABD_CHUNK_WASTE: - aggsum_add(&astat_abd_chunk_waste_size, -space); + wmsum_add(&astat_abd_chunk_waste_size, -space); break; } @@ -3468,7 +3469,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) arc_buf_hdr_t *nhdr; arc_buf_t *buf; kmem_cache_t *ncache, *ocache; - unsigned nsize, osize; /* * This function requires that hdr is in the arc_anon state. @@ -3485,14 +3485,10 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) if (need_crypt) { ncache = hdr_full_crypt_cache; - nsize = sizeof (hdr->b_crypt_hdr); ocache = hdr_full_cache; - osize = HDR_FULL_SIZE; } else { ncache = hdr_full_cache; - nsize = HDR_FULL_SIZE; ocache = hdr_full_crypt_cache; - osize = sizeof (hdr->b_crypt_hdr); } nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE); @@ -5036,7 +5032,7 @@ arc_reap_cb(void *arg, zthr_t *zthr) * memory in the system at a fraction of the arc_size (1/128th by * default). If oversubscribed (free_memory < 0) then reduce the * target arc_size by the deficit amount plus the fractional - * amount. If free memory is positive but less then the fractional + * amount. If free memory is positive but less than the fractional * amount, reduce by what is needed to hit the fractional amount. */ free_memory = arc_available_memory(); @@ -7275,21 +7271,21 @@ arc_kstat_update(kstat_t *ksp, int rw) ARCSTAT(arcstat_size) = aggsum_value(&arc_size); ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used); - ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size); + ARCSTAT(arcstat_data_size) = wmsum_value(&astat_data_size); ARCSTAT(arcstat_metadata_size) = - aggsum_value(&astat_metadata_size); - ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size); + wmsum_value(&astat_metadata_size); + ARCSTAT(arcstat_hdr_size) = wmsum_value(&astat_hdr_size); ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size); - ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size); + ARCSTAT(arcstat_dbuf_size) = wmsum_value(&astat_dbuf_size); #if defined(COMPAT_FREEBSD11) - ARCSTAT(arcstat_other_size) = aggsum_value(&astat_bonus_size) + + ARCSTAT(arcstat_other_size) = wmsum_value(&astat_bonus_size) + aggsum_value(&astat_dnode_size) + - aggsum_value(&astat_dbuf_size); + wmsum_value(&astat_dbuf_size); #endif ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size); - ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size); + ARCSTAT(arcstat_bonus_size) = wmsum_value(&astat_bonus_size); ARCSTAT(arcstat_abd_chunk_waste_size) = - aggsum_value(&astat_abd_chunk_waste_size); + wmsum_value(&astat_abd_chunk_waste_size); as->arcstat_memory_all_bytes.value.ui64 = arc_all_memory(); @@ -7522,14 +7518,14 @@ arc_state_init(void) aggsum_init(&arc_meta_used, 0); aggsum_init(&arc_size, 0); - aggsum_init(&astat_data_size, 0); - aggsum_init(&astat_metadata_size, 0); - aggsum_init(&astat_hdr_size, 0); + wmsum_init(&astat_data_size, 0); + wmsum_init(&astat_metadata_size, 0); + wmsum_init(&astat_hdr_size, 0); aggsum_init(&astat_l2_hdr_size, 0); - aggsum_init(&astat_bonus_size, 0); + wmsum_init(&astat_bonus_size, 0); aggsum_init(&astat_dnode_size, 0); - aggsum_init(&astat_dbuf_size, 0); - aggsum_init(&astat_abd_chunk_waste_size, 0); + wmsum_init(&astat_dbuf_size, 0); + wmsum_init(&astat_abd_chunk_waste_size, 0); arc_anon->arcs_state = ARC_STATE_ANON; arc_mru->arcs_state = ARC_STATE_MRU; @@ -7575,14 +7571,14 @@ arc_state_fini(void) aggsum_fini(&arc_meta_used); aggsum_fini(&arc_size); - aggsum_fini(&astat_data_size); - aggsum_fini(&astat_metadata_size); - aggsum_fini(&astat_hdr_size); + wmsum_fini(&astat_data_size); + wmsum_fini(&astat_metadata_size); + wmsum_fini(&astat_hdr_size); aggsum_fini(&astat_l2_hdr_size); - aggsum_fini(&astat_bonus_size); + wmsum_fini(&astat_bonus_size); aggsum_fini(&astat_dnode_size); - aggsum_fini(&astat_dbuf_size); - aggsum_fini(&astat_abd_chunk_waste_size); + wmsum_fini(&astat_dbuf_size); + wmsum_fini(&astat_abd_chunk_waste_size); } uint64_t diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c index e46a0926d557..3fbb24ddef5e 100644 --- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c +++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c @@ -50,17 +50,17 @@ dataset_kstats_update(kstat_t *ksp, int rw) dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data; dkv->dkv_writes.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_writes); + wmsum_value(&dk->dk_sums.dss_writes); dkv->dkv_nwritten.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nwritten); + wmsum_value(&dk->dk_sums.dss_nwritten); dkv->dkv_reads.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_reads); + wmsum_value(&dk->dk_sums.dss_reads); dkv->dkv_nread.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nread); + wmsum_value(&dk->dk_sums.dss_nread); dkv->dkv_nunlinks.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nunlinks); + wmsum_value(&dk->dk_sums.dss_nunlinks); dkv->dkv_nunlinked.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nunlinked); + wmsum_value(&dk->dk_sums.dss_nunlinked); return (0); } @@ -140,12 +140,12 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) kstat_install(kstat); dk->dk_kstats = kstat; - aggsum_init(&dk->dk_aggsums.das_writes, 0); - aggsum_init(&dk->dk_aggsums.das_nwritten, 0); - aggsum_init(&dk->dk_aggsums.das_reads, 0); - aggsum_init(&dk->dk_aggsums.das_nread, 0); - aggsum_init(&dk->dk_aggsums.das_nunlinks, 0); - aggsum_init(&dk->dk_aggsums.das_nunlinked, 0); + wmsum_init(&dk->dk_sums.dss_writes, 0); + wmsum_init(&dk->dk_sums.dss_nwritten, 0); + wmsum_init(&dk->dk_sums.dss_reads, 0); + wmsum_init(&dk->dk_sums.dss_nread, 0); + wmsum_init(&dk->dk_sums.dss_nunlinks, 0); + wmsum_init(&dk->dk_sums.dss_nunlinked, 0); } void @@ -162,12 +162,12 @@ dataset_kstats_destroy(dataset_kstats_t *dk) kstat_delete(dk->dk_kstats); dk->dk_kstats = NULL; - aggsum_fini(&dk->dk_aggsums.das_writes); - aggsum_fini(&dk->dk_aggsums.das_nwritten); - aggsum_fini(&dk->dk_aggsums.das_reads); - aggsum_fini(&dk->dk_aggsums.das_nread); - aggsum_fini(&dk->dk_aggsums.das_nunlinks); - aggsum_fini(&dk->dk_aggsums.das_nunlinked); + wmsum_fini(&dk->dk_sums.dss_writes); + wmsum_fini(&dk->dk_sums.dss_nwritten); + wmsum_fini(&dk->dk_sums.dss_reads); + wmsum_fini(&dk->dk_sums.dss_nread); + wmsum_fini(&dk->dk_sums.dss_nunlinks); + wmsum_fini(&dk->dk_sums.dss_nunlinked); } void @@ -179,8 +179,8 @@ dataset_kstats_update_write_kstats(dataset_kstats_t *dk, if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_writes, 1); - aggsum_add(&dk->dk_aggsums.das_nwritten, nwritten); + wmsum_add(&dk->dk_sums.dss_writes, 1); + wmsum_add(&dk->dk_sums.dss_nwritten, nwritten); } void @@ -192,8 +192,8 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk, if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_reads, 1); - aggsum_add(&dk->dk_aggsums.das_nread, nread); + wmsum_add(&dk->dk_sums.dss_reads, 1); + wmsum_add(&dk->dk_sums.dss_nread, nread); } void @@ -202,7 +202,7 @@ dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta) if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_nunlinks, delta); + wmsum_add(&dk->dk_sums.dss_nunlinks, delta); } void @@ -211,5 +211,5 @@ dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta) if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_nunlinked, delta); + wmsum_add(&dk->dk_sums.dss_nunlinked, delta); } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index d48dc7943a24..764383b2d039 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -1442,10 +1442,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, zbookmark_phys_t zb; uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; - boolean_t bonus_read; err = zio_flags = 0; - bonus_read = B_FALSE; DB_DNODE_ENTER(db); dn = DB_DNODE(db); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index 123ea05b0436..a713e1329027 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -2880,8 +2880,8 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp) int err = 0; struct receive_writer_arg *rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP); - if (dsl_dataset_is_zapified(drc->drc_ds)) { - uint64_t bytes; + if (dsl_dataset_has_resume_receive_state(drc->drc_ds)) { + uint64_t bytes = 0; (void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset, drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES, sizeof (bytes), 1, &bytes); diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c index 31db49dae68c..862c0bf404ad 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c +++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c @@ -41,6 +41,7 @@ int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */ int32_t send_holes_without_birth_time = 1; +int32_t zfs_traverse_indirect_prefetch_limit = 32; typedef struct prefetch_data { kmutex_t pd_mtx; @@ -176,7 +177,10 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp, return (RESUME_SKIP_NONE); } -static void +/* + * Returns B_TRUE, if prefetch read is issued, otherwise B_FALSE. + */ +static boolean_t traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { @@ -184,18 +188,18 @@ traverse_prefetch_metadata(traverse_data_t *td, int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) - return; + return (B_FALSE); /* * If we are in the process of resuming, don't prefetch, because * some children will not be needed (and in fact may have already * been freed). */ if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) - return; + return (B_FALSE); if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg) - return; + return (B_FALSE); if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) - return; + return (B_FALSE); ASSERT(!BP_IS_REDACTED(bp)); if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) @@ -203,6 +207,7 @@ traverse_prefetch_metadata(traverse_data_t *td, (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); + return (B_TRUE); } static boolean_t @@ -295,7 +300,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, if (BP_GET_LEVEL(bp) > 0) { uint32_t flags = ARC_FLAG_WAIT; - int32_t i; + int32_t i, ptidx, pidx; + uint32_t prefetchlimit; int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; zbookmark_phys_t *czb; @@ -308,16 +314,46 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); + /* + * When performing a traversal it is beneficial to + * asynchronously read-ahead the upcoming indirect + * blocks since they will be needed shortly. However, + * since a 128k indirect (non-L0) block may contain up + * to 1024 128-byte block pointers, its preferable to not + * prefetch them all at once. Issuing a large number of + * async reads may effect performance, and the earlier + * the indirect blocks are prefetched the less likely + * they are to still be resident in the ARC when needed. + * Therefore, prefetching indirect blocks is limited to + * zfs_traverse_indirect_prefetch_limit=32 blocks by + * default. + * + * pidx: Index for which next prefetch to be issued. + * ptidx: Index at which next prefetch to be triggered. + */ + ptidx = 0; + pidx = 1; + prefetchlimit = zfs_traverse_indirect_prefetch_limit; for (i = 0; i < epb; i++) { - SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); - traverse_prefetch_metadata(td, - &((blkptr_t *)buf->b_data)[i], czb); - } + if (prefetchlimit && i == ptidx) { + ASSERT3S(ptidx, <=, pidx); + for (uint32_t prefetched = 0; pidx < epb && + prefetched < prefetchlimit; pidx++) { + SET_BOOKMARK(czb, zb->zb_objset, + zb->zb_object, zb->zb_level - 1, + zb->zb_blkid * epb + pidx); + if (traverse_prefetch_metadata(td, + &((blkptr_t *)buf->b_data)[pidx], + czb) == B_TRUE) { + prefetched++; + if (prefetched == + MAX(prefetchlimit / 2, 1)) + ptidx = pidx; + } + } + } - /* recursively visitbp() blocks below this */ - for (i = 0; i < epb; i++) { + /* recursively visitbp() blocks below this */ SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, zb->zb_blkid * epb + i); @@ -777,6 +813,9 @@ EXPORT_SYMBOL(traverse_pool); ZFS_MODULE_PARAM(zfs, zfs_, pd_bytes_max, INT, ZMOD_RW, "Max number of bytes to prefetch"); +ZFS_MODULE_PARAM(zfs, zfs_, traverse_indirect_prefetch_limit, INT, ZMOD_RW, + "Traverse prefetch number of blocks pointed by indirect block"); + #if defined(_KERNEL) module_param_named(ignore_hole_birth, send_holes_without_birth_time, int, 0644); MODULE_PARM_DESC(ignore_hole_birth, diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index eaba9c0c0e7f..0fc788e28fe4 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -754,7 +754,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock)); ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx)); ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx)); - ASSERT(!MUTEX_HELD(&odn->dn_zfetch.zf_lock)); /* Copy fields. */ ndn->dn_objset = odn->dn_objset; @@ -822,9 +821,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_newgid = odn->dn_newgid; ndn->dn_newprojid = odn->dn_newprojid; ndn->dn_id_flags = odn->dn_id_flags; - dmu_zfetch_init(&ndn->dn_zfetch, NULL); - list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream); - ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode; + dmu_zfetch_init(&ndn->dn_zfetch, ndn); /* * Update back pointers. Updating the handle fixes the back pointer of @@ -832,9 +829,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) */ ASSERT(ndn->dn_handle->dnh_dnode == odn); ndn->dn_handle->dnh_dnode = ndn; - if (ndn->dn_zfetch.zf_dnode == odn) { - ndn->dn_zfetch.zf_dnode = ndn; - } /* * Invalidate the original dnode by clearing all of its back pointers. diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index 2faf1af52991..bead7da2237f 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -236,7 +236,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, error = SET_ERROR(EEXIST); goto eholdnewbmds; default: - /* dsl_bookmark_lookup_impl already did SET_ERRROR */ + /* dsl_bookmark_lookup_impl already did SET_ERROR */ goto eholdnewbmds; } @@ -271,7 +271,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR); break; default: - /* dsl_bookmark_lookup already did SET_ERRROR */ + /* dsl_bookmark_lookup already did SET_ERROR */ break; } } else { @@ -536,7 +536,7 @@ dsl_bookmark_create_sync_impl_book( * Reasoning: * - The zbm_redaction_obj would be referred to by both source and new * bookmark, but would be destroyed once either source or new is - * destroyed, resulting in use-after-free of the referrred object. + * destroyed, resulting in use-after-free of the referred object. * - User expectation when issuing the `zfs bookmark` command is that * a normal bookmark of the source is created * diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index e38ec0cae827..26d4c2fe7e33 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -2007,6 +2007,14 @@ dsl_crypto_recv_raw_objset_check(dsl_dataset_t *ds, dsl_dataset_t *fromds, if (ret != 0) return (ret); + /* + * Useraccounting is not portable and must be done with the keys loaded. + * Therefore, whenever we do any kind of receive the useraccounting + * must not be present. + */ + ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE); + ASSERT0(os->os_flags & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE); + mdn = DMU_META_DNODE(os); /* @@ -2097,9 +2105,6 @@ dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, */ arc_release(os->os_phys_buf, &os->os_phys_buf); bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN); - os->os_phys->os_flags &= ~OBJSET_FLAG_USERACCOUNTING_COMPLETE; - os->os_phys->os_flags &= ~OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE; - os->os_flags = os->os_phys->os_flags; bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN); os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index bad2d56eefdd..a77e381520db 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -909,15 +909,16 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, } typedef struct livelist_entry { - const blkptr_t *le_bp; + blkptr_t le_bp; + uint32_t le_refcnt; avl_node_t le_node; } livelist_entry_t; static int livelist_compare(const void *larg, const void *rarg) { - const blkptr_t *l = ((livelist_entry_t *)larg)->le_bp; - const blkptr_t *r = ((livelist_entry_t *)rarg)->le_bp; + const blkptr_t *l = &((livelist_entry_t *)larg)->le_bp; + const blkptr_t *r = &((livelist_entry_t *)rarg)->le_bp; /* Sort them according to dva[0] */ uint64_t l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]); @@ -944,6 +945,11 @@ struct livelist_iter_arg { * Expects an AVL tree which is incrementally filled will FREE blkptrs * and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a * corresponding FREE are stored in the supplied bplist. + * + * Note that multiple FREE and ALLOC entries for the same blkptr may + * be encountered when dedup is involved. For this reason we keep a + * refcount for all the FREE entries of each blkptr and ensure that + * each of those FREE entries has a corresponding ALLOC preceding it. */ static int dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, @@ -957,23 +963,47 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t))) return (SET_ERROR(EINTR)); + + livelist_entry_t node; + node.le_bp = *bp; + livelist_entry_t *found = avl_find(avl, &node, NULL); if (bp_freed) { - livelist_entry_t *node = kmem_alloc(sizeof (livelist_entry_t), - KM_SLEEP); - blkptr_t *temp_bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); - *temp_bp = *bp; - node->le_bp = temp_bp; - avl_add(avl, node); - } else { - livelist_entry_t node; - node.le_bp = bp; - livelist_entry_t *found = avl_find(avl, &node, NULL); - if (found != NULL) { - avl_remove(avl, found); - kmem_free((blkptr_t *)found->le_bp, sizeof (blkptr_t)); - kmem_free(found, sizeof (livelist_entry_t)); + if (found == NULL) { + /* first free entry for this blkptr */ + livelist_entry_t *e = + kmem_alloc(sizeof (livelist_entry_t), KM_SLEEP); + e->le_bp = *bp; + e->le_refcnt = 1; + avl_add(avl, e); } else { + /* dedup block free */ + ASSERT(BP_GET_DEDUP(bp)); + ASSERT3U(BP_GET_CHECKSUM(bp), ==, + BP_GET_CHECKSUM(&found->le_bp)); + ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt); + found->le_refcnt++; + } + } else { + if (found == NULL) { + /* block is currently marked as allocated */ bplist_append(to_free, bp); + } else { + /* alloc matches a free entry */ + ASSERT3U(found->le_refcnt, !=, 0); + found->le_refcnt--; + if (found->le_refcnt == 0) { + /* all tracked free pairs have been matched */ + avl_remove(avl, found); + kmem_free(found, sizeof (livelist_entry_t)); + } else { + /* + * This is definitely a deduped blkptr so + * let's validate it. + */ + ASSERT(BP_GET_DEDUP(bp)); + ASSERT3U(BP_GET_CHECKSUM(bp), ==, + BP_GET_CHECKSUM(&found->le_bp)); + } } } return (0); @@ -999,6 +1029,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t, }; int err = bpobj_iterate_nofree(bpobj, dsl_livelist_iterate, &arg, size); + VERIFY0(avl_numnodes(&avl)); avl_destroy(&avl); return (err); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index a54cd6ca800e..62ee9bb9ab6c 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -126,7 +126,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); -static uint64_t dsl_scan_count_leaves(vdev_t *vd); +static uint64_t dsl_scan_count_data_disks(vdev_t *vd); extern int zfs_vdev_async_write_active_min_dirty_percent; @@ -451,7 +451,7 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) * phase are done per top-level vdev and are handled separately. */ scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * - dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20); + dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), offsetof(scan_ds_t, sds_node)); @@ -701,7 +701,7 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type) } /* ARGSUSED */ -static int +int dsl_scan_setup_check(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; @@ -2759,22 +2759,16 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) } static uint64_t -dsl_scan_count_leaves(vdev_t *vd) +dsl_scan_count_data_disks(vdev_t *rvd) { uint64_t i, leaves = 0; - /* we only count leaves that belong to the main pool and are readable */ - if (vd->vdev_islog || vd->vdev_isspare || - vd->vdev_isl2cache || !vdev_readable(vd)) - return (0); - - if (vd->vdev_ops->vdev_op_leaf) - return (1); - - for (i = 0; i < vd->vdev_children; i++) { - leaves += dsl_scan_count_leaves(vd->vdev_child[i]); + for (i = 0; i < rvd->vdev_children; i++) { + vdev_t *vd = rvd->vdev_child[i]; + if (vd->vdev_islog || vd->vdev_isspare || vd->vdev_isl2cache) + continue; + leaves += vdev_get_ndisks(vd) - vdev_get_nparity(vd); } - return (leaves); } @@ -3017,8 +3011,6 @@ scan_io_queues_run_one(void *arg) range_seg_t *rs = NULL; scan_io_t *sio = NULL; list_t sio_list; - uint64_t bytes_per_leaf = zfs_scan_vdev_limit; - uint64_t nr_leaves = dsl_scan_count_leaves(queue->q_vd); ASSERT(queue->q_scn->scn_is_sorted); @@ -3026,9 +3018,9 @@ scan_io_queues_run_one(void *arg) offsetof(scan_io_t, sio_nodes.sio_list_node)); mutex_enter(q_lock); - /* calculate maximum in-flight bytes for this txg (min 1MB) */ - queue->q_maxinflight_bytes = - MAX(nr_leaves * bytes_per_leaf, 1ULL << 20); + /* Calculate maximum in-flight bytes for this vdev. */ + queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit * + (vdev_get_ndisks(queue->q_vd) - vdev_get_nparity(queue->q_vd))); /* reset per-queue scan statistics for this txg */ queue->q_total_seg_size_this_txg = 0; @@ -3665,16 +3657,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) /* Need to scan metadata for more blocks to scrub */ dsl_scan_phys_t *scnp = &scn->scn_phys; taskqid_t prefetch_tqid; - uint64_t bytes_per_leaf = zfs_scan_vdev_limit; - uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev); /* * Recalculate the max number of in-flight bytes for pool-wide * scanning operations (minimum 1MB). Limits for the issuing * phase are done per top-level vdev and are handled separately. */ - scn->scn_maxinflight_bytes = - MAX(nr_leaves * bytes_per_leaf, 1ULL << 20); + scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * + dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); if (scnp->scn_ddt_bookmark.ddb_class <= scnp->scn_ddt_class_max) { @@ -4050,9 +4040,8 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, size_t size = BP_GET_PSIZE(bp); abd_t *data = abd_alloc_for_io(size, B_FALSE); - ASSERT3U(scn->scn_maxinflight_bytes, >, 0); - if (queue == NULL) { + ASSERT3U(scn->scn_maxinflight_bytes, >, 0); mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); @@ -4061,6 +4050,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, } else { kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock; + ASSERT3U(queue->q_maxinflight_bytes, >, 0); mutex_enter(q_lock); while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes) cv_wait(&queue->q_zio_cv, q_lock); diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c index 3070cab1e49d..dff7d8ece4be 100644 --- a/sys/contrib/openzfs/module/zfs/fm.c +++ b/sys/contrib/openzfs/module/zfs/fm.c @@ -66,12 +66,9 @@ #ifdef _KERNEL #include <sys/atomic.h> #include <sys/condvar.h> -#include <sys/console.h> #include <sys/zfs_ioctl.h> -int zfs_zevent_len_max = 0; -int zfs_zevent_cols = 80; -int zfs_zevent_console = 0; +int zfs_zevent_len_max = 512; static int zevent_len_cur = 0; static int zevent_waiters = 0; @@ -118,307 +115,6 @@ kstat_t *fm_ksp; #ifdef _KERNEL -/* - * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of - * output so they aren't split across console lines, and return the end column. - */ -/*PRINTFLIKE4*/ -static int -fm_printf(int depth, int c, int cols, const char *format, ...) -{ - va_list ap; - int width; - char c1; - - va_start(ap, format); - width = vsnprintf(&c1, sizeof (c1), format, ap); - va_end(ap); - - if (c + width >= cols) { - console_printf("\n"); - c = 0; - if (format[0] != ' ' && depth > 0) { - console_printf(" "); - c++; - } - } - - va_start(ap, format); - console_vprintf(format, ap); - va_end(ap); - - return ((c + width) % cols); -} - -/* - * Recursively print an nvlist in the specified column width and return the - * column we end up in. This function is called recursively by fm_nvprint(), - * below. We generically format the entire nvpair using hexadecimal - * integers and strings, and elide any integer arrays. Arrays are basically - * used for cache dumps right now, so we suppress them so as not to overwhelm - * the amount of console output we produce at panic time. This can be further - * enhanced as FMA technology grows based upon the needs of consumers. All - * FMA telemetry is logged using the dump device transport, so the console - * output serves only as a fallback in case this procedure is unsuccessful. - */ -static int -fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) -{ - nvpair_t *nvp; - - for (nvp = nvlist_next_nvpair(nvl, NULL); - nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { - - data_type_t type = nvpair_type(nvp); - const char *name = nvpair_name(nvp); - - boolean_t b; - uint8_t i8; - uint16_t i16; - uint32_t i32; - uint64_t i64; - char *str; - nvlist_t *cnv; - - if (strcmp(name, FM_CLASS) == 0) - continue; /* already printed by caller */ - - c = fm_printf(d, c, cols, " %s=", name); - - switch (type) { - case DATA_TYPE_BOOLEAN: - c = fm_printf(d + 1, c, cols, " 1"); - break; - - case DATA_TYPE_BOOLEAN_VALUE: - (void) nvpair_value_boolean_value(nvp, &b); - c = fm_printf(d + 1, c, cols, b ? "1" : "0"); - break; - - case DATA_TYPE_BYTE: - (void) nvpair_value_byte(nvp, &i8); - c = fm_printf(d + 1, c, cols, "0x%x", i8); - break; - - case DATA_TYPE_INT8: - (void) nvpair_value_int8(nvp, (void *)&i8); - c = fm_printf(d + 1, c, cols, "0x%x", i8); - break; - - case DATA_TYPE_UINT8: - (void) nvpair_value_uint8(nvp, &i8); - c = fm_printf(d + 1, c, cols, "0x%x", i8); - break; - - case DATA_TYPE_INT16: - (void) nvpair_value_int16(nvp, (void *)&i16); - c = fm_printf(d + 1, c, cols, "0x%x", i16); - break; - - case DATA_TYPE_UINT16: - (void) nvpair_value_uint16(nvp, &i16); - c = fm_printf(d + 1, c, cols, "0x%x", i16); - break; - - case DATA_TYPE_INT32: - (void) nvpair_value_int32(nvp, (void *)&i32); - c = fm_printf(d + 1, c, cols, "0x%x", i32); - break; - - case DATA_TYPE_UINT32: - (void) nvpair_value_uint32(nvp, &i32); - c = fm_printf(d + 1, c, cols, "0x%x", i32); - break; - - case DATA_TYPE_INT64: - (void) nvpair_value_int64(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "0x%llx", - (u_longlong_t)i64); - break; - - case DATA_TYPE_UINT64: - (void) nvpair_value_uint64(nvp, &i64); - c = fm_printf(d + 1, c, cols, "0x%llx", - (u_longlong_t)i64); - break; - - case DATA_TYPE_HRTIME: - (void) nvpair_value_hrtime(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "0x%llx", - (u_longlong_t)i64); - break; - - case DATA_TYPE_STRING: - (void) nvpair_value_string(nvp, &str); - c = fm_printf(d + 1, c, cols, "\"%s\"", - str ? str : "<NULL>"); - break; - - case DATA_TYPE_NVLIST: - c = fm_printf(d + 1, c, cols, "["); - (void) nvpair_value_nvlist(nvp, &cnv); - c = fm_nvprintr(cnv, d + 1, c, cols); - c = fm_printf(d + 1, c, cols, " ]"); - break; - - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "["); - (void) nvpair_value_nvlist_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) { - c = fm_nvprintr(val[i], d + 1, c, cols); - } - c = fm_printf(d + 1, c, cols, " ]"); - } - break; - - case DATA_TYPE_INT8_ARRAY: { - int8_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_int8_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_UINT8_ARRAY: { - uint8_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_uint8_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_INT16_ARRAY: { - int16_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_int16_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_UINT16_ARRAY: { - uint16_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_uint16_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_INT32_ARRAY: { - int32_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_int32_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_UINT32_ARRAY: { - uint32_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_uint32_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_INT64_ARRAY: { - int64_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_int64_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_UINT64_ARRAY: { - uint64_t *val; - uint_t i, nelem; - - c = fm_printf(d + 1, c, cols, "[ "); - (void) nvpair_value_uint64_array(nvp, &val, &nelem); - for (i = 0; i < nelem; i++) - c = fm_printf(d + 1, c, cols, "0x%llx ", - (u_longlong_t)val[i]); - - c = fm_printf(d + 1, c, cols, "]"); - break; - } - - case DATA_TYPE_STRING_ARRAY: - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_BYTE_ARRAY: - c = fm_printf(d + 1, c, cols, "[...]"); - break; - - case DATA_TYPE_UNKNOWN: - case DATA_TYPE_DONTCARE: - c = fm_printf(d + 1, c, cols, "<unknown>"); - break; - } - } - - return (c); -} - -void -fm_nvprint(nvlist_t *nvl) -{ - char *class; - int c = 0; - - console_printf("\n"); - - if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) - c = fm_printf(0, c, zfs_zevent_cols, "%s", class); - - if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0) - console_printf("\n"); - - console_printf("\n"); -} - static zevent_t * zfs_zevent_alloc(void) { @@ -542,9 +238,6 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) goto out; } - if (zfs_zevent_console) - fm_nvprint(nvl); - ev = zfs_zevent_alloc(); if (ev == NULL) { atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); @@ -657,8 +350,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, #ifdef _KERNEL /* Include events dropped due to rate limiting */ - *dropped += ratelimit_dropped; - ratelimit_dropped = 0; + *dropped += atomic_swap_64(&ratelimit_dropped, 0); #endif ze->ze_dropped = 0; out: @@ -1621,9 +1313,6 @@ fm_init(void) zevent_len_cur = 0; zevent_flags = 0; - if (zfs_zevent_len_max == 0) - zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4); - /* Initialize zevent allocation and generation kstats */ fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, sizeof (struct erpt_kstat) / sizeof (kstat_named_t), @@ -1677,9 +1366,3 @@ fm_fini(void) ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW, "Max event queue length"); - -ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, cols, INT, ZMOD_RW, - "Max event column width"); - -ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, console, INT, ZMOD_RW, - "Log events to the console"); diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 5170c9ca226f..26995575adaa 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -108,6 +108,7 @@ int zfs_ccw_retry_interval = 300; typedef enum zti_modes { ZTI_MODE_FIXED, /* value is # of threads (min 1) */ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ + ZTI_MODE_SCALE, /* Taskqs scale with CPUs. */ ZTI_MODE_NULL, /* don't create a taskq */ ZTI_NMODES } zti_modes_t; @@ -115,6 +116,7 @@ typedef enum zti_modes { #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } #define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 } #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } +#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 } #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } #define ZTI_N(n) ZTI_P(n, 1) @@ -141,7 +143,8 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { * point of lock contention. The ZTI_P(#, #) macro indicates that we need an * additional degree of parallelism specified by the number of threads per- * taskq and the number of taskqs; when dispatching an event in this case, the - * particular taskq is chosen at random. + * particular taskq is chosen at random. ZTI_SCALE is similar to ZTI_BATCH, + * but with number of taskqs also scaling with number of CPUs. * * The different taskq priorities are to handle the different contexts (issue * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that @@ -150,9 +153,9 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ - { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ - { ZTI_BATCH, ZTI_N(5), ZTI_P(12, 8), ZTI_N(5) }, /* WRITE */ - { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ + { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */ + { ZTI_BATCH, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */ + { ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ { ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ @@ -164,7 +167,8 @@ static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); -uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ +uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */ +uint_t zio_taskq_batch_tpq; /* threads per taskq */ boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ uint_t zio_taskq_basedc = 80; /* base duty cycle */ @@ -957,25 +961,12 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) uint_t value = ztip->zti_value; uint_t count = ztip->zti_count; spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - uint_t flags = 0; + uint_t cpus, flags = TASKQ_DYNAMIC; boolean_t batch = B_FALSE; - if (mode == ZTI_MODE_NULL) { - tqs->stqs_count = 0; - tqs->stqs_taskq = NULL; - return; - } - - ASSERT3U(count, >, 0); - - tqs->stqs_count = count; - tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); - switch (mode) { case ZTI_MODE_FIXED: - ASSERT3U(value, >=, 1); - value = MAX(value, 1); - flags |= TASKQ_DYNAMIC; + ASSERT3U(value, >, 0); break; case ZTI_MODE_BATCH: @@ -984,6 +975,48 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) value = MIN(zio_taskq_batch_pct, 100); break; + case ZTI_MODE_SCALE: + flags |= TASKQ_THREADS_CPU_PCT; + /* + * We want more taskqs to reduce lock contention, but we want + * less for better request ordering and CPU utilization. + */ + cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100); + if (zio_taskq_batch_tpq > 0) { + count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) / + zio_taskq_batch_tpq); + } else { + /* + * Prefer 6 threads per taskq, but no more taskqs + * than threads in them on large systems. For 80%: + * + * taskq taskq total + * cpus taskqs percent threads threads + * ------- ------- ------- ------- ------- + * 1 1 80% 1 1 + * 2 1 80% 1 1 + * 4 1 80% 3 3 + * 8 2 40% 3 6 + * 16 3 27% 4 12 + * 32 5 16% 5 25 + * 64 7 11% 7 49 + * 128 10 8% 10 100 + * 256 14 6% 15 210 + */ + count = 1 + cpus / 6; + while (count * count > cpus) + count--; + } + /* Limit each taskq within 100% to not trigger assertion. */ + count = MAX(count, (zio_taskq_batch_pct + 99) / 100); + value = (zio_taskq_batch_pct + count / 2) / count; + break; + + case ZTI_MODE_NULL: + tqs->stqs_count = 0; + tqs->stqs_taskq = NULL; + return; + default: panic("unrecognized mode for %s_%s taskq (%u:%u) in " "spa_activate()", @@ -991,12 +1024,20 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) break; } + ASSERT3U(count, >, 0); + tqs->stqs_count = count; + tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); + for (uint_t i = 0; i < count; i++) { taskq_t *tq; char name[32]; - (void) snprintf(name, sizeof (name), "%s_%s", - zio_type_name[t], zio_taskq_types[q]); + if (count > 1) + (void) snprintf(name, sizeof (name), "%s_%s_%u", + zio_type_name[t], zio_taskq_types[q], i); + else + (void) snprintf(name, sizeof (name), "%s_%s", + zio_type_name[t], zio_taskq_types[q]); if (zio_taskq_sysdc && spa->spa_proc != &p0) { if (batch) @@ -6496,7 +6537,7 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) /* * The virtual dRAID spares must be added after vdev tree is created - * and the vdev guids are generated. The guid of their assoicated + * and the vdev guids are generated. The guid of their associated * dRAID is stored in the config and used when opening the spare. */ if ((error = vdev_draid_spare_create(nvroot, vd, &ndraid, @@ -9848,7 +9889,7 @@ EXPORT_SYMBOL(spa_event_notify); /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, INT, ZMOD_RW, - "log2(fraction of arc that can be used by inflight I/Os when " + "log2 fraction of arc that can be used by inflight I/Os when " "verifying pool during import"); ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_metadata, INT, ZMOD_RW, @@ -9863,6 +9904,9 @@ ZFS_MODULE_PARAM(zfs_spa, spa_, load_print_vdev_tree, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_pct, UINT, ZMOD_RD, "Percentage of CPUs to run an IO worker thread"); +ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD, + "Number of threads per IO worker taskqueue"); + ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW, "Allow importing pool with up to this number of missing top-level " "vdevs (in read-only mode)"); diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c index 5c55d32ec066..f4c2910ad7fe 100644 --- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c +++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c @@ -1290,7 +1290,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, ULONG, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, ULONG, ZMOD_RW, "Percentage of the overall system memory that ZFS allows to be " "used for unflushed changes (value is calculated over 1000000 for " - "finer granularity"); + "finer granularity)"); ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, ULONG, ZMOD_RW, "Hard limit (upper-bound) in the size of the space map log " diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index c536a1c6cda0..5e14d71f1946 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -28,6 +28,7 @@ * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, Datto Inc. All rights reserved. + * Copyright [2021] Hewlett Packard Enterprise Development LP */ #include <sys/zfs_context.h> @@ -625,6 +626,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second, 1); + zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second, + 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -1106,6 +1109,7 @@ vdev_free(vdev_t *vd) cv_destroy(&vd->vdev_rebuild_cv); zfs_ratelimit_fini(&vd->vdev_delay_rl); + zfs_ratelimit_fini(&vd->vdev_deadman_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); if (vd == spa->spa_root_vdev) @@ -1372,7 +1376,7 @@ vdev_metaslab_group_create(vdev_t *vd) /* * The spa ashift min/max only apply for the normal metaslab - * class. Class destination is late binding so ashift boundry + * class. Class destination is late binding so ashift boundary * setting had to wait until now. */ if (vd->vdev_top == vd && vd->vdev_ashift != 0 && @@ -2046,7 +2050,7 @@ vdev_open(vdev_t *vd) vd->vdev_max_asize = max_asize; /* - * If the vdev_ashift was not overriden at creation time, + * If the vdev_ashift was not overridden at creation time, * then set it the logical ashift and optimize the ashift. */ if (vd->vdev_ashift == 0) { @@ -2116,7 +2120,7 @@ vdev_open(vdev_t *vd) } /* - * Track the the minimum allocation size. + * Track the minimum allocation size. */ if (vd->vdev_top == vd && vd->vdev_ashift != 0 && vd->vdev_islog == 0 && vd->vdev_aux == NULL) { @@ -2219,7 +2223,7 @@ vdev_validate(vdev_t *vd) txg = spa_last_synced_txg(spa); if ((label = vdev_label_read_config(vd, txg)) == NULL) { - vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); vdev_dbgmsg(vd, "vdev_validate: failed reading config for " "txg %llu", (u_longlong_t)txg); @@ -4570,7 +4574,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize) /* * Solely for the purposes of 'zpool iostat -lqrw' - * reporting use the priority to catagorize the IO. + * reporting use the priority to categorize the IO. * Only the following are reported to user space: * * ZIO_PRIORITY_SYNC_READ, diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index fb2143e94689..20b1457f0ce8 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -812,7 +812,12 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) /* this is a "big column", nothing to add */ ASSERT3P(rc->rc_abd, !=, NULL); } else { - /* short data column, add a skip sector */ + /* + * short data column, add a skip sector and clear + * rc_tried to force the entire column to be re-read + * thereby including the missing skip sector data + * which is needed for reconstruction. + */ ASSERT3U(rc->rc_size + skip_size, ==, parity_size); ASSERT3U(rr->rr_nempty, !=, 0); ASSERT3P(rc->rc_abd, !=, NULL); @@ -823,6 +828,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) abd_gang_add(rc->rc_abd, abd_get_offset_size( rr->rr_abd_empty, skip_off, skip_size), B_TRUE); skip_off += skip_size; + rc->rc_tried = 0; } /* @@ -1003,7 +1009,8 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset, rc->rc_error = 0; rc->rc_tried = 0; rc->rc_skipped = 0; - rc->rc_repair = 0; + rc->rc_force_repair = 0; + rc->rc_allow_repair = 1; rc->rc_need_orig_restore = B_FALSE; if (q == 0 && i >= bc) @@ -1885,6 +1892,36 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr) vdev_t *svd; /* + * Sequential rebuilds need to always consider the data + * on the child being rebuilt to be stale. This is + * important when all columns are available to aid + * known reconstruction in identifing which columns + * contain incorrect data. + * + * Furthermore, all repairs need to be constrained to + * the devices being rebuilt because without a checksum + * we cannot verify the data is actually correct and + * performing an incorrect repair could result in + * locking in damage and making the data unrecoverable. + */ + if (zio->io_priority == ZIO_PRIORITY_REBUILD) { + if (vdev_draid_rebuilding(cvd)) { + if (c >= rr->rr_firstdatacol) + rr->rr_missingdata++; + else + rr->rr_missingparity++; + rc->rc_error = SET_ERROR(ESTALE); + rc->rc_skipped = 1; + rc->rc_allow_repair = 1; + continue; + } else { + rc->rc_allow_repair = 0; + } + } else { + rc->rc_allow_repair = 1; + } + + /* * If this child is a distributed spare then the * offset might reside on the vdev being replaced. * In which case this data must be written to the @@ -1897,7 +1934,10 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr) rc->rc_offset); if (svd && (svd->vdev_ops == &vdev_spare_ops || svd->vdev_ops == &vdev_replacing_ops)) { - rc->rc_repair = 1; + rc->rc_force_repair = 1; + + if (vdev_draid_rebuilding(svd)) + rc->rc_allow_repair = 1; } } @@ -1908,7 +1948,8 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr) if ((cvd->vdev_ops == &vdev_spare_ops || cvd->vdev_ops == &vdev_replacing_ops) && vdev_draid_rebuilding(cvd)) { - rc->rc_repair = 1; + rc->rc_force_repair = 1; + rc->rc_allow_repair = 1; } } } diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index bafb2c767b2e..e539e9aa2d70 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -181,7 +181,7 @@ int zfs_condense_indirect_vdevs_enable = B_TRUE; * condenses. Higher values will condense less often (causing less * i/o); lower values will reduce the mapping size more quickly. */ -int zfs_indirect_condense_obsolete_pct = 25; +int zfs_condense_indirect_obsolete_pct = 25; /* * Condense if the obsolete space map takes up more than this amount of @@ -445,7 +445,7 @@ vdev_indirect_should_condense(vdev_t *vd) * by the mapping. */ if (bytes_obsolete * 100 / bytes_mapped >= - zfs_indirect_condense_obsolete_pct && + zfs_condense_indirect_obsolete_pct && mapping_size > zfs_condense_min_mapping_bytes) { zfs_dbgmsg("should condense vdev %llu because obsolete " "spacemap covers %d%% of %lluMB mapping", @@ -1424,11 +1424,6 @@ vdev_indirect_repair(zio_t *zio) { indirect_vsd_t *iv = zio->io_vsd; - enum zio_flag flags = ZIO_FLAG_IO_REPAIR; - - if (!(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) - flags |= ZIO_FLAG_SELF_HEAL; - if (!spa_writeable(zio->io_spa)) return; @@ -1892,6 +1887,9 @@ EXPORT_SYMBOL(vdev_obsolete_sm_object); ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_vdevs_enable, INT, ZMOD_RW, "Whether to attempt condensing indirect vdev mappings"); +ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_obsolete_pct, INT, ZMOD_RW, + "Minimum obsolete percent of bytes in the mapping to attempt condensing"); + ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, ULONG, ZMOD_RW, "Don't bother condensing if the mapping uses less than this amount of " "memory"); diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c index f360a18c0041..106678a8708e 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c +++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c @@ -649,6 +649,15 @@ vdev_mirror_io_start(zio_t *zio) */ for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; + + /* Don't issue ZIOs to offline children */ + if (!vdev_mirror_child_readable(mc)) { + mc->mc_error = SET_ERROR(ENXIO); + mc->mc_tried = 1; + mc->mc_skipped = 1; + continue; + } + zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, abd_alloc_sametype(zio->io_abd, diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index db753ec16fd3..1feebf7089b4 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -269,7 +269,8 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, rc->rc_error = 0; rc->rc_tried = 0; rc->rc_skipped = 0; - rc->rc_repair = 0; + rc->rc_force_repair = 0; + rc->rc_allow_repair = 1; rc->rc_need_orig_restore = B_FALSE; if (c >= acols) @@ -1811,8 +1812,10 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr) vdev_t *vd = zio->io_vd; vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; - if ((rc->rc_error == 0 || rc->rc_size == 0) && - (rc->rc_repair == 0)) { + if (!rc->rc_allow_repair) { + continue; + } else if (!rc->rc_force_repair && + (rc->rc_error == 0 || rc->rc_size == 0)) { continue; } @@ -1984,7 +1987,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) * 2 4 5 first: increment to 3 * 3 4 5 done * - * This strategy works for dRAID but is less effecient when there are a large + * This strategy works for dRAID but is less efficient when there are a large * number of child vdevs and therefore permutations to check. Furthermore, * since the raidz_map_t rows likely do not overlap reconstruction would be * possible as long as there are no more than nparity data errors per row. diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index a77ff99faa92..4d7de0c6c44c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -81,7 +81,7 @@ * Advantages: * * - Sequential reconstruction is performed in LBA order which may be faster - * than healing reconstruction particularly when using using HDDs (or + * than healing reconstruction particularly when using HDDs (or * especially with SMR devices). Only allocated capacity is resilvered. * * - Sequential reconstruction is not constrained by ZFS block boundaries. @@ -331,9 +331,9 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) * While we're in syncing context take the opportunity to * setup the scrub when there are no more active rebuilds. */ - if (!vdev_rebuild_active(spa->spa_root_vdev) && + pool_scan_func_t func = POOL_SCAN_SCRUB; + if (dsl_scan_setup_check(&func, tx) == 0 && zfs_rebuild_scrub_enabled) { - pool_scan_func_t func = POOL_SCAN_SCRUB; dsl_scan_setup_sync(&func, tx); } diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index a758fe4fb343..d7c0641c8c2c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -1514,10 +1514,6 @@ spa_vdev_remove_thread(void *arg) * specified by zfs_removal_suspend_progress. We do this * solely from the test suite or during debugging. */ - uint64_t bytes_copied = - spa->spa_removing_phys.sr_copied; - for (int i = 0; i < TXG_SIZE; i++) - bytes_copied += svr->svr_bytes_done[i]; while (zfs_removal_suspend_progress && !svr->svr_thread_exit) delay(hz); diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c index f0f953405cb2..60e631567a89 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fm.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c @@ -395,8 +395,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector) } /* - * We want to rate limit ZIO delay and checksum events so as to not - * flood ZED when a disk is acting up. + * We want to rate limit ZIO delay, deadman, and checksum events so as to not + * flood zevent consumers when a disk is acting up. * * Returns 1 if we're ratelimiting, 0 if not. */ @@ -405,11 +405,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd) { int rc = 0; /* - * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we + * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we * are. Invert it to get our return value. */ if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) { rc = !zfs_ratelimit(&vd->vdev_delay_rl); + } else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) { + rc = !zfs_ratelimit(&vd->vdev_deadman_rl); } else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) { rc = !zfs_ratelimit(&vd->vdev_checksum_rl); } diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 5f291d067bef..0d5536cf7cb0 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -233,7 +233,7 @@ unsigned long zfs_max_nvlist_src_size = 0; /* * When logging the output nvlist of an ioctl in the on-disk history, limit - * the logged size to this many bytes. This must be less then DMU_MAX_ACCESS. + * the logged size to this many bytes. This must be less than DMU_MAX_ACCESS. * This applies primarily to zfs_ioc_channel_program(). */ unsigned long zfs_history_output_max = 1024 * 1024; @@ -2521,6 +2521,26 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, return (err); } +static boolean_t +zfs_is_namespace_prop(zfs_prop_t prop) +{ + switch (prop) { + + case ZFS_PROP_ATIME: + case ZFS_PROP_RELATIME: + case ZFS_PROP_DEVICES: + case ZFS_PROP_EXEC: + case ZFS_PROP_SETUID: + case ZFS_PROP_READONLY: + case ZFS_PROP_XATTR: + case ZFS_PROP_NBMAND: + return (B_TRUE); + + default: + return (B_FALSE); + } +} + /* * This function is best effort. If it fails to set any of the given properties, * it continues to set as many as it can and returns the last error @@ -2540,6 +2560,7 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, int rv = 0; uint64_t intval; const char *strval; + boolean_t should_update_mount_cache = B_FALSE; nvlist_t *genericnvl = fnvlist_alloc(); nvlist_t *retrynvl = fnvlist_alloc(); @@ -2637,6 +2658,9 @@ retry: fnvlist_add_int32(errlist, propname, err); rv = err; } + + if (zfs_is_namespace_prop(prop)) + should_update_mount_cache = B_TRUE; } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2685,6 +2709,9 @@ retry: } } } + if (should_update_mount_cache) + zfs_ioctl_update_mount_cache(dsname); + nvlist_free(genericnvl); nvlist_free(retrynvl); @@ -7352,8 +7379,8 @@ zfsdev_getminor(int fd, minor_t *minorp) return (SET_ERROR(EBADF)); } -static void * -zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which) +void * +zfsdev_get_state(minor_t minor, enum zfsdev_state_type which) { zfsdev_state_t *zs; @@ -7374,21 +7401,11 @@ zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which) return (NULL); } -void * -zfsdev_get_state(minor_t minor, enum zfsdev_state_type which) -{ - void *ptr; - - ptr = zfsdev_get_state_impl(minor, which); - - return (ptr); -} - /* * Find a free minor number. The zfsdev_state_list is expected to * be short since it is only a list of currently open file handles. */ -minor_t +static minor_t zfsdev_minor_alloc(void) { static minor_t last_minor = 0; @@ -7399,7 +7416,7 @@ zfsdev_minor_alloc(void) for (m = last_minor + 1; m != last_minor; m++) { if (m > ZFSDEV_MAX_MINOR) m = 1; - if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) { + if (zfsdev_get_state(m, ZST_ALL) == NULL) { last_minor = m; return (m); } @@ -7408,6 +7425,79 @@ zfsdev_minor_alloc(void) return (0); } +int +zfsdev_state_init(void *priv) +{ + zfsdev_state_t *zs, *zsprev = NULL; + minor_t minor; + boolean_t newzs = B_FALSE; + + ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + + minor = zfsdev_minor_alloc(); + if (minor == 0) + return (SET_ERROR(ENXIO)); + + for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) { + if (zs->zs_minor == -1) + break; + zsprev = zs; + } + + if (!zs) { + zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP); + newzs = B_TRUE; + } + + zfsdev_private_set_state(priv, zs); + + zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit); + zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent); + + /* + * In order to provide for lock-free concurrent read access + * to the minor list in zfsdev_get_state(), new entries + * must be completely written before linking them into the + * list whereas existing entries are already linked; the last + * operation must be updating zs_minor (from -1 to the new + * value). + */ + if (newzs) { + zs->zs_minor = minor; + membar_producer(); + zsprev->zs_next = zs; + } else { + membar_producer(); + zs->zs_minor = minor; + } + + return (0); +} + +void +zfsdev_state_destroy(void *priv) +{ + zfsdev_state_t *zs = zfsdev_private_get_state(priv); + + ASSERT(zs != NULL); + ASSERT3S(zs->zs_minor, >, 0); + + /* + * The last reference to this zfsdev file descriptor is being dropped. + * We don't have to worry about lookup grabbing this state object, and + * zfsdev_state_init() will not try to reuse this object until it is + * invalidated by setting zs_minor to -1. Invalidation must be done + * last, with a memory barrier to ensure ordering. This lets us avoid + * taking the global zfsdev state lock around destruction. + */ + zfs_onexit_destroy(zs->zs_onexit); + zfs_zevent_destroy(zs->zs_zevent); + zs->zs_onexit = NULL; + zs->zs_zevent = NULL; + membar_producer(); + zs->zs_minor = -1; +} + long zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag) { diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 26e40716710e..87ccb6861850 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -25,6 +25,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2021, Datto, Inc. */ #include <sys/sysmacros.h> @@ -247,13 +248,10 @@ zio_init(void) void zio_fini(void) { - size_t i, j, n; - kmem_cache_t *cache; - - n = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; + size_t n = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; #if defined(ZFS_DEBUG) && !defined(_KERNEL) - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { if (zio_buf_cache_allocs[i] != zio_buf_cache_frees[i]) (void) printf("zio_fini: [%d] %llu != %llu\n", (int)((i + 1) << SPA_MINBLOCKSHIFT), @@ -267,11 +265,11 @@ zio_fini(void) * and zio_data_buf_cache. Do a wasteful but trivially correct scan to * sort it out. */ - for (i = 0; i < n; i++) { - cache = zio_buf_cache[i]; + for (size_t i = 0; i < n; i++) { + kmem_cache_t *cache = zio_buf_cache[i]; if (cache == NULL) continue; - for (j = i; j < n; j++) { + for (size_t j = i; j < n; j++) { if (cache == zio_buf_cache[j]) zio_buf_cache[j] = NULL; if (cache == zio_data_buf_cache[j]) @@ -280,22 +278,20 @@ zio_fini(void) kmem_cache_destroy(cache); } - for (i = 0; i < n; i++) { - cache = zio_data_buf_cache[i]; + for (size_t i = 0; i < n; i++) { + kmem_cache_t *cache = zio_data_buf_cache[i]; if (cache == NULL) continue; - for (j = i; j < n; j++) { + for (size_t j = i; j < n; j++) { if (cache == zio_data_buf_cache[j]) zio_data_buf_cache[j] = NULL; } kmem_cache_destroy(cache); } - for (i = 0; i < n; i++) { - if (zio_buf_cache[i] != NULL) - panic("zio_fini: zio_buf_cache[%zd] != NULL", i); - if (zio_data_buf_cache[i] != NULL) - panic("zio_fini: zio_data_buf_cache[%zd] != NULL", i); + for (size_t i = 0; i < n; i++) { + VERIFY3P(zio_buf_cache[i], ==, NULL); + VERIFY3P(zio_data_buf_cache[i], ==, NULL); } kmem_cache_destroy(zio_link_cache); @@ -4570,7 +4566,7 @@ zio_done(zio_t *zio) uint64_t asize = P2ROUNDUP(psize, align); abd_t *adata = zio->io_abd; - if (asize != psize) { + if (adata != NULL && asize != psize) { adata = abd_alloc(asize, B_TRUE); abd_copy(adata, zio->io_abd, psize); abd_zero_off(adata, psize, asize - psize); @@ -4581,7 +4577,7 @@ zio_done(zio_t *zio) zcr->zcr_finish(zcr, adata); zfs_ereport_free_checksum(zcr); - if (asize != psize) + if (adata != NULL && asize != psize) abd_free(adata); } } diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index b6609363f047..23df0e1541a3 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -1383,10 +1383,6 @@ zvol_set_snapdev_impl(char *name, uint64_t snapdev) spl_fstrans_unmark(cookie); } -typedef struct zvol_volmode_cb_arg { - uint64_t volmode; -} zvol_volmode_cb_arg_t; - static void zvol_set_volmode_impl(char *name, uint64_t volmode) { diff --git a/sys/contrib/openzfs/module/zstd/README.md b/sys/contrib/openzfs/module/zstd/README.md index f8e127736aac..eed229e2f78f 100644 --- a/sys/contrib/openzfs/module/zstd/README.md +++ b/sys/contrib/openzfs/module/zstd/README.md @@ -10,7 +10,7 @@ library, besides upgrading to a newer ZSTD release. Tree structure: * `zfs_zstd.c` is the actual `zzstd` kernel module. -* `lib/` contains the the unmodified, [_"amalgamated"_](https://github.com/facebook/zstd/blob/dev/contrib/single_file_libs/README.md) +* `lib/` contains the unmodified, [_"amalgamated"_](https://github.com/facebook/zstd/blob/dev/contrib/single_file_libs/README.md) version of the `Zstandard` library, generated from our template file * `zstd-in.c` is our template file for generating the library * `include/`: This directory contains supplemental includes for platform diff --git a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h index 5cca517b5508..71adc78040fb 100644 --- a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h +++ b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h @@ -34,7 +34,7 @@ /* * This wrapper fixes a problem, in case the ZFS filesystem driver, is compiled - * staticly into the kernel. + * statically into the kernel. * This will cause a symbol collision with the older in-kernel zstd library. * The following macros will simply rename all local zstd symbols and references * diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c index 69ebf252d1ba..78616c08ba72 100644 --- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c +++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c @@ -258,7 +258,7 @@ zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size) for (int i = 0; i < ZSTD_POOL_MAX; i++) { pool = &zstd_mempool[i]; /* - * This lock is simply a marker for a pool object beeing in use. + * This lock is simply a marker for a pool object being in use. * If it's already hold, it will be skipped. * * We need to create it before checking it to avoid race @@ -488,7 +488,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, /* * NOTE: We ignore the ZSTD version for now. As soon as any - * incompatibility occurrs, it has to be handled accordingly. + * incompatibility occurs, it has to be handled accordingly. * The version can be accessed via `hdr_copy.version`. */ |