48 files changed, 3667 insertions, 1394 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
index d0461a9f1298..5898789ad53d 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
@@ -20,7 +20,7 @@
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  *
- *  Solaris Porting Layer (SPL) Credential Implementation.
+ *  Solaris Porting Layer (SPL) Condition Variables Implementation.
  */
 
 #include <sys/condvar.h>
@@ -37,7 +37,7 @@
 #endif
 
 #define	MAX_HRTIMEOUT_SLACK_US	1000
-unsigned int spl_schedule_hrtimeout_slack_us = 0;
+static unsigned int spl_schedule_hrtimeout_slack_us = 0;
 
 static int
 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-cred.c b/sys/contrib/openzfs/module/os/linux/spl/spl-cred.c
index f81b9540a639..d407fc66b2de 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-cred.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-cred.c
@@ -145,6 +145,18 @@ crgetgid(const cred_t *cr)
 	return (KGID_TO_SGID(cr->fsgid));
 }
 
+/* Return the initial user ns or nop_mnt_idmap */
+zidmap_t *
+zfs_get_init_idmap(void)
+{
+#ifdef HAVE_IOPS_CREATE_IDMAP
+	return ((zidmap_t *)&nop_mnt_idmap);
+#else
+	return ((zidmap_t *)&init_user_ns);
+#endif
+}
+
+EXPORT_SYMBOL(zfs_get_init_idmap);
 EXPORT_SYMBOL(crhold);
 EXPORT_SYMBOL(crfree);
 EXPORT_SYMBOL(crgetuid);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-err.c b/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
index c84c39b56bf7..29781b9515b2 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-err.c
@@ -32,7 +32,7 @@
  * analysis and other such goodies.
  * But we would still default to the current default of not to do that.
  */
-unsigned int spl_panic_halt;
+static unsigned int spl_panic_halt;
 /* CSTYLED */
 module_param(spl_panic_halt, uint, 0644);
 MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
@@ -45,7 +45,7 @@ spl_dumpstack(void)
 }
 EXPORT_SYMBOL(spl_dumpstack);
 
-int
+void
 spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
 {
 	const char *newfile;
@@ -75,7 +75,6 @@ spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
 		schedule();
 
 	/* Unreachable */
-	return (1);
 }
 EXPORT_SYMBOL(spl_panic);
 
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index 5179100d1665..986db1518456 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -23,6 +23,7 @@
  *  Solaris Porting Layer (SPL) Generic Implementation.
  */
 
+#include <sys/isa_defs.h>
 #include <sys/sysmacros.h>
 #include <sys/systeminfo.h>
 #include <sys/vmsystm.h>
@@ -47,6 +48,8 @@
 #include <linux/mod_compat.h>
 #include <sys/cred.h>
 #include <sys/vnode.h>
+#include <sys/misc.h>
+#include <linux/mod_compat.h>
 
 unsigned long spl_hostid = 0;
 EXPORT_SYMBOL(spl_hostid);
@@ -59,10 +62,10 @@ proc_t p0;
 EXPORT_SYMBOL(p0);
 
 /*
- * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna
  *
- * "Further scramblings of Marsaglia's xorshift generators"
- * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ * "Scrambled Linear Pseudorandom Number Generators∗"
+ * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
  *
  * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
  * is to provide bytes containing random numbers. It is mapped to /dev/urandom
@@ -74,66 +77,85 @@ EXPORT_SYMBOL(p0);
  * free of atomic instructions.
  *
  * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
- * to generate words larger than 128 bits will paradoxically be limited to
- * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
- * 128-bit words and selecting the first will implicitly select the second. If
+ * to generate words larger than 256 bits will paradoxically be limited to
+ * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1`
+ * 256-bit words and selecting the first will implicitly select the second. If
  * a caller finds this behavior undesirable, random_get_bytes() should be used
  * instead.
  *
  * XXX: Linux interrupt handlers that trigger within the critical section
- * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will
  * see the same numbers. Nothing in the code currently calls this in an
  * interrupt handler, so this is considered to be okay. If that becomes a
  * problem, we could create a set of per-cpu variables for interrupt handlers
  * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
  * true.
  */
-void __percpu *spl_pseudo_entropy;
+static void __percpu *spl_pseudo_entropy;
 
 /*
- * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
- * file:
+ * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0
+ * licensed file:
  *
- * http://xorshift.di.unimi.it/xorshift128plus.c
+ * https://prng.di.unimi.it/xoshiro256plusplus.c
  */
 
+static inline uint64_t rotl(const uint64_t x, int k)
+{
+	return ((x << k) | (x >> (64 - k)));
+}
+
 static inline uint64_t
 spl_rand_next(uint64_t *s)
 {
-	uint64_t s1 = s[0];
-	const uint64_t s0 = s[1];
-	s[0] = s0;
-	s1 ^= s1 << 23; // a
-	s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
-	return (s[1] + s0);
+	const uint64_t result = rotl(s[0] + s[3], 23) + s[0];
+
+	const uint64_t t = s[1] << 17;
+
+	s[2] ^= s[0];
+	s[3] ^= s[1];
+	s[1] ^= s[2];
+	s[0] ^= s[3];
+
+	s[2] ^= t;
+
+	s[3] = rotl(s[3], 45);
+
+	return (result);
 }
 
 static inline void
 spl_rand_jump(uint64_t *s)
 {
-	static const uint64_t JUMP[] =
-	    { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+	static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba,
+	    0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
 
 	uint64_t s0 = 0;
 	uint64_t s1 = 0;
+	uint64_t s2 = 0;
+	uint64_t s3 = 0;
 	int i, b;
 	for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
 		for (b = 0; b < 64; b++) {
 			if (JUMP[i] & 1ULL << b) {
 				s0 ^= s[0];
 				s1 ^= s[1];
+				s2 ^= s[2];
+				s3 ^= s[3];
 			}
 			(void) spl_rand_next(s);
 		}
 
 	s[0] = s0;
 	s[1] = s1;
+	s[2] = s2;
+	s[3] = s3;
 }
 
 int
 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 {
-	uint64_t *xp, s[2];
+	uint64_t *xp, s[4];
 
 	ASSERT(ptr);
 
@@ -141,6 +163,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 
 	s[0] = xp[0];
 	s[1] = xp[1];
+	s[2] = xp[2];
+	s[3] = xp[3];
 
 	while (len) {
 		union {
@@ -152,12 +176,22 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 		len -= i;
 		entropy.ui64 = spl_rand_next(s);
 
+		/*
+		 * xoshiro256++ has low entropy lower bytes, so we copy the
+		 * higher order bytes first.
+		 */
 		while (i--)
+#ifdef _ZFS_BIG_ENDIAN
 			*ptr++ = entropy.byte[i];
+#else
+			*ptr++ = entropy.byte[7 - i];
+#endif
 	}
 
 	xp[0] = s[0];
 	xp[1] = s[1];
+	xp[2] = s[2];
+	xp[3] = s[3];
 
 	put_cpu_ptr(spl_pseudo_entropy);
 
@@ -220,8 +254,10 @@ __div_u64(uint64_t u, uint32_t v)
  * replacements for libgcc-provided functions and will never be called
  * directly.
  */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#endif
 
 /*
  * Implementation of 64-bit unsigned division for 32-bit machines.
@@ -415,7 +451,9 @@ __aeabi_ldivmod(int64_t u, int64_t v)
 EXPORT_SYMBOL(__aeabi_ldivmod);
 #endif /* __arm || __arm__ */
 
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
 
 #endif /* BITS_PER_LONG */
 
@@ -458,7 +496,7 @@ int ddi_strto##type(const char *str, char **endptr,			\
 			if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
 				base = 16; /* hex */			\
 				ptr += 2;				\
-			} else if (str[1] >= '0' && str[1] < 8) {	\
+			} else if (str[1] >= '0' && str[1] < '8') {	\
 				base = 8; /* octal */			\
 				ptr += 1;				\
 			} else {					\
@@ -517,6 +555,61 @@ ddi_copyin(const void *from, void *to, size_t len, int flags)
 }
 EXPORT_SYMBOL(ddi_copyin);
 
+#define	define_spl_param(type, fmt)					\
+int									\
+spl_param_get_##type(char *buf, zfs_kernel_param_t *kp)			\
+{									\
+	return (scnprintf(buf, PAGE_SIZE, fmt "\n",			\
+	    *(type *)kp->arg));						\
+}									\
+int									\
+spl_param_set_##type(const char *buf, zfs_kernel_param_t *kp)		\
+{									\
+	return (kstrto##type(buf, 0, (type *)kp->arg));			\
+}									\
+const struct kernel_param_ops spl_param_ops_##type = {			\
+	.set = spl_param_set_##type,					\
+	.get = spl_param_get_##type,					\
+};									\
+EXPORT_SYMBOL(spl_param_get_##type);					\
+EXPORT_SYMBOL(spl_param_set_##type);					\
+EXPORT_SYMBOL(spl_param_ops_##type);
+
+define_spl_param(s64, "%lld")
+define_spl_param(u64, "%llu")
+
+/*
+ * Post a uevent to userspace whenever a new vdev adds to the pool. It is
+ * necessary to sync blkid information with udev, which zed daemon uses
+ * during device hotplug to identify the vdev.
+ */
+void
+spl_signal_kobj_evt(struct block_device *bdev)
+{
+#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
+#ifdef HAVE_BDEV_KOBJ
+	struct kobject *disk_kobj = bdev_kobj(bdev);
+#else
+	struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
+#endif
+	if (disk_kobj) {
+		int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
+		if (ret) {
+			pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
+			    " (%p): failed(ret:%d)\n", KOBJ_CHANGE,
+			    kobject_name(disk_kobj), disk_kobj, ret);
+		}
+	}
+#else
+/*
+ * This is encountered if neither bdev_kobj() nor part_to_dev() is available
+ * in the kernel - likely due to an API change that needs to be chased down.
+ */
+#error "Unsupported kernel: unable to get struct kobj from bdev"
+#endif
+}
+EXPORT_SYMBOL(spl_signal_kobj_evt);
+
 int
 ddi_copyout(const void *from, void *to, size_t len, int flags)
 {
@@ -705,28 +798,33 @@ spl_kvmem_init(void)
  * initialize each of the per-cpu seeds so that the sequences generated on each
  * CPU are guaranteed to never overlap in practice.
  */
-static void __init
+static int __init
 spl_random_init(void)
 {
-	uint64_t s[2];
+	uint64_t s[4];
 	int i = 0;
 
-	spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t),
+	spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t),
 	    sizeof (uint64_t));
 
+	if (!spl_pseudo_entropy)
+		return (-ENOMEM);
+
 	get_random_bytes(s, sizeof (s));
 
-	if (s[0] == 0 && s[1] == 0) {
+	if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) {
 		if (jiffies != 0) {
 			s[0] = jiffies;
 			s[1] = ~0 - jiffies;
+			s[2] = ~jiffies;
+			s[3] = jiffies - ~0;
 		} else {
-			(void) memcpy(s, "improbable seed", sizeof (s));
+			(void) memcpy(s, "improbable seed", 16);
 		}
 		printk("SPL: get_random_bytes() returned 0 "
 		    "when generating random seed. Setting initial seed to "
-		    "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
-		    cpu_to_be64(s[1]));
+		    "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]),
+		    cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3]));
 	}
 
 	for_each_possible_cpu(i) {
@@ -736,7 +834,11 @@ spl_random_init(void)
 
 		wordp[0] = s[0];
 		wordp[1] = s[1];
+		wordp[2] = s[2];
+		wordp[3] = s[3];
 	}
+
+	return (0);
 }
 
 static void
@@ -757,7 +859,8 @@ spl_init(void)
 {
 	int rc = 0;
 
-	spl_random_init();
+	if ((rc = spl_random_init()))
+		goto out0;
 
 	if ((rc = spl_kvmem_init()))
 		goto out1;
@@ -800,6 +903,8 @@ out3:
 out2:
 	spl_kvmem_fini();
 out1:
+	spl_random_fini();
+out0:
 	return (rc);
 }
 
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
index ba4ca49a2ac9..42821ad60256 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@@ -28,6 +28,7 @@
 #include <sys/timer.h>
 #include <sys/vmem.h>
 #include <sys/wait.h>
+#include <sys/string.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/prefetch.h>
@@ -76,17 +77,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
 	"Default magazine size (2-256), set automatically (0)");
 
-/*
- * The default behavior is to report the number of objects remaining in the
- * cache.  This allows the Linux VM to repeatedly reclaim objects from the
- * cache when memory is low satisfy other memory allocations.  Alternately,
- * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
- * is reclaimed.  This may increase the likelihood of out of memory events.
- */
-static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
-module_param(spl_kmem_cache_reclaim, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
-
 static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
 module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
@@ -102,7 +92,8 @@ MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
  * of 16K was determined to be optimal for architectures using 4K pages and
  * to also work well on architecutres using larger 64K page sizes.
  */
-static unsigned int spl_kmem_cache_slab_limit = 16384;
+static unsigned int spl_kmem_cache_slab_limit =
+    SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE;
 module_param(spl_kmem_cache_slab_limit, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
 	"Objects less than N bytes use the Linux slab");
@@ -151,7 +142,7 @@ MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
 
 struct list_head spl_kmem_cache_list;   /* List of caches */
 struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
-taskq_t *spl_kmem_cache_taskq;		/* Task queue for aging / reclaim */
+static taskq_t *spl_kmem_cache_taskq;   /* Task queue for aging / reclaim */
 
 static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
 
@@ -182,8 +173,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
+#else
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-
+#endif
 	vfree(ptr);
 }
 
@@ -701,12 +695,12 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align,
 
 	skc->skc_magic = SKC_MAGIC;
 	skc->skc_name_size = strlen(name) + 1;
-	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
+	skc->skc_name = kmalloc(skc->skc_name_size, lflags);
 	if (skc->skc_name == NULL) {
 		kfree(skc);
 		return (NULL);
 	}
-	strncpy(skc->skc_name, name, skc->skc_name_size);
+	strlcpy(skc->skc_name, name, skc->skc_name_size);
 
 	skc->skc_ctor = ctor;
 	skc->skc_dtor = dtor;
@@ -791,10 +785,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align,
 	} else {
 		unsigned long slabflags = 0;
 
-		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
-			rc = EINVAL;
+		if (size > spl_kmem_cache_slab_limit)
 			goto out;
-		}
 
 #if defined(SLAB_USERCOPY)
 		/*
@@ -815,10 +807,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align,
 		skc->skc_linux_cache = kmem_cache_create(
 		    skc->skc_name, size, align, slabflags, NULL);
 #endif
-		if (skc->skc_linux_cache == NULL) {
-			rc = ENOMEM;
+		if (skc->skc_linux_cache == NULL)
 			goto out;
-		}
 	}
 
 	down_write(&spl_kmem_cache_sem);
@@ -1016,10 +1006,20 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
-	might_sleep();
+
 	*obj = NULL;
 
 	/*
+	 * Since we can't sleep attempt an emergency allocation to satisfy
+	 * the request.  The only alterative is to fail the allocation but
+	 * it's preferable try.  The use of KM_NOSLEEP is expected to be rare.
+	 */
+	if (flags & KM_NOSLEEP)
+		return (spl_emergency_alloc(skc, flags, obj));
+
+	might_sleep();
+
+	/*
 	 * Before allocating a new slab wait for any reaping to complete and
 	 * then return so the local magazine can be rechecked for new objects.
 	 */
@@ -1452,6 +1452,9 @@ spl_kmem_cache_init(void)
 	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
 	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
+	if (spl_kmem_cache_taskq == NULL)
+		return (-ENOMEM);
+
 	return (0);
 }
 
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
index c6d3c8f4413f..ad553a73a69e 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
@@ -32,6 +32,7 @@
 #include <sys/vmem.h>
 #include <sys/cmn_err.h>
 #include <sys/sysmacros.h>
+#include <sys/string.h>
 
 static kmutex_t kstat_module_lock;
 static struct list_head kstat_module_list;
@@ -390,7 +391,7 @@ kstat_create_module(char *name)
 
 	module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
 	module->ksm_proc = pde;
-	strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
+	strlcpy(module->ksm_name, name, KSTAT_STRLEN);
 	INIT_LIST_HEAD(&module->ksm_kstat_list);
 	list_add_tail(&module->ksm_module_list, &kstat_module_list);
 
@@ -479,8 +480,8 @@ kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
 	kpep->kpe_owner = NULL;
 	kpep->kpe_proc = NULL;
 	INIT_LIST_HEAD(&kpep->kpe_list);
-	strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
-	strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
+	strlcpy(kpep->kpe_module, module, sizeof (kpep->kpe_module));
+	strlcpy(kpep->kpe_name, name, sizeof (kpep->kpe_name));
 }
 EXPORT_SYMBOL(kstat_proc_entry_init);
 
@@ -514,7 +515,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
 	ksp->ks_crtime = gethrtime();
 	ksp->ks_snaptime = ksp->ks_crtime;
 	ksp->ks_instance = ks_instance;
-	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
+	strlcpy(ksp->ks_class, ks_class, sizeof (ksp->ks_class));
 	ksp->ks_type = ks_type;
 	ksp->ks_flags = ks_flags;
 	ksp->ks_update = kstat_default_update;
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
index 01f5619e1893..f0f929d3ce90 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
@@ -47,6 +47,10 @@ static unsigned long table_min = 0;
 static unsigned long table_max = ~0;
 
 static struct ctl_table_header *spl_header = NULL;
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+static struct ctl_table_header *spl_kmem = NULL;
+static struct ctl_table_header *spl_kstat = NULL;
+#endif
 static struct proc_dir_entry *proc_spl = NULL;
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
@@ -624,6 +628,7 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
@@ -634,9 +639,11 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
+#endif
 	{},
 };
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
@@ -648,21 +655,64 @@ static struct ctl_table spl_dir[] = {
 
 static struct ctl_table spl_root[] = {
 	{
-	.procname = "kernel",
-	.mode = 0555,
-	.child = spl_dir,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= spl_dir,
 	},
 	{}
 };
+#endif
+
+static void spl_proc_cleanup(void)
+{
+	remove_proc_entry("kstat", proc_spl);
+	remove_proc_entry("slab", proc_spl_kmem);
+	remove_proc_entry("kmem", proc_spl);
+	remove_proc_entry("taskq-all", proc_spl);
+	remove_proc_entry("taskq", proc_spl);
+	remove_proc_entry("spl", NULL);
+
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+	if (spl_kstat) {
+		unregister_sysctl_table(spl_kstat);
+		spl_kstat = NULL;
+	}
+	if (spl_kmem) {
+		unregister_sysctl_table(spl_kmem);
+		spl_kmem = NULL;
+	}
+#endif
+	if (spl_header) {
+		unregister_sysctl_table(spl_header);
+		spl_header = NULL;
+	}
+}
 
 int
 spl_proc_init(void)
 {
 	int rc = 0;
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
+#else
+	spl_header = register_sysctl("kernel/spl", spl_table);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+
+	spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table);
+	if (spl_kmem == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+	spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table);
+	if (spl_kstat == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+#endif
 
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {
@@ -703,15 +753,8 @@ spl_proc_init(void)
 		goto out;
 	}
 out:
-	if (rc) {
-		remove_proc_entry("kstat", proc_spl);
-		remove_proc_entry("slab", proc_spl_kmem);
-		remove_proc_entry("kmem", proc_spl);
-		remove_proc_entry("taskq-all", proc_spl);
-		remove_proc_entry("taskq", proc_spl);
-		remove_proc_entry("spl", NULL);
-		unregister_sysctl_table(spl_header);
-	}
+	if (rc)
+		spl_proc_cleanup();
 
 	return (rc);
 }
@@ -719,13 +762,5 @@ out:
 void
 spl_proc_fini(void)
 {
-	remove_proc_entry("kstat", proc_spl);
-	remove_proc_entry("slab", proc_spl_kmem);
-	remove_proc_entry("kmem", proc_spl);
-	remove_proc_entry("taskq-all", proc_spl);
-	remove_proc_entry("taskq", proc_spl);
-	remove_proc_entry("spl", NULL);
-
-	ASSERT(spl_header != NULL);
-	unregister_sysctl_table(spl_header);
+	spl_proc_cleanup();
 }
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
index 81501460f04f..5e073950d61a 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -23,9 +23,9 @@
  */
 
 #include <sys/list.h>
-#include <sys/mutex.h>
 #include <sys/procfs_list.h>
 #include <linux/proc_fs.h>
+#include <sys/mutex.h>
 
 /*
  * A procfs_list is a wrapper around a linked list which implements the seq_file
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c b/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c
new file mode 100644
index 000000000000..d5c8da471cbb
--- /dev/null
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Shrinker Implementation.
+ */
+
+#include <sys/kmem.h>
+#include <sys/shrinker.h>
+
+#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
+/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */
+struct spl_shrinker_wrap {
+	struct shrinker shrinker;
+	spl_shrinker_cb countfunc;
+	spl_shrinker_cb scanfunc;
+};
+
+static int
+spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker;
+
+	if (sc->nr_to_scan != 0)
+		(void) sw->scanfunc(&sw->shrinker, sc);
+	return (sw->countfunc(&sw->shrinker, sc));
+}
+#endif
+
+struct shrinker *
+spl_register_shrinker(const char *name, spl_shrinker_cb countfunc,
+    spl_shrinker_cb scanfunc, int seek_cost)
+{
+	struct shrinker *shrinker;
+
+	/* allocate shrinker */
+#if defined(HAVE_SHRINKER_REGISTER)
+	/* 6.7: kernel will allocate the shrinker for us */
+	shrinker = shrinker_alloc(0, name);
+#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+	/* 3.12-6.6: we allocate the shrinker  */
+	shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP);
+#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
+	/* 3.0-3.11: allocate a wrapper */
+	struct spl_shrinker_wrap *sw =
+	    kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP);
+	shrinker = &sw->shrinker;
+#else
+	/* 2.x-2.6.22, or a newer shrinker API has been introduced. */
+#error "Unknown shrinker API"
+#endif
+
+	if (shrinker == NULL)
+		return (NULL);
+
+	/* set callbacks */
+#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
+	sw->countfunc = countfunc;
+	sw->scanfunc = scanfunc;
+	shrinker->shrink = spl_shrinker_single_cb;
+#else
+	shrinker->count_objects = countfunc;
+	shrinker->scan_objects = scanfunc;
+#endif
+
+	/* set params */
+	shrinker->seeks = seek_cost;
+
+	/* register with kernel */
+#if defined(HAVE_SHRINKER_REGISTER)
+	shrinker_register(shrinker);
+#elif defined(HAVE_REGISTER_SHRINKER_VARARG)
+	register_shrinker(shrinker, name);
+#else
+	register_shrinker(shrinker);
+#endif
+
+	return (shrinker);
+}
+EXPORT_SYMBOL(spl_register_shrinker);
+
+void
+spl_unregister_shrinker(struct shrinker *shrinker)
+{
+#if defined(HAVE_SHRINKER_REGISTER)
+	shrinker_free(shrinker);
+#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+	unregister_shrinker(shrinker);
+	kmem_free(shrinker, sizeof (struct shrinker));
+#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
+	unregister_shrinker(shrinker);
+	kmem_free(shrinker, sizeof (struct spl_shrinker_wrap));
+#else
+#error "Unknown shrinker API"
+#endif
+}
+EXPORT_SYMBOL(spl_unregister_shrinker);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index 0aab148975aa..c384b7b378c3 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -36,6 +36,12 @@ static int spl_taskq_thread_bind = 0;
 module_param(spl_taskq_thread_bind, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
 
+static uint_t spl_taskq_thread_timeout_ms = 5000;
+/* BEGIN CSTYLED */
+module_param(spl_taskq_thread_timeout_ms, uint, 0644);
+/* END CSTYLED */
+MODULE_PARM_DESC(spl_taskq_thread_timeout_ms,
+	"Minimum idle threads exit interval for dynamic taskqs");
 
 static int spl_taskq_thread_dynamic = 1;
 module_param(spl_taskq_thread_dynamic, int, 0444);
@@ -46,8 +52,10 @@ module_param(spl_taskq_thread_priority, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_priority,
 	"Allow non-default priority for taskq threads");
 
-static int spl_taskq_thread_sequential = 4;
-module_param(spl_taskq_thread_sequential, int, 0644);
+static uint_t spl_taskq_thread_sequential = 4;
+/* BEGIN CSTYLED */
+module_param(spl_taskq_thread_sequential, uint, 0644);
+/* END CSTYLED */
 MODULE_PARM_DESC(spl_taskq_thread_sequential,
 	"Create new taskq threads after N sequential tasks");
 
@@ -586,8 +594,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
 	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
 	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
 		/* Dynamic taskq may be able to spawn another thread */
-		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
-		    taskq_thread_spawn(tq) == 0)
+		if (taskq_thread_spawn(tq) == 0)
 			goto out;
 	}
 
@@ -621,11 +628,11 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
 	spin_unlock(&t->tqent_lock);
 
 	wake_up(&tq->tq_work_waitq);
-out:
+
 	/* Spawn additional taskq threads if required. */
 	if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
-
+out:
 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 	return (rc);
 }
@@ -668,10 +675,11 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
 	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
 
 	spin_unlock(&t->tqent_lock);
-out:
+
 	/* Spawn additional taskq threads if required. */
 	if (tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
+out:
 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 	return (rc);
 }
@@ -696,9 +704,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
 
 	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
 		/* Dynamic taskq may be able to spawn another thread */
-		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
-		    taskq_thread_spawn(tq) == 0)
-			goto out2;
+		if (taskq_thread_spawn(tq) == 0)
+			goto out;
 		flags |= TQ_FRONT;
 	}
 
@@ -734,11 +741,11 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
 	spin_unlock(&t->tqent_lock);
 
 	wake_up(&tq->tq_work_waitq);
-out:
+
 	/* Spawn additional taskq threads if required. */
 	if (tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
-out2:
+out:
 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 }
 EXPORT_SYMBOL(taskq_dispatch_ent);
@@ -817,6 +824,7 @@ taskq_thread_spawn(taskq_t *tq)
 	if (!(tq->tq_flags & TASKQ_DYNAMIC))
 		return (0);
 
+	tq->lastspawnstop = jiffies;
 	if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
 	    (tq->tq_flags & TASKQ_ACTIVE)) {
 		spawning = (++tq->tq_nspawn);
@@ -828,9 +836,9 @@ taskq_thread_spawn(taskq_t *tq)
 }
 
 /*
- * Threads in a dynamic taskq should only exit once it has been completely
- * drained and no other threads are actively servicing tasks.  This prevents
- * threads from being created and destroyed more than is required.
+ * Threads in a dynamic taskq may exit once there is no more work to do.
+ * To prevent threads from being created and destroyed too often limit
+ * the exit rate to one per spl_taskq_thread_timeout_ms.
  *
  * The first thread is the thread list is treated as the primary thread.
  * There is nothing special about the primary thread but in order to avoid
@@ -839,19 +847,22 @@ taskq_thread_spawn(taskq_t *tq)
 static int
 taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
 {
-	if (!(tq->tq_flags & TASKQ_DYNAMIC))
+	ASSERT(!taskq_next_ent(tq));
+	if (!(tq->tq_flags & TASKQ_DYNAMIC) || !spl_taskq_thread_dynamic)
 		return (0);
-
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		return (1);
 	if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
 	    tqt_thread_list) == tqt)
 		return (0);
-
-	return
-	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
-	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
-	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
-	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
-	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
+	ASSERT3U(tq->tq_nthreads, >, 1);
+	if (tq->tq_nspawn != 0)
+		return (0);
+	if (time_before(jiffies, tq->lastspawnstop +
+	    msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
+		return (0);
+	tq->lastspawnstop = jiffies;
+	return (1);
 }
 
 static int
@@ -902,10 +913,8 @@ taskq_thread(void *args)
 		if (list_empty(&tq->tq_pend_list) &&
 		    list_empty(&tq->tq_prio_list)) {
 
-			if (taskq_thread_should_stop(tq, tqt)) {
-				wake_up_all(&tq->tq_wait_waitq);
+			if (taskq_thread_should_stop(tq, tqt))
 				break;
-			}
 
 			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
 			spin_unlock_irqrestore(&tq->tq_lock, flags);
@@ -980,9 +989,6 @@ taskq_thread(void *args)
 			tqt->tqt_id = TASKQID_INVALID;
 			tqt->tqt_flags = 0;
 			wake_up_all(&tq->tq_wait_waitq);
-		} else {
-			if (taskq_thread_should_stop(tq, tqt))
-				break;
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1046,7 +1052,6 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
 
 	ASSERT(name != NULL);
 	ASSERT(minalloc >= 0);
-	ASSERT(maxalloc <= INT_MAX);
 	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
 
 	/* Scale the number of threads using nthreads as a percentage */
@@ -1090,6 +1095,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
 	tq->tq_flags = (flags | TASKQ_ACTIVE);
 	tq->tq_next_id = TASKQID_INITIAL;
 	tq->tq_lowest_id = TASKQID_INITIAL;
+	tq->lastspawnstop = jiffies;
 	INIT_LIST_HEAD(&tq->tq_free_list);
 	INIT_LIST_HEAD(&tq->tq_pend_list);
 	INIT_LIST_HEAD(&tq->tq_prio_list);
@@ -1229,6 +1235,42 @@ taskq_destroy(taskq_t *tq)
 }
 EXPORT_SYMBOL(taskq_destroy);
 
+/*
+ * Create a taskq with a specified number of pool threads. Allocate
+ * and return an array of nthreads kthread_t pointers, one for each
+ * thread in the pool. The array is not ordered and must be freed
+ * by the caller.
+ */
+taskq_t *
+taskq_create_synced(const char *name, int nthreads, pri_t pri,
+    int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
+{
+	taskq_t *tq;
+	taskq_thread_t *tqt;
+	int i = 0;
+	kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
+	    KM_SLEEP);
+
+	flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
+
+	/* taskq_create spawns all the threads before returning */
+	tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
+	    flags | TASKQ_PREPOPULATE);
+	VERIFY(tq != NULL);
+	VERIFY(tq->tq_nthreads == nthreads);
+
+	list_for_each_entry(tqt, &tq->tq_thread_list, tqt_thread_list) {
+		kthreads[i] = tqt->tqt_thread;
+		i++;
+	}
+
+	ASSERT3S(i, ==, nthreads);
+	*ktpp = kthreads;
+
+	return (tq);
+}
+EXPORT_SYMBOL(taskq_create_synced);
+
 static unsigned int spl_taskq_kick = 0;
 
 /*
@@ -1379,7 +1421,7 @@ spl_taskq_init(void)
 	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
 	if (system_taskq == NULL)
-		return (1);
+		return (-ENOMEM);
 
 	system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
@@ -1388,7 +1430,7 @@ spl_taskq_init(void)
 		cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
 #endif
 		taskq_destroy(system_taskq);
-		return (1);
+		return (-ENOMEM);
 	}
 
 	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
@@ -1399,7 +1441,7 @@ spl_taskq_init(void)
 #endif
 		taskq_destroy(system_taskq);
 		taskq_destroy(system_delay_taskq);
-		return (1);
+		return (-ENOMEM);
 	}
 
 	/*
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c
index 32a2d34b1d93..ee3eb4690c3a 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c
@@ -26,6 +26,7 @@
 #include <sys/thread.h>
 #include <sys/kmem.h>
 #include <sys/tsd.h>
+#include <sys/string.h>
 
 /*
  * Thread interfaces
@@ -92,7 +93,7 @@ __thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
 		return (NULL);
 	}
 
-	strncpy(tp->tp_name, name, tp->tp_name_size);
+	strlcpy(tp->tp_name, name, tp->tp_name_size);
 
 	/*
 	 * Strip trailing "_thread" from passed name which will be the func
@@ -178,12 +179,11 @@ issig(int why)
 	sigorsets(&set, &task->blocked, &set);
 
 	spin_lock_irq(&task->sighand->siglock);
-	int ret;
 #ifdef HAVE_DEQUEUE_SIGNAL_4ARG
 	enum pid_type __type;
-	if ((ret = dequeue_signal(task, &set, &__info, &__type)) != 0) {
+	if (dequeue_signal(task, &set, &__info, &__type) != 0) {
 #else
-	if ((ret = dequeue_signal(task, &set, &__info)) != 0) {
+	if (dequeue_signal(task, &set, &__info) != 0) {
 #endif
 #ifdef HAVE_SIGNAL_STOP
 		spin_unlock_irq(&task->sighand->siglock);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c b/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
index 7912a381294d..d3e53e541b8b 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c b/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c
index 546db9ab8bd7..389c9d0d6df3 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-tsd.c
@@ -706,7 +706,7 @@ spl_tsd_init(void)
 {
 	tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
 	if (tsd_hash_table == NULL)
-		return (1);
+		return (-ENOMEM);
 
 	return (0);
 }
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-xdr.c b/sys/contrib/openzfs/module/os/linux/spl/spl-xdr.c
index 6b77524181db..e1773da5d173 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-xdr.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-xdr.c
@@ -25,6 +25,7 @@
 #include <sys/debug.h>
 #include <sys/types.h>
 #include <sys/sysmacros.h>
+#include <rpc/types.h>
 #include <rpc/xdr.h>
 
 /*
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zlib.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zlib.c
index 589496da0c78..8c6282ee5d16 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zlib.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zlib.c
@@ -204,7 +204,7 @@ spl_zlib_init(void)
 	    size, 0, NULL, NULL, NULL, NULL, NULL,
 	    KMC_KVMEM);
 	if (!zlib_workspace_cache)
-		return (1);
+		return (-ENOMEM);
 
 	return (0);
 }
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
index b8a8b7cd8cd8..d0d0cca154a7 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -25,18 +25,20 @@
  */
 
 #include <sys/types.h>
-#include <sys/mutex.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <linux/file.h>
 #include <linux/magic.h>
 #include <sys/zone.h>
+#include <sys/string.h>
 
 #if defined(CONFIG_USER_NS)
 #include <linux/statfs.h>
 #include <linux/proc_ns.h>
 #endif
 
+#include <sys/mutex.h>
+
 static kmutex_t zone_datasets_lock;
 static struct list_head zone_datasets;
 
@@ -49,7 +51,7 @@ typedef struct zone_datasets {
 typedef struct zone_dataset {
 	struct list_head zd_list;	/* zone_dataset linkage */
 	size_t zd_dsnamelen;		/* length of name */
-	char zd_dsname[0];		/* name of the member dataset */
+	char zd_dsname[];		/* name of the member dataset */
 } zone_dataset_t;
 
 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
@@ -203,8 +205,7 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
 
 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
 	zd->zd_dsnamelen = dsnamelen;
-	strncpy(zd->zd_dsname, dataset, dsnamelen);
-	zd->zd_dsname[dsnamelen] = '\0';
+	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
 	INIT_LIST_HEAD(&zd->zd_list);
 	list_add_tail(&zd->zd_list, &zds->zds_datasets);
 
@@ -415,8 +416,8 @@ spl_zone_fini(void)
 			    zone_dataset_t, zd_list);
 			list_del(&zd->zd_list);
 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
-			put_user_ns(zds->zds_userns);
 		}
+		put_user_ns(zds->zds_userns);
 		list_del(&zds->zds_list);
 		kmem_free(zds, sizeof (*zds));
 	}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 0cd4fa5213d4..cee7410c8833 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
  * Copyright (c) 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
  */
 
 /*
@@ -59,9 +60,19 @@
 #include <sys/zfs_znode.h>
 #ifdef _KERNEL
 #include <linux/kmap_compat.h>
+#include <linux/mm_compat.h>
 #include <linux/scatterlist.h>
+#include <linux/version.h>
+#endif
+
+#ifdef _KERNEL
+#if defined(MAX_ORDER)
+#define	ABD_MAX_ORDER	(MAX_ORDER)
+#elif defined(MAX_PAGE_ORDER)
+#define	ABD_MAX_ORDER	(MAX_PAGE_ORDER)
+#endif
 #else
-#define	MAX_ORDER	1
+#define	ABD_MAX_ORDER	(1)
 #endif
 
 typedef struct abd_stats {
@@ -71,7 +82,7 @@ typedef struct abd_stats {
 	kstat_named_t abdstat_scatter_cnt;
 	kstat_named_t abdstat_scatter_data_size;
 	kstat_named_t abdstat_scatter_chunk_waste;
-	kstat_named_t abdstat_scatter_orders[MAX_ORDER];
+	kstat_named_t abdstat_scatter_orders[ABD_MAX_ORDER];
 	kstat_named_t abdstat_scatter_page_multi_chunk;
 	kstat_named_t abdstat_scatter_page_multi_zone;
 	kstat_named_t abdstat_scatter_page_alloc_retry;
@@ -132,14 +143,14 @@ static abd_stats_t abd_stats = {
 	{ "scatter_sg_table_retry",		KSTAT_DATA_UINT64 },
 };
 
-struct {
+static struct {
 	wmsum_t abdstat_struct_size;
 	wmsum_t abdstat_linear_cnt;
 	wmsum_t abdstat_linear_data_size;
 	wmsum_t abdstat_scatter_cnt;
 	wmsum_t abdstat_scatter_data_size;
 	wmsum_t abdstat_scatter_chunk_waste;
-	wmsum_t abdstat_scatter_orders[MAX_ORDER];
+	wmsum_t abdstat_scatter_orders[ABD_MAX_ORDER];
 	wmsum_t abdstat_scatter_page_multi_chunk;
 	wmsum_t abdstat_scatter_page_multi_zone;
 	wmsum_t abdstat_scatter_page_alloc_retry;
@@ -222,7 +233,7 @@ abd_free_struct_impl(abd_t *abd)
 }
 
 #ifdef _KERNEL
-static unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
+static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1;
 
 /*
  * Mark zfs data pages so they can be excluded from kernel crash dumps
@@ -272,18 +283,21 @@ abd_alloc_chunks(abd_t *abd, size_t size)
 	struct page *page, *tmp_page = NULL;
 	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
 	gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
-	int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
-	int nr_pages = abd_chunkcnt_for_bytes(size);
-	int chunks = 0, zones = 0;
+	unsigned int max_order = MIN(zfs_abd_scatter_max_order,
+	    ABD_MAX_ORDER - 1);
+	unsigned int nr_pages = abd_chunkcnt_for_bytes(size);
+	unsigned int chunks = 0, zones = 0;
 	size_t remaining_size;
 	int nid = NUMA_NO_NODE;
-	int alloc_pages = 0;
+	unsigned int alloc_pages = 0;
 
 	INIT_LIST_HEAD(&pages);
 
+	ASSERT3U(alloc_pages, <, nr_pages);
+
 	while (alloc_pages < nr_pages) {
-		unsigned chunk_pages;
-		int order;
+		unsigned int chunk_pages;
+		unsigned int order;
 
 		order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
 		chunk_pages = (1U << order);
@@ -597,10 +611,8 @@ abd_free_chunks(abd_t *abd)
 	struct scatterlist *sg;
 
 	abd_for_each_sg(abd, sg, n, i) {
-		for (int j = 0; j < sg->length; j += PAGESIZE) {
-			struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
-			umem_free(p, PAGESIZE);
-		}
+		struct page *p = nth_page(sg_page(sg), 0);
+		umem_free_aligned(p, PAGESIZE);
 	}
 	abd_free_sg_table(abd);
 }
@@ -706,7 +718,7 @@ abd_free_zero_scatter(void)
 	__free_page(abd_zero_page);
 #endif /* HAVE_ZERO_PAGE_GPL_ONLY */
 #else
-	umem_free(abd_zero_page, PAGESIZE);
+	umem_free_aligned(abd_zero_page, PAGESIZE);
 #endif /* _KERNEL */
 }
 
@@ -729,7 +741,7 @@ abd_kstats_update(kstat_t *ksp, int rw)
 	    wmsum_value(&abd_sums.abdstat_scatter_data_size);
 	as->abdstat_scatter_chunk_waste.value.ui64 =
 	    wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
-	for (int i = 0; i < MAX_ORDER; i++) {
+	for (int i = 0; i < ABD_MAX_ORDER; i++) {
 		as->abdstat_scatter_orders[i].value.ui64 =
 		    wmsum_value(&abd_sums.abdstat_scatter_orders[i]);
 	}
@@ -758,7 +770,7 @@ abd_init(void)
 	wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
 	wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
 	wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
-	for (i = 0; i < MAX_ORDER; i++)
+	for (i = 0; i < ABD_MAX_ORDER; i++)
 		wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);
 	wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);
 	wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);
@@ -768,7 +780,7 @@ abd_init(void)
 	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
 	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
 	if (abd_ksp != NULL) {
-		for (i = 0; i < MAX_ORDER; i++) {
+		for (i = 0; i < ABD_MAX_ORDER; i++) {
 			snprintf(abd_stats.abdstat_scatter_orders[i].name,
 			    KSTAT_STRLEN, "scatter_order_%d", i);
 			abd_stats.abdstat_scatter_orders[i].data_type =
@@ -798,7 +810,7 @@ abd_fini(void)
 	wmsum_fini(&abd_sums.abdstat_scatter_cnt);
 	wmsum_fini(&abd_sums.abdstat_scatter_data_size);
 	wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
-	for (int i = 0; i < MAX_ORDER; i++)
+	for (int i = 0; i < ABD_MAX_ORDER; i++)
 		wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);
 	wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);
 	wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);
@@ -886,14 +898,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
 {
 	ASSERT(!abd_is_gang(abd));
 	abd_verify(abd);
+	memset(aiter, 0, sizeof (struct abd_iter));
 	aiter->iter_abd = abd;
-	aiter->iter_mapaddr = NULL;
-	aiter->iter_mapsize = 0;
-	aiter->iter_pos = 0;
-	if (abd_is_linear(abd)) {
-		aiter->iter_offset = 0;
-		aiter->iter_sg = NULL;
-	} else {
+	if (!abd_is_linear(abd)) {
 		aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
 		aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
 	}
@@ -906,6 +913,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
 boolean_t
 abd_iter_at_end(struct abd_iter *aiter)
 {
+	ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
 	return (aiter->iter_pos == aiter->iter_abd->abd_size);
 }
 
@@ -917,8 +925,15 @@ abd_iter_at_end(struct abd_iter *aiter)
 void
 abd_iter_advance(struct abd_iter *aiter, size_t amount)
 {
+	/*
+	 * Ensure that last chunk is not in use. abd_iterate_*() must clear
+	 * this state (directly or abd_iter_unmap()) before advancing.
+	 */
 	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
 	ASSERT0(aiter->iter_mapsize);
+	ASSERT3P(aiter->iter_page, ==, NULL);
+	ASSERT0(aiter->iter_page_doff);
+	ASSERT0(aiter->iter_page_dsize);
 
 	/* There's nothing left to advance to, so do nothing */
 	if (abd_iter_at_end(aiter))
@@ -1000,6 +1015,134 @@ abd_cache_reap_now(void)
 }
 
 #if defined(_KERNEL)
+
+/*
+ * This is abd_iter_page(), the function underneath abd_iterate_page_func().
+ * It yields the next page struct and data offset and size within it, without
+ * mapping it into the address space.
+ */
+
+/*
+ * "Compound pages" are a group of pages that can be referenced from a single
+ * struct page *. Its organised as a "head" page, followed by a series of
+ * "tail" pages.
+ *
+ * In OpenZFS, compound pages are allocated using the __GFP_COMP flag, which we
+ * get from scatter ABDs and SPL vmalloc slabs (ie >16K allocations). So a
+ * great many of the IO buffers we get are going to be of this type.
+ *
+ * The tail pages are just regular PAGESIZE pages, and can be safely used
+ * as-is. However, the head page has length covering itself and all the tail
+ * pages. If the ABD chunk spans multiple pages, then we can use the head page
+ * and a >PAGESIZE length, which is far more efficient.
+ *
+ * Before kernel 4.5 however, compound page heads were refcounted separately
+ * from tail pages, such that moving back to the head page would require us to
+ * take a reference to it and releasing it once we're completely finished with
+ * it. In practice, that means when our caller is done with the ABD, which we
+ * have no insight into from here. Rather than contort this API to track head
+ * page references on such ancient kernels, we disable this special compound
+ * page handling on 4.5, instead just using treating each page within it as a
+ * regular PAGESIZE page (which it is). This is slightly less efficient, but
+ * makes everything far simpler.
+ *
+ * The below test sets/clears ABD_ITER_COMPOUND_PAGES to enable/disable the
+ * special handling, and also defines the ABD_ITER_PAGE_SIZE(page) macro to
+ * understand compound pages, or not, as required.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+#define	ABD_ITER_COMPOUND_PAGES		1
+#define	ABD_ITER_PAGE_SIZE(page)	\
+	(PageCompound(page) ? page_size(page) : PAGESIZE)
+#else
+#undef ABD_ITER_COMPOUND_PAGES
+#define	ABD_ITER_PAGE_SIZE(page)	(PAGESIZE)
+#endif
+
+void
+abd_iter_page(struct abd_iter *aiter)
+{
+	if (abd_iter_at_end(aiter)) {
+		aiter->iter_page = NULL;
+		aiter->iter_page_doff = 0;
+		aiter->iter_page_dsize = 0;
+		return;
+	}
+
+	struct page *page;
+	size_t doff, dsize;
+
+	/*
+	 * Find the page, and the start of the data within it. This is computed
+	 * differently for linear and scatter ABDs; linear is referenced by
+	 * virtual memory location, while scatter is referenced by page
+	 * pointer.
+	 */
+	if (abd_is_linear(aiter->iter_abd)) {
+		ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
+
+		/* memory address at iter_pos */
+		void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
+
+		/* struct page for address */
+		page = is_vmalloc_addr(paddr) ?
+		    vmalloc_to_page(paddr) : virt_to_page(paddr);
+
+		/* offset of address within the page */
+		doff = offset_in_page(paddr);
+	} else {
+		ASSERT(!abd_is_gang(aiter->iter_abd));
+
+		/* current scatter page */
+		page = nth_page(sg_page(aiter->iter_sg),
+		    aiter->iter_offset >> PAGE_SHIFT);
+
+		/* position within page */
+		doff = aiter->iter_offset & (PAGESIZE - 1);
+	}
+
+#ifdef ABD_ITER_COMPOUND_PAGES
+	if (PageTail(page)) {
+		/*
+		 * If this is a compound tail page, move back to the head, and
+		 * adjust the offset to match. This may let us yield a much
+		 * larger amount of data from a single logical page, and so
+		 * leave our caller with fewer pages to process.
+		 */
+		struct page *head = compound_head(page);
+		doff += ((page - head) * PAGESIZE);
+		page = head;
+	}
+#endif
+
+	ASSERT(page);
+
+	/*
+	 * Compute the maximum amount of data we can take from this page. This
+	 * is the smaller of:
+	 * - the remaining space in the page
+	 * - the remaining space in this scatterlist entry (which may not cover
+	 *   the entire page)
+	 * - the remaining space in the abd (which may not cover the entire
+	 *   scatterlist entry)
+	 */
+	dsize = MIN(ABD_ITER_PAGE_SIZE(page) - doff,
+	    aiter->iter_abd->abd_size - aiter->iter_pos);
+	if (!abd_is_linear(aiter->iter_abd))
+		dsize = MIN(dsize, aiter->iter_sg->length - aiter->iter_offset);
+	ASSERT3U(dsize, >, 0);
+
+	/* final iterator outputs */
+	aiter->iter_page = page;
+	aiter->iter_page_doff = doff;
+	aiter->iter_page_dsize = dsize;
+}
+
+/*
+ * Note: ABD BIO functions only needed to support vdev_classic. See comments in
+ * vdev_disk.c.
+ */
+
 /*
  * bio_nr_pages for ABD.
  * @off is the offset in @abd
@@ -1154,4 +1297,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
 module_param(zfs_abd_scatter_max_order, uint, 0644);
 MODULE_PARM_DESC(zfs_abd_scatter_max_order,
 	"Maximum order allocation used for a scatter ABD.");
-#endif
+
+#endif /* _KERNEL */
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index a95e9c334af9..02dd80c06062 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -80,12 +80,18 @@ static struct notifier_block arc_hotplug_callback_mem_nb;
 
 /*
  * Return a default max arc size based on the amount of physical memory.
+ * This may be overridden by tuning the zfs_arc_max module parameter.
  */
 uint64_t
 arc_default_max(uint64_t min, uint64_t allmem)
 {
-	/* Default to 1/2 of all memory. */
-	return (MAX(allmem / 2, min));
+	uint64_t size;
+
+	if (allmem >= 1 << 30)
+		size = allmem - (1 << 30);
+	else
+		size = min;
+	return (MAX(allmem * 5 / 8, size));
 }
 
 #ifdef _KERNEL
@@ -219,7 +225,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
 	arc_reduce_target_size(ptob(sc->nr_to_scan));
 	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
 	if (current->reclaim_state != NULL)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += sc->nr_to_scan;
+#else
 		current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
+#endif
 
 	/*
 	 * We are experiencing memory pressure which the arc_evict_zthr was
@@ -243,8 +253,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return (sc->nr_to_scan);
 }
 
-SPL_SHRINKER_DECLARE(arc_shrinker,
-    arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
+static struct shrinker *arc_shrinker = NULL;
 
 int
 arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
@@ -347,22 +356,26 @@ arc_lowmem_init(void)
 	 * reclaim from the arc.  This is done to prevent kswapd from
 	 * swapping out pages when it is preferable to shrink the arc.
 	 */
-	spl_register_shrinker(&arc_shrinker);
+	arc_shrinker = spl_register_shrinker("zfs-arc-shrinker",
+	    arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
+	VERIFY(arc_shrinker);
+
 	arc_set_sys_free(allmem);
 }
 
 void
 arc_lowmem_fini(void)
 {
-	spl_unregister_shrinker(&arc_shrinker);
+	spl_unregister_shrinker(arc_shrinker);
+	arc_shrinker = NULL;
 }
 
 int
-param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
+param_set_arc_u64(const char *buf, zfs_kernel_param_t *kp)
 {
 	int error;
 
-	error = param_set_long(buf, kp);
+	error = spl_param_set_u64(buf, kp);
 	if (error < 0)
 		return (SET_ERROR(error));
 
@@ -374,13 +387,13 @@ param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
 int
 param_set_arc_min(const char *buf, zfs_kernel_param_t *kp)
 {
-	return (param_set_arc_long(buf, kp));
+	return (param_set_arc_u64(buf, kp));
 }
 
 int
 param_set_arc_max(const char *buf, zfs_kernel_param_t *kp)
 {
-	return (param_set_arc_long(buf, kp));
+	return (param_set_arc_u64(buf, kp));
 }
 
 int
@@ -485,56 +498,5 @@ arc_unregister_hotplug(void)
 }
 #endif /* _KERNEL */
 
-/*
- * Helper function for arc_prune_async() it is responsible for safely
- * handling the execution of a registered arc_prune_func_t.
- */
-static void
-arc_prune_task(void *ptr)
-{
-	arc_prune_t *ap = (arc_prune_t *)ptr;
-	arc_prune_func_t *func = ap->p_pfunc;
-
-	if (func != NULL)
-		func(ap->p_adjust, ap->p_private);
-
-	zfs_refcount_remove(&ap->p_refcnt, func);
-}
-
-/*
- * Notify registered consumers they must drop holds on a portion of the ARC
- * buffered they reference.  This provides a mechanism to ensure the ARC can
- * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
- * is analogous to dnlc_reduce_cache() but more generic.
- *
- * This operation is performed asynchronously so it may be safely called
- * in the context of the arc_reclaim_thread().  A reference is taken here
- * for each registered arc_prune_t and the arc_prune_task() is responsible
- * for releasing it once the registered arc_prune_func_t has completed.
- */
-void
-arc_prune_async(int64_t adjust)
-{
-	arc_prune_t *ap;
-
-	mutex_enter(&arc_prune_mtx);
-	for (ap = list_head(&arc_prune_list); ap != NULL;
-	    ap = list_next(&arc_prune_list, ap)) {
-
-		if (zfs_refcount_count(&ap->p_refcnt) >= 2)
-			continue;
-
-		zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
-		ap->p_adjust = adjust;
-		if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
-		    ap, TQ_SLEEP) == TASKQID_INVALID) {
-			zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
-			continue;
-		}
-		ARCSTAT_BUMP(arcstat_prune);
-	}
-	mutex_exit(&arc_prune_mtx);
-}
-
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
 	"Limit on number of pages that ARC shrinker can reclaim at once");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c b/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
index ff3ef1bf6ad9..7e5bd392437e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/mmp_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -30,7 +30,7 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
 {
 	int ret;
 
-	ret = param_set_ulong(val, kp);
+	ret = spl_param_set_u64(val, kp);
 	if (ret < 0)
 		return (ret);
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/policy.c b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
index ab00d2ae14d2..5d1b4383412a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/policy.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
 	if (crgetuid(cr) == owner)
 		return (0);
 
-	if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
+	if (zpl_inode_owner_or_capable(zfs_init_idmap, ip))
 		return (0);
 
 #if defined(CONFIG_USER_NS)
@@ -214,8 +214,10 @@ secpolicy_vnode_setid_retain(struct znode *zp __maybe_unused, const cred_t *cr,
  * Determine that subject can set the file setgid flag.
  */
 int
-secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
+secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid, zidmap_t *mnt_ns,
+    struct user_namespace *fs_ns)
 {
+	gid = zfs_gid_to_vfsgid(mnt_ns, fs_ns, gid);
 #if defined(CONFIG_USER_NS)
 	if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
 		return (EPERM);
@@ -284,8 +286,11 @@ secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
  * Determine that subject can set the file setid flags.
  */
 static int
-secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
+secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner, zidmap_t *mnt_ns,
+    struct user_namespace *fs_ns)
 {
+	owner = zfs_uid_to_vfsuid(mnt_ns, fs_ns, owner);
+
 	if (crgetuid(cr) == owner)
 		return (0);
 
@@ -310,13 +315,14 @@ secpolicy_vnode_stky_modify(const cred_t *cr)
 
 int
 secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
-    const vattr_t *ovap, cred_t *cr)
+    const vattr_t *ovap, cred_t *cr, zidmap_t *mnt_ns,
+    struct user_namespace *fs_ns)
 {
 	int error;
 
 	if ((vap->va_mode & S_ISUID) != 0 &&
 	    (error = secpolicy_vnode_setid_modify(cr,
-	    ovap->va_uid)) != 0) {
+	    ovap->va_uid, mnt_ns, fs_ns)) != 0) {
 		return (error);
 	}
 
@@ -334,7 +340,8 @@ secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
 	 * group-id bit.
 	 */
 	if ((vap->va_mode & S_ISGID) != 0 &&
-	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
+	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid,
+	    mnt_ns, fs_ns) != 0) {
 		vap->va_mode &= ~S_ISGID;
 	}
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/qat.c b/sys/contrib/openzfs/module/os/linux/zfs/qat.c
index 08613b3a2042..07e0cafabb0e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/qat.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/qat.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c b/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c
index 1d099c95bc7c..6d0595dd5f76 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/qat_compress.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -193,7 +193,9 @@ qat_dc_init(void)
 		sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
 		sd.sessDirection = CPA_DC_DIR_COMBINED;
 		sd.sessState = CPA_DC_STATELESS;
+#if (CPA_DC_API_VERSION_NUM_MAJOR == 1 && CPA_DC_API_VERSION_NUM_MINOR < 6)
 		sd.deflateWindowSize = 7;
+#endif
 		sd.checksum = CPA_DC_ADLER32;
 		status = cpaDcGetSessionSize(dc_inst_handles[i],
 		    &sd, &sess_size, &ctx_size);
@@ -247,7 +249,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
 	Cpa8U *buffer_meta_src = NULL;
 	Cpa8U *buffer_meta_dst = NULL;
 	Cpa32U buffer_meta_size = 0;
-	CpaDcRqResults dc_results;
+	CpaDcRqResults dc_results = {.checksum = 1};
 	CpaStatus status = CPA_STATUS_FAIL;
 	Cpa32U hdr_sz = 0;
 	Cpa32U compressed_sz;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/qat_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/qat_crypt.c
index 18b6e38d1a6e..0523a23c61e1 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/qat_crypt.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/qat_crypt.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
index cbdc0f350ad8..c8cbedcd5157 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -60,7 +60,7 @@ param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp)
 {
 	int error;
 
-	error = param_set_ulong(val, kp);
+	error = spl_param_set_u64(val, kp);
 	if (error < 0)
 		return (SET_ERROR(error));
 
@@ -74,7 +74,7 @@ param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp)
 {
 	int error;
 
-	error = param_set_ulong(val, kp);
+	error = spl_param_set_u64(val, kp);
 	if (error < 0)
 		return (SET_ERROR(error));
 
@@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp)
 	return (0);
 }
 
+int
+param_set_active_allocator(const char *val, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = -param_set_active_allocator_common(val);
+	if (error == 0)
+		error = param_set_charp(val, kp);
+
+	return (error);
+}
+
 const char *
 spa_history_zone(void)
 {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/trace.c b/sys/contrib/openzfs/module/os/linux/zfs/trace.c
index a690822ae14c..32a188d169e3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/trace.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/trace.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 9a382261df73..2cea61a6294c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -24,6 +24,7 @@
  * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
  * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
  */
 
 #include <sys/zfs_context.h>
@@ -41,12 +42,49 @@
 #include <linux/blk-cgroup.h>
 #endif
 
+/*
+ * Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying
+ * block_device. Since it carries the block_device inside, its convenient to
+ * just use the handle as a proxy.
+ *
+ * Linux 6.9.x uses a file for the same purpose.
+ *
+ * For pre-6.8, we just emulate this with a cast, since we don't need any of
+ * the other fields inside the handle.
+ */
+#if defined(HAVE_BDEV_OPEN_BY_PATH)
+typedef struct bdev_handle zfs_bdev_handle_t;
+#define	BDH_BDEV(bdh)		((bdh)->bdev)
+#define	BDH_IS_ERR(bdh)		(IS_ERR(bdh))
+#define	BDH_PTR_ERR(bdh)	(PTR_ERR(bdh))
+#define	BDH_ERR_PTR(err)	(ERR_PTR(err))
+#elif defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
+typedef struct file zfs_bdev_handle_t;
+#define	BDH_BDEV(bdh)		(file_bdev(bdh))
+#define	BDH_IS_ERR(bdh)		(IS_ERR(bdh))
+#define	BDH_PTR_ERR(bdh)	(PTR_ERR(bdh))
+#define	BDH_ERR_PTR(err)	(ERR_PTR(err))
+#else
+typedef void zfs_bdev_handle_t;
+#define	BDH_BDEV(bdh)		((struct block_device *)bdh)
+#define	BDH_IS_ERR(bdh)		(IS_ERR(BDH_BDEV(bdh)))
+#define	BDH_PTR_ERR(bdh)	(PTR_ERR(BDH_BDEV(bdh)))
+#define	BDH_ERR_PTR(err)	(ERR_PTR(err))
+#endif
+
 typedef struct vdev_disk {
-	struct block_device		*vd_bdev;
+	zfs_bdev_handle_t		*vd_bdh;
 	krwlock_t			vd_lock;
 } vdev_disk_t;
 
 /*
+ * Maximum number of segments to add to a bio (min 4). If this is higher than
+ * the maximum allowed by the device queue or the kernel itself, it will be
+ * clamped. Setting it to zero will cause the kernel's ideal size to be used.
+ */
+uint_t zfs_vdev_disk_max_segs = 0;
+
+/*
  * Unique identifier for the exclusive vdev holder.
  */
 static void *zfs_vdev_holder = VDEV_HOLDER;
@@ -56,7 +94,7 @@ static void *zfs_vdev_holder = VDEV_HOLDER;
  * device is missing. The missing path may be transient since the links
  * can be briefly removed and recreated in response to udev events.
  */
-static unsigned zfs_vdev_open_timeout_ms = 1000;
+static uint_t zfs_vdev_open_timeout_ms = 1000;
 
 /*
  * Size of the "reserved" partition, in blocks.
@@ -64,28 +102,46 @@ static unsigned zfs_vdev_open_timeout_ms = 1000;
 #define	EFI_MIN_RESV_SIZE	(16 * 1024)
 
 /*
- * Virtual device vector for disks.
+ * BIO request failfast mask.
  */
-typedef struct dio_request {
-	zio_t			*dr_zio;	/* Parent ZIO */
-	atomic_t		dr_ref;		/* References */
-	int			dr_error;	/* Bio error */
-	int			dr_bio_count;	/* Count of bio's */
-	struct bio		*dr_bio[0];	/* Attached bio's */
-} dio_request_t;
 
-static fmode_t
-vdev_bdev_mode(spa_mode_t spa_mode)
+static unsigned int zfs_vdev_failfast_mask = 1;
+
+/*
+ * Convert SPA mode flags into bdev open mode flags.
+ */
+#ifdef HAVE_BLK_MODE_T
+typedef blk_mode_t vdev_bdev_mode_t;
+#define	VDEV_BDEV_MODE_READ	BLK_OPEN_READ
+#define	VDEV_BDEV_MODE_WRITE	BLK_OPEN_WRITE
+#define	VDEV_BDEV_MODE_EXCL	BLK_OPEN_EXCL
+#define	VDEV_BDEV_MODE_MASK	(BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL)
+#else
+typedef fmode_t vdev_bdev_mode_t;
+#define	VDEV_BDEV_MODE_READ	FMODE_READ
+#define	VDEV_BDEV_MODE_WRITE	FMODE_WRITE
+#define	VDEV_BDEV_MODE_EXCL	FMODE_EXCL
+#define	VDEV_BDEV_MODE_MASK	(FMODE_READ|FMODE_WRITE|FMODE_EXCL)
+#endif
+
+static vdev_bdev_mode_t
+vdev_bdev_mode(spa_mode_t smode)
 {
-	fmode_t mode = 0;
+	ASSERT3U(smode, !=, SPA_MODE_UNINIT);
+	ASSERT0(smode & ~(SPA_MODE_READ|SPA_MODE_WRITE));
 
-	if (spa_mode & SPA_MODE_READ)
-		mode |= FMODE_READ;
+	vdev_bdev_mode_t bmode = VDEV_BDEV_MODE_EXCL;
 
-	if (spa_mode & SPA_MODE_WRITE)
-		mode |= FMODE_WRITE;
+	if (smode & SPA_MODE_READ)
+		bmode |= VDEV_BDEV_MODE_READ;
 
-	return (mode);
+	if (smode & SPA_MODE_WRITE)
+		bmode |= VDEV_BDEV_MODE_WRITE;
+
+	ASSERT(bmode & VDEV_BDEV_MODE_MASK);
+	ASSERT0(bmode & ~VDEV_BDEV_MODE_MASK);
+
+	return (bmode);
 }
 
 /*
@@ -105,6 +161,16 @@ bdev_whole(struct block_device *bdev)
 }
 #endif
 
+#if defined(HAVE_BDEVNAME)
+#define	vdev_bdevname(bdev, name)	bdevname(bdev, name)
+#else
+static inline void
+vdev_bdevname(struct block_device *bdev, char *name)
+{
+	snprintf(name, BDEVNAME_SIZE, "%pg", bdev);
+}
+#endif
+
 /*
  * Returns the maximum expansion capacity of the block device (in bytes).
  *
@@ -163,18 +229,60 @@ vdev_disk_error(zio_t *zio)
 	 * which is safe from any context.
 	 */
 	printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
-	    "offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
+	    "offset=%llu size=%llu flags=%llu\n", spa_name(zio->io_spa),
 	    zio->io_vd->vdev_path, zio->io_error, zio->io_type,
 	    (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
 	    zio->io_flags);
 }
 
+static void
+vdev_disk_kobj_evt_post(vdev_t *v)
+{
+	vdev_disk_t *vd = v->vdev_tsd;
+	if (vd && vd->vd_bdh) {
+		spl_signal_kobj_evt(BDH_BDEV(vd->vd_bdh));
+	} else {
+		vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n",
+		    v->vdev_path);
+	}
+}
+
+static zfs_bdev_handle_t *
+vdev_blkdev_get_by_path(const char *path, spa_mode_t smode, void *holder)
+{
+	vdev_bdev_mode_t bmode = vdev_bdev_mode(smode);
+
+#if defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
+	return (bdev_file_open_by_path(path, bmode, holder, NULL));
+#elif defined(HAVE_BDEV_OPEN_BY_PATH)
+	return (bdev_open_by_path(path, bmode, holder, NULL));
+#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
+	return (blkdev_get_by_path(path, bmode, holder, NULL));
+#else
+	return (blkdev_get_by_path(path, bmode, holder));
+#endif
+}
+
+static void
+vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t smode, void *holder)
+{
+#if defined(HAVE_BDEV_RELEASE)
+	return (bdev_release(bdh));
+#elif defined(HAVE_BLKDEV_PUT_HOLDER)
+	return (blkdev_put(BDH_BDEV(bdh), holder));
+#elif defined(HAVE_BLKDEV_PUT)
+	return (blkdev_put(BDH_BDEV(bdh), vdev_bdev_mode(smode)));
+#else
+	fput(bdh);
+#endif
+}
+
 static int
 vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
     uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
-	struct block_device *bdev;
-	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+	zfs_bdev_handle_t *bdh;
+	spa_mode_t smode = spa_mode(v->vdev_spa);
 	hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
 	vdev_disk_t *vd;
 
@@ -199,12 +307,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 		boolean_t reread_part = B_FALSE;
 
 		rw_enter(&vd->vd_lock, RW_WRITER);
-		bdev = vd->vd_bdev;
-		vd->vd_bdev = NULL;
+		bdh = vd->vd_bdh;
+		vd->vd_bdh = NULL;
 
-		if (bdev) {
+		if (bdh) {
+			struct block_device *bdev = BDH_BDEV(bdh);
 			if (v->vdev_expanding && bdev != bdev_whole(bdev)) {
-				bdevname(bdev_whole(bdev), disk_name + 5);
+				vdev_bdevname(bdev_whole(bdev), disk_name + 5);
 				/*
 				 * If userland has BLKPG_RESIZE_PARTITION,
 				 * then it should have updated the partition
@@ -224,15 +333,16 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 					reread_part = B_TRUE;
 			}
 
-			blkdev_put(bdev, mode | FMODE_EXCL);
+			vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
 		}
 
 		if (reread_part) {
-			bdev = blkdev_get_by_path(disk_name, mode | FMODE_EXCL,
+			bdh = vdev_blkdev_get_by_path(disk_name, smode,
 			    zfs_vdev_holder);
-			if (!IS_ERR(bdev)) {
-				int error = vdev_bdev_reread_part(bdev);
-				blkdev_put(bdev, mode | FMODE_EXCL);
+			if (!BDH_IS_ERR(bdh)) {
+				int error =
+				    vdev_bdev_reread_part(BDH_BDEV(bdh));
+				vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
 				if (error == 0) {
 					timeout = MSEC2NSEC(
 					    zfs_vdev_open_timeout_ms * 2);
@@ -275,58 +385,67 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 	 * subsequent attempts are expected to eventually succeed.
 	 */
 	hrtime_t start = gethrtime();
-	bdev = ERR_PTR(-ENXIO);
-	while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
-		bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
+	bdh = BDH_ERR_PTR(-ENXIO);
+	while (BDH_IS_ERR(bdh) && ((gethrtime() - start) < timeout)) {
+		bdh = vdev_blkdev_get_by_path(v->vdev_path, smode,
 		    zfs_vdev_holder);
-		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
+		if (unlikely(BDH_PTR_ERR(bdh) == -ENOENT)) {
+			/*
+			 * There is no point of waiting since device is removed
+			 * explicitly
+			 */
+			if (v->vdev_removed)
+				break;
+
 			schedule_timeout(MSEC_TO_TICK(10));
-		} else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) {
+		} else if (unlikely(BDH_PTR_ERR(bdh) == -ERESTARTSYS)) {
 			timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10);
 			continue;
-		} else if (IS_ERR(bdev)) {
+		} else if (BDH_IS_ERR(bdh)) {
 			break;
 		}
 	}
 
-	if (IS_ERR(bdev)) {
-		int error = -PTR_ERR(bdev);
+	if (BDH_IS_ERR(bdh)) {
+		int error = -BDH_PTR_ERR(bdh);
 		vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error,
 		    (u_longlong_t)(gethrtime() - start),
 		    (u_longlong_t)timeout);
-		vd->vd_bdev = NULL;
+		vd->vd_bdh = NULL;
 		v->vdev_tsd = vd;
 		rw_exit(&vd->vd_lock);
 		return (SET_ERROR(error));
 	} else {
-		vd->vd_bdev = bdev;
+		vd->vd_bdh = bdh;
 		v->vdev_tsd = vd;
 		rw_exit(&vd->vd_lock);
 	}
 
+	struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
+
 	/*  Determine the physical block size */
-	int physical_block_size = bdev_physical_block_size(vd->vd_bdev);
+	int physical_block_size = bdev_physical_block_size(bdev);
 
 	/*  Determine the logical block size */
-	int logical_block_size = bdev_logical_block_size(vd->vd_bdev);
+	int logical_block_size = bdev_logical_block_size(bdev);
 
 	/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
 	v->vdev_nowritecache = B_FALSE;
 
 	/* Set when device reports it supports TRIM. */
-	v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev);
+	v->vdev_has_trim = bdev_discard_supported(bdev);
 
 	/* Set when device reports it supports secure TRIM. */
-	v->vdev_has_securetrim = bdev_secure_discard_supported(vd->vd_bdev);
+	v->vdev_has_securetrim = bdev_secure_discard_supported(bdev);
 
 	/* Inform the ZIO pipeline that we are non-rotational */
-	v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
+	v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev));
 
 	/* Physical volume size in bytes for the partition */
-	*psize = bdev_capacity(vd->vd_bdev);
+	*psize = bdev_capacity(bdev);
 
 	/* Physical volume size in bytes including possible expansion space */
-	*max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
+	*max_psize = bdev_max_capacity(bdev, v->vdev_wholedisk);
 
 	/* Based on the minimum sector size set the block size */
 	*physical_ashift = highbit64(MAX(physical_block_size,
@@ -346,98 +465,15 @@ vdev_disk_close(vdev_t *v)
 	if (v->vdev_reopening || vd == NULL)
 		return;
 
-	if (vd->vd_bdev != NULL) {
-		blkdev_put(vd->vd_bdev,
-		    vdev_bdev_mode(spa_mode(v->vdev_spa)) | FMODE_EXCL);
-	}
+	if (vd->vd_bdh != NULL)
+		vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
+		    zfs_vdev_holder);
 
 	rw_destroy(&vd->vd_lock);
 	kmem_free(vd, sizeof (vdev_disk_t));
 	v->vdev_tsd = NULL;
 }
 
-static dio_request_t *
-vdev_disk_dio_alloc(int bio_count)
-{
-	dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
-	    sizeof (struct bio *) * bio_count, KM_SLEEP);
-	atomic_set(&dr->dr_ref, 0);
-	dr->dr_bio_count = bio_count;
-	dr->dr_error = 0;
-
-	for (int i = 0; i < dr->dr_bio_count; i++)
-		dr->dr_bio[i] = NULL;
-
-	return (dr);
-}
-
-static void
-vdev_disk_dio_free(dio_request_t *dr)
-{
-	int i;
-
-	for (i = 0; i < dr->dr_bio_count; i++)
-		if (dr->dr_bio[i])
-			bio_put(dr->dr_bio[i]);
-
-	kmem_free(dr, sizeof (dio_request_t) +
-	    sizeof (struct bio *) * dr->dr_bio_count);
-}
-
-static void
-vdev_disk_dio_get(dio_request_t *dr)
-{
-	atomic_inc(&dr->dr_ref);
-}
-
-static int
-vdev_disk_dio_put(dio_request_t *dr)
-{
-	int rc = atomic_dec_return(&dr->dr_ref);
-
-	/*
-	 * Free the dio_request when the last reference is dropped and
-	 * ensure zio_interpret is called only once with the correct zio
-	 */
-	if (rc == 0) {
-		zio_t *zio = dr->dr_zio;
-		int error = dr->dr_error;
-
-		vdev_disk_dio_free(dr);
-
-		if (zio) {
-			zio->io_error = error;
-			ASSERT3S(zio->io_error, >=, 0);
-			if (zio->io_error)
-				vdev_disk_error(zio);
-
-			zio_delay_interrupt(zio);
-		}
-	}
-
-	return (rc);
-}
-
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
-{
-	dio_request_t *dr = bio->bi_private;
-	int rc;
-
-	if (dr->dr_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
-		dr->dr_error = BIO_END_IO_ERROR(bio);
-#else
-		if (error)
-			dr->dr_error = -(error);
-		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-			dr->dr_error = EIO;
-#endif
-	}
-
-	/* Drop reference acquired by __vdev_disk_physio */
-	rc = vdev_disk_dio_put(dr);
-}
-
 static inline void
 vdev_submit_bio_impl(struct bio *bio)
 {
@@ -589,8 +625,467 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
 	return (bio);
 }
 
+static inline uint_t
+vdev_bio_max_segs(struct block_device *bdev)
+{
+	/*
+	 * Smallest of the device max segs and the tuneable max segs. Minimum
+	 * 4, so there's room to finish split pages if they come up.
+	 */
+	const uint_t dev_max_segs = queue_max_segments(bdev_get_queue(bdev));
+	const uint_t tune_max_segs = (zfs_vdev_disk_max_segs > 0) ?
+	    MAX(4, zfs_vdev_disk_max_segs) : dev_max_segs;
+	const uint_t max_segs = MIN(tune_max_segs, dev_max_segs);
+
+#ifdef HAVE_BIO_MAX_SEGS
+	return (bio_max_segs(max_segs));
+#else
+	return (MIN(max_segs, BIO_MAX_PAGES));
+#endif
+}
+
+static inline uint_t
+vdev_bio_max_bytes(struct block_device *bdev)
+{
+	return (queue_max_sectors(bdev_get_queue(bdev)) << 9);
+}
+
+
+/*
+ * Virtual block IO object (VBIO)
+ *
+ * Linux block IO (BIO) objects have a limit on how many data segments (pages)
+ * they can hold. Depending on how they're allocated and structured, a large
+ * ZIO can require more than one BIO to be submitted to the kernel, which then
+ * all have to complete before we can return the completed ZIO back to ZFS.
+ *
+ * A VBIO is a wrapper around multiple BIOs, carrying everything needed to
+ * translate a ZIO down into the kernel block layer and back again.
+ *
+ * Note that these are only used for data ZIOs (read/write). Meta-operations
+ * (flush/trim) don't need multiple BIOs and so can just make the call
+ * directly.
+ */
+typedef struct {
+	zio_t		*vbio_zio;	/* parent zio */
+
+	struct block_device *vbio_bdev;	/* blockdev to submit bios to */
+
+	abd_t		*vbio_abd;	/* abd carrying borrowed linear buf */
+
+	uint_t		vbio_max_segs;	/* max segs per bio */
+
+	uint_t		vbio_max_bytes;	/* max bytes per bio */
+	uint_t		vbio_lbs_mask;	/* logical block size mask */
+
+	uint64_t	vbio_offset;	/* start offset of next bio */
+
+	struct bio	*vbio_bio;	/* pointer to the current bio */
+	int		vbio_flags;	/* bio flags */
+} vbio_t;
+
+static vbio_t *
+vbio_alloc(zio_t *zio, struct block_device *bdev, int flags)
+{
+	vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
+
+	vbio->vbio_zio = zio;
+	vbio->vbio_bdev = bdev;
+	vbio->vbio_abd = NULL;
+	vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
+	vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
+	vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
+	vbio->vbio_offset = zio->io_offset;
+	vbio->vbio_bio = NULL;
+	vbio->vbio_flags = flags;
+
+	return (vbio);
+}
+
+BIO_END_IO_PROTO(vbio_completion, bio, error);
+
+static int
+vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
+{
+	struct bio *bio = vbio->vbio_bio;
+	uint_t ssize;
+
+	while (size > 0) {
+		if (bio == NULL) {
+			/* New BIO, allocate and set up */
+			bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
+			    vbio->vbio_max_segs);
+			VERIFY(bio);
+
+			BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
+			bio_set_op_attrs(bio,
+			    vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
+			    WRITE : READ, vbio->vbio_flags);
+
+			if (vbio->vbio_bio) {
+				bio_chain(vbio->vbio_bio, bio);
+				vdev_submit_bio(vbio->vbio_bio);
+			}
+			vbio->vbio_bio = bio;
+		}
+
+		/*
+		 * Only load as much of the current page data as will fit in
+		 * the space left in the BIO, respecting lbs alignment. Older
+		 * kernels will error if we try to overfill the BIO, while
+		 * newer ones will accept it and split the BIO. This ensures
+		 * everything works on older kernels, and avoids an additional
+		 * overhead on the new.
+		 */
+		ssize = MIN(size, (vbio->vbio_max_bytes - BIO_BI_SIZE(bio)) &
+		    vbio->vbio_lbs_mask);
+		if (ssize > 0 &&
+		    bio_add_page(bio, page, ssize, offset) == ssize) {
+			/* Accepted, adjust and load any remaining. */
+			size -= ssize;
+			offset += ssize;
+			continue;
+		}
+
+		/* No room, set up for a new BIO and loop */
+		vbio->vbio_offset += BIO_BI_SIZE(bio);
+
+		/* Signal new BIO allocation wanted */
+		bio = NULL;
+	}
+
+	return (0);
+}
+
+/* Iterator callback to submit ABD pages to the vbio. */
+static int
+vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
+{
+	vbio_t *vbio = priv;
+	return (vbio_add_page(vbio, page, len, off));
+}
+
+/* Create some BIOs, fill them with data and submit them */
+static void
+vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
+{
+	/*
+	 * We plug so we can submit the BIOs as we go and only unplug them when
+	 * they are fully created and submitted. This is important; if we don't
+	 * plug, then the kernel may start executing earlier BIOs while we're
+	 * still creating and executing later ones, and if the device goes
+	 * away while that's happening, older kernels can get confused and
+	 * trample memory.
+	 */
+	struct blk_plug plug;
+	blk_start_plug(&plug);
+
+	(void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio);
+	ASSERT(vbio->vbio_bio);
+
+	vbio->vbio_bio->bi_end_io = vbio_completion;
+	vbio->vbio_bio->bi_private = vbio;
+
+	/*
+	 * Once submitted, vbio_bio now owns vbio (through bi_private) and we
+	 * can't touch it again. The bio may complete and vbio_completion() be
+	 * called and free the vbio before this task is run again, so we must
+	 * consider it invalid from this point.
+	 */
+	vdev_submit_bio(vbio->vbio_bio);
+
+	blk_finish_plug(&plug);
+}
+
+/* IO completion callback */
+BIO_END_IO_PROTO(vbio_completion, bio, error)
+{
+	vbio_t *vbio = bio->bi_private;
+	zio_t *zio = vbio->vbio_zio;
+
+	ASSERT(zio);
+
+	/* Capture and log any errors */
+#ifdef HAVE_1ARG_BIO_END_IO_T
+	zio->io_error = BIO_END_IO_ERROR(bio);
+#else
+	zio->io_error = 0;
+	if (error)
+		zio->io_error = -(error);
+	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		zio->io_error = EIO;
+#endif
+	ASSERT3U(zio->io_error, >=, 0);
+
+	if (zio->io_error)
+		vdev_disk_error(zio);
+
+	/* Return the BIO to the kernel */
+	bio_put(bio);
+
+	/*
+	 * If we copied the ABD before issuing it, clean up and return the copy
+	 * to the ADB, with changes if appropriate.
+	 */
+	if (vbio->vbio_abd != NULL) {
+		void *buf = abd_to_buf(vbio->vbio_abd);
+		abd_free(vbio->vbio_abd);
+		vbio->vbio_abd = NULL;
+
+		if (zio->io_type == ZIO_TYPE_READ)
+			abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
+		else
+			abd_return_buf(zio->io_abd, buf, zio->io_size);
+	}
+
+	/* Final cleanup */
+	kmem_free(vbio, sizeof (vbio_t));
+
+	/* All done, submit for processing */
+	zio_delay_interrupt(zio);
+}
+
+/*
+ * Iterator callback to count ABD pages and check their size & alignment.
+ *
+ * On Linux, each BIO segment can take a page pointer, and an offset+length of
+ * the data within that page. A page can be arbitrarily large ("compound"
+ * pages) but we still have to ensure the data portion is correctly sized and
+ * aligned to the logical block size, to ensure that if the kernel wants to
+ * split the BIO, the two halves will still be properly aligned.
+ *
+ * NOTE: if you change this function, change the copy in
+ * tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c, and add test
+ * data there to validate the change you're making.
+ *
+ */
+typedef struct {
+	uint_t  bmask;
+	uint_t  npages;
+	uint_t  end;
+} vdev_disk_check_pages_t;
+
+static int
+vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv)
+{
+	(void) page;
+	vdev_disk_check_pages_t *s = priv;
+
+	/*
+	 * If we didn't finish on a block size boundary last time, then there
+	 * would be a gap if we tried to use this ABD as-is, so abort.
+	 */
+	if (s->end != 0)
+		return (1);
+
+	/*
+	 * Note if we're taking less than a full block, so we can check it
+	 * above on the next call.
+	 */
+	s->end = (off+len) & s->bmask;
+
+	/* All blocks after the first must start on a block size boundary. */
+	if (s->npages != 0 && (off & s->bmask) != 0)
+		return (1);
+
+	s->npages++;
+	return (0);
+}
+
+/*
+ * Check if we can submit the pages in this ABD to the kernel as-is. Returns
+ * the number of pages, or 0 if it can't be submitted like this.
+ */
+static boolean_t
+vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
+{
+	vdev_disk_check_pages_t s = {
+	    .bmask = bdev_logical_block_size(bdev)-1,
+	    .npages = 0,
+	    .end = 0,
+	};
+
+	if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s))
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+static int
+vdev_disk_io_rw(zio_t *zio)
+{
+	vdev_t *v = zio->io_vd;
+	vdev_disk_t *vd = v->vdev_tsd;
+	struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
+	int flags = 0;
+
+	/*
+	 * Accessing outside the block device is never allowed.
+	 */
+	if (zio->io_offset + zio->io_size > bdev->bd_inode->i_size) {
+		vdev_dbgmsg(zio->io_vd,
+		    "Illegal access %llu size %llu, device size %llu",
+		    (u_longlong_t)zio->io_offset,
+		    (u_longlong_t)zio->io_size,
+		    (u_longlong_t)i_size_read(bdev->bd_inode));
+		return (SET_ERROR(EIO));
+	}
+
+	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
+	    v->vdev_failfast == B_TRUE) {
+		bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
+		    zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
+	}
+
+	/*
+	 * Check alignment of the incoming ABD. If any part of it would require
+	 * submitting a page that is not aligned to the logical block size,
+	 * then we take a copy into a linear buffer and submit that instead.
+	 * This should be impossible on a 512b LBS, and fairly rare on 4K,
+	 * usually requiring abnormally-small data blocks (eg gang blocks)
+	 * mixed into the same ABD as larger ones (eg aggregated).
+	 */
+	abd_t *abd = zio->io_abd;
+	if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) {
+		void *buf;
+		if (zio->io_type == ZIO_TYPE_READ)
+			buf = abd_borrow_buf(zio->io_abd, zio->io_size);
+		else
+			buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+
+		/*
+		 * Wrap the copy in an abd_t, so we can use the same iterators
+		 * to count and fill the vbio later.
+		 */
+		abd = abd_get_from_buf(buf, zio->io_size);
+
+		/*
+		 * False here would mean the borrowed copy has an invalid
+		 * alignment too, which would mean we've somehow been passed a
+		 * linear ABD with an interior page that has a non-zero offset
+		 * or a size not a multiple of PAGE_SIZE. This is not possible.
+		 * It would mean either zio_buf_alloc() or its underlying
+		 * allocators have done something extremely strange, or our
+		 * math in vdev_disk_check_pages() is wrong. In either case,
+		 * something in seriously wrong and its not safe to continue.
+		 */
+		VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev));
+	}
+
+	/* Allocate vbio, with a pointer to the borrowed ABD if necessary */
+	vbio_t *vbio = vbio_alloc(zio, bdev, flags);
+	if (abd != zio->io_abd)
+		vbio->vbio_abd = abd;
+
+	/* Fill it with data pages and submit it to the kernel */
+	vbio_submit(vbio, abd, zio->io_size);
+	return (0);
+}
+
+/* ========== */
+
+/*
+ * This is the classic, battle-tested BIO submission code. Until we're totally
+ * sure that the new code is safe and correct in all cases, this will remain
+ * available and can be enabled by setting zfs_vdev_disk_classic=1 at module
+ * load time.
+ *
+ * These functions have been renamed to vdev_classic_* to make it clear what
+ * they belong to, but their implementations are unchanged.
+ */
+
+/*
+ * Virtual device vector for disks.
+ */
+typedef struct dio_request {
+	zio_t			*dr_zio;	/* Parent ZIO */
+	atomic_t		dr_ref;		/* References */
+	int			dr_error;	/* Bio error */
+	int			dr_bio_count;	/* Count of bio's */
+	struct bio		*dr_bio[];	/* Attached bio's */
+} dio_request_t;
+
+static dio_request_t *
+vdev_classic_dio_alloc(int bio_count)
+{
+	dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
+	    sizeof (struct bio *) * bio_count, KM_SLEEP);
+	atomic_set(&dr->dr_ref, 0);
+	dr->dr_bio_count = bio_count;
+	dr->dr_error = 0;
+
+	for (int i = 0; i < dr->dr_bio_count; i++)
+		dr->dr_bio[i] = NULL;
+
+	return (dr);
+}
+
+static void
+vdev_classic_dio_free(dio_request_t *dr)
+{
+	int i;
+
+	for (i = 0; i < dr->dr_bio_count; i++)
+		if (dr->dr_bio[i])
+			bio_put(dr->dr_bio[i]);
+
+	kmem_free(dr, sizeof (dio_request_t) +
+	    sizeof (struct bio *) * dr->dr_bio_count);
+}
+
+static void
+vdev_classic_dio_get(dio_request_t *dr)
+{
+	atomic_inc(&dr->dr_ref);
+}
+
+static void
+vdev_classic_dio_put(dio_request_t *dr)
+{
+	int rc = atomic_dec_return(&dr->dr_ref);
+
+	/*
+	 * Free the dio_request when the last reference is dropped and
+	 * ensure zio_interpret is called only once with the correct zio
+	 */
+	if (rc == 0) {
+		zio_t *zio = dr->dr_zio;
+		int error = dr->dr_error;
+
+		vdev_classic_dio_free(dr);
+
+		if (zio) {
+			zio->io_error = error;
+			ASSERT3S(zio->io_error, >=, 0);
+			if (zio->io_error)
+				vdev_disk_error(zio);
+
+			zio_delay_interrupt(zio);
+		}
+	}
+}
+
+BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
+{
+	dio_request_t *dr = bio->bi_private;
+
+	if (dr->dr_error == 0) {
+#ifdef HAVE_1ARG_BIO_END_IO_T
+		dr->dr_error = BIO_END_IO_ERROR(bio);
+#else
+		if (error)
+			dr->dr_error = -(error);
+		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+			dr->dr_error = EIO;
+#endif
+	}
+
+	/* Drop reference acquired by vdev_classic_physio */
+	vdev_classic_dio_put(dr);
+}
+
 static inline unsigned int
-vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
+vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
 {
 	unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
 	    bio_size, abd_offset);
@@ -603,9 +1098,16 @@ vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
 }
 
 static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
-    size_t io_size, uint64_t io_offset, int rw, int flags)
+vdev_classic_physio(zio_t *zio)
 {
+	vdev_t *v = zio->io_vd;
+	vdev_disk_t *vd = v->vdev_tsd;
+	struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
+	size_t io_size = zio->io_size;
+	uint64_t io_offset = zio->io_offset;
+	int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
+	int flags = 0;
+
 	dio_request_t *dr;
 	uint64_t abd_offset;
 	uint64_t bio_offset;
@@ -628,10 +1130,13 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
 	}
 
 retry:
-	dr = vdev_disk_dio_alloc(bio_count);
+	dr = vdev_classic_dio_alloc(bio_count);
 
-	if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
-		bio_set_flags_failfast(bdev, &flags);
+	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
+	    zio->io_vd->vdev_failfast == B_TRUE) {
+		bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
+		    zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
+	}
 
 	dr->dr_zio = zio;
 
@@ -660,23 +1165,23 @@ retry:
 		 * this should be rare - see the comment above.
 		 */
 		if (dr->dr_bio_count == i) {
-			vdev_disk_dio_free(dr);
+			vdev_classic_dio_free(dr);
 			bio_count *= 2;
 			goto retry;
 		}
 
-		nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset);
+		nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
 		dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
 		if (unlikely(dr->dr_bio[i] == NULL)) {
-			vdev_disk_dio_free(dr);
+			vdev_classic_dio_free(dr);
 			return (SET_ERROR(ENOMEM));
 		}
 
-		/* Matching put called by vdev_disk_physio_completion */
-		vdev_disk_dio_get(dr);
+		/* Matching put called by vdev_classic_physio_completion */
+		vdev_classic_dio_get(dr);
 
 		BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
-		dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
+		dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
 		dr->dr_bio[i]->bi_private = dr;
 		bio_set_op_attrs(dr->dr_bio[i], rw, flags);
 
@@ -690,7 +1195,7 @@ retry:
 	}
 
 	/* Extra reference to protect dio_request during vdev_submit_bio */
-	vdev_disk_dio_get(dr);
+	vdev_classic_dio_get(dr);
 
 	if (dr->dr_bio_count > 1)
 		blk_start_plug(&plug);
@@ -704,11 +1209,13 @@ retry:
 	if (dr->dr_bio_count > 1)
 		blk_finish_plug(&plug);
 
-	(void) vdev_disk_dio_put(dr);
+	vdev_classic_dio_put(dr);
 
 	return (error);
 }
 
+/* ========== */
+
 BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
 {
 	zio_t *zio = bio->bi_private;
@@ -751,39 +1258,123 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
 	return (0);
 }
 
+BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
+{
+	zio_t *zio = bio->bi_private;
+#ifdef HAVE_1ARG_BIO_END_IO_T
+	zio->io_error = BIO_END_IO_ERROR(bio);
+#else
+	zio->io_error = -error;
+#endif
+	bio_put(bio);
+	if (zio->io_error)
+		vdev_disk_error(zio);
+	zio_interrupt(zio);
+}
+
+/*
+ * Wrappers for the different secure erase and discard APIs. We use async
+ * when available; in this case, *biop is set to the last bio in the chain.
+ */
 static int
-vdev_disk_io_trim(zio_t *zio)
+vdev_bdev_issue_secure_erase(zfs_bdev_handle_t *bdh, sector_t sector,
+    sector_t nsect, struct bio **biop)
 {
-	vdev_t *v = zio->io_vd;
-	vdev_disk_t *vd = v->vdev_tsd;
+	*biop = NULL;
+	int error;
 
 #if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
-	if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
-		return (-blkdev_issue_secure_erase(vd->vd_bdev,
-		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
-	} else {
-		return (-blkdev_issue_discard(vd->vd_bdev,
-		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
-	}
-#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
-	unsigned long trim_flags = 0;
-#if defined(BLKDEV_DISCARD_SECURE)
-	if (zio->io_trim_flags & ZIO_TRIM_SECURE)
-		trim_flags |= BLKDEV_DISCARD_SECURE;
+	error = blkdev_issue_secure_erase(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
+	error = __blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
+	error = blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE);
+#else
+#error "unsupported kernel"
 #endif
-	return (-blkdev_issue_discard(vd->vd_bdev,
-	    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
+
+	return (error);
+}
+
+static int
+vdev_bdev_issue_discard(zfs_bdev_handle_t *bdh, sector_t sector,
+    sector_t nsect, struct bio **biop)
+{
+	*biop = NULL;
+	int error;
+
+#if defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
+	error = __blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS, 0, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS)
+	error = __blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
+	error = blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS, 0);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS)
+	error = blkdev_issue_discard(BDH_BDEV(bdh),
+	    sector, nsect, GFP_NOFS);
 #else
-#error "Unsupported kernel"
+#error "unsupported kernel"
 #endif
+
+	return (error);
 }
 
+/*
+ * Entry point for TRIM ops. This calls the right wrapper for secure erase or
+ * discard, and then does the appropriate finishing work for error vs success
+ * and async vs sync.
+ */
+static int
+vdev_disk_io_trim(zio_t *zio)
+{
+	int error;
+	struct bio *bio;
+
+	zfs_bdev_handle_t *bdh = ((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh;
+	sector_t sector = zio->io_offset >> 9;
+	sector_t nsects = zio->io_size >> 9;
+
+	if (zio->io_trim_flags & ZIO_TRIM_SECURE)
+		error = vdev_bdev_issue_secure_erase(bdh, sector, nsects, &bio);
+	else
+		error = vdev_bdev_issue_discard(bdh, sector, nsects, &bio);
+
+	if (error != 0)
+		return (SET_ERROR(-error));
+
+	if (bio == NULL) {
+		/*
+		 * This was a synchronous op that completed successfully, so
+		 * return it to ZFS immediately.
+		 */
+		zio_interrupt(zio);
+	} else {
+		/*
+		 * This was an asynchronous op; set up completion callback and
+		 * issue it.
+		 */
+		bio->bi_private = zio;
+		bio->bi_end_io = vdev_disk_discard_end_io;
+		vdev_submit_bio(bio);
+	}
+
+	return (0);
+}
+
+int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
+
 static void
 vdev_disk_io_start(zio_t *zio)
 {
 	vdev_t *v = zio->io_vd;
 	vdev_disk_t *vd = v->vdev_tsd;
-	int rw, error;
+	int error;
 
 	/*
 	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
@@ -801,7 +1392,7 @@ vdev_disk_io_start(zio_t *zio)
 	 * If the vdev is closed, it's likely due to a failed reopen and is
 	 * in the UNAVAIL state.  Nothing to be done here but return failure.
 	 */
-	if (vd->vd_bdev == NULL) {
+	if (vd->vd_bdh == NULL) {
 		rw_exit(&vd->vd_lock);
 		zio->io_error = ENXIO;
 		zio_interrupt(zio);
@@ -809,74 +1400,72 @@ vdev_disk_io_start(zio_t *zio)
 	}
 
 	switch (zio->io_type) {
-	case ZIO_TYPE_IOCTL:
+	case ZIO_TYPE_FLUSH:
 
 		if (!vdev_readable(v)) {
-			rw_exit(&vd->vd_lock);
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return;
-		}
-
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			if (v->vdev_nowritecache) {
-				zio->io_error = SET_ERROR(ENOTSUP);
-				break;
-			}
-
-			error = vdev_disk_io_flush(vd->vd_bdev, zio);
+			/* Drive not there, can't flush */
+			error = SET_ERROR(ENXIO);
+		} else if (zfs_nocacheflush) {
+			/* Flushing disabled by operator, declare success */
+			error = 0;
+		} else if (v->vdev_nowritecache) {
+			/* This vdev not capable of flushing */
+			error = SET_ERROR(ENOTSUP);
+		} else {
+			/*
+			 * Issue the flush. If successful, the response will
+			 * be handled in the completion callback, so we're done.
+			 */
+			error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
 			if (error == 0) {
 				rw_exit(&vd->vd_lock);
 				return;
 			}
-
-			zio->io_error = error;
-
-			break;
-
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
 		}
 
+		/* Couldn't issue the flush, so set the error and return it */
 		rw_exit(&vd->vd_lock);
+		zio->io_error = error;
 		zio_execute(zio);
 		return;
-	case ZIO_TYPE_WRITE:
-		rw = WRITE;
-		break;
-
-	case ZIO_TYPE_READ:
-		rw = READ;
-		break;
 
 	case ZIO_TYPE_TRIM:
-		zio->io_error = vdev_disk_io_trim(zio);
+		error = vdev_disk_io_trim(zio);
 		rw_exit(&vd->vd_lock);
-		zio_interrupt(zio);
+		if (error) {
+			zio->io_error = error;
+			zio_execute(zio);
+		}
 		return;
 
-	default:
+	case ZIO_TYPE_READ:
+	case ZIO_TYPE_WRITE:
+		zio->io_target_timestamp = zio_handle_io_delay(zio);
+		error = vdev_disk_io_rw_fn(zio);
 		rw_exit(&vd->vd_lock);
-		zio->io_error = SET_ERROR(ENOTSUP);
-		zio_interrupt(zio);
+		if (error) {
+			zio->io_error = error;
+			zio_interrupt(zio);
+		}
 		return;
-	}
 
-	zio->io_target_timestamp = zio_handle_io_delay(zio);
-	error = __vdev_disk_physio(vd->vd_bdev, zio,
-	    zio->io_size, zio->io_offset, rw, 0);
-	rw_exit(&vd->vd_lock);
+	default:
+		/*
+		 * Getting here means our parent vdev has made a very strange
+		 * request of us, and shouldn't happen. Assert here to force a
+		 * crash in dev builds, but in production return the IO
+		 * unhandled. The pool will likely suspend anyway but that's
+		 * nicer than crashing the kernel.
+		 */
+		ASSERT3S(zio->io_type, ==, -1);
 
-	if (error) {
-		zio->io_error = error;
+		rw_exit(&vd->vd_lock);
+		zio->io_error = SET_ERROR(ENOTSUP);
 		zio_interrupt(zio);
 		return;
 	}
+
+	__builtin_unreachable();
 }
 
 static void
@@ -891,8 +1480,8 @@ vdev_disk_io_done(zio_t *zio)
 		vdev_t *v = zio->io_vd;
 		vdev_disk_t *vd = v->vdev_tsd;
 
-		if (zfs_check_media_change(vd->vd_bdev)) {
-			invalidate_bdev(vd->vd_bdev);
+		if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) {
+			invalidate_bdev(BDH_BDEV(vd->vd_bdh));
 			v->vdev_remove_wanted = B_TRUE;
 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
 		}
@@ -925,8 +1514,49 @@ vdev_disk_rele(vdev_t *vd)
 	/* XXX: Implement me as a vnode rele for the device */
 }
 
+/*
+ * BIO submission method. See comment above about vdev_classic.
+ * Set zfs_vdev_disk_classic=0 for new, =1 for classic
+ */
+static uint_t zfs_vdev_disk_classic = 0;	/* default new */
+
+/* Set submission function from module parameter */
+static int
+vdev_disk_param_set_classic(const char *buf, zfs_kernel_param_t *kp)
+{
+	int err = param_set_uint(buf, kp);
+	if (err < 0)
+		return (SET_ERROR(err));
+
+	vdev_disk_io_rw_fn =
+	    zfs_vdev_disk_classic ? vdev_classic_physio : vdev_disk_io_rw;
+
+	printk(KERN_INFO "ZFS: forcing %s BIO submission\n",
+	    zfs_vdev_disk_classic ? "classic" : "new");
+
+	return (0);
+}
+
+/*
+ * At first use vdev use, set the submission function from the default value if
+ * it hasn't been set already.
+ */
+static int
+vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
+{
+	(void) spa;
+	(void) nv;
+	(void) tsd;
+
+	if (vdev_disk_io_rw_fn == NULL)
+		vdev_disk_io_rw_fn = zfs_vdev_disk_classic ?
+		    vdev_classic_physio : vdev_disk_io_rw;
+
+	return (0);
+}
+
 vdev_ops_t vdev_disk_ops = {
-	.vdev_op_init = NULL,
+	.vdev_op_init = vdev_disk_init,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_disk_open,
 	.vdev_op_close = vdev_disk_close,
@@ -947,7 +1577,8 @@ vdev_ops_t vdev_disk_ops = {
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+	.vdev_op_leaf = B_TRUE,			/* leaf vdev */
+	.vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post
 };
 
 /*
@@ -976,17 +1607,17 @@ MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
 int
 param_set_min_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
 {
-	uint64_t val;
+	uint_t val;
 	int error;
 
-	error = kstrtoull(buf, 0, &val);
+	error = kstrtouint(buf, 0, &val);
 	if (error < 0)
 		return (SET_ERROR(error));
 
 	if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
 		return (SET_ERROR(-EINVAL));
 
-	error = param_set_ulong(buf, kp);
+	error = param_set_uint(buf, kp);
 	if (error < 0)
 		return (SET_ERROR(error));
 
@@ -996,19 +1627,32 @@ param_set_min_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
 int
 param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
 {
-	uint64_t val;
+	uint_t val;
 	int error;
 
-	error = kstrtoull(buf, 0, &val);
+	error = kstrtouint(buf, 0, &val);
 	if (error < 0)
 		return (SET_ERROR(error));
 
 	if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
 		return (SET_ERROR(-EINVAL));
 
-	error = param_set_ulong(buf, kp);
+	error = param_set_uint(buf, kp);
 	if (error < 0)
 		return (SET_ERROR(error));
 
 	return (0);
 }
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
+	"Timeout before determining that a device is missing");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
+	"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
+
+ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
+	"Maximum number of data segments to add to an IO request (min 4)");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev_disk, zfs_vdev_disk_, classic,
+    vdev_disk_param_set_classic, param_get_uint, ZMOD_RD,
+	"Use classic BIO submission method");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
index f073145326e3..ac41a2615f16 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -53,8 +53,8 @@ static taskq_t *vdev_file_taskq;
  * impact the vdev_ashift setting which can only be set at vdev creation
  * time.
  */
-static unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
-static unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
 
 static void
 vdev_file_hold(vdev_t *vd)
@@ -242,7 +242,7 @@ vdev_file_io_start(zio_t *zio)
 	vdev_t *vd = zio->io_vd;
 	vdev_file_t *vf = vd->vdev_tsd;
 
-	if (zio->io_type == ZIO_TYPE_IOCTL) {
+	if (zio->io_type == ZIO_TYPE_FLUSH) {
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
 			zio->io_error = SET_ERROR(ENXIO);
@@ -250,33 +250,27 @@ vdev_file_io_start(zio_t *zio)
 			return;
 		}
 
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			/*
-			 * We cannot safely call vfs_fsync() when PF_FSTRANS
-			 * is set in the current context.  Filesystems like
-			 * XFS include sanity checks to verify it is not
-			 * already set, see xfs_vm_writepage().  Therefore
-			 * the sync must be dispatched to a different context.
-			 */
-			if (__spl_pf_fstrans_check()) {
-				VERIFY3U(taskq_dispatch(vdev_file_taskq,
-				    vdev_file_io_fsync, zio, TQ_SLEEP), !=,
-				    TASKQID_INVALID);
-				return;
-			}
-
-			zio->io_error = zfs_file_fsync(vf->vf_file,
-			    O_SYNC | O_DSYNC);
-			break;
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
+		if (zfs_nocacheflush) {
+			zio_execute(zio);
+			return;
 		}
 
+		/*
+		 * We cannot safely call vfs_fsync() when PF_FSTRANS
+		 * is set in the current context.  Filesystems like
+		 * XFS include sanity checks to verify it is not
+		 * already set, see xfs_vm_writepage().  Therefore
+		 * the sync must be dispatched to a different context.
+		 */
+		if (__spl_pf_fstrans_check()) {
+			VERIFY3U(taskq_dispatch(vdev_file_taskq,
+			    vdev_file_io_fsync, zio, TQ_SLEEP), !=,
+			    TASKQID_INVALID);
+			return;
+		}
+
+		zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
+
 		zio_execute(zio);
 		return;
 	} else if (zio->io_type == ZIO_TYPE_TRIM) {
@@ -376,7 +370,7 @@ vdev_ops_t vdev_disk_ops = {
 
 #endif
 
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
 	"Logical ashift for file-based devices");
-ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
 	"Physical ashift for file-based devices");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_label_os.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_label_os.c
new file mode 100644
index 000000000000..3d965b89a962
--- /dev/null
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_label_os.c
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2023 by iXsystems, Inc.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+
+/*
+ * Check if the reserved boot area is in-use.
+ *
+ * This function always returns 0, as there are no known external uses
+ * of the reserved area on Linux.
+ */
+int
+vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd)
+{
+	(void) spa;
+	(void) childvd;
+
+	return (0);
+}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
index b70691ab31c1..48abbc010917 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -493,10 +493,8 @@ zfs_acl_release_nodes(zfs_acl_t *aclp)
 {
 	zfs_acl_node_t *aclnode;
 
-	while ((aclnode = list_head(&aclp->z_acl))) {
-		list_remove(&aclp->z_acl, aclnode);
+	while ((aclnode = list_remove_head(&aclp->z_acl)))
 		zfs_acl_node_free(aclnode);
-	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
@@ -525,7 +523,7 @@ zfs_acl_valid_ace_type(uint_t type, uint_t flags)
 		    entry_type == ACE_EVERYONE || entry_type == 0 ||
 		    entry_type == ACE_IDENTIFIER_GROUP);
 	default:
-		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
+		if (type <= MAX_ACE_TYPE)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
@@ -629,18 +627,18 @@ zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
 	return (NULL);
 }
 
-static uint64_t
-zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+static uintptr_t
+zfs_ace_walk(void *datap, uintptr_t cookie, int aclcnt,
     uint16_t *flags, uint16_t *type, uint32_t *mask)
 {
 	(void) aclcnt;
 	zfs_acl_t *aclp = datap;
-	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
+	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)cookie;
 	uint64_t who;
 
 	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
 	    flags, type);
-	return ((uint64_t)(uintptr_t)acep);
+	return ((uintptr_t)acep);
 }
 
 /*
@@ -1163,6 +1161,7 @@ zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
 		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
 		    cb->cb_acl_node);
 	}
+	ASSERT3P(cb->cb_acl_node, !=, NULL);
 	*dataptr = cb->cb_acl_node->z_acldata;
 	*length = cb->cb_acl_node->z_size;
 }
@@ -1284,7 +1283,7 @@ acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
  */
 static int
 ace_trivial_common(void *acep, int aclcnt,
-    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+    uintptr_t (*walk)(void *, uintptr_t, int,
     uint16_t *, uint16_t *, uint32_t *))
 {
 	uint16_t flags;
@@ -1801,7 +1800,7 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t va_mode, zfs_acl_t *paclp,
  */
 int
 zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
-    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids, zidmap_t *mnt_ns)
 {
 	int		error;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
@@ -1888,8 +1887,10 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
 		acl_ids->z_mode |= S_ISGID;
 	} else {
 		if ((acl_ids->z_mode & S_ISGID) &&
-		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
+		    secpolicy_vnode_setids_setgids(cr, gid, mnt_ns,
+		    zfs_i_user_ns(ZTOI(dzp))) != 0) {
 			acl_ids->z_mode &= ~S_ISGID;
+		}
 	}
 
 	if (acl_ids->z_aclp == NULL) {
@@ -1920,8 +1921,8 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
 			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
 			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
 				trim = B_TRUE;
-			zfs_acl_chmod(vap->va_mode, acl_ids->z_mode, B_FALSE,
-			    trim, acl_ids->z_aclp);
+			zfs_acl_chmod(S_ISDIR(vap->va_mode), acl_ids->z_mode,
+			    B_FALSE, trim, acl_ids->z_aclp);
 		}
 	}
 
@@ -1977,7 +1978,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 	if (mask == 0)
 		return (SET_ERROR(ENOSYS));
 
-	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
+	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr,
+	    zfs_init_idmap)))
 		return (error);
 
 	mutex_enter(&zp->z_acl_lock);
@@ -2136,7 +2138,8 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 	if (zp->z_pflags & ZFS_IMMUTABLE)
 		return (SET_ERROR(EPERM));
 
-	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+	    zfs_init_idmap)))
 		return (error);
 
 	error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
@@ -2228,8 +2231,7 @@ static int
 zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
 {
 	if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&
-	    (!Z_ISDEV(ZTOI(zp)->i_mode) ||
-	    (Z_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) {
+	    (!Z_ISDEV(ZTOI(zp)->i_mode) || (v4_mode & WRITE_MASK_ATTRS))) {
 		return (SET_ERROR(EROFS));
 	}
 
@@ -2282,7 +2284,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
  */
 static int
 zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
-    boolean_t anyaccess, cred_t *cr)
+    boolean_t anyaccess, cred_t *cr, zidmap_t *mnt_ns)
 {
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	zfs_acl_t	*aclp;
@@ -2298,7 +2300,13 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
 	uid_t		gowner;
 	uid_t		fowner;
 
-	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+	if (mnt_ns) {
+		fowner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+		    KUID_TO_SUID(ZTOI(zp)->i_uid));
+		gowner = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+		    KGID_TO_SGID(ZTOI(zp)->i_gid));
+	} else
+		zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
 
 	mutex_enter(&zp->z_acl_lock);
 
@@ -2409,7 +2417,8 @@ zfs_has_access(znode_t *zp, cred_t *cr)
 {
 	uint32_t have = ACE_ALL_PERMS;
 
-	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr,
+	    zfs_init_idmap) != 0) {
 		uid_t owner;
 
 		owner = zfs_fuid_map_id(ZTOZSB(zp),
@@ -2439,7 +2448,8 @@ zfs_has_access(znode_t *zp, cred_t *cr)
  * we want to avoid that here.
  */
 static int
-zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
+zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr,
+    zidmap_t *mnt_ns)
 {
 	int err, mask;
 	int unmapped = 0;
@@ -2452,8 +2462,9 @@ zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
 		return (unmapped ? SET_ERROR(EPERM) : 0);
 	}
 
-#if defined(HAVE_IOPS_PERMISSION_USERNS)
-	err = generic_permission(cr->user_ns, ZTOI(zp), mask);
+#if (defined(HAVE_IOPS_PERMISSION_USERNS) || \
+	defined(HAVE_IOPS_PERMISSION_IDMAP))
+	err = generic_permission(mnt_ns, ZTOI(zp), mask);
 #else
 	err = generic_permission(ZTOI(zp), mask);
 #endif
@@ -2468,7 +2479,7 @@ zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr)
 
 static int
 zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
-    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr, zidmap_t *mnt_ns)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int err;
@@ -2518,20 +2529,20 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
 	}
 
 	if (zp->z_pflags & ZFS_ACL_TRIVIAL)
-		return (zfs_zaccess_trivial(zp, working_mode, cr));
+		return (zfs_zaccess_trivial(zp, working_mode, cr, mnt_ns));
 
-	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr, mnt_ns));
 }
 
 static int
 zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
-    cred_t *cr)
+    cred_t *cr, zidmap_t *mnt_ns)
 {
 	if (*working_mode != ACE_WRITE_DATA)
 		return (SET_ERROR(EACCES));
 
 	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
-	    check_privs, B_FALSE, cr));
+	    check_privs, B_FALSE, cr, mnt_ns));
 }
 
 int
@@ -2566,7 +2577,6 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 	}
 
 	if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {
-		owner = B_TRUE;
 		if (zdp->z_mode & S_IXUSR) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
@@ -2576,7 +2586,6 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 		}
 	}
 	if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {
-		groupmbr = B_TRUE;
 		if (zdp->z_mode & S_IXGRP) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
@@ -2596,9 +2605,11 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 
 slow:
 	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
-	ZFS_ENTER(ZTOZSB(zdp));
-	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
-	ZFS_EXIT(ZTOZSB(zdp));
+	if ((error = zfs_enter(ZTOZSB(zdp), FTAG)) != 0)
+		return (error);
+	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+	    zfs_init_idmap);
+	zfs_exit(ZTOZSB(zdp), FTAG);
 	return (error);
 }
 
@@ -2609,7 +2620,8 @@ slow:
  * can define any form of access.
  */
 int
-zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr,
+    zidmap_t *mnt_ns)
 {
 	uint32_t	working_mode;
 	int		error;
@@ -2648,8 +2660,10 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
 		}
 	}
 
-	owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
-	    cr, ZFS_OWNER);
+	owner = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ZTOI(zp)),
+	    KUID_TO_SUID(ZTOI(zp)->i_uid));
+	owner = zfs_fuid_map_id(ZTOZSB(zp), owner, cr, ZFS_OWNER);
+
 	/*
 	 * Map the bits required to the standard inode flags
 	 * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits.  Map the bits
@@ -2674,7 +2688,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
 		needed_bits |= S_IXUSR;
 
 	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
-	    &check_privs, skipaclchk, cr)) == 0) {
+	    &check_privs, skipaclchk, cr, mnt_ns)) == 0) {
 		if (is_attr)
 			zrele(xzp);
 		return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
@@ -2688,7 +2702,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
 	}
 
 	if (error && (flags & V_APPEND)) {
-		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
+		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr,
+		    mnt_ns);
 	}
 
 	if (error && check_privs) {
@@ -2699,7 +2714,6 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
 		 * read_acl/read_attributes
 		 */
 
-		error = 0;
 		ASSERT(working_mode != 0);
 
 		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
@@ -2755,20 +2769,22 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
  * NFSv4-style ZFS ACL format and call zfs_zaccess()
  */
 int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr,
+    zidmap_t *mnt_ns)
 {
-	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr,
+	    mnt_ns));
 }
 
 /*
  * Access function for secpolicy_vnode_setattr
  */
 int
-zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
+zfs_zaccess_unix(void *zp, int mode, cred_t *cr)
 {
 	int v4_mode = zfs_unix_to_v4(mode >> 6);
 
-	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr, zfs_init_idmap));
 }
 
 /* See zfs_zaccess_delete() */
@@ -2845,7 +2861,7 @@ static const boolean_t zfs_write_implies_delete_child = B_TRUE;
  * zfs_write_implies_delete_child
  */
 int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr, zidmap_t *mnt_ns)
 {
 	uint32_t wanted_dirperms;
 	uint32_t dzp_working_mode = 0;
@@ -2872,7 +2888,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
 	 * (This is part of why we're checking the target first.)
 	 */
 	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
-	    &zpcheck_privs, B_FALSE, cr);
+	    &zpcheck_privs, B_FALSE, cr, mnt_ns);
 	if (zp_error == EACCES) {
 		/* We hit a DENY ACE. */
 		if (!zpcheck_privs)
@@ -2894,7 +2910,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
 	if (zfs_write_implies_delete_child)
 		wanted_dirperms |= ACE_WRITE_DATA;
 	dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
-	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
+	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr, mnt_ns);
 	if (dzp_error == EACCES) {
 		/* We hit a DENY ACE. */
 		if (!dzpcheck_privs)
@@ -2976,7 +2992,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
 
 int
 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
-    znode_t *tzp, cred_t *cr)
+    znode_t *tzp, cred_t *cr, zidmap_t *mnt_ns)
 {
 	int add_perm;
 	int error;
@@ -2998,21 +3014,21 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
 	 * If that succeeds then check for add_file/add_subdir permissions
 	 */
 
-	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+	if ((error = zfs_zaccess_delete(sdzp, szp, cr, mnt_ns)))
 		return (error);
 
 	/*
 	 * If we have a tzp, see if we can delete it?
 	 */
 	if (tzp) {
-		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
+		if ((error = zfs_zaccess_delete(tdzp, tzp, cr, mnt_ns)))
 			return (error);
 	}
 
 	/*
 	 * Now check for add permissions
 	 */
-	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr, mnt_ns);
 
 	return (error);
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index aae19f6346fd..54ed70d0394f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -392,7 +392,20 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
 
 	zfsctl_snapshot_hold(se);
 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
-	ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);
+	/*
+	 * If this condition happens, we managed to:
+	 * - dispatch once
+	 * - want to dispatch _again_ before it returned
+	 *
+	 * So let's just return - if that task fails at unmounting,
+	 * we'll eventually dispatch again, and if it succeeds,
+	 * no problem.
+	 */
+	if (se->se_taskqid != TASKQID_INVALID) {
+		rw_exit(&se->se_taskqid_lock);
+		zfsctl_snapshot_rele(se);
+		return;
+	}
 	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
 	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
 	rw_exit(&se->se_taskqid_lock);
@@ -465,17 +478,19 @@ zfsctl_is_snapdir(struct inode *ip)
  */
 static struct inode *
 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
-    const struct file_operations *fops, const struct inode_operations *ops)
+    const struct file_operations *fops, const struct inode_operations *ops,
+    uint64_t creation)
 {
-	inode_timespec_t now;
 	struct inode *ip;
 	znode_t *zp;
+	inode_timespec_t now = {.tv_sec = creation};
 
 	ip = new_inode(zfsvfs->z_sb);
 	if (ip == NULL)
 		return (NULL);
 
-	now = current_time(ip);
+	if (!creation)
+		now = current_time(ip);
 	zp = ITOZ(ip);
 	ASSERT3P(zp->z_dirlocks, ==, NULL);
 	ASSERT3P(zp->z_acl_cached, ==, NULL);
@@ -485,9 +500,10 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_zn_prefetch = B_FALSE;
 	zp->z_is_sa = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
+#endif
 	zp->z_is_ctldir = B_TRUE;
-	zp->z_is_stale = B_FALSE;
 	zp->z_sa_hdl = NULL;
 	zp->z_blksz = 0;
 	zp->z_seq = 0;
@@ -504,9 +520,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 	ip->i_uid = SUID_TO_KUID(0);
 	ip->i_gid = SGID_TO_KGID(0);
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	ip->i_atime = now;
-	ip->i_mtime = now;
-	ip->i_ctime = now;
+	zpl_inode_set_atime_to_ts(ip, now);
+	zpl_inode_set_mtime_to_ts(ip, now);
+	zpl_inode_set_ctime_to_ts(ip, now);
 	ip->i_fop = fops;
 	ip->i_op = ops;
 #if defined(IOP_XATTR)
@@ -521,7 +537,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
 	membar_producer();
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
@@ -538,14 +553,28 @@ zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 	struct inode *ip = NULL;
+	uint64_t creation = 0;
+	dsl_dataset_t *snap_ds;
+	dsl_pool_t *pool;
 
 	while (ip == NULL) {
 		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
 		if (ip)
 			break;
 
+		if (id <= ZFSCTL_INO_SNAPDIRS && !creation) {
+			pool = dmu_objset_pool(zfsvfs->z_os);
+			dsl_pool_config_enter(pool, FTAG);
+			if (!dsl_dataset_hold_obj(pool,
+			    ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) {
+				creation = dsl_get_creation(snap_ds);
+				dsl_dataset_rele(snap_ds, FTAG);
+			}
+			dsl_pool_config_exit(pool, FTAG);
+		}
+
 		/* May fail due to concurrent zfsctl_inode_alloc() */
-		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
+		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation);
 	}
 
 	return (ip);
@@ -567,7 +596,7 @@ zfsctl_create(zfsvfs_t *zfsvfs)
 	ASSERT(zfsvfs->z_ctldir == NULL);
 
 	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
-	    &zpl_fops_root, &zpl_ops_root);
+	    &zpl_fops_root, &zpl_ops_root, 0);
 	if (zfsvfs->z_ctldir == NULL)
 		return (SET_ERROR(ENOENT));
 
@@ -673,17 +702,19 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		i;
+	int		error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsctl_is_snapdir(ip)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (zfsctl_snapdir_fid(ip, fidp));
 	}
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
 
@@ -698,7 +729,7 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = 0;
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -776,7 +807,8 @@ zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	int error = 0;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (strcmp(name, "..") == 0) {
 		*ipp = dip->i_sb->s_root->d_inode;
@@ -793,7 +825,7 @@ zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -810,11 +842,12 @@ zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	uint64_t id;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -823,7 +856,7 @@ zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -844,7 +877,8 @@ zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -902,7 +936,7 @@ out:
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -922,7 +956,8 @@ zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -951,7 +986,7 @@ out:
 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1076,7 +1111,8 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 		return (SET_ERROR(EISDIR));
 
 	zfsvfs = ITOZSB(ip);
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
@@ -1164,7 +1200,7 @@ error:
 	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(full_path, MAXPATHLEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1228,10 +1264,11 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
 	znode_t *dzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsvfs->z_shares_dir == 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
@@ -1240,7 +1277,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
 		zrele(dzp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
index be65f0a2e245..f707959c9445 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -29,13 +29,13 @@
 typedef struct zfs_dbgmsg {
 	procfs_list_node_t	zdm_node;
 	uint64_t		zdm_timestamp;
-	int			zdm_size;
-	char			zdm_msg[1]; /* variable length allocation */
+	uint_t			zdm_size;
+	char			zdm_msg[]; /* variable length allocation */
 } zfs_dbgmsg_t;
 
 static procfs_list_t zfs_dbgmsgs;
-static int zfs_dbgmsg_size = 0;
-int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
+static uint_t zfs_dbgmsg_size = 0;
+static uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
 
 /*
  * Internal ZFS debug messages are enabled by default.
@@ -68,14 +68,14 @@ zfs_dbgmsg_show(struct seq_file *f, void *p)
 }
 
 static void
-zfs_dbgmsg_purge(int max_size)
+zfs_dbgmsg_purge(uint_t max_size)
 {
 	while (zfs_dbgmsg_size > max_size) {
 		zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
 		if (zdm == NULL)
 			return;
 
-		int size = zdm->zdm_size;
+		uint_t size = zdm->zdm_size;
 		kmem_free(zdm, size);
 		zfs_dbgmsg_size -= size;
 	}
@@ -135,7 +135,7 @@ __set_error(const char *file, const char *func, int line, int err)
 void
 __zfs_dbgmsg(char *buf)
 {
-	int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+	uint_t size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1;
 	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
 	zdm->zdm_size = size;
 	zdm->zdm_timestamp = gethrestime_sec();
@@ -144,7 +144,7 @@ __zfs_dbgmsg(char *buf)
 	mutex_enter(&zfs_dbgmsgs.pl_lock);
 	procfs_list_add(&zfs_dbgmsgs, zdm);
 	zfs_dbgmsg_size += size;
-	zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
+	zfs_dbgmsg_purge(zfs_dbgmsg_maxsize);
 	mutex_exit(&zfs_dbgmsgs.pl_lock);
 }
 
@@ -175,7 +175,8 @@ __dprintf(boolean_t dprint, const char *file, const char *func,
 		newfile = file;
 	}
 
-	i = snprintf(buf, size, "%s%s:%d:%s(): ", prefix, newfile, line, func);
+	i = snprintf(buf, size, "%px %s%s:%d:%s(): ",
+	    curthread, prefix, newfile, line, func);
 
 	if (i < size) {
 		va_start(adx, fmt);
@@ -252,6 +253,8 @@ zfs_dbgmsg_print(const char *tag)
 module_param(zfs_dbgmsg_enable, int, 0644);
 MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
 
-module_param(zfs_dbgmsg_maxsize, int, 0644);
+/* BEGIN CSTYLED */
+module_param(zfs_dbgmsg_maxsize, uint, 0644);
+/* END CSTYLED */
 MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
 #endif
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
index c5b3b5ce7fc0..1eeabe53d23c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -649,6 +649,8 @@ zfs_rmnode(znode_t *zp)
 	objset_t	*os = zfsvfs->z_os;
 	znode_t		*xzp = NULL;
 	dmu_tx_t	*tx;
+	znode_hold_t	*zh;
+	uint64_t	z_id = zp->z_id;
 	uint64_t	acl_obj;
 	uint64_t	xattr_obj;
 	uint64_t	links;
@@ -666,8 +668,9 @@ zfs_rmnode(znode_t *zp)
 			 * Not enough space to delete some xattrs.
 			 * Leave it in the unlinked set.
 			 */
+			zh = zfs_znode_hold_enter(zfsvfs, z_id);
 			zfs_znode_dmu_fini(zp);
-
+			zfs_znode_hold_exit(zfsvfs, zh);
 			return;
 		}
 	}
@@ -686,7 +689,9 @@ zfs_rmnode(znode_t *zp)
 			 * Not enough space or we were interrupted by unmount.
 			 * Leave the file in the unlinked set.
 			 */
+			zh = zfs_znode_hold_enter(zfsvfs, z_id);
 			zfs_znode_dmu_fini(zp);
+			zfs_znode_hold_exit(zfsvfs, zh);
 			return;
 		}
 	}
@@ -726,7 +731,9 @@ zfs_rmnode(znode_t *zp)
 		 * which point we'll call zfs_unlinked_drain() to process it).
 		 */
 		dmu_tx_abort(tx);
+		zh = zfs_znode_hold_enter(zfsvfs, z_id);
 		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
 		goto out;
 	}
 
@@ -926,6 +933,74 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
 	return (error);
 }
 
+static int
+zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	int		zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
+	boolean_t	unlinked = B_FALSE;
+	sa_bulk_attr_t	bulk[3];
+	uint64_t	mtime[2], ctime[2];
+	uint64_t	links;
+	int		count = 0;
+	int		error;
+
+	if (zp_is_dir && !zfs_dirempty(zp))
+		return (SET_ERROR(ENOTEMPTY));
+
+	if (ZTOI(zp)->i_nlink <= zp_is_dir) {
+		zfs_panic_recover("zfs: link count on %lu is %u, "
+		    "should be at least %u", zp->z_id,
+		    (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
+		set_nlink(ZTOI(zp), zp_is_dir + 1);
+	}
+	drop_nlink(ZTOI(zp));
+	if (ZTOI(zp)->i_nlink == zp_is_dir) {
+		zp->z_unlinked = B_TRUE;
+		clear_nlink(ZTOI(zp));
+		unlinked = B_TRUE;
+	} else {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, sizeof (ctime));
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+		    ctime);
+	}
+	links = ZTOI(zp)->i_nlink;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+	    NULL, &links, sizeof (links));
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT3U(error, ==, 0);
+
+	if (unlinkedp != NULL)
+		*unlinkedp = unlinked;
+	else if (unlinked)
+		zfs_unlinked_add(zp, tx);
+
+	return (0);
+}
+
+/*
+ * Forcefully drop an nlink reference from (zp) and mark it for deletion if it
+ * was the last link. This *must* only be done to znodes which have already
+ * been zfs_link_destroy()'d with ZRENAMING. This is explicitly only used in
+ * the error path of zfs_rename(), where we have to correct the nlink count if
+ * we failed to link the target as well as failing to re-link the original
+ * znodes.
+ */
+int
+zfs_drop_nlink(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
+{
+	int error;
+
+	mutex_enter(&zp->z_lock);
+	error = zfs_drop_nlink_locked(zp, tx, unlinkedp);
+	mutex_exit(&zp->z_lock);
+
+	return (error);
+}
+
 /*
  * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
  * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
@@ -966,31 +1041,9 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
 			return (error);
 		}
 
-		if (ZTOI(zp)->i_nlink <= zp_is_dir) {
-			zfs_panic_recover("zfs: link count on %lu is %u, "
-			    "should be at least %u", zp->z_id,
-			    (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
-			set_nlink(ZTOI(zp), zp_is_dir + 1);
-		}
-		drop_nlink(ZTOI(zp));
-		if (ZTOI(zp)->i_nlink == zp_is_dir) {
-			zp->z_unlinked = B_TRUE;
-			clear_nlink(ZTOI(zp));
-			unlinked = B_TRUE;
-		} else {
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
-			    NULL, &ctime, sizeof (ctime));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
-			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
-			    ctime);
-		}
-		links = ZTOI(zp)->i_nlink;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-		    NULL, &links, sizeof (links));
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-		count = 0;
-		ASSERT(error == 0);
+		/* The only error is !zfs_dirempty() and we checked earlier. */
+		error = zfs_drop_nlink_locked(zp, tx, &unlinked);
+		ASSERT3U(error, ==, 0);
 		mutex_exit(&zp->z_lock);
 	} else {
 		error = zfs_dropname(dl, zp, dzp, tx, flag);
@@ -1066,11 +1119,8 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr)
 
 	*xzpp = NULL;
 
-	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
-		return (error);
-
 	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
-	    &acl_ids)) != 0)
+	    &acl_ids, zfs_init_idmap)) != 0)
 		return (error);
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
 		zfs_acl_ids_free(&acl_ids);
@@ -1218,7 +1268,8 @@ zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
 	    cr, ZFS_OWNER);
 
 	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
-	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)
+	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+	    zfs_init_idmap) == 0)
 		return (0);
 	else
 		return (secpolicy_vnode_remove(cr));
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index e12f7c3ced43..bc753614be27 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -246,7 +246,7 @@ zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
 {
 	loff_t rc;
 
-	if (*offp < 0 || *offp > MAXOFFSET_T)
+	if (*offp < 0)
 		return (EINVAL);
 
 	rc = vfs_llseek(fp, *offp, whence);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index 67b864aa77a9..663474ea49ab 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -135,7 +135,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 
 	vecnum = cmd - ZFS_IOC_FIRST;
 
-	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+	zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 
 	if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) {
 		error = -SET_ERROR(EFAULT);
@@ -146,7 +146,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 	if (error == 0 && rc != 0)
 		error = -SET_ERROR(EFAULT);
 out:
-	kmem_free(zc, sizeof (zfs_cmd_t));
+	vmem_free(zc, sizeof (zfs_cmd_t));
 	return (error);
 
 }
@@ -282,6 +282,8 @@ zfsdev_detach(void)
 #define	ZFS_DEBUG_STR	""
 #endif
 
+zidmap_t *zfs_init_idmap;
+
 static int
 openzfs_init_os(void)
 {
@@ -305,6 +307,8 @@ openzfs_init_os(void)
 	printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
 #endif /* CONFIG_FS_POSIX_ACL */
 
+	zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap();
+
 	return (0);
 }
 
@@ -369,8 +373,7 @@ MODULE_ALIAS("zcommon");
 MODULE_ALIAS("zzstd");
 MODULE_DESCRIPTION("ZFS");
 MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE("Lua: MIT");
-MODULE_LICENSE("zstd: Dual BSD/GPL");
-MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE("Dual MIT/GPL"); /* lua */
+MODULE_LICENSE("Dual BSD/GPL"); /* zstd / misc */
 MODULE_LICENSE(ZFS_META_LICENSE);
 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
index eb7c5f6166d2..e2431fe8a803 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -279,11 +279,11 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
 
 		for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
 			if (type_map[i].ztm_type & property->pd_types)  {
-				len += snprintf(buf + len, buflen - len, "%s ",
-				    type_map[i].ztm_name);
+				len += kmem_scnprintf(buf + len, buflen - len,
+				    "%s ", type_map[i].ztm_name);
 			}
 		}
-		len += snprintf(buf + len, buflen - len, "\n");
+		len += kmem_scnprintf(buf + len, buflen - len, "\n");
 		return (len);
 	}
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index abb6dbe67cdf..c2ed67c438c6 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 	this_seg_start = orig_loffset;
 
 	rq_for_each_segment(bv, rq, iter) {
-		if (uio->iter.bio) {
-			/*
-			 * If uio->iter.bio is present, then we know we've saved
-			 * uio->iter from a previous call to this function, and
-			 * we can skip ahead in this rq_for_each_segment() loop
-			 * to where we last left off.  That way, we don't need
-			 * to iterate over tons of segments we've already
-			 * processed - we can just restore the "saved state".
-			 */
-			iter = uio->iter;
-			bv = uio->bv;
-			this_seg_start = uio->uio_loffset;
-			memset(&uio->iter, 0, sizeof (uio->iter));
-			continue;
-		}
-
 		/*
 		 * Lookup what the logical offset of the last byte of this
 		 * segment is.
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 			copied = 1;	/* We copied some data */
 		}
 
-		if (n == 0) {
-			/*
-			 * All done copying.  Save our 'iter' value to the uio.
-			 * This allows us to "save our state" and skip ahead in
-			 * the rq_for_each_segment() loop the next time we call
-			 * call zfs_uiomove_bvec_rq() on this uio (which we
-			 * will be doing for any remaining data in the uio).
-			 */
-			uio->iter = iter; /* make a copy of the struct data */
-			uio->bv = bv;
-			return (0);
-		}
-
 		this_seg_start = this_seg_end + 1;
 	}
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index a67ba821d06f..2015c20d7340 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -56,7 +56,6 @@
 #include <sys/sunddi.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
-#include <sys/spa_boot.h>
 #include <sys/objlist.h>
 #include <sys/zpl.h>
 #include <linux/vfs_compat.h>
@@ -274,8 +273,10 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 		 * Sync a specific filesystem.
 		 */
 		dsl_pool_t *dp;
+		int error;
 
-		ZFS_ENTER(zfsvfs);
+		if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+			return (error);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
@@ -283,14 +284,14 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (spa_suspended(dp->dp_spa)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
@@ -607,7 +608,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
 	}
 
 	if (tmp != *val) {
-		(void) strcpy(setpoint, "temporary");
+		if (setpoint)
+			(void) strcpy(setpoint, "temporary");
 		*val = tmp;
 	}
 	return (0);
@@ -783,9 +785,7 @@ zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
 	}
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
-	if (error != 0) {
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
+
 	return (error);
 }
 
@@ -825,6 +825,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
 
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
 		*zfvp = NULL;
 		zfsvfs_free(zfsvfs);
 		return (error);
@@ -848,8 +849,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 	if (error)
 		return (error);
 
-	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
-
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
@@ -857,7 +856,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 	 */
 	if (mounting) {
 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
-		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+		error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+		if (error)
+			return (error);
+		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
+		    &zfsvfs->z_kstat.dk_zil_sums);
 
 		/*
 		 * During replay we remove the read only flag to
@@ -921,6 +924,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 		/* restore readonly bit */
 		if (readonly != 0)
 			readonly_changed_cb(zfsvfs, B_TRUE);
+	} else {
+		ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);
+		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
+		    &zfsvfs->z_kstat.dk_zil_sums);
 	}
 
 	/*
@@ -1087,7 +1094,8 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp)
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 	int err = 0;
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
@@ -1148,7 +1156,7 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp)
 			err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -1158,13 +1166,14 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
 	znode_t *rootzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*ipp = ZTOI(rootzp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1185,7 +1194,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
 	int objects = 0;
 	int i = 0, j = 0;
 
-	zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+	zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
@@ -1221,7 +1230,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
 		zrele(zp);
 	}
 
-	kmem_free(zp_array, max_array * sizeof (znode_t *));
+	vmem_free(zp_array, max_array * sizeof (znode_t *));
 
 	return (objects);
 }
@@ -1231,23 +1240,30 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
  * and inode caches.  This can occur when the ARC needs to free meta data
  * blocks but can't because they are all pinned by entries in these caches.
  */
+#if defined(HAVE_SUPER_BLOCK_S_SHRINK)
+#define	S_SHRINK(sb)	(&(sb)->s_shrink)
+#elif defined(HAVE_SUPER_BLOCK_S_SHRINK_PTR)
+#define	S_SHRINK(sb)	((sb)->s_shrink)
+#endif
+
 int
 zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 {
 	zfsvfs_t *zfsvfs = sb->s_fs_info;
 	int error = 0;
-	struct shrinker *shrinker = &sb->s_shrink;
+	struct shrinker *shrinker = S_SHRINK(sb);
 	struct shrink_control sc = {
 		.nr_to_scan = nr_to_scan,
 		.gfp_mask = GFP_KERNEL,
 	};
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
 	defined(SHRINK_CONTROL_HAS_NID) && \
 	defined(SHRINKER_NUMA_AWARE)
-	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
+	if (shrinker->flags & SHRINKER_NUMA_AWARE) {
 		*objects = 0;
 		for_each_online_node(sc.nid) {
 			*objects += (*shrinker->scan_objects)(shrinker, &sc);
@@ -1283,7 +1299,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 		*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
 #endif
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
 	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
@@ -1320,12 +1336,11 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 		 * may add the parents of dir-based xattrs to the taskq
 		 * so we want to wait for these.
 		 *
-		 * We can safely read z_nr_znodes without locking because the
-		 * VFS has already blocked operations which add to the
-		 * z_all_znodes list and thus increment z_nr_znodes.
+		 * We can safely check z_all_znodes for being empty because the
+		 * VFS has already blocked operations which add to it.
 		 */
 		int round = 0;
-		while (zfsvfs->z_nr_znodes > 0) {
+		while (!list_is_empty(&zfsvfs->z_all_znodes)) {
 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)), 0);
 			if (++round > 1 && !unmounting)
@@ -1479,7 +1494,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 	 * read-only flag, pretend it was set, as done for snapshots.
 	 */
 	if (!canwrite)
-		vfs->vfs_readonly = true;
+		vfs->vfs_readonly = B_TRUE;
 
 	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
 	if (error) {
@@ -1513,7 +1528,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 	sb->s_op = &zpl_super_operations;
 	sb->s_xattr = zpl_xattr_handlers;
 	sb->s_export_op = &zpl_export_operations;
-	sb->s_d_op = &zpl_dentry_operations;
 
 	/* Set features for file system. */
 	zfs_set_fuid_feature(zfsvfs);
@@ -1547,6 +1561,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 	error = zfs_root(zfsvfs, &root_inode);
 	if (error) {
 		(void) zfs_umount(sb);
+		zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
 		goto out;
 	}
 
@@ -1554,6 +1569,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 	sb->s_root = d_make_root(root_inode);
 	if (sb->s_root == NULL) {
 		(void) zfs_umount(sb);
+		zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
 		error = SET_ERROR(ENOMEM);
 		goto out;
 	}
@@ -1651,6 +1667,7 @@ zfs_umount(struct super_block *sb)
 	}
 
 	zfsvfs_free(zfsvfs);
+	sb->s_fs_info = NULL;
 	return (0);
 }
 
@@ -1740,7 +1757,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
 	}
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
@@ -1756,7 +1774,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 			 */
 			VERIFY3P(igrab(*ipp), !=, NULL);
 		}
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -1764,14 +1782,14 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 
 	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 
 	/* Don't export xattr stuff */
 	if (zp->z_pflags & ZFS_XATTR) {
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
@@ -1786,7 +1804,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
 		    fid_gen);
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
@@ -1794,7 +1812,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 	if (*ipp)
 		zfs_znode_update_vfs(ITOZ(*ipp));
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -1869,8 +1887,8 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		err2 = zfs_rezget(zp);
 		if (err2) {
+			zpl_d_drop_aliases(ZTOI(zp));
 			remove_inode_hash(ZTOI(zp));
-			zp->z_is_stale = B_TRUE;
 		}
 
 		/* see comment in zfs_suspend_fs() */
@@ -2041,91 +2059,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 }
 
 /*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION)
-		pname = ZPL_VERSION_STR;
-	else
-		pname = zfs_prop_to_name(prop);
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_OFF;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
-/*
  * Return true if the corresponding vfs's unmounted flag is set.
  * Otherwise return false.
  * If this function returns true we know VFS unmount has been initiated.
@@ -2164,6 +2097,9 @@ zfs_init(void)
 	zfs_znode_init();
 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
 	register_filesystem(&zpl_fs_type);
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+	register_fo_extend(&zpl_file_operations);
+#endif
 }
 
 void
@@ -2174,6 +2110,9 @@ zfs_fini(void)
 	 */
 	taskq_wait(system_delay_taskq);
 	taskq_wait(system_taskq);
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+	unregister_fo_extend(&zpl_file_operations);
+#endif
 	unregister_filesystem(&zpl_fs_type);
 	zfs_znode_fini();
 	zfsctl_fini();
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index d6ff838806eb..1cecad9f7755 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -82,13 +82,13 @@
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1) A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *	This is done avoiding races using zfs_enter(zfsvfs).
+ *      A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with zfs_verify_zp(zp).  Both of these macros
  *      can return EIO from the calling function.
  *
  *  (2) zrele() should always be the last thing except for zil_commit() (if
- *	necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the
+ *	necessary) and zfs_exit(). This is for 3 reasons: First, if it's the
  *	last reference, the vnode/znode can be freed, so the zp may point to
  *	freed memory.  Second, the last reference will call zfs_zinactive(),
  *	which may induce a lot of work -- pushing cached pages (which acquires
@@ -107,7 +107,7 @@
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
- *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
@@ -142,7 +142,7 @@
  *
  * In general, this is how things should be ordered in each vnode op:
  *
- *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ *	zfs_enter(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
  *	rw_enter(...);			// grab any other locks you need
@@ -160,7 +160,7 @@
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
- *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		zfs_exit(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
@@ -171,7 +171,7 @@
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	zrele(...);			// release held znodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
- *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	zfs_exit(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 int
@@ -180,22 +180,29 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/* Honor ZFS_APPENDONLY file attribute */
-	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
-	/* Keep a count of the synchronous opens in the znode */
-	if (flag & O_SYNC)
-		atomic_inc_32(&zp->z_sync_cnt);
+	/*
+	 * Keep a count of the synchronous opens in the znode.  On first
+	 * synchronous open we must convert all previous async transactions
+	 * into sync to keep correct ordering.
+	 */
+	if (flag & O_SYNC) {
+		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
+			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
+	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -205,56 +212,60 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/* Decrement the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_dec_32(&zp->z_sync_cnt);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 #if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Write:	If we find a memory mapped page, we write to *both*
- *		the page and the dmu buffer.
+ * between the DMU cache and the memory mapped pages.  Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
  */
 void
 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 {
-	struct inode *ip = ZTOI(zp);
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	uint64_t nbytes;
-	int64_t	off;
-	void *pb;
+	struct address_space *mp = ZTOI(zp)->i_mapping;
+	int64_t off = start & (PAGE_SIZE - 1);
 
-	off = start & (PAGE_SIZE-1);
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		nbytes = MIN(PAGE_SIZE - off, len);
+		uint64_t nbytes = MIN(PAGE_SIZE - off, len);
 
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);
 
-			pb = kmap(pp);
-			(void) dmu_read(os, zp->z_id, start + off, nbytes,
-			    pb + off, DMU_READ_PREFETCH);
+			void *pb = kmap(pp);
+			int error = dmu_read(os, zp->z_id, start + off,
+			    nbytes, pb + off, DMU_READ_PREFETCH);
 			kunmap(pp);
 
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
+			if (error) {
+				SetPageError(pp);
+				ClearPageUptodate(pp);
+			} else {
+				ClearPageError(pp);
+				SetPageUptodate(pp);
+
+				if (mapping_writably_mapped(mp))
+					flush_dcache_page(pp);
+
+				mark_page_accessed(pp);
+			}
 
-			mark_page_accessed(pp);
-			SetPageUptodate(pp);
-			ClearPageError(pp);
 			unlock_page(pp);
 			put_page(pp);
 		}
@@ -265,38 +276,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 }
 
 /*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Read:	We "read" preferentially from memory mapped pages,
- *		else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *	 the file is memory mapped.
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages.  Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
  */
 int
 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	int64_t	start, off;
-	uint64_t bytes;
+	int64_t start = uio->uio_loffset;
+	int64_t off = start & (PAGE_SIZE - 1);
 	int len = nbytes;
 	int error = 0;
-	void *pb;
 
-	start = uio->uio_loffset;
-	off = start & (PAGE_SIZE-1);
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		bytes = MIN(PAGE_SIZE - off, len);
+		uint64_t bytes = MIN(PAGE_SIZE - off, len);
 
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
-			ASSERT(PageUptodate(pp));
+			/*
+			 * If filemap_fault() retries there exists a window
+			 * where the page will be unlocked and not up to date.
+			 * In this case we must try and fill the page.
+			 */
+			if (unlikely(!PageUptodate(pp))) {
+				error = zfs_fillpage(ip, pp);
+				if (error) {
+					unlock_page(pp);
+					put_page(pp);
+					return (error);
+				}
+			}
+
+			ASSERT(PageUptodate(pp) || PageDirty(pp));
+
 			unlock_page(pp);
 
-			pb = kmap(pp);
+			void *pb = kmap(pp);
 			error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
 			kunmap(pp);
 
@@ -312,9 +329,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 
 		len -= bytes;
 		off = 0;
+
 		if (error)
 			break;
 	}
+
 	return (error);
 }
 #endif /* _KERNEL */
@@ -449,8 +468,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		}
 	}
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zdp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
+		return (error);
 
 	*zpp = NULL;
 
@@ -460,12 +479,12 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 
@@ -474,17 +493,17 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		 */
 
 		if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
-		    B_TRUE, cr))) {
+		    B_TRUE, cr, zfs_init_idmap))) {
 			zrele(*zpp);
 			*zpp = NULL;
 		}
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOTDIR));
 	}
 
@@ -492,14 +511,15 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 	 * Check accessibility of directory.
 	 */
 
-	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+	    zfs_init_idmap))) {
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -507,7 +527,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 	if ((error == 0) && (*zpp))
 		zfs_znode_update_vfs(*zpp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -524,6 +544,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
  *		cr	- credentials of caller.
  *		flag	- file flag.
  *		vsecp	- ACL to be set
+ *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- znode of created or trunc'd entry.
  *
@@ -535,7 +556,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
  */
 int
 zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
-    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
+    zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
@@ -550,6 +572,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 	boolean_t	fuid_dirtied;
 	boolean_t	have_acl = B_FALSE;
 	boolean_t	waited = B_FALSE;
+	boolean_t	skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
@@ -566,21 +589,21 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -609,7 +632,7 @@ top:
 				zfs_acl_ids_free(&acl_ids);
 			if (strcmp(name, "..") == 0)
 				error = SET_ERROR(EISDIR);
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -622,7 +645,8 @@ top:
 		 * Create a new file object and update the directory
 		 * to reference it.
 		 */
-		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr,
+		    mnt_ns))) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			goto out;
@@ -641,7 +665,7 @@ top:
 		}
 
 		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-		    cr, vsecp, &acl_ids)) != 0)
+		    cr, vsecp, &acl_ids, mnt_ns)) != 0)
 			goto out;
 		have_acl = B_TRUE;
 
@@ -681,7 +705,7 @@ top:
 			}
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_abort(tx);
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
@@ -714,7 +738,6 @@ top:
 
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
-		have_acl = B_FALSE;
 
 		/*
 		 * A directory entry already exists for this name.
@@ -736,7 +759,8 @@ top:
 		/*
 		 * Verify requested access to file.
 		 */
-		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr,
+		    mnt_ns))) {
 			goto out;
 		}
 
@@ -774,13 +798,14 @@ out:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 int
 zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp,
+    zidmap_t *mnt_ns)
 {
 	(void) excl, (void) mode, (void) flag;
 	znode_t		*zp = NULL, *dzp = ITOZ(dip);
@@ -808,14 +833,14 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	os = zfsvfs->z_os;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -827,14 +852,14 @@ top:
 	 * Create a new file object and update the directory
 	 * to reference it.
 	 */
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
 		goto out;
 	}
 
 	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-	    cr, vsecp, &acl_ids)) != 0)
+	    cr, vsecp, &acl_ids, mnt_ns)) != 0)
 		goto out;
 	have_acl = B_TRUE;
 
@@ -870,7 +895,7 @@ top:
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
@@ -894,7 +919,7 @@ out:
 		*ipp = ZTOI(zp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -941,8 +966,8 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE) {
@@ -961,11 +986,11 @@ top:
 	    NULL, realnmp))) {
 		if (realnmp)
 			pn_free(realnmp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+	if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
 		goto out;
 	}
 
@@ -979,7 +1004,7 @@ top:
 
 	mutex_enter(&zp->z_lock);
 	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
-	    !(zp->z_is_mapped);
+	    !zn_has_cached_data(zp, 0, LLONG_MAX);
 	mutex_exit(&zp->z_lock);
 
 	/*
@@ -1042,7 +1067,7 @@ top:
 		zrele(zp);
 		if (xzp)
 			zrele(xzp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1067,8 +1092,10 @@ top:
 		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
 		delete_now = may_delete_now && !toobig &&
 		    atomic_read(&ZTOI(zp)->i_count) == 1 &&
-		    !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+		    !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+		    xattr_obj == xattr_obj_unlinked &&
 		    zfs_external_acl(zp) == acl_obj;
+		VERIFY_IMPLY(xattr_obj_unlinked, xzp);
 	}
 
 	if (delete_now) {
@@ -1131,7 +1158,7 @@ out:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1145,6 +1172,7 @@ out:
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *		vsecp	- ACL to be set
+ *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- znode of created directory.
  *
@@ -1157,7 +1185,7 @@ out:
  */
 int
 zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
-    cred_t *cr, int flags, vsecattr_t *vsecp)
+    cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
@@ -1188,18 +1216,18 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 	if (dirname == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
@@ -1208,14 +1236,14 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
-	    vsecp, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+	    vsecp, &acl_ids, mnt_ns)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/*
@@ -1231,21 +1259,22 @@ top:
 	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
 	    NULL, NULL))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
-	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
+	    mnt_ns))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
@@ -1277,7 +1306,7 @@ top:
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1323,7 +1352,7 @@ out:
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1359,8 +1388,8 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE)
@@ -1373,11 +1402,11 @@ top:
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, NULL))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+	if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
 		goto out;
 	}
 
@@ -1424,7 +1453,7 @@ top:
 		}
 		dmu_tx_abort(tx);
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1452,7 +1481,7 @@ out:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1491,8 +1520,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 	uint64_t	parent;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0)
@@ -1587,11 +1616,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 		if (done)
 			break;
 
-		/* Prefetch znode */
-		if (prefetch) {
-			dmu_prefetch(os, objnum, 0, 0, 0,
-			    ZIO_PRIORITY_SYNC_READ);
-		}
+		if (prefetch)
+			dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
 
 		/*
 		 * Move to the next entry, fill in the previous offset.
@@ -1611,7 +1637,7 @@ update:
 	if (error == ENOENT)
 		error = 0;
 out:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1629,20 +1655,29 @@ out:
  *	RETURN:	0 (always succeeds)
  */
 int
-zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
     struct kstat *sp)
+#else
+zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+#endif
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint32_t blksize;
 	u_longlong_t nblocks;
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	mutex_enter(&zp->z_lock);
 
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+	zpl_generic_fillattr(user_ns, request_mask, ip, sp);
+#else
 	zpl_generic_fillattr(user_ns, ip, sp);
+#endif
 	/*
 	 * +1 link count for root inode with visible '.zfs' directory.
 	 */
@@ -1673,7 +1708,7 @@ zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
 			    dmu_objset_id(zfsvfs->z_os);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (0);
 }
@@ -1808,6 +1843,7 @@ next:
  *		flags	- ATTR_UTIME set if non-default time values provided.
  *			- ATTR_NOACLCHECK (CIFS context only).
  *		cr	- credentials of caller.
+ *		mnt_ns	- user namespace of the mount
  *
  *	RETURN:	0 if success
  *		error code if failure
@@ -1816,11 +1852,11 @@ next:
  *	ip - ctime updated, mtime updated if size changed.
  */
 int
-zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
 {
 	struct inode	*ip;
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	objset_t	*os = zfsvfs->z_os;
+	objset_t	*os;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	vattr_t		oldva;
@@ -1849,9 +1885,10 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if (mask == 0)
 		return (0);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 	ip = ZTOI(zp);
+	os = zfsvfs->z_os;
 
 	/*
 	 * If this is a xvattr_t, then get a pointer to the structure of
@@ -1862,13 +1899,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
 
@@ -1883,7 +1920,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(ENOTSUP));
 		}
 	}
@@ -1899,17 +1936,17 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & ATTR_XVATTR))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1965,7 +2002,8 @@ top:
 	 */
 
 	if (mask & ATTR_SIZE) {
-		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
+		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr,
+		    mnt_ns);
 		if (err)
 			goto out3;
 
@@ -1990,13 +2028,15 @@ top:
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
-		    skipaclchk, cr);
+		    skipaclchk, cr, mnt_ns);
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID)) {
 		int	idmask = (mask & (ATTR_UID|ATTR_GID));
 		int	take_owner;
 		int	take_group;
+		uid_t	uid;
+		gid_t	gid;
 
 		/*
 		 * NOTE: even if a new mode is being set,
@@ -2010,9 +2050,13 @@ top:
 		 * Take ownership or chgrp to group we are a member of
 		 */
 
-		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+		uid = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ip),
+		    vap->va_uid);
+		gid = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ip),
+		    vap->va_gid);
+		take_owner = (mask & ATTR_UID) && (uid == crgetuid(cr));
 		take_group = (mask & ATTR_GID) &&
-		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
+		    zfs_groupmember(zfsvfs, gid, cr);
 
 		/*
 		 * If both ATTR_UID and ATTR_GID are set then take_owner and
@@ -2028,7 +2072,7 @@ top:
 		    ((idmask == ATTR_UID) && take_owner) ||
 		    ((idmask == ATTR_GID) && take_group)) {
 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
-			    skipaclchk, cr) == 0) {
+			    skipaclchk, cr, mnt_ns) == 0) {
 				/*
 				 * Remove setuid/setgid for non-privileged users
 				 */
@@ -2141,12 +2185,12 @@ top:
 	mutex_exit(&zp->z_lock);
 
 	if (mask & ATTR_MODE) {
-		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+		    mnt_ns) == 0) {
 			err = secpolicy_setid_setsticky_clear(ip, vap,
-			    &oldva, cr);
+			    &oldva, cr, mnt_ns, zfs_i_user_ns(ip));
 			if (err)
 				goto out3;
-
 			trim_mask |= ATTR_MODE;
 		} else {
 			need_policy = TRUE;
@@ -2167,7 +2211,7 @@ top:
 			vap->va_mask &= ~trim_mask;
 		}
 		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
-		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+		    zfs_zaccess_unix, zp);
 		if (err)
 			goto out3;
 
@@ -2395,15 +2439,16 @@ top:
 
 	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
 		zp->z_atime_dirty = B_FALSE;
-		ZFS_TIME_ENCODE(&ip->i_atime, atime);
+		inode_timespec_t tmp_atime = zpl_inode_get_atime(ip);
+		ZFS_TIME_ENCODE(&tmp_atime, atime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 		    &atime, sizeof (atime));
 	}
 
 	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-		ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
-		    vap->va_mtime, ZTOI(zp));
+		zpl_inode_set_mtime_to_ts(ZTOI(zp),
+		    zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    mtime, sizeof (mtime));
@@ -2411,8 +2456,8 @@ top:
 
 	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
-		ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
-		    ZTOI(zp));
+		zpl_inode_set_ctime_to_ts(ZTOI(zp),
+		    zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    ctime, sizeof (ctime));
 	}
@@ -2512,7 +2557,7 @@ out:
 		dmu_tx_commit(tx);
 		if (attrzp) {
 			if (err2 == 0 && handle_eadir)
-				err2 = zfs_setattr_dir(attrzp);
+				err = zfs_setattr_dir(attrzp);
 			zrele(attrzp);
 		}
 		zfs_znode_update_vfs(zp);
@@ -2526,7 +2571,7 @@ out3:
 	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(tmpxvattr, sizeof (xvattr_t));
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -2637,6 +2682,9 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
  *		tnm	- New entry name.
  *		cr	- credentials of caller.
  *		flags	- case flags
+ *		rflags  - RENAME_* flags
+ *		wa_vap  - attributes for RENAME_WHITEOUT (must be a char 0:0).
+ *		mnt_ns	- user namespace of the mount
  *
  *	RETURN:	0 on success, error code on failure.
  *
@@ -2645,7 +2693,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
  */
 int
 zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
-    cred_t *cr, int flags)
+    cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
 {
 	znode_t		*szp, *tzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(sdzp);
@@ -2657,15 +2705,41 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	int		error = 0;
 	int		zflg = 0;
 	boolean_t	waited = B_FALSE;
+	/* Needed for whiteout inode creation. */
+	boolean_t	fuid_dirtied;
+	zfs_acl_ids_t	acl_ids;
+	boolean_t	have_acl = B_FALSE;
+	znode_t		*wzp = NULL;
+
 
 	if (snm == NULL || tnm == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(sdzp);
+	if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
+		return (SET_ERROR(EINVAL));
+
+	/* Already checked by Linux VFS, but just to make sure. */
+	if (rflags & RENAME_EXCHANGE &&
+	    (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
+	 * right kind of vattr_t for the whiteout file. These are set
+	 * internally by ZFS so should never be incorrect.
+	 */
+	VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+	VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
+	VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));
+
+	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_verify_zp(tdzp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
@@ -2673,13 +2747,13 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	 */
 	if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
 	    zfsctl_is_node(ZTOI(tdzp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -2697,7 +2771,7 @@ top:
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2727,7 +2801,7 @@ top:
 			 * the rename() function shall return successfully
 			 * and perform no other action."
 			 */
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 		/*
@@ -2799,7 +2873,7 @@ top:
 
 		if (strcmp(snm, "..") == 0)
 			serr = EINVAL;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (serr);
 	}
 	if (terr) {
@@ -2811,7 +2885,7 @@ top:
 
 		if (strcmp(tnm, "..") == 0)
 			terr = EINVAL;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (terr);
 	}
 
@@ -2834,8 +2908,7 @@ top:
 	 * Note that if target and source are the same, this can be
 	 * done in a single check.
 	 */
-
-	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns)))
 		goto out;
 
 	if (S_ISDIR(ZTOI(szp)->i_mode)) {
@@ -2851,17 +2924,19 @@ top:
 	 * Does target exist?
 	 */
 	if (tzp) {
+		if (rflags & RENAME_NOREPLACE) {
+			error = SET_ERROR(EEXIST);
+			goto out;
+		}
 		/*
-		 * Source and target must be the same type.
+		 * Source and target must be the same type (unless exchanging).
 		 */
-		if (S_ISDIR(ZTOI(szp)->i_mode)) {
-			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(ENOTDIR);
-				goto out;
-			}
-		} else {
-			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(EISDIR);
+		if (!(rflags & RENAME_EXCHANGE)) {
+			boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
+			boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
+
+			if (s_is_dir != t_is_dir) {
+				error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
 				goto out;
 			}
 		}
@@ -2874,12 +2949,43 @@ top:
 			error = 0;
 			goto out;
 		}
+	} else if (rflags & RENAME_EXCHANGE) {
+		/* Target must exist for RENAME_EXCHANGE. */
+		error = SET_ERROR(ENOENT);
+		goto out;
+	}
+
+	/* Set up inode creation for RENAME_WHITEOUT. */
+	if (rflags & RENAME_WHITEOUT) {
+		/*
+		 * Whiteout files are not regular files or directories, so to
+		 * match zfs_create() we do not inherit the project id.
+		 */
+		uint64_t wo_projid = ZFS_DEFAULT_PROJID;
+
+		error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns);
+		if (error)
+			goto out;
+
+		if (!have_acl) {
+			error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
+			    &acl_ids, mnt_ns);
+			if (error)
+				goto out;
+			have_acl = B_TRUE;
+		}
+
+		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
+			error = SET_ERROR(EDQUOT);
+			goto out;
+		}
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+	dmu_tx_hold_zap(tx, sdzp->z_id,
+	    (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
 	if (sdzp != tdzp) {
 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
@@ -2889,7 +2995,21 @@ top:
 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tzp);
 	}
+	if (rflags & RENAME_WHITEOUT) {
+		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+		    ZFS_SA_BASE_ATTR_SIZE);
 
+		dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
+		dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+		if (!zfsvfs->z_use_sa &&
+		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, acl_ids.z_aclp->z_acl_bytes);
+		}
+	}
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
@@ -2915,62 +3035,114 @@ top:
 		zrele(szp);
 		if (tzp)
 			zrele(tzp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
-	if (tzp)	/* Attempt to remove the existing target */
-		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+	/*
+	 * Unlink the source.
+	 */
+	szp->z_pflags |= ZFS_AV_MODIFIED;
+	if (tdzp->z_pflags & ZFS_PROJINHERIT)
+		szp->z_pflags |= ZFS_PROJINHERIT;
 
-	if (error == 0) {
-		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
-		if (error == 0) {
-			szp->z_pflags |= ZFS_AV_MODIFIED;
-			if (tdzp->z_pflags & ZFS_PROJINHERIT)
-				szp->z_pflags |= ZFS_PROJINHERIT;
-
-			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
-			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+	error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+	    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+	VERIFY0(error);
+
+	error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+	if (error)
+		goto commit;
+
+	/*
+	 * Unlink the target.
+	 */
+	if (tzp) {
+		int tzflg = zflg;
+
+		if (rflags & RENAME_EXCHANGE) {
+			/* This inode will be re-linked soon. */
+			tzflg |= ZRENAMING;
+
+			tzp->z_pflags |= ZFS_AV_MODIFIED;
+			if (sdzp->z_pflags & ZFS_PROJINHERIT)
+				tzp->z_pflags |= ZFS_PROJINHERIT;
+
+			error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+			    (void *)&tzp->z_pflags, sizeof (uint64_t), tx);
 			ASSERT0(error);
+		}
+		error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
+		if (error)
+			goto commit_link_szp;
+	}
 
-			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
-			if (error == 0) {
-				zfs_log_rename(zilog, tx, TX_RENAME |
-				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
-				    sdl->dl_name, tdzp, tdl->dl_name, szp);
-			} else {
-				/*
-				 * At this point, we have successfully created
-				 * the target name, but have failed to remove
-				 * the source name.  Since the create was done
-				 * with the ZRENAMING flag, there are
-				 * complications; for one, the link count is
-				 * wrong.  The easiest way to deal with this
-				 * is to remove the newly created target, and
-				 * return the original error.  This must
-				 * succeed; fortunately, it is very unlikely to
-				 * fail, since we just created it.
-				 */
-				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
-				    ZRENAMING, NULL), ==, 0);
-			}
-		} else {
-			/*
-			 * If we had removed the existing target, subsequent
-			 * call to zfs_link_create() to add back the same entry
-			 * but, the new dnode (szp) should not fail.
-			 */
-			ASSERT(tzp == NULL);
+	/*
+	 * Create the new target links:
+	 *   * We always link the target.
+	 *   * RENAME_EXCHANGE: Link the old target to the source.
+	 *   * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
+	 */
+	error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+	if (error) {
+		/*
+		 * If we have removed the existing target, a subsequent call to
+		 * zfs_link_create() to add back the same entry, but with a new
+		 * dnode (szp), should not fail.
+		 */
+		ASSERT3P(tzp, ==, NULL);
+		goto commit_link_tzp;
+	}
+
+	switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+	case RENAME_EXCHANGE:
+		error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
+		/*
+		 * The same argument as zfs_link_create() failing for
+		 * szp applies here, since the source directory must
+		 * have had an entry we are replacing.
+		 */
+		ASSERT0(error);
+		if (error)
+			goto commit_unlink_td_szp;
+		break;
+	case RENAME_WHITEOUT:
+		zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
+		error = zfs_link_create(sdl, wzp, tx, ZNEW);
+		if (error) {
+			zfs_znode_delete(wzp, tx);
+			remove_inode_hash(ZTOI(wzp));
+			goto commit_unlink_td_szp;
 		}
+		break;
 	}
 
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+	case RENAME_EXCHANGE:
+		zfs_log_rename_exchange(zilog, tx,
+		    (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+		    tdzp, tdl->dl_name, szp);
+		break;
+	case RENAME_WHITEOUT:
+		zfs_log_rename_whiteout(zilog, tx,
+		    (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+		    tdzp, tdl->dl_name, szp, wzp);
+		break;
+	default:
+		ASSERT0(rflags & ~RENAME_NOREPLACE);
+		zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0),
+		    sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp);
+		break;
+	}
+
+commit:
 	dmu_tx_commit(tx);
 out:
-	if (zl != NULL)
-		zfs_rename_unlock(&zl);
-
-	zfs_dirent_unlock(sdl);
-	zfs_dirent_unlock(tdl);
+	if (have_acl)
+		zfs_acl_ids_free(&acl_ids);
 
 	zfs_znode_update_vfs(sdzp);
 	if (sdzp == tdzp)
@@ -2981,16 +3153,57 @@ out:
 
 	zfs_znode_update_vfs(szp);
 	zrele(szp);
+	if (wzp) {
+		zfs_znode_update_vfs(wzp);
+		zrele(wzp);
+	}
 	if (tzp) {
 		zfs_znode_update_vfs(tzp);
 		zrele(tzp);
 	}
 
+	if (zl != NULL)
+		zfs_rename_unlock(&zl);
+
+	zfs_dirent_unlock(sdl);
+	zfs_dirent_unlock(tdl);
+
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
+
+	/*
+	 * Clean-up path for broken link state.
+	 *
+	 * At this point we are in a (very) bad state, so we need to do our
+	 * best to correct the state. In particular, all of the nlinks are
+	 * wrong because we were destroying and creating links with ZRENAMING.
+	 *
+	 * In some form, all of these operations have to resolve the state:
+	 *
+	 *  * link_destroy() *must* succeed. Fortunately, this is very likely
+	 *    since we only just created it.
+	 *
+	 *  * link_create()s are allowed to fail (though they shouldn't because
+	 *    we only just unlinked them and are putting the entries back
+	 *    during clean-up). But if they fail, we can just forcefully drop
+	 *    the nlink value to (at the very least) avoid broken nlink values
+	 *    -- though in the case of non-empty directories we will have to
+	 *    panic (otherwise we'd have a leaked directory with a broken ..).
+	 */
+commit_unlink_td_szp:
+	VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL));
+commit_link_tzp:
+	if (tzp) {
+		if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
+			VERIFY0(zfs_drop_nlink(tzp, tx, NULL));
+	}
+commit_link_szp:
+	if (zfs_link_create(sdl, szp, tx, ZRENAMING))
+		VERIFY0(zfs_drop_nlink(szp, tx, NULL));
+	goto commit;
 }
 
 /*
@@ -3002,6 +3215,7 @@ out:
  *		link	- Name for new symlink entry.
  *		cr	- credentials of caller.
  *		flags	- case flags
+ *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- Znode for new symbolic link.
  *
@@ -3012,7 +3226,7 @@ out:
  */
 int
 zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
-    znode_t **zpp, cred_t *cr, int flags)
+    znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfs_dirlock_t	*dl;
@@ -3032,26 +3246,26 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 	if (len > MAXPATHLEN) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
-	    vap, cr, NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+	    vap, cr, NULL, &acl_ids, mnt_ns)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 top:
@@ -3063,21 +3277,21 @@ top:
 	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
@@ -3104,7 +3318,7 @@ top:
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3159,7 +3373,7 @@ top:
 		zrele(zp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3185,8 +3399,8 @@ zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
@@ -3196,7 +3410,7 @@ zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 		error = zfs_sa_readlink(zp, uio);
 	mutex_exit(&zp->z_lock);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3241,8 +3455,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -3250,11 +3464,14 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (S_ISDIR(sip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
-	ZFS_VERIFY_ZP(szp);
+	if ((error = zfs_verify_zp(szp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
@@ -3265,7 +3482,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
@@ -3274,7 +3491,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * super blocks.
 	 */
 	if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
@@ -3282,17 +3499,17 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
@@ -3305,19 +3522,20 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
 	    cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
-	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr,
+	    zfs_init_idmap))) {
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3327,7 +3545,7 @@ top:
 	 */
 	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3349,7 +3567,7 @@ top:
 			goto top;
 		}
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/* unmark z_unlinked so zfs_link_create will not reject */
@@ -3391,7 +3609,7 @@ top:
 
 	zfs_znode_update_vfs(tdzp);
 	zfs_znode_update_vfs(szp);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3444,12 +3662,13 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	caddr_t		va;
 	int		err = 0;
 	uint64_t	mtime[2], ctime[2];
+	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[3];
 	int		cnt = 0;
 	struct address_space *mapping;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 
 	ASSERT(PageLocked(pp));
 
@@ -3461,7 +3680,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	/* Page is beyond end of file */
 	if (pgoff >= offset) {
 		unlock_page(pp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3521,7 +3740,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3549,7 +3768,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 #endif
 		}
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3557,7 +3776,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	if (!clear_page_dirty_for_io(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3576,11 +3795,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 
-	err = dmu_tx_assign(tx, TXG_NOWAIT);
+	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
-		if (err == ERESTART)
-			dmu_tx_wait(tx);
-
 		dmu_tx_abort(tx);
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 		filemap_dirty_folio(page_mapping(pp), page_folio(pp));
@@ -3592,7 +3808,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 		if (!for_sync)
 			atomic_dec_32(&zp->z_async_writes_cnt);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 
@@ -3607,28 +3823,23 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	    &zp->z_pflags, 8);
 
 	/* Preserve the mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+	tmp_ts = zpl_inode_get_mtime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, mtime);
+	tmp_ts = zpl_inode_get_ctime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_seq++;
 
 	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
 
-	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
-	    for_sync ? zfs_putpage_sync_commit_cb :
-	    zfs_putpage_async_commit_cb, pp);
-
-	dmu_tx_commit(tx);
-
-	zfs_rangelock_exit(lr);
-
+	boolean_t commit = B_FALSE;
 	if (wbc->sync_mode != WB_SYNC_NONE) {
 		/*
 		 * Note that this is rarely called under writepages(), because
 		 * writepages() normally handles the entire commit for
 		 * performance reasons.
 		 */
-		zil_commit(zfsvfs->z_log, zp->z_id);
+		commit = B_TRUE;
 	} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
 		/*
 		 * If the caller does not intend to wait synchronously
@@ -3638,12 +3849,23 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 		 * our writeback to complete. Refer to the comment in
 		 * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
 		 */
-		zil_commit(zfsvfs->z_log, zp->z_id);
+		commit = B_TRUE;
 	}
 
+	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
+	    for_sync ? zfs_putpage_sync_commit_cb :
+	    zfs_putpage_async_commit_cb, pp);
+
+	dmu_tx_commit(tx);
+
+	zfs_rangelock_exit(lr);
+
+	if (commit)
+		zil_commit(zfsvfs->z_log, zp->z_id);
+
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -3658,6 +3880,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	dmu_tx_t	*tx;
 	uint64_t	mode, atime[2], mtime[2], ctime[2];
+	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[4];
 	int		error = 0;
 	int		cnt = 0;
@@ -3665,8 +3888,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
 		return (0);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 #ifdef I_DIRTY_TIME
 	/*
@@ -3702,9 +3925,12 @@ zfs_dirty_inode(struct inode *ip, int flags)
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 
 	/* Preserve the mode, mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_atime, atime);
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+	tmp_ts = zpl_inode_get_atime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, atime);
+	tmp_ts = zpl_inode_get_mtime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, mtime);
+	tmp_ts = zpl_inode_get_ctime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	mode = ip->i_mode;
 
 	zp->z_mode = mode;
@@ -3714,7 +3940,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
 
 	dmu_tx_commit(tx);
 out:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3747,7 +3973,9 @@ zfs_inactive(struct inode *ip)
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
-			ZFS_TIME_ENCODE(&ip->i_atime, atime);
+			inode_timespec_t tmp_atime;
+			tmp_atime = zpl_inode_get_atime(ip);
+			ZFS_TIME_ENCODE(&tmp_atime, atime);
 			mutex_enter(&zp->z_lock);
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
 			    (void *)&atime, sizeof (atime), tx);
@@ -3766,55 +3994,45 @@ zfs_inactive(struct inode *ip)
  * Fill pages with data from the disk.
  */
 static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_fillpage(struct inode *ip, struct page *pp)
 {
-	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	objset_t *os;
-	struct page *cur_pp;
-	u_offset_t io_off, total;
-	size_t io_len;
-	loff_t i_size;
-	unsigned page_idx;
-	int err;
+	loff_t i_size = i_size_read(ip);
+	u_offset_t io_off = page_offset(pp);
+	size_t io_len = PAGE_SIZE;
 
-	os = zfsvfs->z_os;
-	io_len = nr_pages << PAGE_SHIFT;
-	i_size = i_size_read(ip);
-	io_off = page_offset(pl[0]);
+	ASSERT3U(io_off, <, i_size);
 
 	if (io_off + io_len > i_size)
 		io_len = i_size - io_off;
 
-	/*
-	 * Iterate over list of pages and read each page individually.
-	 */
-	page_idx = 0;
-	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
-		caddr_t va;
+	void *va = kmap(pp);
+	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+	    io_len, va, DMU_READ_PREFETCH);
+	if (io_len != PAGE_SIZE)
+		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
+	kunmap(pp);
 
-		cur_pp = pl[page_idx++];
-		va = kmap(cur_pp);
-		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
-		    DMU_READ_PREFETCH);
-		kunmap(cur_pp);
-		if (err) {
-			/* convert checksum errors into IO errors */
-			if (err == ECKSUM)
-				err = SET_ERROR(EIO);
-			return (err);
-		}
+	if (error) {
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+
+		SetPageError(pp);
+		ClearPageUptodate(pp);
+	} else {
+		ClearPageError(pp);
+		SetPageUptodate(pp);
 	}
 
-	return (0);
+	return (error);
 }
 
 /*
- * Uses zfs_fillpage to read data from the file and fill the pages.
+ * Uses zfs_fillpage to read data from the file and fill the page.
  *
  *	IN:	ip	 - inode of file to get data from.
- *		pl	 - list of pages to read
- *		nr_pages - number of pages to read
+ *		pp	 - page to read
  *
  *	RETURN:	0 on success, error code on failure.
  *
@@ -3822,24 +4040,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
  *	vp - atime updated
  */
 int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_getpage(struct inode *ip, struct page *pp)
 {
-	znode_t	 *zp  = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	 err;
-
-	if (pl == NULL)
-		return (0);
+	znode_t *zp = ITOZ(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
-	err = zfs_fillpage(ip, pl, nr_pages);
+	error = zfs_fillpage(ip, pp);
+	if (error == 0)
+		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
 
-	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
+	zfs_exit(zfsvfs, FTAG);
 
-	ZFS_EXIT(zfsvfs);
-	return (err);
+	return (error);
 }
 
 /*
@@ -3861,28 +4077,29 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
 	(void) addrp;
 	znode_t  *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
-	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
-	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-		ZFS_EXIT(zfsvfs);
+	if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
+	    (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((vm_flags & (VM_READ | VM_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EACCES));
 	}
 
 	if (off < 0 || len > MAXOFFSET_T - off) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENXIO));
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -3913,11 +4130,11 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	uint64_t	off, len;
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (cmd != F_FREESP) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3926,12 +4143,12 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3941,8 +4158,9 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * On Linux we can get here through truncate_range() which
 	 * operates directly on inodes, so we need to check access rights.
 	 */
-	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+	    zfs_init_idmap))) {
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3951,7 +4169,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3966,19 +4184,23 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
 
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_verify_zp(zp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3999,7 +4221,7 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -4030,5 +4252,4 @@ EXPORT_SYMBOL(zfs_map);
 /* CSTYLED */
 module_param(zfs_delete_blocks, ulong, 0644);
 MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-
 #endif
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index dc504b1a120b..b99df188c64b 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -169,8 +169,7 @@ zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
 	znode_hold_t *zh = buf;
 
 	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
-	zfs_refcount_create(&zh->zh_refcount);
-	zh->zh_obj = ZFS_NO_OBJECT;
+	zh->zh_refcount = 0;
 
 	return (0);
 }
@@ -182,7 +181,6 @@ zfs_znode_hold_cache_destructor(void *buf, void *arg)
 	znode_hold_t *zh = buf;
 
 	mutex_destroy(&zh->zh_lock);
-	zfs_refcount_destroy(&zh->zh_refcount);
 }
 
 void
@@ -273,7 +271,7 @@ zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
 	return (held);
 }
 
-static znode_hold_t *
+znode_hold_t *
 zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
 {
 	znode_hold_t *zh, *zh_new, search;
@@ -281,43 +279,43 @@ zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
 	boolean_t found = B_FALSE;
 
 	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
-	zh_new->zh_obj = obj;
 	search.zh_obj = obj;
 
 	mutex_enter(&zfsvfs->z_hold_locks[i]);
 	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
 	if (likely(zh == NULL)) {
 		zh = zh_new;
+		zh->zh_obj = obj;
 		avl_add(&zfsvfs->z_hold_trees[i], zh);
 	} else {
 		ASSERT3U(zh->zh_obj, ==, obj);
 		found = B_TRUE;
 	}
-	zfs_refcount_add(&zh->zh_refcount, NULL);
+	zh->zh_refcount++;
+	ASSERT3S(zh->zh_refcount, >, 0);
 	mutex_exit(&zfsvfs->z_hold_locks[i]);
 
 	if (found == B_TRUE)
 		kmem_cache_free(znode_hold_cache, zh_new);
 
 	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
 	mutex_enter(&zh->zh_lock);
 
 	return (zh);
 }
 
-static void
+void
 zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
 {
 	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
 	boolean_t remove = B_FALSE;
 
 	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
 	mutex_exit(&zh->zh_lock);
 
 	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	if (zfs_refcount_remove(&zh->zh_refcount, NULL) == 0) {
+	ASSERT3S(zh->zh_refcount, >, 0);
+	if (--zh->zh_refcount == 0) {
 		avl_remove(&zfsvfs->z_hold_trees[i], zh);
 		remove = B_TRUE;
 	}
@@ -359,7 +357,7 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
 void
 zfs_znode_dmu_fini(znode_t *zp)
 {
-	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
+	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) ||
 	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
 
 	sa_handle_destroy(zp->z_sa_hdl);
@@ -392,7 +390,6 @@ zfs_inode_destroy(struct inode *ip)
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	if (list_link_active(&zp->z_link_node)) {
 		list_remove(&zfsvfs->z_all_znodes, zp);
-		zfsvfs->z_nr_znodes--;
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
@@ -417,12 +414,21 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
 	switch (ip->i_mode & S_IFMT) {
 	case S_IFREG:
 		ip->i_op = &zpl_inode_operations;
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+		ip->i_fop = &zpl_file_operations.kabi_fops;
+#else
 		ip->i_fop = &zpl_file_operations;
+#endif
 		ip->i_mapping->a_ops = &zpl_address_space_operations;
 		break;
 
 	case S_IFDIR:
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+		ip->i_flags |= S_IOPS_WRAPPER;
+		ip->i_op = &zpl_dir_inode_operations.ops;
+#else
 		ip->i_op = &zpl_dir_inode_operations;
+#endif
 		ip->i_fop = &zpl_dir_file_operations;
 		ITOZ(ip)->z_zn_prefetch = B_TRUE;
 		break;
@@ -452,7 +458,11 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
 		/* Assume the inode is a file and attempt to continue */
 		ip->i_mode = S_IFREG | 0644;
 		ip->i_op = &zpl_inode_operations;
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+		ip->i_fop = &zpl_file_operations.kabi_fops;
+#else
 		ip->i_fop = &zpl_file_operations;
+#endif
 		ip->i_mapping->a_ops = &zpl_address_space_operations;
 		break;
 	}
@@ -492,13 +502,11 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip)
 void
 zfs_znode_update_vfs(znode_t *zp)
 {
-	zfsvfs_t	*zfsvfs;
 	struct inode	*ip;
 	uint32_t	blksize;
 	u_longlong_t	i_blocks;
 
 	ASSERT(zp != NULL);
-	zfsvfs = ZTOZSB(zp);
 	ip = ZTOI(zp);
 
 	/* Skip .zfs control nodes which do not exist on disk. */
@@ -534,6 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	uint64_t links;
 	uint64_t z_uid, z_gid;
 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
+	inode_timespec_t tmp_ts;
 	uint64_t projid = ZFS_DEFAULT_PROJID;
 	sa_bulk_attr_t bulk[12];
 	int count = 0;
@@ -550,9 +559,10 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_unlinked = B_FALSE;
 	zp->z_atime_dirty = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
+#endif
 	zp->z_is_ctldir = B_FALSE;
-	zp->z_is_stale = B_FALSE;
 	zp->z_suspended = B_FALSE;
 	zp->z_sa_hdl = NULL;
 	zp->z_mapcnt = 0;
@@ -604,9 +614,12 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	if (zp->z_pflags & ZFS_XATTR)
 		zp->z_xattr_parent = parent;
 
-	ZFS_TIME_DECODE(&ip->i_atime, atime);
-	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ip->i_ctime, ctime);
+	ZFS_TIME_DECODE(&tmp_ts, atime);
+	zpl_inode_set_atime_to_ts(ip, tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, mtime);
+	zpl_inode_set_mtime_to_ts(ip, tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, ctime);
+	zpl_inode_set_ctime_to_ts(ip, tmp_ts);
 	ZFS_TIME_DECODE(&zp->z_btime, btime);
 
 	ip->i_ino = zp->z_id;
@@ -631,7 +644,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	if (links > 0)
@@ -1187,6 +1199,7 @@ zfs_rezget(znode_t *zp)
 	uint64_t gen;
 	uint64_t z_uid, z_gid;
 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
+	inode_timespec_t tmp_ts;
 	uint64_t projid = ZFS_DEFAULT_PROJID;
 	znode_hold_t *zh;
 
@@ -1279,9 +1292,12 @@ zfs_rezget(znode_t *zp)
 	zfs_uid_write(ZTOI(zp), z_uid);
 	zfs_gid_write(ZTOI(zp), z_gid);
 
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
+	ZFS_TIME_DECODE(&tmp_ts, atime);
+	zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, mtime);
+	zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, ctime);
+	zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
 	ZFS_TIME_DECODE(&zp->z_btime, btime);
 
 	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
@@ -1389,21 +1405,24 @@ zfs_zinactive(znode_t *zp)
 boolean_t
 zfs_relatime_need_update(const struct inode *ip)
 {
-	inode_timespec_t now;
+	inode_timespec_t now, tmp_atime, tmp_ts;
 
 	gethrestime(&now);
+	tmp_atime = zpl_inode_get_atime(ip);
 	/*
 	 * In relatime mode, only update the atime if the previous atime
 	 * is earlier than either the ctime or mtime or if at least a day
 	 * has passed since the last update of atime.
 	 */
-	if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
+	tmp_ts = zpl_inode_get_mtime(ip);
+	if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
 		return (B_TRUE);
 
-	if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
+	tmp_ts = zpl_inode_get_ctime(ip);
+	if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
 		return (B_TRUE);
 
-	if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
+	if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
 		return (B_TRUE);
 
 	return (B_FALSE);
@@ -1426,7 +1445,7 @@ void
 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
     uint64_t ctime[2])
 {
-	inode_timespec_t now;
+	inode_timespec_t now, tmp_ts;
 
 	gethrestime(&now);
 
@@ -1434,7 +1453,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
 
 	if (flag & ATTR_MTIME) {
 		ZFS_TIME_ENCODE(&now, mtime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
+		ZFS_TIME_DECODE(&tmp_ts, mtime);
+		zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
 		if (ZTOZSB(zp)->z_use_fuids) {
 			zp->z_pflags |= (ZFS_ARCHIVE |
 			    ZFS_AV_MODIFIED);
@@ -1443,7 +1463,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
 
 	if (flag & ATTR_CTIME) {
 		ZFS_TIME_ENCODE(&now, ctime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
+		ZFS_TIME_DECODE(&tmp_ts, ctime);
+		zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
 		if (ZTOZSB(zp)->z_use_fuids)
 			zp->z_pflags |= ZFS_ARCHIVE;
 	}
@@ -1641,7 +1662,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
 	 * Zero partial page cache entries.  This must be done under a
 	 * range lock in order to keep the ARC and page cache in sync.
 	 */
-	if (zp->z_is_mapped) {
+	if (zn_has_cached_data(zp, off, off + len - 1)) {
 		loff_t first_page, last_page, page_len;
 		loff_t first_page_offset, last_page_offset;
 
@@ -1864,7 +1885,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
 		/* For the moment we expect all zpl props to be uint64_ts */
 		uint64_t val;
-		char *name;
+		const char *name;
 
 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
@@ -1883,6 +1904,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	}
 	ASSERT(version != 0);
 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
+	ASSERT(error == 0);
 
 	/*
 	 * Create zap object used for SA attribute registration
@@ -1960,7 +1982,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	}
 
 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
-	    cr, NULL, &acl_ids));
+	    cr, NULL, &acl_ids, zfs_init_idmap));
 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, rootzp);
 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
@@ -2136,7 +2158,6 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
 	} else if (error != ENOENT) {
 		return (error);
 	}
-	error = 0;
 
 	for (;;) {
 		uint64_t pobj = 0;
@@ -2252,6 +2273,91 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
 	return (error);
 }
 
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION)
+		pname = ZPL_VERSION_STR;
+	else
+		pname = zfs_prop_to_name(prop);
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_OFF;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_create_fs);
 EXPORT_SYMBOL(zfs_obj_to_path);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
index dcab02b07894..21f3740f6fe6 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
@@ -223,14 +223,32 @@ int
 zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
 {
 	int ret;
-	crypto_mechanism_t mech;
+	crypto_mechanism_t mech = {0};
 	uint_t keydata_len;
 
 	ASSERT(key != NULL);
 	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
 
+/*
+ * Workaround for GCC 12+ with UBSan enabled deficencies.
+ *
+ * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
+ * below as violating -Warray-bounds
+ */
+#if defined(__GNUC__) && !defined(__clang__) && \
+	((!defined(_KERNEL) && defined(ZFS_UBSAN_ENABLED)) || \
+	    defined(CONFIG_UBSAN))
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
 	keydata_len = zio_crypt_table[crypt].ci_keylen;
+#if defined(__GNUC__) && !defined(__clang__) && \
+	((!defined(_KERNEL) && defined(ZFS_UBSAN_ENABLED)) || \
+	    defined(CONFIG_UBSAN))
+#pragma GCC diagnostic pop
+#endif
 	memset(key, 0, sizeof (zio_crypt_key_t));
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
 
 	/* fill keydata buffers and salt with random data */
 	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
@@ -282,7 +300,6 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
 	key->zk_crypt = crypt;
 	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
 	key->zk_salt_count = 0;
-	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
 
 	return (0);
 
@@ -1388,7 +1405,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
     boolean_t *no_crypt)
 {
 	int ret;
-	uint64_t txtype, lr_len;
+	uint64_t txtype, lr_len, nused;
 	uint_t nr_src, nr_dst, crypt_len;
 	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
 	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
@@ -1415,7 +1432,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
 	zilc = (zil_chain_t *)src;
 	slrp = src + sizeof (zil_chain_t);
 	aadp = aadbuf;
-	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+	nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+	ASSERT3U(nused, >=, sizeof (zil_chain_t));
+	ASSERT3U(nused, <=, datalen);
+	blkend = src + nused;
 
 	/* calculate the number of encrypted iovecs we will need */
 	for (; slrp < blkend; slrp += lr_len) {
@@ -1428,6 +1448,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
 			txtype = BSWAP_64(lr->lrc_txtype);
 			lr_len = BSWAP_64(lr->lrc_reclen);
 		}
+		ASSERT3U(lr_len, >=, sizeof (lr_t));
+		ASSERT3U(lr_len, <=, blkend - slrp);
 
 		nr_iovecs++;
 		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
@@ -1496,20 +1518,16 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
 		 * authenticate it.
 		 */
 		if (txtype == TX_WRITE) {
-			crypt_len = sizeof (lr_write_t) -
-			    sizeof (lr_t) - sizeof (blkptr_t);
+			const size_t o = offsetof(lr_write_t, lr_blkptr);
+			crypt_len = o - sizeof (lr_t);
 			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
 			src_iovecs[nr_iovecs].iov_len = crypt_len;
 			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
 			dst_iovecs[nr_iovecs].iov_len = crypt_len;
 
 			/* copy the bp now since it will not be encrypted */
-			memcpy(dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    sizeof (blkptr_t));
-			memcpy(aadp,
-			    slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    sizeof (blkptr_t));
+			memcpy(dlrp + o, slrp + o, sizeof (blkptr_t));
+			memcpy(aadp, slrp + o, sizeof (blkptr_t));
 			aadp += sizeof (blkptr_t);
 			aad_len += sizeof (blkptr_t);
 			nr_iovecs++;
@@ -1526,6 +1544,21 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
 				nr_iovecs++;
 				total_len += crypt_len;
 			}
+		} else if (txtype == TX_CLONE_RANGE) {
+			const size_t o = offsetof(lr_clone_range_t, lr_nbps);
+			crypt_len = o - sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+			/* copy the bps now since they will not be encrypted */
+			memcpy(dlrp + o, slrp + o, lr_len - o);
+			memcpy(aadp, slrp + o, lr_len - o);
+			aadp += lr_len - o;
+			aad_len += lr_len - o;
+			nr_iovecs++;
+			total_len += crypt_len;
 		} else {
 			crypt_len = lr_len - sizeof (lr_t);
 			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
@@ -1891,6 +1924,9 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
 	crypto_ctx_template_t tmpl;
 	uint8_t *authbuf = NULL;
 
+	memset(&puio, 0, sizeof (puio));
+	memset(&cuio, 0, sizeof (cuio));
+
 	/*
 	 * If the needed key is the current one, just use it. Otherwise we
 	 * need to generate a temporary one from the given salt + master key.
@@ -1950,9 +1986,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
 		/* If the hardware implementation fails fall back to software */
 	}
 
-	memset(&puio, 0, sizeof (puio));
-	memset(&cuio, 0, sizeof (cuio));
-
 	/* create uios for encryption */
 	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
 	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
@@ -1968,7 +2001,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
 
 	if (locked) {
 		rw_exit(&key->zk_salt_lock);
-		locked = B_FALSE;
 	}
 
 	if (authbuf != NULL)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index d5c222120a9d..8ee7fcecc7b7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -42,7 +42,7 @@
 static int
 zpl_common_open(struct inode *ip, struct file *filp)
 {
-	if (filp->f_mode & FMODE_WRITE)
+	if (blk_mode_is_open_write(filp->f_mode))
 		return (-EACCES);
 
 	return (generic_file_open(ip, filp));
@@ -57,7 +57,8 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (!zpl_dir_emit_dots(filp, ctx))
 		goto out;
@@ -78,7 +79,7 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
 		ctx->pos++;
 	}
 out:
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -102,7 +103,11 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
  * Get root directory attributes.
  */
 static int
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_root_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
 zpl_root_getattr_impl(struct user_namespace *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
@@ -114,9 +119,13 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
 	(void) request_mask, (void) query_flags;
 	struct inode *ip = path->dentry->d_inode;
 
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 #ifdef HAVE_GENERIC_FILLATTR_USERNS
 	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+	generic_fillattr(user_ns, request_mask, ip, stat);
 #else
 	(void) user_ns;
 #endif
@@ -207,7 +216,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
 	return (!!dentry->d_inode);
 }
 
-static const dentry_operations_t zpl_dops_snapdirs = {
+static dentry_operations_t zpl_dops_snapdirs = {
 /*
  * Auto mounting of snapshots is only supported for 2.6.37 and
  * newer kernels.  Prior to this kernel the ops->follow_link()
@@ -258,7 +267,8 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	uint64_t id, pos;
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	cookie = spl_fstrans_mark();
 
 	if (!zpl_dir_emit_dots(filp, ctx))
@@ -282,7 +292,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	}
 out:
 	spl_fstrans_unmark(cookie);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	if (error == -ENOENT)
 		return (0);
@@ -310,6 +320,10 @@ static int
 zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
     struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
     unsigned int flags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
 #else
 zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry, unsigned int flags)
@@ -331,7 +345,9 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
 	return (error);
 }
 
-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
+#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP))
 static int
 zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry)
@@ -358,6 +374,9 @@ static int
 #ifdef HAVE_IOPS_MKDIR_USERNS
 zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
     struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip,
+    struct dentry *dentry, umode_t mode)
 #else
 zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 #endif
@@ -369,7 +388,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
+#if (defined(HAVE_IOPS_MKDIR_USERNS) || defined(HAVE_IOPS_MKDIR_IDMAP))
+	zpl_vap_init(vap, dip, mode | S_IFDIR, cr, user_ns);
+#else
+	zpl_vap_init(vap, dip, mode | S_IFDIR, cr, zfs_init_idmap);
+#endif
 
 	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
 	if (error == 0) {
@@ -389,7 +412,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
  * Get snapshot directory attributes.
  */
 static int
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
 zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
@@ -401,11 +428,17 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 	(void) request_mask, (void) query_flags;
 	struct inode *ip = path->dentry->d_inode;
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZPL_ENTER(zfsvfs);
-#ifdef HAVE_USERNS_IOPS_GETATTR
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 #ifdef HAVE_GENERIC_FILLATTR_USERNS
 	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+	generic_fillattr(user_ns, request_mask, ip, stat);
 #else
 	(void) user_ns;
 #endif
@@ -422,7 +455,7 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 		    dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
 		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
 		if (err != 0) {
-			ZPL_EXIT(zfsvfs);
+			zpl_exit(zfsvfs, FTAG);
 			return (-err);
 		}
 		stat->nlink += snap_count;
@@ -430,7 +463,7 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 
 	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
 	stat->atime = current_time(ip);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (0);
 }
@@ -463,7 +496,9 @@ const struct file_operations zpl_fops_snapdir = {
 const struct inode_operations zpl_ops_snapdir = {
 	.lookup		= zpl_snapdir_lookup,
 	.getattr	= zpl_snapdir_getattr,
-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+#if (defined(HAVE_RENAME_WANTS_FLAGS) || \
+	defined(HAVE_IOPS_RENAME_USERNS) || \
+	defined(HAVE_IOPS_RENAME_IDMAP))
 	.rename		= zpl_snapdir_rename2,
 #else
 	.rename		= zpl_snapdir_rename,
@@ -508,7 +543,8 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	znode_t *dzp;
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	cookie = spl_fstrans_mark();
 
 	if (zfsvfs->z_shares_dir == 0) {
@@ -527,7 +563,7 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	iput(ZTOI(dzp));
 out:
 	spl_fstrans_unmark(cookie);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
@@ -553,6 +589,10 @@ static int
 zpl_shares_getattr_impl(struct user_namespace *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_shares_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
 #else
 zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
     u32 request_mask, unsigned int query_flags)
@@ -564,12 +604,17 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 	znode_t *dzp;
 	int error;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsvfs->z_shares_dir == 0) {
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 #ifdef HAVE_GENERIC_FILLATTR_USERNS
 		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+	generic_fillattr(user_ns, request_mask, ip, stat);
 #else
 		(void) user_ns;
 #endif
@@ -578,25 +623,24 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 #endif
 		stat->nlink = stat->size = 2;
 		stat->atime = current_time(ip);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
 	if (error == 0) {
-#ifdef HAVE_USERNS_IOPS_GETATTR
-#ifdef HAVE_GENERIC_FILLATTR_USERNS
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+		error = -zfs_getattr_fast(user_ns, request_mask, ZTOI(dzp),
+		    stat);
+#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 		error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
 #else
-		(void) user_ns;
-#endif
-#else
 		error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
 #endif
 		iput(ZTOI(dzp));
 	}
 
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
index 5be63532d329..aa80b72e2d7a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index 9a640fb40b67..9dec52215c7c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -27,6 +27,7 @@
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 #endif
+#include <linux/fs.h>
 #include <sys/file.h>
 #include <sys/dmu_objset.h>
 #include <sys/zfs_znode.h>
@@ -37,6 +38,9 @@
     defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO)
 #include <linux/pagemap.h>
 #endif
+#ifdef HAVE_FILE_FADVISE
+#include <linux/fadvise.h>
+#endif
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 #include <linux/writeback.h>
 #endif
@@ -191,9 +195,12 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	 * zfs_putpage() respectively.
 	 */
 	if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
-		ZPL_ENTER(zfsvfs);
+		if ((error = zpl_enter(zfsvfs, FTAG)) != 0) {
+			atomic_dec_32(&zp->z_sync_writes_cnt);
+			return (error);
+		}
 		zil_commit(zfsvfs->z_log, zp->z_id);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 	}
 
 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -294,16 +301,11 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
 #if defined(HAVE_VFS_IOV_ITER)
 	zfs_uio_iov_iter_init(uio, to, pos, count, skip);
 #else
-#ifdef HAVE_IOV_ITER_TYPE
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
-	    iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
-	    count, skip);
-#else
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
-	    to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
+	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
+	    zfs_uio_iov_iter_type(to) & ITER_KVEC ?
+	    UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
 #endif
-#endif
 }
 
 static ssize_t
@@ -618,7 +620,6 @@ static int
 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct inode *ip = filp->f_mapping->host;
-	znode_t *zp = ITOZ(ip);
 	int error;
 	fstrans_cookie_t cookie;
 
@@ -633,9 +634,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (error)
 		return (error);
 
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+	znode_t *zp = ITOZ(ip);
 	mutex_enter(&zp->z_lock);
 	zp->z_is_mapped = B_TRUE;
 	mutex_exit(&zp->z_lock);
+#endif
 
 	return (error);
 }
@@ -648,29 +652,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 static inline int
 zpl_readpage_common(struct page *pp)
 {
-	struct inode *ip;
-	struct page *pl[1];
-	int error = 0;
 	fstrans_cookie_t cookie;
 
 	ASSERT(PageLocked(pp));
-	ip = pp->mapping->host;
-	pl[0] = pp;
 
 	cookie = spl_fstrans_mark();
-	error = -zfs_getpage(ip, pl, 1);
+	int error = -zfs_getpage(pp->mapping->host, pp);
 	spl_fstrans_unmark(cookie);
 
-	if (error) {
-		SetPageError(pp);
-		ClearPageUptodate(pp);
-	} else {
-		ClearPageError(pp);
-		SetPageUptodate(pp);
-		flush_dcache_page(pp);
-	}
-
 	unlock_page(pp);
+
 	return (error);
 }
 
@@ -729,15 +720,38 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {
 	boolean_t *for_sync = data;
 	fstrans_cookie_t cookie;
+	int ret;
 
 	ASSERT(PageLocked(pp));
 	ASSERT(!PageWriteback(pp));
 
 	cookie = spl_fstrans_mark();
-	(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
+	ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
 	spl_fstrans_unmark(cookie);
 
-	return (0);
+	return (ret);
+}
+
+#ifdef HAVE_WRITEPAGE_T_FOLIO
+static int
+zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
+{
+	return (zpl_putpage(&pp->page, wbc, data));
+}
+#endif
+
+static inline int
+zpl_write_cache_pages(struct address_space *mapping,
+    struct writeback_control *wbc, void *data)
+{
+	int result;
+
+#ifdef HAVE_WRITEPAGE_T_FOLIO
+	result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
+#else
+	result = write_cache_pages(mapping, wbc, zpl_putpage, data);
+#endif
+	return (result);
 }
 
 static int
@@ -748,10 +762,11 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	enum writeback_sync_modes sync_mode;
 	int result;
 
-	ZPL_ENTER(zfsvfs);
+	if ((result = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (result);
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		wbc->sync_mode = WB_SYNC_ALL;
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	sync_mode = wbc->sync_mode;
 
 	/*
@@ -763,13 +778,13 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	 */
 	boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
 	wbc->sync_mode = WB_SYNC_NONE;
-	result = write_cache_pages(mapping, wbc, zpl_putpage, &for_sync);
+	result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 	if (sync_mode != wbc->sync_mode) {
-		ZPL_ENTER(zfsvfs);
-		ZPL_VERIFY_ZP(zp);
+		if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+			return (result);
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, zp->z_id);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 
 		/*
 		 * We need to call write_cache_pages() again (we can't just
@@ -779,8 +794,7 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		 * details). That being said, this is a no-op in most cases.
 		 */
 		wbc->sync_mode = sync_mode;
-		result = write_cache_pages(mapping, wbc, zpl_putpage,
-		    &for_sync);
+		result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 	}
 	return (result);
 }
@@ -906,6 +920,61 @@ zpl_ioctl_getversion(struct file *filp, void __user *arg)
 	return (copy_to_user(arg, &generation, sizeof (generation)));
 }
 
+#ifdef HAVE_FILE_FADVISE
+static int
+zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
+{
+	struct inode *ip = file_inode(filp);
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	objset_t *os = zfsvfs->z_os;
+	int error = 0;
+
+	if (S_ISFIFO(ip->i_mode))
+		return (-ESPIPE);
+
+	if (offset < 0 || len < 0)
+		return (-EINVAL);
+
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
+
+	switch (advice) {
+	case POSIX_FADV_SEQUENTIAL:
+	case POSIX_FADV_WILLNEED:
+#ifdef HAVE_GENERIC_FADVISE
+		if (zn_has_cached_data(zp, offset, offset + len - 1))
+			error = generic_fadvise(filp, offset, len, advice);
+#endif
+		/*
+		 * Pass on the caller's size directly, but note that
+		 * dmu_prefetch_max will effectively cap it.  If there
+		 * really is a larger sequential access pattern, perhaps
+		 * dmu_zfetch will detect it.
+		 */
+		if (len == 0)
+			len = i_size_read(ip) - offset;
+
+		dmu_prefetch(os, zp->z_id, 0, offset, len,
+		    ZIO_PRIORITY_ASYNC_READ);
+		break;
+	case POSIX_FADV_NORMAL:
+	case POSIX_FADV_RANDOM:
+	case POSIX_FADV_DONTNEED:
+	case POSIX_FADV_NOREUSE:
+		/* ignored for now */
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	zfs_exit(zfsvfs, FTAG);
+
+	return (error);
+}
+#endif /* HAVE_FILE_FADVISE */
+
 #define	ZFS_FL_USER_VISIBLE	(FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
 #define	ZFS_FL_USER_MODIFIABLE	(FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
 
@@ -975,7 +1044,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
 	    !capable(CAP_LINUX_IMMUTABLE))
 		return (-EPERM);
 
-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
 		return (-EACCES);
 
 	xva_init(xva);
@@ -1022,7 +1091,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -1070,7 +1139,7 @@ zpl_ioctl_setxattr(struct file *filp, void __user *arg)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -1105,7 +1174,7 @@ __zpl_ioctl_setdosflags(struct inode *ip, uint64_t ioctl_flags, xvattr_t *xva)
 	    !capable(CAP_LINUX_IMMUTABLE))
 		return (-EPERM);
 
-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
 		return (-EACCES);
 
 	xva_init(xva);
@@ -1158,7 +1227,7 @@ zpl_ioctl_setdosflags(struct file *filp, void __user *arg)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, zfs_init_idmap);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -1183,6 +1252,12 @@ zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return (zpl_ioctl_getdosflags(filp, (void *)arg));
 	case ZFS_IOC_SETDOSFLAGS:
 		return (zpl_ioctl_setdosflags(filp, (void *)arg));
+	case ZFS_IOC_COMPAT_FICLONE:
+		return (zpl_ioctl_ficlone(filp, (void *)arg));
+	case ZFS_IOC_COMPAT_FICLONERANGE:
+		return (zpl_ioctl_ficlonerange(filp, (void *)arg));
+	case ZFS_IOC_COMPAT_FIDEDUPERANGE:
+		return (zpl_ioctl_fideduperange(filp, (void *)arg));
 	default:
 		return (-ENOTTY);
 	}
@@ -1209,7 +1284,6 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 }
 #endif /* CONFIG_COMPAT */
 
-
 const struct address_space_operations zpl_address_space_operations = {
 #ifdef HAVE_VFS_READPAGES
 	.readpages	= zpl_readpages,
@@ -1232,7 +1306,12 @@ const struct address_space_operations zpl_address_space_operations = {
 #endif
 };
 
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+const struct file_operations_extend zpl_file_operations = {
+	.kabi_fops = {
+#else
 const struct file_operations zpl_file_operations = {
+#endif
 	.open		= zpl_open,
 	.release	= zpl_release,
 	.llseek		= zpl_llseek,
@@ -1244,7 +1323,11 @@ const struct file_operations zpl_file_operations = {
 	.read_iter	= zpl_iter_read,
 	.write_iter	= zpl_iter_write,
 #ifdef HAVE_VFS_IOV_ITER
+#ifdef HAVE_COPY_SPLICE_READ
+	.splice_read	= copy_splice_read,
+#else
 	.splice_read	= generic_file_splice_read,
+#endif
 	.splice_write	= iter_file_splice_write,
 #endif
 #else
@@ -1259,10 +1342,30 @@ const struct file_operations zpl_file_operations = {
 	.aio_fsync	= zpl_aio_fsync,
 #endif
 	.fallocate	= zpl_fallocate,
+#ifdef HAVE_VFS_COPY_FILE_RANGE
+	.copy_file_range	= zpl_copy_file_range,
+#endif
+#ifdef HAVE_VFS_CLONE_FILE_RANGE
+	.clone_file_range	= zpl_clone_file_range,
+#endif
+#ifdef HAVE_VFS_REMAP_FILE_RANGE
+	.remap_file_range	= zpl_remap_file_range,
+#endif
+#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
+	.dedupe_file_range	= zpl_dedupe_file_range,
+#endif
+#ifdef HAVE_FILE_FADVISE
+	.fadvise	= zpl_fadvise,
+#endif
 	.unlocked_ioctl	= zpl_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= zpl_compat_ioctl,
 #endif
+#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
+	}, /* kabi_fops */
+	.copy_file_range	= zpl_copy_file_range,
+	.clone_file_range	= zpl_clone_file_range,
+#endif
 };
 
 const struct file_operations zpl_dir_file_operations = {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
new file mode 100644
index 000000000000..64728fdb1187
--- /dev/null
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
@@ -0,0 +1,299 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2023, Klara Inc.
+ */
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <linux/fs.h>
+#ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
+#include <linux/splice.h>
+#endif
+#include <sys/file.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfeature.h>
+
+/*
+ * Clone part of a file via block cloning.
+ *
+ * Note that we are not required to update file offsets; the kernel will take
+ * care of that depending on how it was called.
+ */
+static ssize_t
+zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
+    struct file *dst_file, loff_t dst_off, size_t len)
+{
+	struct inode *src_i = file_inode(src_file);
+	struct inode *dst_i = file_inode(dst_file);
+	uint64_t src_off_o = (uint64_t)src_off;
+	uint64_t dst_off_o = (uint64_t)dst_off;
+	uint64_t len_o = (uint64_t)len;
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	int err;
+
+	if (!zfs_bclone_enabled)
+		return (-EOPNOTSUPP);
+
+	if (!spa_feature_is_enabled(
+	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
+		return (-EOPNOTSUPP);
+
+	if (src_i != dst_i)
+		spl_inode_lock_shared(src_i);
+	spl_inode_lock(dst_i);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
+	    &dst_off_o, &len_o, cr);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	spl_inode_unlock(dst_i);
+	if (src_i != dst_i)
+		spl_inode_unlock_shared(src_i);
+
+	if (err < 0)
+		return (err);
+
+	return ((ssize_t)len_o);
+}
+
+#if defined(HAVE_VFS_COPY_FILE_RANGE) || \
+    defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
+/*
+ * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
+ * to dst_off in dst_file. We are permitted to do this however we like, so we
+ * try to just clone the blocks, and if we can't support it, fall back to the
+ * kernel's generic byte copy function.
+ */
+ssize_t
+zpl_copy_file_range(struct file *src_file, loff_t src_off,
+    struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
+{
+	ssize_t ret;
+
+	/* Flags is reserved for future extensions and must be zero. */
+	if (flags != 0)
+		return (-EINVAL);
+
+	/* Try to do it via zfs_clone_range() and allow shortening. */
+	ret = zpl_clone_file_range_impl(src_file, src_off,
+	    dst_file, dst_off, len);
+
+#if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
+	/*
+	 * Since Linux 5.3 the filesystem driver is responsible for executing
+	 * an appropriate fallback, and a generic fallback function is provided.
+	 */
+	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
+	    ret == -EAGAIN)
+		ret = generic_copy_file_range(src_file, src_off, dst_file,
+		    dst_off, len, flags);
+#elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
+	/*
+	 * Since 6.8 the fallback function is called splice_copy_file_range
+	 * and has a slightly different signature.
+	 */
+	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
+	    ret == -EAGAIN)
+		ret = splice_copy_file_range(src_file, src_off, dst_file,
+		    dst_off, len);
+#else
+	/*
+	 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
+	 * to the kernel that it should fallback to a content copy.
+	 */
+	if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
+		ret = -EOPNOTSUPP;
+#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
+
+	return (ret);
+}
+#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
+
+#ifdef HAVE_VFS_REMAP_FILE_RANGE
+/*
+ * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
+ *
+ * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
+ * that they must clone - they cannot fall back to copying. FICLONE is exactly
+ * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
+ * the kernel will sort that out for us.
+ *
+ * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
+ * range in both files and if they're the same, arrange for them to be backed
+ * by the same storage.
+ *
+ * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
+ * if we want. It's designed for filesystems that may need to shorten the
+ * length for alignment, EOF, or any other requirement. ZFS may shorten the
+ * request when there is outstanding dirty data which hasn't been written.
+ */
+loff_t
+zpl_remap_file_range(struct file *src_file, loff_t src_off,
+    struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
+{
+	if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
+		return (-EINVAL);
+
+	/* No support for dedup yet */
+	if (flags & REMAP_FILE_DEDUP)
+		return (-EOPNOTSUPP);
+
+	/* Zero length means to clone everything to the end of the file */
+	if (len == 0)
+		len = i_size_read(file_inode(src_file)) - src_off;
+
+	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+	    dst_file, dst_off, len);
+
+	if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
+		ret = -EINVAL;
+
+	return (ret);
+}
+#endif /* HAVE_VFS_REMAP_FILE_RANGE */
+
+#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
+    defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
+/*
+ * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
+ */
+int
+zpl_clone_file_range(struct file *src_file, loff_t src_off,
+    struct file *dst_file, loff_t dst_off, uint64_t len)
+{
+	/* Zero length means to clone everything to the end of the file */
+	if (len == 0)
+		len = i_size_read(file_inode(src_file)) - src_off;
+
+	/* The entire length must be cloned or this is an error. */
+	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+	    dst_file, dst_off, len);
+
+	if (ret >= 0 && ret != len)
+		ret = -EINVAL;
+
+	return (ret);
+}
+#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
+
+#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
+/*
+ * Entry point for FIDEDUPERANGE, before Linux 4.20.
+ */
+int
+zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
+    struct file *dst_file, loff_t dst_off, uint64_t len)
+{
+	/* No support for dedup yet */
+	return (-EOPNOTSUPP);
+}
+#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
+
+/* Entry point for FICLONE, before Linux 4.5. */
+long
+zpl_ioctl_ficlone(struct file *dst_file, void *arg)
+{
+	unsigned long sfd = (unsigned long)arg;
+
+	struct file *src_file = fget(sfd);
+	if (src_file == NULL)
+		return (-EBADF);
+
+	if (dst_file->f_op != src_file->f_op) {
+		fput(src_file);
+		return (-EXDEV);
+	}
+
+	size_t len = i_size_read(file_inode(src_file));
+
+	ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
+
+	fput(src_file);
+
+	if (ret < 0) {
+		if (ret == -EOPNOTSUPP)
+			return (-ENOTTY);
+		return (ret);
+	}
+
+	if (ret != len)
+		return (-EINVAL);
+
+	return (0);
+}
+
+/* Entry point for FICLONERANGE, before Linux 4.5. */
+long
+zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
+{
+	zfs_ioc_compat_file_clone_range_t fcr;
+
+	if (copy_from_user(&fcr, arg, sizeof (fcr)))
+		return (-EFAULT);
+
+	struct file *src_file = fget(fcr.fcr_src_fd);
+	if (src_file == NULL)
+		return (-EBADF);
+
+	if (dst_file->f_op != src_file->f_op) {
+		fput(src_file);
+		return (-EXDEV);
+	}
+
+	size_t len = fcr.fcr_src_length;
+	if (len == 0)
+		len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
+
+	ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
+	    dst_file, fcr.fcr_dest_offset, len);
+
+	fput(src_file);
+
+	if (ret < 0) {
+		if (ret == -EOPNOTSUPP)
+			return (-ENOTTY);
+		return (ret);
+	}
+
+	if (ret != len)
+		return (-EINVAL);
+
+	return (0);
+}
+
+/* Entry point for FIDEDUPERANGE, before Linux 4.5. */
+long
+zpl_ioctl_fideduperange(struct file *filp, void *arg)
+{
+	(void) arg;
+
+	/* No support for dedup yet */
+	return (-ENOTTY);
+}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index 4f79265a0856..ad1753f7a071 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -24,6 +24,7 @@
  */
 
 
+#include <sys/sysmacros.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
@@ -33,7 +34,6 @@
 #include <sys/zpl.h>
 #include <sys/file.h>
 
-
 static struct dentry *
 zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
@@ -112,18 +112,22 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 }
 
 void
-zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
+zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr,
+    zidmap_t *mnt_ns)
 {
 	vap->va_mask = ATTR_MODE;
 	vap->va_mode = mode;
-	vap->va_uid = crgetuid(cr);
 
-	if (dir && dir->i_mode & S_ISGID) {
+	vap->va_uid = zfs_vfsuid_to_uid(mnt_ns,
+	    zfs_i_user_ns(dir), crgetuid(cr));
+
+	if (dir->i_mode & S_ISGID) {
 		vap->va_gid = KGID_TO_SGID(dir->i_gid);
 		if (S_ISDIR(mode))
 			vap->va_mode |= S_ISGID;
 	} else {
-		vap->va_gid = crgetgid(cr);
+		vap->va_gid = zfs_vfsgid_to_gid(mnt_ns,
+		    zfs_i_user_ns(dir), crgetgid(cr));
 	}
 }
 
@@ -131,6 +135,9 @@ static int
 #ifdef HAVE_IOPS_CREATE_USERNS
 zpl_create(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode, bool flag)
+#elif defined(HAVE_IOPS_CREATE_IDMAP)
+zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode, bool flag)
 #else
 zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
 #endif
@@ -140,14 +147,17 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
+#if !(defined(HAVE_IOPS_CREATE_USERNS) || defined(HAVE_IOPS_CREATE_IDMAP))
+	zidmap_t *user_ns = kcred->user_ns;
+#endif
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
+	zpl_vap_init(vap, dir, mode, cr, user_ns);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
-	    mode, &zp, cr, 0, NULL);
+	    mode, &zp, cr, 0, NULL, user_ns);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
@@ -174,6 +184,9 @@ static int
 #ifdef HAVE_IOPS_MKNOD_USERNS
 zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode,
+#elif defined(HAVE_IOPS_MKNOD_IDMAP)
+zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode,
 #else
 zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 #endif
@@ -184,6 +197,9 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
+#if !(defined(HAVE_IOPS_MKNOD_USERNS) || defined(HAVE_IOPS_MKNOD_IDMAP))
+	zidmap_t *user_ns = kcred->user_ns;
+#endif
 
 	/*
 	 * We currently expect Linux to supply rdev=0 for all sockets
@@ -194,12 +210,12 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
+	zpl_vap_init(vap, dir, mode, cr, user_ns);
 	vap->va_rdev = rdev;
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
-	    mode, &zp, cr, 0, NULL);
+	    mode, &zp, cr, 0, NULL, user_ns);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
@@ -224,18 +240,29 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 #ifdef HAVE_TMPFILE
 static int
+#ifdef HAVE_TMPFILE_IDMAP
+zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
+    struct file *file, umode_t mode)
+#elif !defined(HAVE_TMPFILE_DENTRY)
+zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
+    struct file *file, umode_t mode)
+#else
 #ifdef HAVE_TMPFILE_USERNS
 zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
     struct dentry *dentry, umode_t mode)
 #else
 zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 #endif
+#endif
 {
 	cred_t *cr = CRED();
 	struct inode *ip;
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
+#if !(defined(HAVE_TMPFILE_USERNS) || defined(HAVE_TMPFILE_IDMAP))
+	zidmap_t *userns = kcred->user_ns;
+#endif
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
@@ -245,18 +272,28 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	 */
 	if (!IS_POSIXACL(dir))
 		mode &= ~current_umask();
-	zpl_vap_init(vap, dir, mode, cr);
+	zpl_vap_init(vap, dir, mode, cr, userns);
 
 	cookie = spl_fstrans_mark();
-	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
+	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL, userns);
 	if (error == 0) {
 		/* d_tmpfile will do drop_nlink, so we should set it first */
 		set_nlink(ip, 1);
+#ifndef HAVE_TMPFILE_DENTRY
+		d_tmpfile(file, ip);
+
+		error = zpl_xattr_security_init(ip, dir,
+		    &file->f_path.dentry->d_name);
+#else
 		d_tmpfile(dentry, ip);
 
 		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
+#endif
 		if (error == 0)
 			error = zpl_init_acl(ip, dir);
+#ifndef HAVE_TMPFILE_DENTRY
+		error = finish_open_simple(file, error);
+#endif
 		/*
 		 * don't need to handle error here, file is already in
 		 * unlinked set.
@@ -302,6 +339,9 @@ static int
 #ifdef HAVE_IOPS_MKDIR_USERNS
 zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
 #else
 zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 #endif
@@ -311,13 +351,17 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	znode_t *zp;
 	int error;
 	fstrans_cookie_t cookie;
+#if !(defined(HAVE_IOPS_MKDIR_USERNS) || defined(HAVE_IOPS_MKDIR_IDMAP))
+	zidmap_t *user_ns = kcred->user_ns;
+#endif
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
+	zpl_vap_init(vap, dir, mode | S_IFDIR, cr, user_ns);
 
 	cookie = spl_fstrans_mark();
-	error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL);
+	error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL,
+	    user_ns);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
@@ -371,6 +415,10 @@ static int
 zpl_getattr_impl(struct user_namespace *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
 #else
 zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
@@ -387,7 +435,9 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 	 * XXX query_flags currently ignored.
 	 */
 
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+	error = -zfs_getattr_fast(user_ns, request_mask, ip, stat);
+#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 	error = -zfs_getattr_fast(user_ns, ip, stat);
 #else
 	error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
@@ -426,9 +476,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 ZPL_GETATTR_WRAPPER(zpl_getattr);
 
 static int
-#ifdef HAVE_SETATTR_PREPARE_USERNS
+#ifdef HAVE_USERNS_IOPS_SETATTR
 zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
     struct iattr *ia)
+#elif defined(HAVE_IDMAP_IOPS_SETATTR)
+zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
+    struct iattr *ia)
 #else
 zpl_setattr(struct dentry *dentry, struct iattr *ia)
 #endif
@@ -439,7 +492,13 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	int error;
 	fstrans_cookie_t cookie;
 
-	error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#elif defined(HAVE_SETATTR_PREPARE_IDMAP)
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#else
+	error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
+#endif
 	if (error)
 		return (error);
 
@@ -447,18 +506,37 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
 	vap->va_mode = ia->ia_mode;
-	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
-	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
+	if (ia->ia_valid & ATTR_UID)
+#ifdef HAVE_IATTR_VFSID
+		vap->va_uid = zfs_vfsuid_to_uid(user_ns, zfs_i_user_ns(ip),
+		    __vfsuid_val(ia->ia_vfsuid));
+#else
+		vap->va_uid = KUID_TO_SUID(ia->ia_uid);
+#endif
+	if (ia->ia_valid & ATTR_GID)
+#ifdef HAVE_IATTR_VFSID
+		vap->va_gid = zfs_vfsgid_to_gid(user_ns, zfs_i_user_ns(ip),
+		    __vfsgid_val(ia->ia_vfsgid));
+#else
+		vap->va_gid = KGID_TO_SGID(ia->ia_gid);
+#endif
 	vap->va_size = ia->ia_size;
 	vap->va_atime = ia->ia_atime;
 	vap->va_mtime = ia->ia_mtime;
 	vap->va_ctime = ia->ia_ctime;
 
 	if (vap->va_mask & ATTR_ATIME)
-		ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
+		zpl_inode_set_atime_to_ts(ip,
+		    zpl_inode_timestamp_truncate(ia->ia_atime, ip));
 
 	cookie = spl_fstrans_mark();
-	error = -zfs_setattr(ITOZ(ip), vap, 0, cr);
+#ifdef HAVE_USERNS_IOPS_SETATTR
+	error = -zfs_setattr(ITOZ(ip), vap, 0, cr, user_ns);
+#elif defined(HAVE_IDMAP_IOPS_SETATTR)
+	error = -zfs_setattr(ITOZ(ip), vap, 0, cr, user_ns);
+#else
+	error = -zfs_setattr(ITOZ(ip), vap, 0, cr, zfs_init_idmap);
+#endif
 	if (!error && (ia->ia_valid & ATTR_MODE))
 		error = zpl_chmod_acl(ip);
 
@@ -474,32 +552,47 @@ static int
 #ifdef HAVE_IOPS_RENAME_USERNS
 zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
     struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
-    unsigned int flags)
+    unsigned int rflags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int rflags)
 #else
 zpl_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+    struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
 #endif
 {
 	cred_t *cr = CRED();
+	vattr_t *wo_vap = NULL;
 	int error;
 	fstrans_cookie_t cookie;
-
-	/* We don't have renameat2(2) support */
-	if (flags)
-		return (-EINVAL);
+#if !(defined(HAVE_IOPS_RENAME_USERNS) || defined(HAVE_IOPS_RENAME_IDMAP))
+	zidmap_t *user_ns = kcred->user_ns;
+#endif
 
 	crhold(cr);
+	if (rflags & RENAME_WHITEOUT) {
+		wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+		zpl_vap_init(wo_vap, sdip, S_IFCHR, cr, user_ns);
+		wo_vap->va_rdev = makedevice(0, 0);
+	}
+
 	cookie = spl_fstrans_mark();
 	error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip),
-	    dname(tdentry), cr, 0);
+	    dname(tdentry), cr, 0, rflags, wo_vap, user_ns);
 	spl_fstrans_unmark(cookie);
+	if (wo_vap)
+		kmem_free(wo_vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
+#if !defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_RENAME2) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP)
 static int
 zpl_rename(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry)
@@ -512,6 +605,9 @@ static int
 #ifdef HAVE_IOPS_SYMLINK_USERNS
 zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, const char *name)
+#elif defined(HAVE_IOPS_SYMLINK_IDMAP)
+zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, const char *name)
 #else
 zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
 #endif
@@ -521,14 +617,17 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
 	znode_t *zp;
 	int error;
 	fstrans_cookie_t cookie;
+#if !(defined(HAVE_IOPS_SYMLINK_USERNS) || defined(HAVE_IOPS_SYMLINK_IDMAP))
+	zidmap_t *user_ns = kcred->user_ns;
+#endif
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
+	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr, user_ns);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_symlink(ITOZ(dir), dname(dentry), vap,
-	    (char *)name, &zp, cr, 0);
+	    (char *)name, &zp, cr, 0, user_ns);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error) {
@@ -678,7 +777,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 		return (-EMLINK);
 
 	crhold(cr);
-	ip->i_ctime = current_time(ip);
+	zpl_inode_set_ctime_to_ts(ip, current_time(ip));
 	/* Must have an existing ref, so igrab() cannot return NULL */
 	VERIFY3P(igrab(ip), !=, NULL);
 
@@ -698,46 +797,6 @@ out:
 	return (error);
 }
 
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	unsigned int flags = (nd ? nd->flags : 0);
-#else
-zpl_revalidate(struct dentry *dentry, unsigned int flags)
-{
-#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
-	/* CSTYLED */
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-	int error;
-
-	if (flags & LOOKUP_RCU)
-		return (-ECHILD);
-
-	/*
-	 * After a rollback negative dentries created before the rollback
-	 * time must be invalidated.  Otherwise they can obscure files which
-	 * are only present in the rolled back dataset.
-	 */
-	if (dentry->d_inode == NULL) {
-		spin_lock(&dentry->d_lock);
-		error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
-		spin_unlock(&dentry->d_lock);
-
-		if (error)
-			return (0);
-	}
-
-	/*
-	 * The dentry may reference a stale inode if a mounted file system
-	 * was rolled back to a point in time where the object didn't exist.
-	 */
-	if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
-		return (0);
-
-	return (1);
-}
-
 const struct inode_operations zpl_inode_operations = {
 	.setattr	= zpl_setattr,
 	.getattr	= zpl_getattr,
@@ -751,11 +810,20 @@ const struct inode_operations zpl_inode_operations = {
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
 	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
 };
 
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+const struct inode_operations_wrapper zpl_dir_inode_operations = {
+	.ops = {
+#else
 const struct inode_operations zpl_dir_inode_operations = {
+#endif
 	.create		= zpl_create,
 	.lookup		= zpl_lookup,
 	.link		= zpl_link,
@@ -764,7 +832,11 @@ const struct inode_operations zpl_dir_inode_operations = {
 	.mkdir		= zpl_mkdir,
 	.rmdir		= zpl_rmdir,
 	.mknod		= zpl_mknod,
-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+#ifdef HAVE_RENAME2
+	.rename2	= zpl_rename2,
+#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+	.rename		= zpl_rename2,
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
 	.rename		= zpl_rename2,
 #else
 	.rename		= zpl_rename,
@@ -784,8 +856,16 @@ const struct inode_operations zpl_dir_inode_operations = {
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
 	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
+#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
+	},
+	.rename2	= zpl_rename2,
+#endif
 };
 
 const struct inode_operations zpl_symlink_inode_operations = {
@@ -823,10 +903,10 @@ const struct inode_operations zpl_special_inode_operations = {
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
 	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
 };
-
-dentry_operations_t zpl_dentry_operations = {
-	.d_revalidate	= zpl_revalidate,
-};
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index b18efde9b18a..d98d32c1f9fb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2023, Datto Inc. All rights reserved.
  */
 
 
@@ -185,7 +186,9 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data)
 static int
 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 {
-	ZPL_ENTER(zfsvfs);
+	int error;
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	dmu_objset_name(zfsvfs->z_os, fsname);
@@ -205,7 +208,7 @@ __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 
 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (0);
 }
@@ -233,6 +236,18 @@ __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
 	}
 #endif /* CONFIG_FS_POSIX_ACL */
 
+	switch (zfsvfs->z_case) {
+	case ZFS_CASE_SENSITIVE:
+		seq_puts(seq, ",casesensitive");
+		break;
+	case ZFS_CASE_INSENSITIVE:
+		seq_puts(seq, ",caseinsensitive");
+		break;
+	default:
+		seq_puts(seq, ",casemixed");
+		break;
+	}
+
 	return (0);
 }
 
@@ -262,11 +277,14 @@ zpl_test_super(struct super_block *s, void *data)
 {
 	zfsvfs_t *zfsvfs = s->s_fs_info;
 	objset_t *os = data;
-
-	if (zfsvfs == NULL)
-		return (0);
-
-	return (os == zfsvfs->z_os);
+	/*
+	 * If the os doesn't match the z_os in the super_block, assume it is
+	 * not a match. Matching would imply a multimount of a dataset. It is
+	 * possible that during a multimount, there is a simultaneous operation
+	 * that changes the z_os, e.g., rollback, where the match will be
+	 * missed, but in that case the user will get an EBUSY.
+	 */
+	return (zfsvfs != NULL && os == zfsvfs->z_os);
 }
 
 static struct super_block *
@@ -292,12 +310,35 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
 
 	s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
 
+	/*
+	 * Recheck with the lock held to prevent mounting the wrong dataset
+	 * since z_os can be stale when the teardown lock is held.
+	 *
+	 * We can't do this in zpl_test_super in since it's under spinlock and
+	 * also s_umount lock is not held there so it would race with
+	 * zfs_umount and zfsvfs can be freed.
+	 */
+	if (!IS_ERR(s) && s->s_fs_info != NULL) {
+		zfsvfs_t *zfsvfs = s->s_fs_info;
+		if (zpl_enter(zfsvfs, FTAG) == 0) {
+			if (os != zfsvfs->z_os)
+				err = -SET_ERROR(EBUSY);
+			zpl_exit(zfsvfs, FTAG);
+		} else {
+			err = -SET_ERROR(EBUSY);
+		}
+	}
 	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
 	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
 
 	if (IS_ERR(s))
 		return (ERR_CAST(s));
 
+	if (err) {
+		deactivate_locked_super(s);
+		return (ERR_PTR(err));
+	}
+
 	if (s->s_root == NULL) {
 		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
 		if (err) {
@@ -334,7 +375,7 @@ zpl_kill_sb(struct super_block *sb)
 }
 
 void
-zpl_prune_sb(int64_t nr_to_scan, void *arg)
+zpl_prune_sb(uint64_t nr_to_scan, void *arg)
 {
 	struct super_block *sb = (struct super_block *)arg;
 	int objects = 0;
@@ -360,7 +401,11 @@ const struct super_operations zpl_super_operations = {
 struct file_system_type zpl_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= ZFS_DRIVER,
+#if defined(HAVE_IDMAP_MNT_API)
+	.fs_flags		= FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+#else
 	.fs_flags		= FS_USERNS_MOUNT,
+#endif
 	.mount			= zpl_mount,
 	.kill_sb		= zpl_kill_sb,
 };
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index 98378109cb9a..4e4f5210f85d 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -246,8 +246,8 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out1;
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
@@ -264,7 +264,8 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 out:
 
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out1:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -435,12 +436,13 @@ zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out;
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 	error = __zpl_xattr_get(ip, name, value, size, cr);
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -497,7 +499,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
 		vap->va_gid = crgetgid(cr);
 
 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
-		    cr, 0, NULL);
+		    cr, ATTR_NOACLCHECK, NULL, zfs_init_idmap);
 		if (error)
 			goto out;
 	}
@@ -511,7 +513,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
 	error = -zfs_write_simple(xzp, value, size, pos, NULL);
 out:
 	if (error == 0) {
-		ip->i_ctime = current_time(ip);
+		zpl_inode_set_ctime_to_ts(ip, current_time(ip));
 		zfs_mark_inode_dirty(ip);
 	}
 
@@ -604,8 +606,8 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out1;
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	/*
@@ -658,7 +660,8 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
 out:
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out1:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
@@ -735,9 +738,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
 
 static int
-__zpl_xattr_user_set(struct inode *ip, const char *name,
+__zpl_xattr_user_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
     const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	int error = 0;
 	/* xattr_resolve_name will do this for us if this is defined */
 #ifndef HAVE_XATTR_HANDLER_NAME
@@ -843,9 +848,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
 
 static int
-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
+__zpl_xattr_trusted_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
     const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	char *xattr_name;
 	int error;
 
@@ -911,9 +918,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
 
 static int
-__zpl_xattr_security_set(struct inode *ip, const char *name,
+__zpl_xattr_security_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
     const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	char *xattr_name;
 	int error;
 	/* xattr_resolve_name will do this for us if this is defined */
@@ -937,7 +946,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
 	int error = 0;
 
 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
-		error = __zpl_xattr_security_set(ip,
+		error = __zpl_xattr_security_set(NULL, ip,
 		    xattr->name, xattr->value, xattr->value_len, 0);
 
 		if (error < 0)
@@ -1002,7 +1011,8 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
 				 */
 				if (ip->i_mode != mode) {
 					ip->i_mode = ITOZ(ip)->z_mode = mode;
-					ip->i_ctime = current_time(ip);
+					zpl_inode_set_ctime_to_ts(ip,
+					    current_time(ip));
 					zfs_mark_inode_dirty(ip);
 				}
 
@@ -1052,11 +1062,23 @@ int
 #ifdef HAVE_SET_ACL_USERNS
 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
     struct posix_acl *acl, int type)
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type)
+#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
+zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type)
 #else
 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
 #endif /* HAVE_SET_ACL_USERNS */
 {
+#ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
+	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+#else
 	return (zpl_set_acl_impl(ip, acl, type));
+#endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
 }
 #endif /* HAVE_SET_ACL */
 
@@ -1115,7 +1137,7 @@ zpl_get_acl_impl(struct inode *ip, int type)
 	return (acl);
 }
 
-#if defined(HAVE_GET_ACL_RCU)
+#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
 struct posix_acl *
 zpl_get_acl(struct inode *ip, int type, bool rcu)
 {
@@ -1149,7 +1171,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir)
 			return (PTR_ERR(acl));
 		if (!acl) {
 			ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
-			ip->i_ctime = current_time(ip);
+			zpl_inode_set_ctime_to_ts(ip, current_time(ip));
 			zfs_mark_inode_dirty(ip);
 			return (0);
 		}
@@ -1297,7 +1319,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
 
 static int
-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
     const void *value, size_t size, int flags)
 {
 	struct posix_acl *acl;
@@ -1311,8 +1334,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);
 
-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
 		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif
 
 	if (value) {
 		acl = zpl_acl_from_xattr(value, size);
@@ -1336,7 +1365,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
 
 static int
-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
     const void *value, size_t size, int flags)
 {
 	struct posix_acl *acl;
@@ -1350,8 +1380,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);
 
-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
 		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif
 
 	if (value) {
 		acl = zpl_acl_from_xattr(value, size);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index acbab55d03ef..4b960daf89ee 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -6,7 +6,7 @@
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
@@ -37,6 +37,7 @@
 #include <sys/spa_impl.h>
 #include <sys/zvol.h>
 #include <sys/zvol_impl.h>
+#include <cityhash.h>
 
 #include <linux/blkdev_compat.h>
 #include <linux/task_io_accounting_ops.h>
@@ -53,8 +54,14 @@ static unsigned int zvol_request_sync = 0;
 static unsigned int zvol_prefetch_bytes = (128 * 1024);
 static unsigned long zvol_max_discard_blocks = 16384;
 
+/*
+ * Switch taskq at multiple of 512 MB offset. This can be set to a lower value
+ * to utilize more threads for small files but may affect prefetch hits.
+ */
+#define	ZVOL_TASKQ_OFFSET_SHIFT 29
+
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
-static const unsigned int zvol_open_timeout_ms = 1000;
+static unsigned int zvol_open_timeout_ms = 1000;
 #endif
 
 static unsigned int zvol_threads = 0;
@@ -76,6 +83,8 @@ static boolean_t zvol_use_blk_mq = B_FALSE;
 static unsigned int zvol_blk_mq_blocks_per_thread = 8;
 #endif
 
+static unsigned int zvol_num_taskqs = 0;
+
 #ifndef	BLKDEV_DEFAULT_RQ
 /* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
 #define	BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
@@ -114,7 +123,11 @@ struct zvol_state_os {
 	boolean_t use_blk_mq;
 };
 
-taskq_t *zvol_taskq;
+typedef struct zv_taskq {
+	uint_t tqs_cnt;
+	taskq_t **tqs_taskq;
+} zv_taskq_t;
+static zv_taskq_t zvol_taskqs;
 static struct ida zvol_ida;
 
 typedef struct zv_request_stack {
@@ -342,8 +355,7 @@ zvol_discard(zv_request_t *zvr)
 	struct request_queue *q = zv->zv_zso->zvo_queue;
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
 	unsigned long start_time = 0;
-
-	boolean_t acct = blk_queue_io_stat(q);
+	boolean_t acct = B_FALSE;
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
@@ -388,7 +400,7 @@ zvol_discard(zv_request_t *zvr)
 	if (error != 0) {
 		dmu_tx_abort(tx);
 	} else {
-		zvol_log_truncate(zv, tx, start, size, B_TRUE);
+		zvol_log_truncate(zv, tx, start, size);
 		dmu_tx_commit(tx);
 		error = dmu_free_long_range(zv->zv_objset,
 		    ZVOL_OBJ, start, size);
@@ -513,7 +525,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 	uint64_t size = io_size(bio, rq);
 	int rw = io_data_dir(bio, rq);
 
-	if (zvol_request_sync)
+	if (zvol_request_sync || zv->zv_threading == B_FALSE)
 		force_sync = 1;
 
 	zv_request_t zvr = {
@@ -533,6 +545,22 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 	}
 
 	zv_request_task_t *task;
+	zv_taskq_t *ztqs = &zvol_taskqs;
+	uint_t blk_mq_hw_queue = 0;
+	uint_t tq_idx;
+	uint_t taskq_hash;
+#ifdef HAVE_BLK_MQ
+	if (rq)
+#ifdef HAVE_BLK_MQ_RQ_HCTX
+		blk_mq_hw_queue = rq->mq_hctx->queue_num;
+#else
+		blk_mq_hw_queue =
+		    rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num;
+#endif
+#endif
+	taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
+	    blk_mq_hw_queue, 0);
+	tq_idx = taskq_hash % ztqs->tqs_cnt;
 
 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
@@ -558,7 +586,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 			rw_enter(&zv->zv_suspend_lock, RW_WRITER);
 			if (zv->zv_zilog == NULL) {
 				zv->zv_zilog = zil_open(zv->zv_objset,
-				    zvol_get_data);
+				    zvol_get_data, &zv->zv_kstat.dk_zil_sums);
 				zv->zv_flags |= ZVOL_WRITTEN_TO;
 				/* replay / destroy done in zvol_create_minor */
 				VERIFY0((zv->zv_zilog->zl_header->zh_flags &
@@ -602,7 +630,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 				zvol_discard(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
-				taskq_dispatch_ent(zvol_taskq,
+				taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
@@ -610,7 +638,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 				zvol_write(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
-				taskq_dispatch_ent(zvol_taskq,
+				taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 				    zvol_write_task, task, 0, &task->ent);
 			}
 		}
@@ -632,7 +660,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
 			zvol_read(&zvr);
 		} else {
 			task = zv_request_task_create(zvr);
-			taskq_dispatch_ent(zvol_taskq,
+			taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 			    zvol_read_task, task, 0, &task->ent);
 		}
 	}
@@ -672,7 +700,11 @@ zvol_request(struct request_queue *q, struct bio *bio)
 }
 
 static int
+#ifdef HAVE_BLK_MODE_T
+zvol_open(struct gendisk *disk, blk_mode_t flag)
+#else
 zvol_open(struct block_device *bdev, fmode_t flag)
+#endif
 {
 	zvol_state_t *zv;
 	int error = 0;
@@ -687,10 +719,14 @@ retry:
 	/*
 	 * Obtain a copy of private_data under the zvol_state_lock to make
 	 * sure that either the result of zvol free code path setting
-	 * bdev->bd_disk->private_data to NULL is observed, or zvol_os_free()
+	 * disk->private_data to NULL is observed, or zvol_os_free()
 	 * is not called on this zv because of the positive zv_open_count.
 	 */
+#ifdef HAVE_BLK_MODE_T
+	zv = disk->private_data;
+#else
 	zv = bdev->bd_disk->private_data;
+#endif
 	if (zv == NULL) {
 		rw_exit(&zvol_state_lock);
 		return (SET_ERROR(-ENXIO));
@@ -770,14 +806,15 @@ retry:
 			}
 		}
 
-		error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
+		error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
 
 		if (drop_namespace)
 			mutex_exit(&spa_namespace_lock);
 	}
 
 	if (error == 0) {
-		if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+		if ((blk_mode_is_open_write(flag)) &&
+		    (zv->zv_flags & ZVOL_RDONLY)) {
 			if (zv->zv_open_count == 0)
 				zvol_last_close(zv);
 
@@ -792,14 +829,25 @@ retry:
 		rw_exit(&zv->zv_suspend_lock);
 
 	if (error == 0)
+#ifdef HAVE_BLK_MODE_T
+		disk_check_media_change(disk);
+#else
 		zfs_check_media_change(bdev);
+#endif
 
 	return (error);
 }
 
 static void
-zvol_release(struct gendisk *disk, fmode_t mode)
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
+zvol_release(struct gendisk *disk)
+#else
+zvol_release(struct gendisk *disk, fmode_t unused)
+#endif
 {
+#if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
+	(void) unused;
+#endif
 	zvol_state_t *zv;
 	boolean_t drop_suspend = B_TRUE;
 
@@ -854,7 +902,13 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
 
 	switch (cmd) {
 	case BLKFLSBUF:
+#ifdef HAVE_FSYNC_BDEV
 		fsync_bdev(bdev);
+#elif defined(HAVE_SYNC_BLOCKDEV)
+		sync_blockdev(bdev);
+#else
+#error "Neither fsync_bdev() nor sync_blockdev() found"
+#endif
 		invalidate_bdev(bdev);
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
@@ -1030,6 +1084,16 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
 
 	zso->zvo_disk->minors = ZVOL_MINORS;
 	zso->zvo_queue = zso->zvo_disk->queue;
+#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
+	struct gendisk *disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
+	if (IS_ERR(disk)) {
+		zso->zvo_disk = NULL;
+		return (1);
+	}
+
+	zso->zvo_disk = disk;
+	zso->zvo_disk->minors = ZVOL_MINORS;
+	zso->zvo_queue = zso->zvo_disk->queue;
 #else
 	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
 	if (zso->zvo_queue == NULL)
@@ -1078,6 +1142,17 @@ zvol_alloc_blk_mq(zvol_state_t *zv)
 	}
 	zso->zvo_queue = zso->zvo_disk->queue;
 	zso->zvo_disk->minors = ZVOL_MINORS;
+#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
+	struct gendisk *disk = blk_mq_alloc_disk(&zso->tag_set, NULL, zv);
+	if (IS_ERR(disk)) {
+		zso->zvo_disk = NULL;
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+
+	zso->zvo_disk = disk;
+	zso->zvo_queue = zso->zvo_disk->queue;
+	zso->zvo_disk->minors = ZVOL_MINORS;
 #else
 	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
 	if (zso->zvo_disk == NULL) {
@@ -1174,7 +1249,7 @@ zvol_alloc(dev_t dev, const char *name)
 	zso->zvo_queue->queuedata = zv;
 	zso->zvo_dev = dev;
 	zv->zv_open_count = 0;
-	strlcpy(zv->zv_name, name, MAXNAMELEN);
+	strlcpy(zv->zv_name, name, sizeof (zv->zv_name));
 
 	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
 	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
@@ -1231,9 +1306,13 @@ zvol_os_free(zvol_state_t *zv)
 
 	del_gendisk(zv->zv_zso->zvo_disk);
 #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
-	defined(HAVE_BLK_ALLOC_DISK)
+	(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
+#if defined(HAVE_BLK_CLEANUP_DISK)
 	blk_cleanup_disk(zv->zv_zso->zvo_disk);
 #else
+	put_disk(zv->zv_zso->zvo_disk);
+#endif
+#else
 	blk_cleanup_queue(zv->zv_zso->zvo_queue);
 	put_disk(zv->zv_zso->zvo_disk);
 #endif
@@ -1275,6 +1354,8 @@ zvol_os_create_minor(const char *name)
 	int error = 0;
 	int idx;
 	uint64_t hash = zvol_name_hash(name);
+	uint64_t volthreading;
+	bool replayed_zil = B_FALSE;
 
 	if (zvol_inhibit_dev)
 		return (0);
@@ -1283,6 +1364,13 @@ zvol_os_create_minor(const char *name)
 	if (idx < 0)
 		return (SET_ERROR(-idx));
 	minor = idx << ZVOL_MINOR_BITS;
+	if (MINOR(minor) != minor) {
+		/* too many partitions can cause an overflow */
+		zfs_dbgmsg("zvol: create minor overflow: %s, minor %u/%u",
+		    name, minor, MINOR(minor));
+		ida_simple_remove(&zvol_ida, idx);
+		return (SET_ERROR(EINVAL));
+	}
 
 	zv = zvol_find_by_name_hash(name, hash, RW_NONE);
 	if (zv) {
@@ -1320,6 +1408,12 @@ zvol_os_create_minor(const char *name)
 	zv->zv_volsize = volsize;
 	zv->zv_objset = os;
 
+	/* Default */
+	zv->zv_threading = B_TRUE;
+	if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL)
+	    == 0)
+		zv->zv_threading = volthreading;
+
 	set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
 
 	blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
@@ -1408,18 +1502,21 @@ zvol_os_create_minor(const char *name)
 	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
 #endif
 
+	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
+	error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
+	if (error)
+		goto out_dmu_objset_disown;
 	ASSERT3P(zv->zv_zilog, ==, NULL);
-	zv->zv_zilog = zil_open(os, zvol_get_data);
+	zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
 	if (spa_writeable(dmu_objset_spa(os))) {
 		if (zil_replay_disable)
-			zil_destroy(zv->zv_zilog, B_FALSE);
+			replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE);
 		else
-			zil_replay(os, zv, zvol_replay_vector);
+			replayed_zil = zil_replay(os, zv, zvol_replay_vector);
 	}
-	zil_close(zv->zv_zilog);
+	if (replayed_zil)
+		zil_close(zv->zv_zilog);
 	zv->zv_zilog = NULL;
-	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
-	dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
 
 	/*
 	 * When udev detects the addition of the device it will immediately
@@ -1427,7 +1524,7 @@ zvol_os_create_minor(const char *name)
 	 * Prefetching the blocks commonly scanned by blkid(8) will speed
 	 * up this process.
 	 */
-	len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
+	len = MIN(zvol_prefetch_bytes, SPA_MAXBLOCKSIZE);
 	if (len > 0) {
 		dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
 		dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
@@ -1488,6 +1585,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
 	 */
 	set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
 	set_disk_ro(zv->zv_zso->zvo_disk, readonly);
+
+	dataset_kstats_rename(&zv->zv_kstat, newname);
 }
 
 void
@@ -1528,8 +1627,40 @@ zvol_init(void)
 		zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
 	}
 
+	/*
+	 * Use atleast 32 zvol_threads but for many core system,
+	 * prefer 6 threads per taskq, but no more taskqs
+	 * than threads in them on large systems.
+	 *
+	 *                 taskq   total
+	 * cpus    taskqs  threads threads
+	 * ------- ------- ------- -------
+	 * 1       1       32       32
+	 * 2       1       32       32
+	 * 4       1       32       32
+	 * 8       2       16       32
+	 * 16      3       11       33
+	 * 32      5       7        35
+	 * 64      8       8        64
+	 * 128     11      12       132
+	 * 256     16      16       256
+	 */
+	zv_taskq_t *ztqs = &zvol_taskqs;
+	uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
+	if (num_tqs == 0) {
+		num_tqs = 1 + num_online_cpus() / 6;
+		while (num_tqs * num_tqs > zvol_actual_threads)
+			num_tqs--;
+	}
+	uint_t per_tq_thread = zvol_actual_threads / num_tqs;
+	if (per_tq_thread * num_tqs < zvol_actual_threads)
+		per_tq_thread++;
+	ztqs->tqs_cnt = num_tqs;
+	ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
+		kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
+		ztqs->tqs_taskq = NULL;
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
 		return (error);
 	}
@@ -1549,11 +1680,22 @@ zvol_init(void)
 		    1024);
 	}
 #endif
-	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
-	    zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
-	if (zvol_taskq == NULL) {
-		unregister_blkdev(zvol_major, ZVOL_DRIVER);
-		return (-ENOMEM);
+	for (uint_t i = 0; i < num_tqs; i++) {
+		char name[32];
+		(void) snprintf(name, sizeof (name), "%s_tq-%u",
+		    ZVOL_DRIVER, i);
+		ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
+		    maxclsyspri, per_tq_thread, INT_MAX,
+		    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+		if (ztqs->tqs_taskq[i] == NULL) {
+			for (int j = i - 1; j >= 0; j--)
+				taskq_destroy(ztqs->tqs_taskq[j]);
+			unregister_blkdev(zvol_major, ZVOL_DRIVER);
+			kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
+			    sizeof (taskq_t *));
+			ztqs->tqs_taskq = NULL;
+			return (-ENOMEM);
+		}
 	}
 
 	zvol_init_impl();
@@ -1564,9 +1706,22 @@ zvol_init(void)
 void
 zvol_fini(void)
 {
+	zv_taskq_t *ztqs = &zvol_taskqs;
 	zvol_fini_impl();
 	unregister_blkdev(zvol_major, ZVOL_DRIVER);
-	taskq_destroy(zvol_taskq);
+
+	if (ztqs->tqs_taskq == NULL) {
+		ASSERT3U(ztqs->tqs_cnt, ==, 0);
+	} else {
+		for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
+			ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
+			taskq_destroy(ztqs->tqs_taskq[i]);
+		}
+		kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
+		    sizeof (taskq_t *));
+		ztqs->tqs_taskq = NULL;
+	}
+
 	ida_destroy(&zvol_ida);
 }
 
@@ -1587,6 +1742,9 @@ MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
 module_param(zvol_max_discard_blocks, ulong, 0444);
 MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
 
+module_param(zvol_num_taskqs, uint, 0444);
+MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs");
+
 module_param(zvol_prefetch_bytes, uint, 0644);
 MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
 
@@ -1605,4 +1763,9 @@ MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
     "Process volblocksize blocks per thread");
 #endif
 
+#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
+module_param(zvol_open_timeout_ms, uint, 0644);
+MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
+#endif
+
 /* END CSTYLED */