20 files changed, 1554 insertions, 704 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-atomic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-atomic.c
deleted file mode 100644
index b6d967108fed..000000000000
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-atomic.c
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Atomic Implementation.
- */
-
-#include <sys/atomic.h>
-
-#ifdef ATOMIC_SPINLOCK
-/* Global atomic lock declarations */
-DEFINE_SPINLOCK(atomic32_lock);
-DEFINE_SPINLOCK(atomic64_lock);
-
-EXPORT_SYMBOL(atomic32_lock);
-EXPORT_SYMBOL(atomic64_lock);
-#endif /* ATOMIC_SPINLOCK */
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index 89ca4a648b2f..585ad7377b49 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -197,266 +197,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 
 	return (0);
 }
-
-
 EXPORT_SYMBOL(random_get_pseudo_bytes);
 
-#if BITS_PER_LONG == 32
-
-/*
- * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
- * provides a div64_u64() function for this we do not use it because the
- * implementation is flawed.  There are cases which return incorrect
- * results as late as linux-2.6.35.  Until this is fixed upstream the
- * spl must provide its own implementation.
- *
- * This implementation is a slightly modified version of the algorithm
- * proposed by the book 'Hacker's Delight'.  The original source can be
- * found here and is available for use without restriction.
- *
- * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
- */
-
-/*
- * Calculate number of leading of zeros for a 64-bit value.
- */
-static int
-nlz64(uint64_t x)
-{
-	register int n = 0;
-
-	if (x == 0)
-		return (64);
-
-	if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
-	if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
-	if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n +  8; x = x <<  8; }
-	if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n +  4; x = x <<  4; }
-	if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n +  2; x = x <<  2; }
-	if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n +  1; }
-
-	return (n);
-}
-
-/*
- * Newer kernels have a div_u64() function but we define our own
- * to simplify portability between kernel versions.
- */
-static inline uint64_t
-__div_u64(uint64_t u, uint32_t v)
-{
-	(void) do_div(u, v);
-	return (u);
-}
-
-/*
- * Turn off missing prototypes warning for these functions. They are
- * replacements for libgcc-provided functions and will never be called
- * directly.
- */
-#if defined(__GNUC__) && !defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#endif
-
-/*
- * Implementation of 64-bit unsigned division for 32-bit machines.
- *
- * First the procedure takes care of the case in which the divisor is a
- * 32-bit quantity. There are two subcases: (1) If the left half of the
- * dividend is less than the divisor, one execution of do_div() is all that
- * is required (overflow is not possible). (2) Otherwise it does two
- * divisions, using the grade school method.
- */
-uint64_t
-__udivdi3(uint64_t u, uint64_t v)
-{
-	uint64_t u0, u1, v1, q0, q1, k;
-	int n;
-
-	if (v >> 32 == 0) {			// If v < 2**32:
-		if (u >> 32 < v) {		// If u/v cannot overflow,
-			return (__div_u64(u, v)); // just do one division.
-		} else {			// If u/v would overflow:
-			u1 = u >> 32;		// Break u into two halves.
-			u0 = u & 0xFFFFFFFF;
-			q1 = __div_u64(u1, v);	// First quotient digit.
-			k  = u1 - q1 * v;	// First remainder, < v.
-			u0 += (k << 32);
-			q0 = __div_u64(u0, v);	// Seconds quotient digit.
-			return ((q1 << 32) + q0);
-		}
-	} else {				// If v >= 2**32:
-		n = nlz64(v);			// 0 <= n <= 31.
-		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
-		u1 = u >> 1;			// To ensure no overflow.
-		q1 = __div_u64(u1, v1);		// Get quotient from
-		q0 = (q1 << n) >> 31;		// Undo normalization and
-						// division of u by 2.
-		if (q0 != 0)			// Make q0 correct or
-			q0 = q0 - 1;		// too small by 1.
-		if ((u - q0 * v) >= v)
-			q0 = q0 + 1;		// Now q0 is correct.
-
-		return (q0);
-	}
-}
-EXPORT_SYMBOL(__udivdi3);
-
-#ifndef abs64
-/* CSTYLED */
-#define	abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
-#endif
-
-/*
- * Implementation of 64-bit signed division for 32-bit machines.
- */
-int64_t
-__divdi3(int64_t u, int64_t v)
-{
-	int64_t q, t;
-	q = __udivdi3(abs64(u), abs64(v));
-	t = (u ^ v) >> 63;	// If u, v have different
-	return ((q ^ t) - t);	// signs, negate q.
-}
-EXPORT_SYMBOL(__divdi3);
-
-/*
- * Implementation of 64-bit unsigned modulo for 32-bit machines.
- */
-uint64_t
-__umoddi3(uint64_t dividend, uint64_t divisor)
-{
-	return (dividend - (divisor * __udivdi3(dividend, divisor)));
-}
-EXPORT_SYMBOL(__umoddi3);
-
-/* 64-bit signed modulo for 32-bit machines. */
-int64_t
-__moddi3(int64_t n, int64_t d)
-{
-	int64_t q;
-	boolean_t nn = B_FALSE;
-
-	if (n < 0) {
-		nn = B_TRUE;
-		n = -n;
-	}
-	if (d < 0)
-		d = -d;
-
-	q = __umoddi3(n, d);
-
-	return (nn ? -q : q);
-}
-EXPORT_SYMBOL(__moddi3);
-
-/*
- * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
- */
-uint64_t
-__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
-{
-	uint64_t q = __udivdi3(n, d);
-	if (r)
-		*r = n - d * q;
-	return (q);
-}
-EXPORT_SYMBOL(__udivmoddi4);
-
-/*
- * Implementation of 64-bit signed division/modulo for 32-bit machines.
- */
-int64_t
-__divmoddi4(int64_t n, int64_t d, int64_t *r)
-{
-	int64_t q, rr;
-	boolean_t nn = B_FALSE;
-	boolean_t nd = B_FALSE;
-	if (n < 0) {
-		nn = B_TRUE;
-		n = -n;
-	}
-	if (d < 0) {
-		nd = B_TRUE;
-		d = -d;
-	}
-
-	q = __udivmoddi4(n, d, (uint64_t *)&rr);
-
-	if (nn != nd)
-		q = -q;
-	if (nn)
-		rr = -rr;
-	if (r)
-		*r = rr;
-	return (q);
-}
-EXPORT_SYMBOL(__divmoddi4);
-
-#if defined(__arm) || defined(__arm__)
-/*
- * Implementation of 64-bit (un)signed division for 32-bit arm machines.
- *
- * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
- * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
- * and the remainder in {r2, r3}.  The return type is specifically left
- * set to 'void' to ensure the compiler does not overwrite these registers
- * during the return.  All results are in registers as per ABI
- */
-void
-__aeabi_uldivmod(uint64_t u, uint64_t v)
-{
-	uint64_t res;
-	uint64_t mod;
-
-	res = __udivdi3(u, v);
-	mod = __umoddi3(u, v);
-	{
-		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
-		register uint32_t r1 asm("r1") = (res >> 32);
-		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
-		register uint32_t r3 asm("r3") = (mod >> 32);
-
-		asm volatile(""
-		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
-		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
-
-		return; /* r0; */
-	}
-}
-EXPORT_SYMBOL(__aeabi_uldivmod);
-
-void
-__aeabi_ldivmod(int64_t u, int64_t v)
-{
-	int64_t res;
-	uint64_t mod;
-
-	res =  __divdi3(u, v);
-	mod = __umoddi3(u, v);
-	{
-		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
-		register uint32_t r1 asm("r1") = (res >> 32);
-		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
-		register uint32_t r3 asm("r3") = (mod >> 32);
-
-		asm volatile(""
-		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
-		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
-
-		return; /* r0; */
-	}
-}
-EXPORT_SYMBOL(__aeabi_ldivmod);
-#endif /* __arm || __arm__ */
-
-#if defined(__GNUC__) && !defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-
-#endif /* BITS_PER_LONG */
-
 /*
  * NOTE: The strtoxx behavior is solely based on my reading of the Solaris
  * ddi_strtol(9F) man page.  I have not verified the behavior of these
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
index 5594b2f80c02..6d496e68511e 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@@ -139,12 +139,10 @@ static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
 static void *
 kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 {
-	gfp_t lflags = kmem_flags_convert(flags);
+	gfp_t lflags = kmem_flags_convert(flags | KM_VMEM);
 	void *ptr;
 
-	if (skc->skc_flags & KMC_RECLAIMABLE)
-		lflags |= __GFP_RECLAIMABLE;
-	ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
+	ptr = spl_vmalloc(size, lflags);
 
 	/* Resulting allocated memory will be page aligned */
 	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
@@ -424,7 +422,7 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
 	if (!empty)
 		return (-EEXIST);
 
-	if (skc->skc_flags & KMC_RECLAIMABLE)
+	if (skc->skc_flags & KMC_RECLAIMABLE && !(flags & KM_VMEM))
 		lflags |= __GFP_RECLAIMABLE;
 	ske = kmalloc(sizeof (*ske), lflags);
 	if (ske == NULL)
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
index 9fe008cef868..9fe4042b5079 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
@@ -188,7 +188,7 @@ spl_kvmalloc(size_t size, gfp_t lflags)
 		return (ptr);
 	}
 
-	return (spl_vmalloc(size, lflags | __GFP_HIGHMEM));
+	return (spl_vmalloc(size, lflags));
 }
 
 /*
@@ -237,7 +237,7 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
 		 */
 		if (size > spl_kmem_alloc_max) {
 			if (flags & KM_VMEM) {
-				ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
+				ptr = spl_vmalloc(size, lflags);
 			} else {
 				return (NULL);
 			}
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
index 02c5b42bc4a0..154ab12e84f7 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
@@ -531,7 +531,6 @@ kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
 	strlcpy(kpep->kpe_module, module, sizeof (kpep->kpe_module));
 	strlcpy(kpep->kpe_name, name, sizeof (kpep->kpe_name));
 }
-EXPORT_SYMBOL(kstat_proc_entry_init);
 
 kstat_t *
 __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
@@ -702,7 +701,6 @@ out:
 	mutex_exit(&kstat_module_lock);
 
 }
-EXPORT_SYMBOL(kstat_proc_entry_install);
 
 void
 __kstat_install(kstat_t *ksp)
@@ -739,7 +737,6 @@ kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
 	mutex_exit(&kstat_module_lock);
 
 }
-EXPORT_SYMBOL(kstat_proc_entry_delete);
 
 void
 __kstat_delete(kstat_t *ksp)
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-math-compat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-math-compat.c
new file mode 100644
index 000000000000..3184db7f28b0
--- /dev/null
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-math-compat.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Generic Implementation.
+ */
+
+#include <sys/isa_defs.h>
+#include <sys/sysmacros.h>
+
+/*
+ * 64-bit math support for 32-bit platforms. Compilers will generatee
+ * references to the functions here if required.
+ */
+
+#if BITS_PER_LONG == 32
+
+/*
+ * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
+ * provides a div64_u64() function for this we do not use it because the
+ * implementation is flawed.  There are cases which return incorrect
+ * results as late as linux-2.6.35.  Until this is fixed upstream the
+ * spl must provide its own implementation.
+ *
+ * This implementation is a slightly modified version of the algorithm
+ * proposed by the book 'Hacker's Delight'.  The original source can be
+ * found here and is available for use without restriction.
+ *
+ * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
+ */
+
+/*
+ * Calculate number of leading of zeros for a 64-bit value.
+ */
+static int
+nlz64(uint64_t x)
+{
+	register int n = 0;
+
+	if (x == 0)
+		return (64);
+
+	if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
+	if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
+	if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n +  8; x = x <<  8; }
+	if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n +  4; x = x <<  4; }
+	if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n +  2; x = x <<  2; }
+	if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n +  1; }
+
+	return (n);
+}
+
+/*
+ * Newer kernels have a div_u64() function but we define our own
+ * to simplify portability between kernel versions.
+ */
+static inline uint64_t
+__div_u64(uint64_t u, uint32_t v)
+{
+	(void) do_div(u, v);
+	return (u);
+}
+
+/*
+ * Implementation of 64-bit unsigned division for 32-bit machines.
+ *
+ * First the procedure takes care of the case in which the divisor is a
+ * 32-bit quantity. There are two subcases: (1) If the left half of the
+ * dividend is less than the divisor, one execution of do_div() is all that
+ * is required (overflow is not possible). (2) Otherwise it does two
+ * divisions, using the grade school method.
+ */
+uint64_t
+__udivdi3(uint64_t u, uint64_t v)
+{
+	uint64_t u0, u1, v1, q0, q1, k;
+	int n;
+
+	if (v >> 32 == 0) {			// If v < 2**32:
+		if (u >> 32 < v) {		// If u/v cannot overflow,
+			return (__div_u64(u, v)); // just do one division.
+		} else {			// If u/v would overflow:
+			u1 = u >> 32;		// Break u into two halves.
+			u0 = u & 0xFFFFFFFF;
+			q1 = __div_u64(u1, v);	// First quotient digit.
+			k  = u1 - q1 * v;	// First remainder, < v.
+			u0 += (k << 32);
+			q0 = __div_u64(u0, v);	// Seconds quotient digit.
+			return ((q1 << 32) + q0);
+		}
+	} else {				// If v >= 2**32:
+		n = nlz64(v);			// 0 <= n <= 31.
+		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
+		u1 = u >> 1;			// To ensure no overflow.
+		q1 = __div_u64(u1, v1);		// Get quotient from
+		q0 = (q1 << n) >> 31;		// Undo normalization and
+						// division of u by 2.
+		if (q0 != 0)			// Make q0 correct or
+			q0 = q0 - 1;		// too small by 1.
+		if ((u - q0 * v) >= v)
+			q0 = q0 + 1;		// Now q0 is correct.
+
+		return (q0);
+	}
+}
+EXPORT_SYMBOL(__udivdi3);
+
+#ifndef abs64
+/* CSTYLED */
+#define	abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
+#endif
+
+/*
+ * Implementation of 64-bit signed division for 32-bit machines.
+ */
+int64_t
+__divdi3(int64_t u, int64_t v)
+{
+	int64_t q, t;
+	q = __udivdi3(abs64(u), abs64(v));
+	t = (u ^ v) >> 63;	// If u, v have different
+	return ((q ^ t) - t);	// signs, negate q.
+}
+EXPORT_SYMBOL(__divdi3);
+
+/*
+ * Implementation of 64-bit unsigned modulo for 32-bit machines.
+ */
+uint64_t
+__umoddi3(uint64_t dividend, uint64_t divisor)
+{
+	return (dividend - (divisor * __udivdi3(dividend, divisor)));
+}
+EXPORT_SYMBOL(__umoddi3);
+
+/* 64-bit signed modulo for 32-bit machines. */
+int64_t
+__moddi3(int64_t n, int64_t d)
+{
+	int64_t q;
+	boolean_t nn = B_FALSE;
+
+	if (n < 0) {
+		nn = B_TRUE;
+		n = -n;
+	}
+	if (d < 0)
+		d = -d;
+
+	q = __umoddi3(n, d);
+
+	return (nn ? -q : q);
+}
+EXPORT_SYMBOL(__moddi3);
+
+/*
+ * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
+ */
+uint64_t
+__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
+{
+	uint64_t q = __udivdi3(n, d);
+	if (r)
+		*r = n - d * q;
+	return (q);
+}
+EXPORT_SYMBOL(__udivmoddi4);
+
+/*
+ * Implementation of 64-bit signed division/modulo for 32-bit machines.
+ */
+int64_t
+__divmoddi4(int64_t n, int64_t d, int64_t *r)
+{
+	int64_t q, rr;
+	boolean_t nn = B_FALSE;
+	boolean_t nd = B_FALSE;
+	if (n < 0) {
+		nn = B_TRUE;
+		n = -n;
+	}
+	if (d < 0) {
+		nd = B_TRUE;
+		d = -d;
+	}
+
+	q = __udivmoddi4(n, d, (uint64_t *)&rr);
+
+	if (nn != nd)
+		q = -q;
+	if (nn)
+		rr = -rr;
+	if (r)
+		*r = rr;
+	return (q);
+}
+EXPORT_SYMBOL(__divmoddi4);
+
+#if defined(__arm) || defined(__arm__)
+/*
+ * Implementation of 64-bit (un)signed division for 32-bit arm machines.
+ *
+ * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
+ * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
+ * and the remainder in {r2, r3}.  The return type is specifically left
+ * set to 'void' to ensure the compiler does not overwrite these registers
+ * during the return.  All results are in registers as per ABI
+ */
+void
+__aeabi_uldivmod(uint64_t u, uint64_t v)
+{
+	uint64_t res;
+	uint64_t mod;
+
+	res = __udivdi3(u, v);
+	mod = __umoddi3(u, v);
+	{
+		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
+		register uint32_t r1 asm("r1") = (res >> 32);
+		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
+		register uint32_t r3 asm("r3") = (mod >> 32);
+
+		asm volatile(""
+		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
+		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
+
+		return; /* r0; */
+	}
+}
+EXPORT_SYMBOL(__aeabi_uldivmod);
+
+void
+__aeabi_ldivmod(int64_t u, int64_t v)
+{
+	int64_t res;
+	uint64_t mod;
+
+	res =  __divdi3(u, v);
+	mod = __umoddi3(u, v);
+	{
+		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
+		register uint32_t r1 asm("r1") = (res >> 32);
+		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
+		register uint32_t r3 asm("r3") = (mod >> 32);
+
+		asm volatile(""
+		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
+		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
+
+		return; /* r0; */
+	}
+}
+EXPORT_SYMBOL(__aeabi_ldivmod);
+#endif /* __arm || __arm__ */
+
+#endif /* BITS_PER_LONG */
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c b/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
index 1c984f221c7d..76ee71074cb5 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-trace.c
@@ -27,8 +27,6 @@
 
 #include <sys/taskq.h>
 
-#ifdef _KERNEL
 #define	CREATE_TRACE_POINTS
 #include <sys/trace.h>
 #include <sys/trace_taskq.h>
-#endif
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
index b2eae5d00b10..5992957280e4 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -59,6 +59,18 @@ typedef struct zone_dataset {
 	char zd_dsname[];		/* name of the member dataset */
 } zone_dataset_t;
 
+/*
+ * UID-based dataset zoning: allows delegating datasets to all user
+ * namespaces owned by a specific UID, enabling rootless container support.
+ */
+typedef struct zone_uid_datasets {
+	struct list_head zuds_list;	/* zone_uid_datasets linkage */
+	kuid_t zuds_owner;		/* owner UID */
+	struct list_head zuds_datasets;	/* datasets for this UID */
+} zone_uid_datasets_t;
+
+static struct list_head zone_uid_datasets;
+
 #ifdef CONFIG_USER_NS
 
 /*
@@ -138,6 +150,18 @@ zone_datasets_lookup(unsigned int nsinum)
 }
 
 #ifdef CONFIG_USER_NS
+static zone_uid_datasets_t *
+zone_uid_datasets_lookup(kuid_t owner)
+{
+	zone_uid_datasets_t *zuds;
+
+	list_for_each_entry(zuds, &zone_uid_datasets, zuds_list) {
+		if (uid_eq(zuds->zuds_owner, owner))
+			return (zuds);
+	}
+	return (NULL);
+}
+
 static struct zone_dataset *
 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
 {
@@ -232,6 +256,62 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
 EXPORT_SYMBOL(zone_dataset_attach);
 
 int
+zone_dataset_attach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+	zone_uid_datasets_t *zuds;
+	zone_dataset_t *zd;
+	int error;
+	size_t dsnamelen;
+	kuid_t kowner;
+
+	/* Only root can attach datasets to UIDs */
+	if ((error = zone_dataset_cred_check(cred)) != 0)
+		return (error);
+	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+		return (error);
+
+	kowner = make_kuid(current_user_ns(), owner_uid);
+	if (!uid_valid(kowner))
+		return (EINVAL);
+
+	mutex_enter(&zone_datasets_lock);
+
+	/* Find or create UID entry */
+	zuds = zone_uid_datasets_lookup(kowner);
+	if (zuds == NULL) {
+		zuds = kmem_alloc(sizeof (zone_uid_datasets_t), KM_SLEEP);
+		INIT_LIST_HEAD(&zuds->zuds_list);
+		INIT_LIST_HEAD(&zuds->zuds_datasets);
+		zuds->zuds_owner = kowner;
+		list_add_tail(&zuds->zuds_list, &zone_uid_datasets);
+	} else {
+		/* Check if dataset already attached */
+		list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+			if (zd->zd_dsnamelen == dsnamelen &&
+			    strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+				mutex_exit(&zone_datasets_lock);
+				return (EEXIST);
+			}
+		}
+	}
+
+	/* Add dataset to UID's list */
+	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+	zd->zd_dsnamelen = dsnamelen;
+	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
+	INIT_LIST_HEAD(&zd->zd_list);
+	list_add_tail(&zd->zd_list, &zuds->zuds_datasets);
+
+	mutex_exit(&zone_datasets_lock);
+	return (0);
+#else
+	return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_attach_uid);
+
+int
 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
 {
 #ifdef CONFIG_USER_NS
@@ -280,6 +360,217 @@ zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
 }
 EXPORT_SYMBOL(zone_dataset_detach);
 
+int
+zone_dataset_detach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+	zone_uid_datasets_t *zuds;
+	zone_dataset_t *zd;
+	int error;
+	size_t dsnamelen;
+	kuid_t kowner;
+
+	if ((error = zone_dataset_cred_check(cred)) != 0)
+		return (error);
+	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+		return (error);
+
+	kowner = make_kuid(current_user_ns(), owner_uid);
+	if (!uid_valid(kowner))
+		return (EINVAL);
+
+	mutex_enter(&zone_datasets_lock);
+
+	zuds = zone_uid_datasets_lookup(kowner);
+	if (zuds == NULL) {
+		mutex_exit(&zone_datasets_lock);
+		return (ENOENT);
+	}
+
+	/* Find and remove dataset */
+	list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+		if (zd->zd_dsnamelen == dsnamelen &&
+		    strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+			list_del(&zd->zd_list);
+			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+			/* Remove UID entry if no more datasets */
+			if (list_empty(&zuds->zuds_datasets)) {
+				list_del(&zuds->zuds_list);
+				kmem_free(zuds, sizeof (*zuds));
+			}
+
+			mutex_exit(&zone_datasets_lock);
+			return (0);
+		}
+	}
+
+	mutex_exit(&zone_datasets_lock);
+	return (ENOENT);
+#else
+	return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_detach_uid);
+
+/*
+ * Callback for looking up zoned_uid property (registered by ZFS module).
+ */
+static zone_get_zoned_uid_fn_t zone_get_zoned_uid_fn = NULL;
+
+void
+zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
+{
+	zone_get_zoned_uid_fn = fn;
+}
+EXPORT_SYMBOL(zone_register_zoned_uid_callback);
+
+void
+zone_unregister_zoned_uid_callback(void)
+{
+	zone_get_zoned_uid_fn = NULL;
+}
+EXPORT_SYMBOL(zone_unregister_zoned_uid_callback);
+
+#ifdef CONFIG_USER_NS
+/*
+ * Check if a dataset is the delegation root (has zoned_uid set locally).
+ */
+static boolean_t
+zone_dataset_is_zoned_uid_root(const char *dataset, uid_t zoned_uid)
+{
+	char *root;
+	uid_t found_uid;
+	boolean_t is_root;
+
+	if (zone_get_zoned_uid_fn == NULL)
+		return (B_FALSE);
+
+	root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	found_uid = zone_get_zoned_uid_fn(dataset, root, MAXPATHLEN);
+	is_root = (found_uid == zoned_uid && strcmp(root, dataset) == 0);
+	kmem_free(root, MAXPATHLEN);
+	return (is_root);
+}
+#endif /* CONFIG_USER_NS */
+
+/*
+ * Core authorization check for zoned_uid write delegation.
+ */
+zone_admin_result_t
+zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
+    const char *aux_dataset)
+{
+#ifdef CONFIG_USER_NS
+	struct user_namespace *user_ns;
+	char *delegation_root;
+	uid_t zoned_uid, ns_owner_uid;
+	int write_unused;
+	zone_admin_result_t result = ZONE_ADMIN_NOT_APPLICABLE;
+
+	/* Step 1: If in global zone, not applicable */
+	if (INGLOBALZONE(curproc))
+		return (ZONE_ADMIN_NOT_APPLICABLE);
+
+	/* Step 2: Need callback to be registered */
+	if (zone_get_zoned_uid_fn == NULL)
+		return (ZONE_ADMIN_NOT_APPLICABLE);
+
+	delegation_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+	/* Step 3: Find delegation root */
+	zoned_uid = zone_get_zoned_uid_fn(dataset, delegation_root,
+	    MAXPATHLEN);
+	if (zoned_uid == 0)
+		goto out;
+
+	/* Step 4: Verify namespace owner matches */
+	user_ns = current_user_ns();
+	ns_owner_uid = from_kuid(&init_user_ns, user_ns->owner);
+	if (ns_owner_uid != zoned_uid)
+		goto out;
+
+	/* Step 5: Tiered capability check based on operation class */
+	{
+		int required_cap;
+		switch (op) {
+		case ZONE_OP_DESTROY:
+		case ZONE_OP_RENAME:
+		case ZONE_OP_CLONE:
+			required_cap = CAP_SYS_ADMIN;
+			break;
+		case ZONE_OP_CREATE:
+		case ZONE_OP_SNAPSHOT:
+		case ZONE_OP_SETPROP:
+			required_cap = CAP_FOWNER;
+			break;
+		default:
+			required_cap = CAP_SYS_ADMIN;
+			break;
+		}
+		if (!ns_capable(user_ns, required_cap)) {
+			result = ZONE_ADMIN_DENIED;
+			goto out;
+		}
+	}
+
+	/* Step 6: Operation-specific constraints */
+	switch (op) {
+	case ZONE_OP_DESTROY:
+		/* Cannot destroy the delegation root itself */
+		if (zone_dataset_is_zoned_uid_root(dataset, zoned_uid)) {
+			result = ZONE_ADMIN_DENIED;
+			goto out;
+		}
+		break;
+
+	case ZONE_OP_RENAME:
+		/* Cannot rename outside delegation subtree */
+		if (aux_dataset != NULL) {
+			char *dst_root;
+			uid_t dst_uid;
+
+			dst_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+			dst_uid = zone_get_zoned_uid_fn(aux_dataset,
+			    dst_root, MAXPATHLEN);
+			if (dst_uid != zoned_uid ||
+			    strcmp(dst_root, delegation_root) != 0) {
+				kmem_free(dst_root, MAXPATHLEN);
+				result = ZONE_ADMIN_DENIED;
+				goto out;
+			}
+			kmem_free(dst_root, MAXPATHLEN);
+		}
+		break;
+
+	case ZONE_OP_CLONE:
+		/* Clone source must be visible */
+		if (aux_dataset != NULL) {
+			if (!zone_dataset_visible(aux_dataset, &write_unused)) {
+				result = ZONE_ADMIN_DENIED;
+				goto out;
+			}
+		}
+		break;
+
+	case ZONE_OP_CREATE:
+	case ZONE_OP_SNAPSHOT:
+	case ZONE_OP_SETPROP:
+		/* No additional constraints */
+		break;
+	}
+
+	result = ZONE_ADMIN_ALLOWED;
+out:
+	kmem_free(delegation_root, MAXPATHLEN);
+	return (result);
+#else
+	(void) dataset, (void) op, (void) aux_dataset;
+	return (ZONE_ADMIN_NOT_APPLICABLE);
+#endif
+}
+EXPORT_SYMBOL(zone_dataset_admin_check);
+
 /*
  * A dataset is visible if:
  * - It is a parent of a namespace entry.
@@ -293,34 +584,19 @@ EXPORT_SYMBOL(zone_dataset_detach);
  * The parent datasets of namespace entries are visible and
  * read-only to provide a path back to the root of the pool.
  */
-int
-zone_dataset_visible(const char *dataset, int *write)
+/*
+ * Helper function to check if a dataset matches against a list of
+ * delegated datasets. Returns visibility and sets write permission.
+ */
+static int
+zone_dataset_check_list(struct list_head *datasets, const char *dataset,
+    size_t dsnamelen, int *write)
 {
-	zone_datasets_t *zds;
 	zone_dataset_t *zd;
-	size_t dsnamelen, zd_len;
-	int visible;
-
-	/* Default to read-only, in case visible is returned. */
-	if (write != NULL)
-		*write = 0;
-	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
-		return (0);
-	if (INGLOBALZONE(curproc)) {
-		if (write != NULL)
-			*write = 1;
-		return (1);
-	}
+	size_t zd_len;
+	int visible = 0;
 
-	mutex_enter(&zone_datasets_lock);
-	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
-	if (zds == NULL) {
-		mutex_exit(&zone_datasets_lock);
-		return (0);
-	}
-
-	visible = 0;
-	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+	list_for_each_entry(zd, datasets, zd_list) {
 		zd_len = strlen(zd->zd_dsname);
 		if (zd_len > dsnamelen) {
 			/*
@@ -352,7 +628,8 @@ zone_dataset_visible(const char *dataset, int *write)
 			 * the namespace entry.
 			 */
 			visible = memcmp(zd->zd_dsname, dataset,
-			    zd_len) == 0 && dataset[zd_len] == '/';
+			    zd_len) == 0 && (dataset[zd_len] == '/' ||
+			    dataset[zd_len] == '@' || dataset[zd_len] == '#');
 			if (visible) {
 				if (write != NULL)
 					*write = 1;
@@ -361,9 +638,70 @@ zone_dataset_visible(const char *dataset, int *write)
 		}
 	}
 
-	mutex_exit(&zone_datasets_lock);
 	return (visible);
 }
+
+#if defined(CONFIG_USER_NS)
+/*
+ * Check UID-based zoning visibility for the current process.
+ * Must be called with zone_datasets_lock held.
+ */
+static int
+zone_dataset_visible_uid(const char *dataset, size_t dsnamelen, int *write)
+{
+	zone_uid_datasets_t *zuds;
+
+	zuds = zone_uid_datasets_lookup(curproc->cred->user_ns->owner);
+	if (zuds != NULL)
+		return (zone_dataset_check_list(&zuds->zuds_datasets, dataset,
+		    dsnamelen, write));
+	return (0);
+}
+#endif
+
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+	zone_datasets_t *zds;
+	size_t dsnamelen;
+	int visible;
+
+	/* Default to read-only, in case visible is returned. */
+	if (write != NULL)
+		*write = 0;
+	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+		return (0);
+	if (INGLOBALZONE(curproc)) {
+		if (write != NULL)
+			*write = 1;
+		return (1);
+	}
+
+	mutex_enter(&zone_datasets_lock);
+
+	/* First, check namespace-specific zoning (existing behavior) */
+	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+	if (zds != NULL) {
+		visible = zone_dataset_check_list(&zds->zds_datasets, dataset,
+		    dsnamelen, write);
+		if (visible) {
+			mutex_exit(&zone_datasets_lock);
+			return (visible);
+		}
+	}
+
+	/* Second, check UID-based zoning */
+#if defined(CONFIG_USER_NS)
+	visible = zone_dataset_visible_uid(dataset, dsnamelen, write);
+	if (visible) {
+		mutex_exit(&zone_datasets_lock);
+		return (visible);
+	}
+#endif
+
+	mutex_exit(&zone_datasets_lock);
+	return (0);
+}
 EXPORT_SYMBOL(zone_dataset_visible);
 
 unsigned int
@@ -395,8 +733,9 @@ EXPORT_SYMBOL(crgetzoneid);
 boolean_t
 inglobalzone(proc_t *proc)
 {
+	(void) proc;
 #if defined(CONFIG_USER_NS)
-	return (proc->cred->user_ns == &init_user_ns);
+	return (current_user_ns() == &init_user_ns);
 #else
 	return (B_TRUE);
 #endif
@@ -408,6 +747,7 @@ spl_zone_init(void)
 {
 	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
 	INIT_LIST_HEAD(&zone_datasets);
+	INIT_LIST_HEAD(&zone_uid_datasets);
 	return (0);
 }
 
@@ -415,6 +755,7 @@ void
 spl_zone_fini(void)
 {
 	zone_datasets_t *zds;
+	zone_uid_datasets_t *zuds;
 	zone_dataset_t *zd;
 
 	/*
@@ -423,6 +764,22 @@ spl_zone_fini(void)
 	 * namespace is destroyed, just do it here, since spl is about to go
 	 * out of context.
 	 */
+
+	/* Clean up UID-based delegations */
+	while (!list_empty(&zone_uid_datasets)) {
+		zuds = list_entry(zone_uid_datasets.next,
+		    zone_uid_datasets_t, zuds_list);
+		while (!list_empty(&zuds->zuds_datasets)) {
+			zd = list_entry(zuds->zuds_datasets.next,
+			    zone_dataset_t, zd_list);
+			list_del(&zd->zd_list);
+			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+		}
+		list_del(&zuds->zuds_list);
+		kmem_free(zuds, sizeof (*zuds));
+	}
+
+	/* Clean up namespace-based delegations */
 	while (!list_empty(&zone_datasets)) {
 		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
 		while (!list_empty(&zds->zds_datasets)) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index 6478c834b7a5..dbc9aad936bf 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -410,6 +410,22 @@ param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
 	return (0);
 }
 
+int
+param_set_l2arc_dwpd_limit(const char *buf, zfs_kernel_param_t *kp)
+{
+	uint64_t old_val = l2arc_dwpd_limit;
+	int error;
+
+	error = spl_param_set_u64(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	if (l2arc_dwpd_limit != old_val)
+		l2arc_dwpd_bump_reset();
+
+	return (0);
+}
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 static int
 arc_hotplug_callback(struct notifier_block *self, unsigned long action,
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
index d6323fd56a8f..91010bdf642a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
@@ -39,8 +39,10 @@
 #include <sys/dsl_prop.h>
 #include <sys/fm/util.h>
 #include <sys/dsl_scan.h>
+#include <sys/dmu.h>
 #include <sys/fs/zfs.h>
 #include <sys/kstat.h>
+#include <sys/zone.h>
 #include "zfs_prop.h"
 
 
@@ -122,16 +124,60 @@ spa_history_zone(void)
 	return ("linux");
 }
 
+static int
+spa_restore_zoned_uid_cb(const char *dsname, void *arg)
+{
+	(void) arg;
+	uint64_t zoned_uid = 0;
+
+	if (dsl_prop_get(dsname, "zoned_uid", 8, 1, &zoned_uid, NULL) != 0)
+		return (0);
+
+	if (zoned_uid != 0) {
+		int err = zone_dataset_attach_uid(kcred, dsname,
+		    (uid_t)zoned_uid);
+		if (err != 0 && err != EEXIST) {
+			cmn_err(CE_WARN, "failed to restore zoned_uid for "
+			    "'%s' (uid %llu): %d", dsname,
+			    (unsigned long long)zoned_uid, err);
+		}
+	}
+	return (0);
+}
+
 void
 spa_import_os(spa_t *spa)
 {
-	(void) spa;
+	(void) dmu_objset_find(spa_name(spa),
+	    spa_restore_zoned_uid_cb, NULL, DS_FIND_CHILDREN);
+}
+
+static int
+spa_cleanup_zoned_uid_cb(const char *dsname, void *arg)
+{
+	(void) arg;
+	uint64_t zoned_uid = 0;
+
+	if (dsl_prop_get(dsname, "zoned_uid", 8, 1, &zoned_uid, NULL) != 0)
+		return (0);
+
+	if (zoned_uid != 0) {
+		int err = zone_dataset_detach_uid(kcred, dsname,
+		    (uid_t)zoned_uid);
+		if (err != 0 && err != ENOENT) {
+			cmn_err(CE_WARN, "failed to detach zoned_uid for "
+			    "'%s' (uid %llu): %d", dsname,
+			    (unsigned long long)zoned_uid, err);
+		}
+	}
+	return (0);
 }
 
 void
 spa_export_os(spa_t *spa)
 {
-	(void) spa;
+	(void) dmu_objset_find(spa_name(spa),
+	    spa_cleanup_zoned_uid_cb, NULL, DS_FIND_CHILDREN);
 }
 
 void
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 1bd3500e9f66..66e10584ab5e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -445,7 +445,14 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 	v->vdev_has_securetrim = bdev_secure_discard_supported(bdev);
 
 	/* Inform the ZIO pipeline that we are non-rotational */
+#ifdef HAVE_BLK_QUEUE_ROT
+	v->vdev_nonrot = !blk_queue_rot(bdev_get_queue(bdev));
+#else
 	v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev));
+#endif
+
+	/* Is backed by a block device. */
+	v->vdev_is_blkdev = B_TRUE;
 
 	/* Physical volume size in bytes for the partition */
 	*psize = bdev_capacity(bdev);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index 79fd8911102d..c73ef86df4dc 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -251,16 +251,7 @@ snapentry_compare_by_name(const void *a, const void *b)
 {
 	const zfs_snapentry_t *se_a = a;
 	const zfs_snapentry_t *se_b = b;
-	int ret;
-
-	ret = strcmp(se_a->se_name, se_b->se_name);
-
-	if (ret < 0)
-		return (-1);
-	else if (ret > 0)
-		return (1);
-	else
-		return (0);
+	return (TREE_ISIGN(strcmp(se_a->se_name, se_b->se_name)));
 }
 
 /*
@@ -272,15 +263,10 @@ snapentry_compare_by_objsetid(const void *a, const void *b)
 	const zfs_snapentry_t *se_a = a;
 	const zfs_snapentry_t *se_b = b;
 
-	if (se_a->se_spa != se_b->se_spa)
-		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
-
-	if (se_a->se_objsetid < se_b->se_objsetid)
-		return (-1);
-	else if (se_a->se_objsetid > se_b->se_objsetid)
-		return (1);
-	else
-		return (0);
+	int cmp = TREE_PCMP(se_a->se_spa, se_b->se_spa);
+	if (cmp != 0)
+		return (cmp);
+	return (TREE_CMP(se_a->se_objsetid, se_b->se_objsetid));
 }
 
 /*
@@ -1201,8 +1187,10 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 
 	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
 	    ZFS_MAX_DATASET_NAME_LEN, full_name);
-	if (error)
+	if (error) {
+		zfs_exit(zfsvfs, FTAG);
 		goto error;
+	}
 
 	if (is_current_chrooted() == 0) {
 		/*
@@ -1220,6 +1208,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 		error = get_root_path(&mnt_path, m, MAXPATHLEN);
 		if (error != 0) {
 			kmem_free(m, MAXPATHLEN);
+			zfs_exit(zfsvfs, FTAG);
 			goto error;
 		}
 		mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
@@ -1253,6 +1242,33 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 	    zfs_snapshot_no_setuid ? "nosuid" : "suid");
 
 	/*
+	 * Release z_teardown_lock before potentially blocking operations
+	 * (cv_wait for concurrent mounts, call_usermodehelper for the mount
+	 * helper).  Holding z_teardown_lock(R) across call_usermodehelper
+	 * deadlocks with namespace_sem: the mount helper needs
+	 * namespace_sem(W) via move_mount, while /proc/self/mountinfo
+	 * readers hold namespace_sem(R) and need z_teardown_lock(R) via
+	 * zpl_show_devname.  A concurrent zfs_suspend_fs queuing
+	 * z_teardown_lock(W) blocks new readers, completing the cycle.
+	 * See https://github.com/openzfs/zfs/issues/18409
+	 *
+	 * Releasing the lock allows zfs_suspend_fs to proceed during
+	 * the mount, so dmu_objset_hold in zpl_get_tree can transiently
+	 * fail with ENOENT during the clone swap.  The mount helper
+	 * fails, this function returns EISDIR, and the VFS silently
+	 * falls back to the ctldir stub (empty directory).  The caller
+	 * gets the stub inode instead of the real snapshot root until
+	 * the next access retries the automount.
+	 *
+	 * Safe because everything below operates on local string copies
+	 * (full_name, full_path) or uses its own synchronization
+	 * (zfs_snapshot_lock, se_mtx).  The parent zfsvfs pointer
+	 * remains valid because we hold a path reference to the
+	 * automount trigger dentry.
+	 */
+	zfs_exit(zfsvfs, FTAG);
+
+	/*
 	 * Check if snapshot is already being mounted. If found, wait for
 	 * pending mount to complete before returning success.
 	 */
@@ -1366,8 +1382,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 error:
 	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(full_path, MAXPATHLEN);
-
-	zfs_exit(zfsvfs, FTAG);
+	kmem_free(options, 7);
 
 	return (error);
 }
@@ -1379,17 +1394,31 @@ int
 zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
     struct inode **ipp)
 {
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
 	int error;
 	struct path path;
 	char *mnt;
 	struct dentry *dentry;
+	zfs_snapentry_t *se;
 
 	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
-	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
-	    MAXPATHLEN, mnt);
-	if (error)
-		goto out;
+	/*
+	 * Try the in-memory AVL tree first for previously mounted
+	 * snapshots, falling back to the on-disk scan if not found.
+	 */
+	rw_enter(&zfs_snapshot_lock, RW_READER);
+	se = zfsctl_snapshot_find_by_objsetid(zfsvfs->z_os->os_spa, objsetid);
+	rw_exit(&zfs_snapshot_lock);
+	if (se != NULL) {
+		strlcpy(mnt, se->se_path, MAXPATHLEN);
+		zfsctl_snapshot_rele(se);
+	} else {
+		error = zfsctl_snapshot_path_objset(zfsvfs, objsetid,
+		    MAXPATHLEN, mnt);
+		if (error)
+			goto out;
+	}
 
 	/* Trigger automount */
 	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index 5421a441b323..ce6092be1da7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -170,6 +170,8 @@ zfs_ioc_userns_attach(zfs_cmd_t *zc)
 	 */
 	if (error == ENOTTY)
 		error = ZFS_ERR_NOT_USER_NAMESPACE;
+	if (error == ENXIO)
+		error = ZFS_ERR_NO_USER_NS_SUPPORT;
 
 	return (error);
 }
@@ -190,6 +192,8 @@ zfs_ioc_userns_detach(zfs_cmd_t *zc)
 	 */
 	if (error == ENOTTY)
 		error = ZFS_ERR_NOT_USER_NAMESPACE;
+	if (error == ENXIO)
+		error = ZFS_ERR_NO_USER_NS_SUPPORT;
 
 	return (error);
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index 8a7d14ab6119..9c0d92551843 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2026, TrueNAS.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -64,53 +65,15 @@
 #include <linux/fs.h>
 #include "zfs_comutil.h"
 
-enum {
-	TOKEN_RO,
-	TOKEN_RW,
-	TOKEN_SETUID,
-	TOKEN_NOSETUID,
-	TOKEN_EXEC,
-	TOKEN_NOEXEC,
-	TOKEN_DEVICES,
-	TOKEN_NODEVICES,
-	TOKEN_DIRXATTR,
-	TOKEN_SAXATTR,
-	TOKEN_XATTR,
-	TOKEN_NOXATTR,
-	TOKEN_ATIME,
-	TOKEN_NOATIME,
-	TOKEN_RELATIME,
-	TOKEN_NORELATIME,
-	TOKEN_NBMAND,
-	TOKEN_NONBMAND,
-	TOKEN_MNTPOINT,
-	TOKEN_LAST,
-};
-
-static const match_table_t zpl_tokens = {
-	{ TOKEN_RO,		MNTOPT_RO },
-	{ TOKEN_RW,		MNTOPT_RW },
-	{ TOKEN_SETUID,		MNTOPT_SETUID },
-	{ TOKEN_NOSETUID,	MNTOPT_NOSETUID },
-	{ TOKEN_EXEC,		MNTOPT_EXEC },
-	{ TOKEN_NOEXEC,		MNTOPT_NOEXEC },
-	{ TOKEN_DEVICES,	MNTOPT_DEVICES },
-	{ TOKEN_NODEVICES,	MNTOPT_NODEVICES },
-	{ TOKEN_DIRXATTR,	MNTOPT_DIRXATTR },
-	{ TOKEN_SAXATTR,	MNTOPT_SAXATTR },
-	{ TOKEN_XATTR,		MNTOPT_XATTR },
-	{ TOKEN_NOXATTR,	MNTOPT_NOXATTR },
-	{ TOKEN_ATIME,		MNTOPT_ATIME },
-	{ TOKEN_NOATIME,	MNTOPT_NOATIME },
-	{ TOKEN_RELATIME,	MNTOPT_RELATIME },
-	{ TOKEN_NORELATIME,	MNTOPT_NORELATIME },
-	{ TOKEN_NBMAND,		MNTOPT_NBMAND },
-	{ TOKEN_NONBMAND,	MNTOPT_NONBMAND },
-	{ TOKEN_MNTPOINT,	MNTOPT_MNTPOINT "=%s" },
-	{ TOKEN_LAST,		NULL },
-};
+vfs_t *
+zfsvfs_vfs_alloc(void)
+{
+	vfs_t *vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
+	mutex_init(&vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
+	return (vfsp);
+}
 
-static void
+void
 zfsvfs_vfs_free(vfs_t *vfsp)
 {
 	if (vfsp != NULL) {
@@ -121,139 +84,6 @@ zfsvfs_vfs_free(vfs_t *vfsp)
 	}
 }
 
-static int
-zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
-{
-	switch (token) {
-	case TOKEN_RO:
-		vfsp->vfs_readonly = B_TRUE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_RW:
-		vfsp->vfs_readonly = B_FALSE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_SETUID:
-		vfsp->vfs_setuid = B_TRUE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_NOSETUID:
-		vfsp->vfs_setuid = B_FALSE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_EXEC:
-		vfsp->vfs_exec = B_TRUE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_NOEXEC:
-		vfsp->vfs_exec = B_FALSE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_DEVICES:
-		vfsp->vfs_devices = B_TRUE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_NODEVICES:
-		vfsp->vfs_devices = B_FALSE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_DIRXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_DIR;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_SAXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_SA;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_XATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_SA;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_NOXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_OFF;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_ATIME:
-		vfsp->vfs_atime = B_TRUE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_NOATIME:
-		vfsp->vfs_atime = B_FALSE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_RELATIME:
-		vfsp->vfs_relatime = B_TRUE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NORELATIME:
-		vfsp->vfs_relatime = B_FALSE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NBMAND:
-		vfsp->vfs_nbmand = B_TRUE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_NONBMAND:
-		vfsp->vfs_nbmand = B_FALSE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_MNTPOINT:
-		if (vfsp->vfs_mntpoint != NULL)
-			kmem_strfree(vfsp->vfs_mntpoint);
-		vfsp->vfs_mntpoint = match_strdup(&args[0]);
-		if (vfsp->vfs_mntpoint == NULL)
-			return (SET_ERROR(ENOMEM));
-		break;
-	default:
-		break;
-	}
-
-	return (0);
-}
-
-/*
- * Parse the raw mntopts and return a vfs_t describing the options.
- */
-static int
-zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
-{
-	vfs_t *tmp_vfsp;
-	int error;
-
-	tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
-	mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
-
-	if (mntopts != NULL) {
-		substring_t args[MAX_OPT_ARGS];
-		char *tmp_mntopts, *p, *t;
-		int token;
-
-		tmp_mntopts = t = kmem_strdup(mntopts);
-		if (tmp_mntopts == NULL)
-			return (SET_ERROR(ENOMEM));
-
-		while ((p = strsep(&t, ",")) != NULL) {
-			if (!*p)
-				continue;
-
-			args[0].to = args[0].from = NULL;
-			token = match_token(p, zpl_tokens, args);
-			error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
-			if (error) {
-				kmem_strfree(tmp_mntopts);
-				zfsvfs_vfs_free(tmp_vfsp);
-				return (error);
-			}
-		}
-
-		kmem_strfree(tmp_mntopts);
-	}
-
-	*vfsp = tmp_vfsp;
-
-	return (0);
-}
-
 boolean_t
 zfs_is_readonly(zfsvfs_t *zfsvfs)
 {
@@ -1486,20 +1316,16 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 static atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
 
 int
-zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
+zfs_domount(struct super_block *sb, const char *osname,
+    vfs_t *vfs, int silent)
 {
-	const char *osname = zm->mnt_osname;
 	struct inode *root_inode = NULL;
 	uint64_t recordsize;
 	int error = 0;
 	zfsvfs_t *zfsvfs = NULL;
-	vfs_t *vfs = NULL;
 	int canwrite;
 	int dataset_visible_zone;
 
-	ASSERT(zm);
-	ASSERT(osname);
-
 	dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
 
 	/*
@@ -1511,10 +1337,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 		return (SET_ERROR(EPERM));
 	}
 
-	error = zfsvfs_parse_options(zm->mnt_data, &vfs);
-	if (error)
-		return (error);
-
 	/*
 	 * If a non-writable filesystem is being mounted without the
 	 * read-only flag, pretend it was set, as done for snapshots.
@@ -1523,16 +1345,12 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 		vfs->vfs_readonly = B_TRUE;
 
 	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
-	if (error) {
-		zfsvfs_vfs_free(vfs);
+	if (error)
 		goto out;
-	}
 
 	if ((error = dsl_prop_get_integer(osname, "recordsize",
-	    &recordsize, NULL))) {
-		zfsvfs_vfs_free(vfs);
+	    &recordsize, NULL)))
 		goto out;
-	}
 
 	vfs->vfs_data = zfsvfs;
 	zfsvfs->z_vfs = vfs;
@@ -1614,6 +1432,13 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 out:
 	if (error) {
 		if (zfsvfs != NULL) {
+			/*
+			 * We're returning error, so the caller still owns
+			 * the mount options vfs_t. Remove them from zfsvfs
+			 * so we don't try to free them.
+			 */
+			zfsvfs->z_vfs = NULL;
+
 			dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 			zfsvfs_free(zfsvfs);
 		}
@@ -1704,24 +1529,16 @@ zfs_umount(struct super_block *sb)
 }
 
 int
-zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
+zfs_remount(struct super_block *sb, vfs_t *vfsp, int flags)
 {
 	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	vfs_t *vfsp;
 	boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
-	int error;
 
 	if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
-	    !(*flags & SB_RDONLY)) {
-		*flags |= SB_RDONLY;
+	    !(flags & SB_RDONLY))
 		return (EROFS);
-	}
 
-	error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
-	if (error)
-		return (error);
-
-	if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
+	if (!zfs_is_readonly(zfsvfs) && (flags & SB_RDONLY))
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
 
 	zfs_unregister_callbacks(zfsvfs);
@@ -1732,7 +1549,7 @@ zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
 	if (!issnap)
 		(void) zfs_register_callbacks(vfsp);
 
-	return (error);
+	return (0);
 }
 
 int
@@ -1963,15 +1780,6 @@ bail:
 	/* release the VFS ops */
 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
-
-	if (err != 0) {
-		/*
-		 * Since we couldn't setup the sa framework, try to force
-		 * unmount this file system.
-		 */
-		if (zfsvfs->z_os)
-			(void) zfs_umount(zfsvfs->z_sb);
-	}
 	return (err);
 }
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index a788e3fd4862..e65f81230124 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -200,8 +200,9 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 	 * Keep a count of the synchronous opens in the znode.  On first
 	 * synchronous open we must convert all previous async transactions
 	 * into sync to keep correct ordering.
+	 * Skip it for snapshot, as it won't have any transactions.
 	 */
-	if (flag & O_SYNC) {
+	if (!zfsvfs->z_issnap && (flag & O_SYNC)) {
 		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
 			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
 	}
@@ -222,7 +223,7 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
 		return (error);
 
 	/* Decrement the synchronous opens in the znode */
-	if (flag & O_SYNC)
+	if (!zfsvfs->z_issnap && (flag & O_SYNC))
 		atomic_dec_32(&zp->z_sync_cnt);
 
 	zfs_exit(zfsvfs, FTAG);
@@ -2581,8 +2582,19 @@ top:
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
-	if (mask != 0)
+	if (mask != 0) {
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
+		/*
+		 * Ensure that the z_seq is always incremented on setattr
+		 * operation. This is required for change accounting for
+		 * NFS clients.
+		 *
+		 * ATTR_MODE already increments via zfs_acl_chmod_setattr.
+		 * ATTR_SIZE already increments via zfs_freesp.
+		 */
+		if (!(mask & (ATTR_MODE | ATTR_SIZE)))
+			zp->z_seq++;
+	}
 
 	mutex_exit(&zp->z_lock);
 	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
@@ -3513,7 +3525,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	boolean_t	is_tmpfile = 0;
 	uint64_t	txg;
 
-	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
+	is_tmpfile = (sip->i_nlink == 0 &&
+	    (inode_state_read_once(sip) & I_LINKABLE));
 
 	ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
index 711da151f65e..0568bb63c75e 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_export.c
@@ -37,8 +37,8 @@ zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
 {
 	fstrans_cookie_t cookie;
 	ushort_t empty_fid = 0;
-	fid_t *fid;
-	int len_bytes, rc;
+	fid_t *fid, *pfid;
+	int len_bytes, required_len, parent_len, rc, prc, fh_type;
 
 	len_bytes = *max_len * sizeof (__u32);
 
@@ -56,11 +56,44 @@ zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
 	else
 		rc = zfs_fid(ip, fid);
 
+	required_len = offsetof(fid_t, fid_data) + fid->fid_len;
+
+	/*
+	 * Kernel has requested that the resulting file handle contain
+	 * a reference to the provided parent. This typically would happen
+	 * if the NFS export has subtree checking enabled.
+	 */
+	if (parent != NULL) {
+		if ((rc == 0) && (len_bytes >
+		    required_len + offsetof(fid_t, fid_data))) {
+			parent_len = len_bytes - required_len;
+			pfid = (fid_t *)((char *)fh + required_len);
+			pfid->fid_len = parent_len - offsetof(fid_t, fid_data);
+		} else {
+			empty_fid = 0;
+			pfid = (fid_t *)&empty_fid;
+		}
+
+		if (zfsctl_is_node(parent))
+			prc = zfsctl_fid(parent, pfid);
+		else
+			prc = zfs_fid(parent, pfid);
+
+		if (rc == 0 && prc != 0)
+			rc = prc;
+
+		required_len += offsetof(fid_t, fid_data) +
+		    pfid->fid_len;
+		fh_type = FILEID_INO32_GEN_PARENT;
+	} else {
+		fh_type = FILEID_INO32_GEN;
+	}
+
 	spl_fstrans_unmark(cookie);
-	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
-	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
 
-	return (rc == 0 ? FILEID_INO32_GEN : 255);
+	*max_len = roundup(required_len, sizeof (__u32)) / sizeof (__u32);
+
+	return (rc == 0 ? fh_type : FILEID_INVALID);
 }
 
 static struct dentry *
@@ -74,7 +107,8 @@ zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
 
 	len_bytes = fh_len * sizeof (__u32);
 
-	if (fh_type != FILEID_INO32_GEN ||
+	if ((fh_type != FILEID_INO32_GEN &&
+	    fh_type != FILEID_INO32_GEN_PARENT) ||
 	    len_bytes < offsetof(fid_t, fid_data) ||
 	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
 		return (ERR_PTR(-EINVAL));
@@ -104,6 +138,46 @@ zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
 	return (d_obtain_alias(ip));
 }
 
+static struct dentry *
+zpl_fh_to_parent(struct super_block *sb, struct fid *fh,
+    int fh_len, int fh_type)
+{
+	/*
+	 * Convert the provided struct fid to a dentry for the parent
+	 * This is possible only if it was created with the parent,
+	 * e.g. type is FILEID_INO32_GEN_PARENT. When this type of
+	 * filehandle is created we simply pack the parent fid_t
+	 * after the entry's fid_t. So this function will adjust
+	 * offset in the provided buffer to the begining of the
+	 * parent fid_t and call zpl_fh_to_dentry() on it.
+	 */
+	fid_t *fid = (fid_t *)fh;
+	fid_t *pfid;
+	int len_bytes, parent_len_bytes, child_fid_bytes, parent_fh_len;
+
+	len_bytes = fh_len * sizeof (__u32);
+
+	if ((fh_type != FILEID_INO32_GEN_PARENT) ||
+	    len_bytes < offsetof(fid_t, fid_data) ||
+	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
+		return (ERR_PTR(-EINVAL));
+
+	child_fid_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
+	parent_len_bytes = len_bytes - child_fid_bytes;
+
+	if (parent_len_bytes < offsetof(fid_t, fid_data))
+		return (ERR_PTR(-EINVAL));
+
+	pfid = (fid_t *)((char *)fh + child_fid_bytes);
+
+	if (parent_len_bytes < offsetof(fid_t, fid_data) + pfid->fid_len)
+		return (ERR_PTR(-EINVAL));
+
+	parent_fh_len = parent_len_bytes / sizeof (__u32);
+	return (zpl_fh_to_dentry(sb, (struct fid *)pfid, parent_fh_len,
+	    FILEID_INO32_GEN));
+}
+
 /*
  * In case the filesystem contains name longer than 255, we need to override
  * the default get_name so we don't get buffer overflow. Unfortunately, since
@@ -177,6 +251,7 @@ zpl_commit_metadata(struct inode *inode)
 const struct export_operations zpl_export_operations = {
 	.encode_fh		= zpl_encode_fh,
 	.fh_to_dentry		= zpl_fh_to_dentry,
+	.fh_to_parent		= zpl_fh_to_parent,
 	.get_name		= zpl_get_name,
 	.get_parent		= zpl_get_parent,
 	.commit_metadata	= zpl_commit_metadata,
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index f7691c02d163..ffe227796f0a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -43,6 +43,9 @@
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 #include <linux/writeback.h>
 #endif
+#ifdef HAVE_FILELOCK_HEADER
+#include <linux/filelock.h>
+#endif
 
 /*
  * When using fallocate(2) to preallocate space, inflate the requested
@@ -776,34 +779,23 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
-	switch (advice) {
-	case POSIX_FADV_SEQUENTIAL:
-	case POSIX_FADV_WILLNEED:
+	if (advice == POSIX_FADV_WILLNEED) {
+		loff_t rlen = len ? len : i_size_read(ip) - offset;
+		dmu_prefetch(os, zp->z_id, 0, offset, rlen,
+		    ZIO_PRIORITY_ASYNC_READ);
+		if (!zn_has_cached_data(zp, offset, offset + rlen - 1)) {
+			zfs_exit(zfsvfs, FTAG);
+			return (error);
+		}
+	}
+
 #ifdef HAVE_GENERIC_FADVISE
-		if (zn_has_cached_data(zp, offset, offset + len - 1))
-			error = generic_fadvise(filp, offset, len, advice);
+	error = generic_fadvise(filp, offset, len, advice);
 #endif
-		/*
-		 * Pass on the caller's size directly, but note that
-		 * dmu_prefetch_max will effectively cap it.  If there
-		 * really is a larger sequential access pattern, perhaps
-		 * dmu_zfetch will detect it.
-		 */
-		if (len == 0)
-			len = i_size_read(ip) - offset;
 
-		dmu_prefetch(os, zp->z_id, 0, offset, len,
-		    ZIO_PRIORITY_ASYNC_READ);
-		break;
-	case POSIX_FADV_NORMAL:
-	case POSIX_FADV_RANDOM:
-	case POSIX_FADV_DONTNEED:
-	case POSIX_FADV_NOREUSE:
-		/* ignored for now */
-		break;
-	default:
-		error = -EINVAL;
-		break;
+	if (error == 0 && advice == POSIX_FADV_DONTNEED) {
+		loff_t rlen = len ? len : i_size_read(ip) - offset;
+		dmu_evict_range(os, zp->z_id, offset, rlen);
 	}
 
 	zfs_exit(zfsvfs, FTAG);
@@ -1242,6 +1234,7 @@ const struct file_operations zpl_file_operations = {
 	.mmap		= zpl_mmap,
 	.fsync		= zpl_fsync,
 	.fallocate	= zpl_fallocate,
+	.setlease	= generic_setlease,
 	.copy_file_range	= zpl_copy_file_range,
 #ifdef HAVE_VFS_CLONE_FILE_RANGE
 	.clone_file_range	= zpl_clone_file_range,
@@ -1264,6 +1257,7 @@ const struct file_operations zpl_dir_file_operations = {
 	.read		= generic_read_dir,
 	.iterate_shared	= zpl_iterate,
 	.fsync		= zpl_fsync,
+	.setlease	= generic_setlease,
 	.unlocked_ioctl = zpl_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = zpl_compat_ioctl,
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index f97662d052c7..e4e15c824f4b 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -506,6 +506,32 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 	}
 #endif
 
+#ifdef STATX_CHANGE_COOKIE
+	if (request_mask & STATX_CHANGE_COOKIE) {
+		/*
+		 * knfsd uses the STATX_CHANGE_COOKIE to surface to clients
+		 * change_info4 data, which is used to implement NFS client
+		 * name caching (see RFC 8881 Section 10.8). This number
+		 * should always increase with changes and should not be
+		 * reused. We cannot simply present ctime here because
+		 * ZFS uses a coarse timer to set them, which may cause
+		 * clients to fail to detect changes and invalidate cache.
+		 *
+		 * ZFS always increments znode z_seq number, but this is
+		 * uint_t and so we mask in ctime to upper bits.
+		 *
+		 * STATX_ATTR_CHANGE_MONOTONIC is advertised
+		 * to prevent knfsd from generating the change cookie
+		 * based on ctime. C.f. nfsd4_change_attribute in
+		 * fs/nfsd/nfsfh.c.
+		 */
+		stat->change_cookie =
+		    ((u64)stat->ctime.tv_sec << 32) | zp->z_seq;
+		stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
+		stat->result_mask |= STATX_CHANGE_COOKIE;
+	}
+#endif
+
 #ifdef STATX_DIOALIGN
 	if (request_mask & STATX_DIOALIGN) {
 		uint64_t align;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index 347b352506e5..2cd0f17c860f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -24,6 +24,7 @@
  * Copyright (c) 2023, Datto Inc. All rights reserved.
  * Copyright (c) 2025, Klara, Inc.
  * Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
+ * Copyright (c) 2026, TrueNAS.
  */
 
 
@@ -35,6 +36,8 @@
 #include <linux/iversion.h>
 #include <linux/version.h>
 #include <linux/vfs_compat.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 
 /*
  * What to do when the last reference to an inode is released. If 0, the kernel
@@ -265,21 +268,6 @@ zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
 }
 
 static int
-zpl_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_remount(sb, flags, &zm);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 {
 	int error;
@@ -354,21 +342,6 @@ zpl_show_options(struct seq_file *seq, struct dentry *root)
 }
 
 static int
-zpl_fill_super(struct super_block *sb, void *data, int silent)
-{
-	zfs_mnt_t *zm = (zfs_mnt_t *)data;
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_domount(sb, zm, silent);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
 zpl_test_super(struct super_block *s, void *data)
 {
 	zfsvfs_t *zfsvfs = s->s_fs_info;
@@ -383,17 +356,477 @@ zpl_test_super(struct super_block *s, void *data)
 	return (zfsvfs != NULL && os == zfsvfs->z_os);
 }
 
-static struct super_block *
-zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
+static void
+zpl_kill_sb(struct super_block *sb)
+{
+	zfs_preumount(sb);
+	kill_anon_super(sb);
+}
+
+void
+zpl_prune_sb(uint64_t nr_to_scan, void *arg)
+{
+	struct super_block *sb = (struct super_block *)arg;
+	int objects = 0;
+
+	/*
+	 * Ensure the superblock is not in the process of being torn down.
+	 */
+#ifdef HAVE_SB_DYING
+	if (down_read_trylock(&sb->s_umount)) {
+		if (!(sb->s_flags & SB_DYING) && sb->s_root &&
+		    (sb->s_flags & SB_BORN)) {
+			(void) zfs_prune(sb, nr_to_scan, &objects);
+		}
+		up_read(&sb->s_umount);
+	}
+#else
+	if (down_read_trylock(&sb->s_umount)) {
+		if (!hlist_unhashed(&sb->s_instances) &&
+		    sb->s_root && (sb->s_flags & SB_BORN)) {
+			(void) zfs_prune(sb, nr_to_scan, &objects);
+		}
+		up_read(&sb->s_umount);
+	}
+#endif
+}
+
+/*
+ * Mount option parsing.
+ *
+ * The kernel receives a set of "stringy" mount options, typically a
+ * comma-separated list through mount(2) or fsconfig(2). These are split into a
+ * set of struct fs_parameter, and then vfs_parse_fs_param() is called for
+ * each. That function will handle (and consume) some options directly, and
+ * other subsystems (mainly security modules) are given the opportunity to
+ * consume them too. Any left over are passed to zpl_parse_param(). Our job is
+ * to use them to fill in the vfs_t we've attached previously to
+ * fc->fs_private, ready for the mount or remount call when it comes.
+ *
+ * Historically, mount options have been generated, removed, modified and
+ * otherwise complicated by multiple different actors over a long time: the
+ * kernel itself, the original mount(8) utility and later libmount,
+ * mount.zfs(8), libzfs and the ZFS tools that use it, and any program using
+ * the various mount APIs that have come and gone over the years. This is
+ * further complicated by cross-pollination between OpenSolaris/illumos, Linux
+ * and FreeBSD. Long story short: we could see all sorts of things, and we need
+ * to at least try not to break old userspace programs.
+ *
+ * At time of writing, this is my best understanding of all the options we
+ * might reasonably see, and where and how they're handled.
+ *
+ *
+ * These are common options for all filesystems that are processed by the
+ * kernel directly, without zpl_parse_param() being called. They're a bit of a
+ * mixed bag, but are ultimately all available to us via either sb->s_flags or
+ * fc->sb_flags:
+ *
+ *	dirsync:	set SB_DIRSYNC
+ *	lazytime:	set SB_LAZYTIME
+ *	mand:		set SB_MANDLOCK
+ *	ro:		set SB_RDONLY
+ *	sync:		set SB_SYNCHRONOUS
+ *
+ *	async:		clear SB_SYNCHRONOUS
+ *	nolazytime:	clear SB_LAZYTIME
+ *	nomand:		clear SB_MANDLOCK
+ *	rw:		clear SB_RDONLY
+ *
+ * Fortunately, almost all of these are handled directly by the kernel. 'mand'
+ * and 'nomand' are swallowed by the kernel ('mand' emits a warning in the
+ * kernel log), but it and the corresponding dataset property have been a no-op
+ * in OpenZFS for years, so there's nothing for us to do there.
+ *
+ * The only tricky one is SB_RDONLY ('ro'/'rw'), which can be both a mount and
+ * a superblock option. While we won't receive the "stringy" options, the
+ * kernel will set it for us in fc->sb_flags, and we've always had special
+ * handling for it at mount and remount time (eg handling snapshot mounts), so
+ * it's not a problem to do nothing here because we will sort it out later.
+ *
+ *
+ * These are options that we may receive as "stringy" options but also as mount
+ * flags.
+ *
+ *	exec:		clear MS_NOEXEC
+ *	noexec:		set MS_NOEXEC
+ *	suid:		clear MS_NOSUID
+ *	nosuid:		set MS_NOSUID
+ *	dev:		clear MS_NODEV
+ *	nodev:		set MS_NODEV
+ *	atime:		clear MS_NOATIME
+ *	noatime:	set MS_NOATIME
+ *	relatime:	set MS_RELATIME
+ *	norelatime:	clear MS_RELATIME
+ *
+ * In testing, it appears that recent libmount will convert them, but our own
+ * mount code (libzfs_mount) may not. We will be called for the stringy
+ * versions, but not for the flags. The flags will later be available on
+ * vfsmount->mnt_flags, not set on the vfs_t. This tends not to matter in
+ * practice, as almost all mounts come through libzfs (via zfs-mount(8) or
+ * mount.zfs(8)) and so as strings, and when they do come through flags, they
+ * will still be reported correctly via mountinfo and by zfs-get(8), which has
+ * special handling for "temporary" properties. Also, we never use these
+ * internally for any decisions; 'exec', 'suid' and 'dev' are handled in the
+ * kernel, and the kernel provides helpers for 'atime' and 'relatime'. The
+ * only place the difference is observable is through zfs_get_temporary_prop(),
+ * which is only used by the zfs.get_prop() Lua call.
+ *
+ * This is fixable by getting at vfsmount->mnt_flags, but this is not readily
+ * available until after the mount operation is completed, and with some
+ * effort. This is all very low impact, so it's left for future improvement.
+ *
+ *
+ * These are true OpenZFS-specific mount options. They give the equivalent
+ * of temporarily setting the pool properties as follows:
+ *
+ *	strictatime	atime=on, relatime=off
+ *
+ *	xattr:		xattr=sa
+ *	saxattr:	xattr=sa
+ *	dirxattr:	xattr=dir
+ *	noxattr:	xattr=off
+ *
+ *
+ * mntpoint= provides the canonical mount point for a snapshot mount. This
+ * is an assist for the snapshot automounter call out to userspace, to
+ * understand where the snapshot is mounted even when triggered from an
+ * alternate mount namespace (eg inside a chroot).
+ *
+ *	mntpoint=	vfs->vfs_mntpoint=...
+ *
+ *
+ * These are used for coordination inside libzfs, and should not make it
+ * to the kernel, but it does not strip them, so we handle them and ignore
+ * them.
+ *
+ *	defaults
+ *	zfsutil
+ *	remount
+ *
+ *
+ * These are specific to SELinux. When that security module is running, it
+ * will consume them, but if not, they will be passed through to us. libzfs
+ * adds them unconditionally, so we will always see them when SELinux is not
+ * running, and ignore them.
+ *
+ *	fscontext
+ *	defcontext
+ *	rootcontext
+ *	context
+ *
+ *
+ * When preparing a remount, libmount will read /proc/self/mountinfo and add
+ * any unrecognised flags it finds there to the options. So, we have to accept
+ * anything that __zpl_show_options() can produce.
+ *
+ *	posixacl
+ *	noacl
+ *	casesensitive
+ *	caseinsensitive
+ *	casemixed
+ *
+ *
+ * mount(8) has a notion of "sloppy" options. According to the documentation,
+ * when the -s switch is provided, unrecognised mount options will be ignored.
+ * Only the Linux NFS and SMB filesystems support it, and traditionally
+ * OpenZFS has too. however, it appears massively underspecified and
+ * inconsistent. Depending on the interplay between mount(8), the mount helper
+ * (eg mount.zfs(8)) and libmount, -s may cause unknown options to be filtered
+ * in userspace, _or_ an additional option 'sloppy' to be passed to the kernel
+ * either before or after the "unknown" option, _or_ nothing at all happens
+ * and the unknown option to be passed through to the kernel as-is. The
+ * kernel NFS and SMB filesystems both expect to see an explicit option
+ * 'sloppy' and use this to either ignore or reject unknown options, but as
+ * described, it's very easy for that option to not appear, or appear too late.
+ *
+ * OpenZFS has a test for this in the test suite, and it's documented in
+ * mount.zfs(8), so to support it we accept 'sloppy' and ignore it, and all
+ * other unknown options produce a notice in the kernel log, and are also
+ * ignored. This allows the "feature" to continue to work, while avoiding
+ * the additional housekeeping for the 'sloppy' option.
+ *
+ *	sloppy
+ *
+ *
+ * Finally, all filesystems get automatic handling for the 'source' option,
+ * that is, the "name" of the filesystem (the first column of df(1)'s output).
+ * However, this only happens if the handler does not otherwise handle
+ * the 'source' option. Since we handle _all_ options because of 'sloppy', we
+ * deal with this explicitly by calling into the kernel's helper for this,
+ * vfs_parse_fs_param_source(), which sets up fc->source.
+ *
+ *	source
+ *
+ *
+ * Thank you for reading this far. I hope you find what you are looking for,
+ * in this life or the next.
+ *
+ *   -- robn, 2026-03-26
+ */
+
+enum {
+	Opt_exec, Opt_suid, Opt_dev,
+	Opt_atime, Opt_relatime, Opt_strictatime,
+	Opt_saxattr, Opt_dirxattr, Opt_noxattr,
+	Opt_mntpoint,
+
+	Opt_ignore, Opt_warn,
+};
+
+static const struct fs_parameter_spec zpl_param_spec[] = {
+	fsparam_flag_no("exec",		Opt_exec),
+	fsparam_flag_no("suid",		Opt_suid),
+	fsparam_flag_no("dev",		Opt_dev),
+
+	fsparam_flag_no("atime",	Opt_atime),
+	fsparam_flag_no("relatime",	Opt_relatime),
+	fsparam_flag("strictatime",	Opt_strictatime),
+
+	fsparam_flag("xattr",		Opt_saxattr),
+	fsparam_flag("saxattr",		Opt_saxattr),
+	fsparam_flag("dirxattr",	Opt_dirxattr),
+	fsparam_flag("noxattr",		Opt_noxattr),
+
+	fsparam_string("mntpoint",	Opt_mntpoint),
+
+	fsparam_flag("defaults",	Opt_ignore),
+	fsparam_flag("zfsutil",		Opt_ignore),
+	fsparam_flag("remount",		Opt_ignore),
+
+	fsparam_string("fscontext",	Opt_ignore),
+	fsparam_string("defcontext",	Opt_ignore),
+	fsparam_string("rootcontext",	Opt_ignore),
+	fsparam_string("context",	Opt_ignore),
+
+	fsparam_flag("posixacl",	Opt_ignore),
+	fsparam_flag("noacl",		Opt_ignore),
+	fsparam_flag("casesensitive",	Opt_ignore),
+	fsparam_flag("caseinsensitive",	Opt_ignore),
+	fsparam_flag("casemixed",	Opt_ignore),
+
+	fsparam_flag("sloppy",		Opt_ignore),
+
+	{}
+};
+
+static int
+zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-	struct super_block *s;
+	vfs_t *vfs = fc->fs_private;
+
+	/* Handle 'source' explicitly so we don't trip on it as an unknown. */
+	int opt = vfs_parse_fs_param_source(fc, param);
+	if (opt != -ENOPARAM)
+		return (opt);
+
+	struct fs_parse_result result;
+	opt = fs_parse(fc, zpl_param_spec, param, &result);
+	if (opt == -ENOPARAM) {
+		/*
+		 * Convert unknowns to warnings, to work around the whole
+		 * "sloppy option" mess.
+		 */
+		opt = Opt_warn;
+	}
+	if (opt < 0)
+		return (opt);
+
+	switch (opt) {
+	case Opt_exec:
+		vfs->vfs_exec = !result.negated;
+		vfs->vfs_do_exec = B_TRUE;
+		break;
+	case Opt_suid:
+		vfs->vfs_setuid = !result.negated;
+		vfs->vfs_do_setuid = B_TRUE;
+		break;
+	case Opt_dev:
+		vfs->vfs_devices = !result.negated;
+		vfs->vfs_do_devices = B_TRUE;
+		break;
+
+	case Opt_atime:
+		vfs->vfs_atime = !result.negated;
+		vfs->vfs_do_atime = B_TRUE;
+		break;
+	case Opt_relatime:
+		vfs->vfs_relatime = !result.negated;
+		vfs->vfs_do_relatime = B_TRUE;
+		break;
+	case Opt_strictatime:
+		vfs->vfs_atime = B_TRUE;
+		vfs->vfs_do_atime = B_TRUE;
+		vfs->vfs_relatime = B_FALSE;
+		vfs->vfs_do_relatime = B_TRUE;
+		break;
+
+	case Opt_saxattr:
+		vfs->vfs_xattr = ZFS_XATTR_SA;
+		vfs->vfs_do_xattr = B_TRUE;
+		break;
+	case Opt_dirxattr:
+		vfs->vfs_xattr = ZFS_XATTR_DIR;
+		vfs->vfs_do_xattr = B_TRUE;
+		break;
+	case Opt_noxattr:
+		vfs->vfs_xattr = ZFS_XATTR_OFF;
+		vfs->vfs_do_xattr = B_TRUE;
+		break;
+
+	case Opt_mntpoint:
+		if (vfs->vfs_mntpoint != NULL)
+			kmem_strfree(vfs->vfs_mntpoint);
+		vfs->vfs_mntpoint = kmem_strdup(param->string);
+		break;
+
+	case Opt_ignore:
+		break;
+
+	case Opt_warn:
+		cmn_err(CE_NOTE,
+		    "ZFS: ignoring unknown mount option: %s", param->key);
+		break;
+
+	default:
+		return (-SET_ERROR(EINVAL));
+	}
+
+	return (0);
+}
+
+/*
+ * Before Linux 5.8, the kernel's individual parameter parsing had a list of
+ * "forbidden" options that would always be rejected early. These were options
+ * that should be specified by MS_* flags, to be set on the superblock
+ * directly. However, it was inconsistently applied (eg it had various "*atime"
+ * options but not "atime", and also caused problems when it was not in sync
+ * with the version of libmount in use. It was deemed needlessly restrictive
+ * and was dropped in torvalds/linux@9193ae87a8af.
+ *
+ * Unfortunately, some of the options on this list are used by OpenZFS, so
+ * we need to see them. These include the aforementioned "*atime", "dev",
+ * "exec" and "suid".
+ *
+ * There is no easy compile-time check available to detect this, so we use
+ * a simple version check that should make it available everywhere needed,
+ * most notably RHEL8's 4.18+extras, which has backported fs_context support
+ * but does not include the 5.8 commit.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+#define	HAVE_FORBIDDEN_SB_FLAGS	1
+#endif
+
+#ifdef HAVE_FORBIDDEN_SB_FLAGS
+/*
+ * The typical path for options parsing through mount(2) is:
+ *
+ *     ksys_mount
+ *     do_mount
+ *     generic_parse_monolithic
+ *     vfs_parse_fs_string
+ *     vfs_parse_fs_param
+ *     zpl_parse_param
+ *
+ * vfs_parse_fs_param() calls the internal vfs_parse_sb_flag(), which is
+ * where the "forbidden" flags are applied. If it makes it through there,
+ * it will later call fc->parse_param() ie zpl_parse_param(). We can't
+ * intercept this chain in the middle anywhere; the earliest thing we can
+ * override is generic_parse_monolithic(), substituting our own by setting
+ * fc->parse_monolithic and doing the parsing work ourselves.
+ *
+ * Fortunately, generic_parse_monolithic() is almost entirely splitting the
+ * incoming parameter string on comma and handing off to the rest of the
+ * pipeline. This is easily replaced (almost entirely by reviving a few bits
+ * of our old options parser).
+ *
+ * To keep the change as narrow as possible, we reuse zpl_param_spec and
+ * zpl_parse_param() as much as possible. Once we've parsed the option, we call
+ * fs_parse(zpl_param_spec) to find out if the option is actually one we
+ * explicitly care about. If it is, we call zpl_parse_param() directly,
+ * avoiding vfs_parse_fs_param() and so the risk of being rejected. If it is
+ * not one we explicitly care about, we call zpl_parse_param() as normal,
+ * letting the kernel reject it if it wishes. If it doesn't, it will end up
+ * back in zpl_parse_param() via fc->parse_param, and we can ignore or warn
+ * about it we normally would.
+ */
+static int
+zpl_parse_monolithic(struct fs_context *fc, void *data)
+{
+	char *mntopts = data;
+
+	if (mntopts == NULL)
+		return (0);
+
+	/*
+	 * Because we supply a .parse_monolithic callback, the kernel does
+	 * no consideration of the options blob at all. Because of this, we
+	 * have to give LSMs a first look at it. They will remove any options
+	 * of interest to them (eg the SELinux *context= options).
+	 */
+	int err = security_sb_eat_lsm_opts(mntopts, &fc->security);
+	if (err)
+		return (err);
+
+	char *key;
+	while ((key = strsep(&mntopts, ",")) != NULL) {
+		if (!*key)
+			continue;
+
+		struct fs_parameter param = {
+		    .key = key,
+		};
+
+		char *value = strchr(key, '=');
+		if (value != NULL) {
+			/* Key starts with '='. Kernel ignores, we will too. */
+			if (value == key)
+				continue;
+			*value++ = '\0';
+
+			/* key=value is a "string" type, set up for that */
+			param.string = value;
+			param.type = fs_value_is_string;
+			param.size = strlen(value);
+		} else {
+			/* unadorned key is a "flag" type */
+			param.type = fs_value_is_flag;
+		}
+
+		/* Check if this is one of our options. */
+		struct fs_parse_result result;
+		int opt = fs_parse(fc, zpl_param_spec, &param, &result);
+		if (opt >= 0) {
+			/*
+			 * We already know this one of our options, so a
+			 * failure here would be nonsensical.
+			 */
+			VERIFY0(zpl_parse_param(fc, &param));
+		} else {
+			/*
+			 * Not one of our option, send it through the kernel's
+			 * standard parameter handling.
+			 */
+			err = vfs_parse_fs_param(fc, &param);
+			if (err < 0)
+				return (err);
+		}
+	}
+
+	return (0);
+}
+#endif /* HAVE_FORBIDDEN_SB_FLAGS */
+
+static int
+zpl_get_tree(struct fs_context *fc)
+{
+	struct super_block *sb;
 	objset_t *os;
 	boolean_t issnap = B_FALSE;
 	int err;
 
-	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
+	err = dmu_objset_hold(fc->source, FTAG, &os);
 	if (err)
-		return (ERR_PTR(-err));
+		return (-err);
 
 	/*
 	 * The dsl pool lock must be released prior to calling sget().
@@ -405,7 +838,8 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
 	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
 	dsl_pool_rele(dmu_objset_pool(os), FTAG);
 
-	s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
+	sb = sget(fc->fs_type, zpl_test_super, set_anon_super,
+	    fc->sb_flags, os);
 
 	/*
 	 * Recheck with the lock held to prevent mounting the wrong dataset
@@ -415,93 +849,161 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
 	 * also s_umount lock is not held there so it would race with
 	 * zfs_umount and zfsvfs can be freed.
 	 */
-	if (!IS_ERR(s) && s->s_fs_info != NULL) {
-		zfsvfs_t *zfsvfs = s->s_fs_info;
+	if (!IS_ERR(sb) && sb->s_fs_info != NULL) {
+		zfsvfs_t *zfsvfs = sb->s_fs_info;
 		if (zpl_enter(zfsvfs, FTAG) == 0) {
 			if (os != zfsvfs->z_os)
-				err = -SET_ERROR(EBUSY);
+				err = SET_ERROR(EBUSY);
 			issnap = zfsvfs->z_issnap;
 			zpl_exit(zfsvfs, FTAG);
 		} else {
-			err = -SET_ERROR(EBUSY);
+			err = SET_ERROR(EBUSY);
 		}
 	}
 	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
 	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
 
-	if (IS_ERR(s))
-		return (ERR_CAST(s));
+	if (IS_ERR(sb))
+		return (PTR_ERR(sb));
 
 	if (err) {
-		deactivate_locked_super(s);
-		return (ERR_PTR(err));
+		deactivate_locked_super(sb);
+		return (-err);
 	}
 
-	if (s->s_root == NULL) {
-		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
+	if (sb->s_root == NULL) {
+		vfs_t *vfs = fc->fs_private;
+
+		/* Apply readonly flag as mount option */
+		if (fc->sb_flags & SB_RDONLY) {
+			vfs->vfs_readonly = B_TRUE;
+			vfs->vfs_do_readonly = B_TRUE;
+		}
+
+		fstrans_cookie_t cookie = spl_fstrans_mark();
+		err = zfs_domount(sb, fc->source, vfs,
+		    fc->sb_flags & SB_SILENT ? 1 : 0);
+		spl_fstrans_unmark(cookie);
+
 		if (err) {
-			deactivate_locked_super(s);
-			return (ERR_PTR(err));
+			deactivate_locked_super(sb);
+			return (-err);
 		}
-		s->s_flags |= SB_ACTIVE;
-	} else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) {
+
+		/*
+		 * zfsvfs has taken ownership of the mount options, so we
+		 * need to ensure we don't free them.
+		 */
+		fc->fs_private = NULL;
+
+		sb->s_flags |= SB_ACTIVE;
+	} else if (!issnap && ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY)) {
 		/*
 		 * Skip ro check for snap since snap is always ro regardless
 		 * ro flag is passed by mount or not.
 		 */
-		deactivate_locked_super(s);
-		return (ERR_PTR(-EBUSY));
+		deactivate_locked_super(sb);
+		return (-SET_ERROR(EBUSY));
 	}
 
-	return (s);
+	struct dentry *root = dget(sb->s_root);
+	if (IS_ERR(root))
+		return (PTR_ERR(root));
+
+	fc->root = root;
+	return (0);
 }
 
-static struct dentry *
-zpl_mount(struct file_system_type *fs_type, int flags,
-    const char *osname, void *data)
+static int
+zpl_reconfigure(struct fs_context *fc)
 {
-	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
+	fstrans_cookie_t cookie;
+	int error;
 
-	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
-	if (IS_ERR(sb))
-		return (ERR_CAST(sb));
+	cookie = spl_fstrans_mark();
+	error = -zfs_remount(fc->root->d_sb, fc->fs_private, fc->sb_flags);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
 
-	return (dget(sb->s_root));
-}
+	if (error == 0) {
+		/*
+		 * zfsvfs has taken ownership of the mount options, so we
+		 * need to ensure we don't free them.
+		 */
+		fc->fs_private = NULL;
+	}
 
-static void
-zpl_kill_sb(struct super_block *sb)
-{
-	zfs_preumount(sb);
-	kill_anon_super(sb);
+	return (error);
 }
 
-void
-zpl_prune_sb(uint64_t nr_to_scan, void *arg)
+static int
+zpl_dup_fc(struct fs_context *fc, struct fs_context *src_fc)
 {
-	struct super_block *sb = (struct super_block *)arg;
-	int objects = 0;
+	vfs_t *src_vfs = src_fc->fs_private;
+	if (src_vfs == NULL)
+		return (0);
+
+	vfs_t *vfs = zfsvfs_vfs_alloc();
+	if (vfs == NULL)
+		return (-SET_ERROR(ENOMEM));
 
 	/*
-	 * Ensure the superblock is not in the process of being torn down.
+	 * This is annoying, but a straight memcpy() would require us to
+	 * reinitialise the lock.
 	 */
-#ifdef HAVE_SB_DYING
-	if (down_read_trylock(&sb->s_umount)) {
-		if (!(sb->s_flags & SB_DYING) && sb->s_root &&
-		    (sb->s_flags & SB_BORN)) {
-			(void) zfs_prune(sb, nr_to_scan, &objects);
-		}
-		up_read(&sb->s_umount);
-	}
-#else
-	if (down_read_trylock(&sb->s_umount)) {
-		if (!hlist_unhashed(&sb->s_instances) &&
-		    sb->s_root && (sb->s_flags & SB_BORN)) {
-			(void) zfs_prune(sb, nr_to_scan, &objects);
-		}
-		up_read(&sb->s_umount);
-	}
+	vfs->vfs_xattr = src_vfs->vfs_xattr;
+	vfs->vfs_readonly = src_vfs->vfs_readonly;
+	vfs->vfs_do_readonly = src_vfs->vfs_do_readonly;
+	vfs->vfs_setuid = src_vfs->vfs_setuid;
+	vfs->vfs_do_setuid = src_vfs->vfs_do_setuid;
+	vfs->vfs_exec = src_vfs->vfs_exec;
+	vfs->vfs_do_exec = src_vfs->vfs_do_exec;
+	vfs->vfs_devices = src_vfs->vfs_devices;
+	vfs->vfs_do_devices = src_vfs->vfs_do_devices;
+	vfs->vfs_do_xattr = src_vfs->vfs_do_xattr;
+	vfs->vfs_atime = src_vfs->vfs_atime;
+	vfs->vfs_do_atime = src_vfs->vfs_do_atime;
+	vfs->vfs_relatime = src_vfs->vfs_relatime;
+	vfs->vfs_do_relatime = src_vfs->vfs_do_relatime;
+	vfs->vfs_nbmand = src_vfs->vfs_nbmand;
+	vfs->vfs_do_nbmand = src_vfs->vfs_do_nbmand;
+
+	mutex_enter(&src_vfs->vfs_mntpt_lock);
+	if (src_vfs->vfs_mntpoint != NULL)
+		vfs->vfs_mntpoint = kmem_strdup(src_vfs->vfs_mntpoint);
+	mutex_exit(&src_vfs->vfs_mntpt_lock);
+
+	fc->fs_private = vfs;
+	return (0);
+}
+
+static void
+zpl_free_fc(struct fs_context *fc)
+{
+	zfsvfs_vfs_free(fc->fs_private);
+}
+
+const struct fs_context_operations zpl_fs_context_operations = {
+#ifdef	HAVE_FORBIDDEN_SB_FLAGS
+	.parse_monolithic	= zpl_parse_monolithic,
 #endif
+	.parse_param		= zpl_parse_param,
+	.get_tree		= zpl_get_tree,
+	.reconfigure		= zpl_reconfigure,
+	.dup			= zpl_dup_fc,
+	.free			= zpl_free_fc,
+};
+
+static int
+zpl_init_fs_context(struct fs_context *fc)
+{
+	fc->fs_private = zfsvfs_vfs_alloc();
+	if (fc->fs_private == NULL)
+		return (-SET_ERROR(ENOMEM));
+
+	fc->ops = &zpl_fs_context_operations;
+
+	return (0);
 }
 
 const struct super_operations zpl_super_operations = {
@@ -517,7 +1019,6 @@ const struct super_operations zpl_super_operations = {
 	.put_super		= zpl_put_super,
 	.sync_fs		= zpl_sync_fs,
 	.statfs			= zpl_statfs,
-	.remount_fs		= zpl_remount_fs,
 	.show_devname		= zpl_show_devname,
 	.show_options		= zpl_show_options,
 	.show_stats		= NULL,
@@ -560,7 +1061,7 @@ struct file_system_type zpl_fs_type = {
 #else
 	.fs_flags		= FS_USERNS_MOUNT,
 #endif
-	.mount			= zpl_mount,
+	.init_fs_context	= zpl_init_fs_context,
 	.kill_sb		= zpl_kill_sb,
 };
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 89f9bc555fcf..dc47ff20fd74 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -1796,7 +1796,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
 {
 	int readonly = get_disk_ro(zv->zv_zso->zvo_disk);
 
-	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 
 	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));