aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2021-08-17 19:10:18 +0000
committerMartin Matuska <mm@FreeBSD.org>2021-08-17 19:10:44 +0000
commit2faf504d1ab821fe2b9df9d2afb49bb35e1334f4 (patch)
treec02e10f9ad7338483c1fe65fe08b017ee5206789 /sys/contrib/openzfs
parenta3ddd7c92b9947ac4518fb0b55c2275422d0d627 (diff)
parent8ae86e2edc736483c1530fd689525aa2460eaec8 (diff)
downloadsrc-2faf504d1ab821fe2b9df9d2afb49bb35e1334f4.tar.gz
src-2faf504d1ab821fe2b9df9d2afb49bb35e1334f4.zip
zfs: merge openzfs/zfs@8ae86e2ed (master) into main
Notable upstream pull request merges: #12422 Fix/improve dbuf hits accounting #12406 Increase default volblocksize from 8KB to 16KB #12398 Remove b_pabd/b_rabd allocation from arc_hdr_alloc() #12397 Run arc_evict thread at higher priority #12297 Avoid vq_lock drop in vdev_queue_aggregate() #12161 Restore FreeBSD sysctl processing for arc.min and arc.max Obtained from: OpenZFS OpenZFS commit: 8ae86e2edc736483c1530fd689525aa2460eaec8
Diffstat (limited to 'sys/contrib/openzfs')
-rw-r--r--sys/contrib/openzfs/configure.ac1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h6
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h4
-rw-r--r--sys/contrib/openzfs/include/sys/arc.h7
-rw-r--r--sys/contrib/openzfs/include/sys/arc_impl.h14
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_refcount.h8
-rw-r--r--sys/contrib/openzfs/include/sys/zthr.h5
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.42
-rw-r--r--sys/contrib/openzfs/man/man7/zfsprops.72
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c53
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/arc_os.c14
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c12
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c34
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c192
-rw-r--r--sys/contrib/openzfs/module/zfs/refcount.c51
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c63
-rw-r--r--sys/contrib/openzfs/module/zfs/zthr.c19
-rwxr-xr-xsys/contrib/openzfs/scripts/zfs.sh12
-rw-r--r--sys/contrib/openzfs/tests/Makefile.am2
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run4
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in7
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib28
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/Makefile.am1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh6
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/Makefile.am5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh34
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh71
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/setup.ksh35
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh10
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/reservation/reservation.shlib6
37 files changed, 532 insertions, 199 deletions
diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac
index 27409c82f396..6f34b210d2b7 100644
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@@ -327,6 +327,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile
tests/zfs-tests/tests/functional/compression/Makefile
tests/zfs-tests/tests/functional/cp_files/Makefile
+ tests/zfs-tests/tests/functional/crtime/Makefile
tests/zfs-tests/tests/functional/ctime/Makefile
tests/zfs-tests/tests/functional/deadman/Makefile
tests/zfs-tests/tests/functional/delegate/Makefile
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
index 5b3b3271e39e..5695abee7b85 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
@@ -62,6 +62,12 @@
#define param_set_arc_long_args(var) \
CTLTYPE_ULONG, &var, 0, param_set_arc_long, "LU"
+#define param_set_arc_min_args(var) \
+ CTLTYPE_ULONG, &var, 0, param_set_arc_min, "LU"
+
+#define param_set_arc_max_args(var) \
+ CTLTYPE_ULONG, &var, 0, param_set_arc_max, "LU"
+
#define param_set_arc_int_args(var) \
CTLTYPE_INT, &var, 0, param_set_arc_int, "I"
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
index 3df491f8b392..d3410bc07a32 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
@@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_mru_ghost_hits = ab->b_l1hdr.b_mru_ghost_hits;
__entry->hdr_mfu_hits = ab->b_l1hdr.b_mfu_hits;
__entry->hdr_mfu_ghost_hits = ab->b_l1hdr.b_mfu_ghost_hits;
- __entry->hdr_l2_hits = ab->b_l1hdr.b_l2_hits;
+ __entry->hdr_l2_hits = ab->b_l2hdr.b_hits;
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
@@ -238,7 +238,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->hdr_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
__entry->hdr_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
__entry->hdr_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
- __entry->hdr_l2_hits = hdr->b_l1hdr.b_l2_hits;
+ __entry->hdr_l2_hits = hdr->b_l2hdr.b_hits;
__entry->hdr_refcount = hdr->b_l1hdr.b_refcnt.rc_count;
__entry->bp_dva0[0] = bp->blk_dva[0].dva_word[0];
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
index be211c5b51da..0a6273442b71 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
@@ -36,21 +36,21 @@
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/sa.h>
+#include <sys/time.h>
#include <sys/zfs_vfsops.h>
#include <sys/rrwlock.h>
#include <sys/zfs_sa.h>
#include <sys/zfs_stat.h>
#include <sys/zfs_rlock.h>
-
#ifdef __cplusplus
extern "C" {
#endif
#define ZNODE_OS_FIELDS \
+ inode_timespec_t z_btime; /* creation/birth time (cached) */ \
struct inode z_inode;
-
/*
* Convert between znode pointers and inode pointers
*/
diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
index 20fa47bd9564..afbe65bb1c97 100644
--- a/sys/contrib/openzfs/include/sys/arc.h
+++ b/sys/contrib/openzfs/include/sys/arc.h
@@ -46,6 +46,13 @@ extern "C" {
*/
#define ARC_EVICT_ALL UINT64_MAX
+/*
+ * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum
+ * block size and a larger block is written. To leave some safety margin, we
+ * limit the minimum for zfs_arc_max to the maximium transaction size.
+ */
+#define MIN_ARC_MAX DMU_MAX_ACCESS
+
#define HDR_SET_LSIZE(hdr, x) do { \
ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
(hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
index f99d2911b5df..3c5af9d86340 100644
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -153,24 +153,22 @@ typedef struct l1arc_buf_hdr {
kmutex_t b_freeze_lock;
zio_cksum_t *b_freeze_cksum;
- arc_buf_t *b_buf;
- uint32_t b_bufcnt;
- /* for waiting on writes to complete */
+ /* for waiting on reads to complete */
kcondvar_t b_cv;
uint8_t b_byteswap;
-
/* protected by arc state mutex */
arc_state_t *b_state;
multilist_node_t b_arc_node;
- /* updated atomically */
+ /* protected by hash lock */
clock_t b_arc_access;
uint32_t b_mru_hits;
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
- uint32_t b_l2_hits;
+ uint32_t b_bufcnt;
+ arc_buf_t *b_buf;
/* self protecting */
zfs_refcount_t b_refcnt;
@@ -990,7 +988,7 @@ extern unsigned long zfs_arc_max;
extern void arc_reduce_target_size(int64_t to_free);
extern boolean_t arc_reclaim_needed(void);
extern void arc_kmem_reap_soon(void);
-extern void arc_wait_for_eviction(uint64_t);
+extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
@@ -1004,6 +1002,8 @@ extern void arc_unregister_hotplug(void);
extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
/* used in zdb.c */
boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index 5d43750594cd..a6b704ec87a0 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -1225,7 +1225,7 @@ typedef struct ddt_histogram {
#define ZVOL_DEV_NAME "zd"
#define ZVOL_PROP_NAME "name"
-#define ZVOL_DEFAULT_BLOCKSIZE 8192
+#define ZVOL_DEFAULT_BLOCKSIZE 16384
typedef enum {
VDEV_INITIALIZE_NONE,
diff --git a/sys/contrib/openzfs/include/sys/zfs_refcount.h b/sys/contrib/openzfs/include/sys/zfs_refcount.h
index fc0cbea1cf7c..1e6449472e38 100644
--- a/sys/contrib/openzfs/include/sys/zfs_refcount.h
+++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h
@@ -96,8 +96,8 @@ typedef struct refcount {
#define zfs_refcount_create_tracked(rc) ((rc)->rc_count = 0)
#define zfs_refcount_destroy(rc) ((rc)->rc_count = 0)
#define zfs_refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
-#define zfs_refcount_is_zero(rc) ((rc)->rc_count == 0)
-#define zfs_refcount_count(rc) ((rc)->rc_count)
+#define zfs_refcount_is_zero(rc) (zfs_refcount_count(rc) == 0)
+#define zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
#define zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
#define zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
#define zfs_refcount_add_many(rc, number, holder) \
@@ -105,13 +105,13 @@ typedef struct refcount {
#define zfs_refcount_remove_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, -number)
#define zfs_refcount_transfer(dst, src) { \
- uint64_t __tmp = (src)->rc_count; \
+ uint64_t __tmp = zfs_refcount_count(src); \
atomic_add_64(&(src)->rc_count, -__tmp); \
atomic_add_64(&(dst)->rc_count, __tmp); \
}
#define zfs_refcount_transfer_ownership(rc, ch, nh) ((void)0)
#define zfs_refcount_transfer_ownership_many(rc, nr, ch, nh) ((void)0)
-#define zfs_refcount_held(rc, holder) ((rc)->rc_count > 0)
+#define zfs_refcount_held(rc, holder) (zfs_refcount_count(rc) > 0)
#define zfs_refcount_not_held(rc, holder) (B_TRUE)
#define zfs_refcount_init()
diff --git a/sys/contrib/openzfs/include/sys/zthr.h b/sys/contrib/openzfs/include/sys/zthr.h
index ae8c57e9eea2..19be89eeebe5 100644
--- a/sys/contrib/openzfs/include/sys/zthr.h
+++ b/sys/contrib/openzfs/include/sys/zthr.h
@@ -25,10 +25,11 @@ typedef void (zthr_func_t)(void *, zthr_t *);
typedef boolean_t (zthr_checkfunc_t)(void *, zthr_t *);
extern zthr_t *zthr_create(const char *zthr_name,
- zthr_checkfunc_t checkfunc, zthr_func_t *func, void *arg);
+ zthr_checkfunc_t checkfunc, zthr_func_t *func, void *arg,
+ pri_t pri);
extern zthr_t *zthr_create_timer(const char *zthr_name,
zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg,
- hrtime_t nano_wait);
+ hrtime_t nano_wait, pri_t pri);
extern void zthr_destroy(zthr_t *t);
extern void zthr_wakeup(zthr_t *t);
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 9a1dec3e67ca..d7fc31bfde10 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -2034,7 +2034,7 @@ powerpc_altivec Altivec PowerPC
.
.It Sy zfs_vdev_scheduler Pq charp
.Sy DEPRECATED .
-Prints warning to kernel log for compatiblity.
+Prints warning to kernel log for compatibility.
.
.It Sy zfs_zevent_len_max Ns = Ns Sy 512 Pq int
Max event queue length.
diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7
index 3f3ddcebf320..fcf086f0c271 100644
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@@ -527,7 +527,7 @@ cannot be changed once the volume has been written, so it should be set at
volume creation time.
The default
.Sy blocksize
-for volumes is 8 Kbytes.
+for volumes is 16 Kbytes.
Any power of 2 from 512 bytes to 128 Kbytes is valid.
.Pp
This property can also be referred to by its shortened column name,
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 3b8b11cff0c2..fddb1f0e87cb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -233,7 +233,7 @@ arc_lowmem(void *arg __unused, int howto __unused)
* with ARC reclaim thread.
*/
if (curproc == pageproc)
- arc_wait_for_eviction(to_free);
+ arc_wait_for_eviction(to_free, B_FALSE);
}
void
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 94124fdcf6c3..5315b60982df 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -144,6 +144,55 @@ extern arc_state_t ARC_l2c_only;
/* arc.c */
+int
+param_set_arc_max(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t val;
+ int err;
+
+ val = zfs_arc_max;
+ err = sysctl_handle_long(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == NULL)
+ return (SET_ERROR(err));
+
+ if (val != 0 && (val < MIN_ARC_MAX || val <= arc_c_min ||
+ val >= arc_all_memory()))
+ return (SET_ERROR(EINVAL));
+
+ zfs_arc_max = val;
+ arc_tuning_update(B_TRUE);
+
+ /* Update the sysctl to the tuned value */
+ if (val != 0)
+ zfs_arc_max = arc_c_max;
+
+ return (0);
+}
+
+int
+param_set_arc_min(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t val;
+ int err;
+
+ val = zfs_arc_min;
+ err = sysctl_handle_64(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == NULL)
+ return (SET_ERROR(err));
+
+ if (val != 0 && (val < 2ULL << SPA_MAXBLOCKSHIFT || val > arc_c_max))
+ return (SET_ERROR(EINVAL));
+
+ zfs_arc_min = val;
+ arc_tuning_update(B_TRUE);
+
+ /* Update the sysctl to the tuned value */
+ if (val != 0)
+ zfs_arc_min = arc_c_min;
+
+ return (0);
+}
+
/* legacy compat */
extern uint64_t l2arc_write_max; /* def max write size */
extern uint64_t l2arc_write_boost; /* extra warmup write */
@@ -278,11 +327,11 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_long, "LU",
+ &zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_min, "LU",
"min arc size (LEGACY)");
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_long, "LU",
+ &zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_max, "LU",
"max arc size (LEGACY)");
/* dbuf.c */
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index 415cfc281ae8..f96cd1271ee5 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -217,7 +217,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
* for the requested amount of data to be evicted.
*/
arc_reduce_target_size(ptob(sc->nr_to_scan));
- arc_wait_for_eviction(ptob(sc->nr_to_scan));
+ arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
if (current->reclaim_state != NULL)
current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
@@ -372,6 +372,18 @@ param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
}
int
+param_set_arc_min(const char *buf, zfs_kernel_param_t *kp)
+{
+ return (param_set_arc_long(buf, kp));
+}
+
+int
+param_set_arc_max(const char *buf, zfs_kernel_param_t *kp)
+{
+ return (param_set_arc_long(buf, kp));
+}
+
+int
param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
{
int error;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index 8cc454468a3f..6859832ab81c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -525,9 +525,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
uint64_t tmp_gen;
uint64_t links;
uint64_t z_uid, z_gid;
- uint64_t atime[2], mtime[2], ctime[2];
+ uint64_t atime[2], mtime[2], ctime[2], btime[2];
uint64_t projid = ZFS_DEFAULT_PROJID;
- sa_bulk_attr_t bulk[11];
+ sa_bulk_attr_t bulk[12];
int count = 0;
ASSERT(zfsvfs != NULL);
@@ -569,6 +569,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
(dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
@@ -596,6 +597,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ZFS_TIME_DECODE(&ip->i_atime, atime);
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
+ ZFS_TIME_DECODE(&zp->z_btime, btime);
ip->i_ino = zp->z_id;
zfs_znode_update_vfs(zp);
@@ -1169,12 +1171,12 @@ zfs_rezget(znode_t *zp)
uint64_t obj_num = zp->z_id;
uint64_t mode;
uint64_t links;
- sa_bulk_attr_t bulk[10];
+ sa_bulk_attr_t bulk[11];
int err;
int count = 0;
uint64_t gen;
uint64_t z_uid, z_gid;
- uint64_t atime[2], mtime[2], ctime[2];
+ uint64_t atime[2], mtime[2], ctime[2], btime[2];
uint64_t projid = ZFS_DEFAULT_PROJID;
znode_hold_t *zh;
@@ -1244,6 +1246,7 @@ zfs_rezget(znode_t *zp)
&mtime, 16);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
&ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
@@ -1269,6 +1272,7 @@ zfs_rezget(znode_t *zp)
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
+ ZFS_TIME_DECODE(&zp->z_btime, btime);
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
zfs_znode_dmu_fini(zp);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index 98c2fb3a0c92..24a8b036bf0f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -378,18 +378,46 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
{
int error;
fstrans_cookie_t cookie;
+ struct inode *ip = path->dentry->d_inode;
+ znode_t *zp __maybe_unused = ITOZ(ip);
cookie = spl_fstrans_mark();
/*
- * XXX request_mask and query_flags currently ignored.
+ * XXX query_flags currently ignored.
*/
#ifdef HAVE_USERNS_IOPS_GETATTR
- error = -zfs_getattr_fast(user_ns, path->dentry->d_inode, stat);
+ error = -zfs_getattr_fast(user_ns, ip, stat);
#else
- error = -zfs_getattr_fast(kcred->user_ns, path->dentry->d_inode, stat);
+ error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
#endif
+
+#ifdef STATX_BTIME
+ if (request_mask & STATX_BTIME) {
+ stat->btime = zp->z_btime;
+ stat->result_mask |= STATX_BTIME;
+ }
+#endif
+
+#ifdef STATX_ATTR_IMMUTABLE
+ if (zp->z_pflags & ZFS_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
+#endif
+
+#ifdef STATX_ATTR_APPEND
+ if (zp->z_pflags & ZFS_APPENDONLY)
+ stat->attributes |= STATX_ATTR_APPEND;
+ stat->attributes_mask |= STATX_ATTR_APPEND;
+#endif
+
+#ifdef STATX_ATTR_NODUMP
+ if (zp->z_pflags & ZFS_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
+ stat->attributes_mask |= STATX_ATTR_NODUMP;
+#endif
+
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index bd64a4b24a2c..227d0417c765 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -834,12 +834,13 @@ static kcondvar_t l2arc_rebuild_thr_cv;
enum arc_hdr_alloc_flags {
ARC_HDR_ALLOC_RDATA = 0x1,
ARC_HDR_DO_ADAPT = 0x2,
+ ARC_HDR_USE_RESERVE = 0x4,
};
-static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, int);
static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
-static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, int);
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
@@ -1854,7 +1855,8 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
* and then loan a buffer from it, rather than allocating a
* linear buffer and wrapping it in an abd later.
*/
- cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
+ cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
+ ARC_HDR_DO_ADAPT);
tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -2578,13 +2580,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
l2arc_hdr_arcstats_increment_state(hdr);
}
}
-
- /*
- * L2 headers should never be on the L2 state list since they don't
- * have L1 headers allocated.
- */
- ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
- multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
}
void
@@ -2740,12 +2735,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
ASSERT3P(*ret, ==, NULL);
IMPLY(encrypted, compressed);
- hdr->b_l1hdr.b_mru_hits = 0;
- hdr->b_l1hdr.b_mru_ghost_hits = 0;
- hdr->b_l1hdr.b_mfu_hits = 0;
- hdr->b_l1hdr.b_mfu_ghost_hits = 0;
- hdr->b_l1hdr.b_l2_hits = 0;
-
buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
@@ -3182,7 +3171,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
{
uint64_t size;
boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
- boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3193,14 +3181,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
size = HDR_GET_PSIZE(hdr);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
- do_adapt);
+ alloc_flags);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
ARCSTAT_INCR(arcstat_raw_size, size);
} else {
size = arc_hdr_size(hdr);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
- do_adapt);
+ alloc_flags);
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
}
@@ -3246,13 +3234,34 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
}
+/*
+ * Allocate empty anonymous ARC header. The header will get its identity
+ * assigned and buffers attached later as part of read or write operations.
+ *
+ * In case of read arc_read() assigns header its identify (b_dva + b_birth),
+ * inserts it into ARC hash to become globally visible and allocates physical
+ * (b_pabd) or raw (b_rabd) ABD buffer to read into from disk. On disk read
+ * completion arc_read_done() allocates ARC buffer(s) as needed, potentially
+ * sharing one of them with the physical ABD buffer.
+ *
+ * In case of write arc_alloc_buf() allocates ARC buffer to be filled with
+ * data. Then after compression and/or encryption arc_write_ready() allocates
+ * and fills (or potentially shares) physical (b_pabd) or raw (b_rabd) ABD
+ * buffer. On disk write completion arc_write_done() assigns the header its
+ * new identity (b_dva + b_birth) and inserts into ARC hash.
+ *
+ * In case of partial overwrite the old data is read first as described. Then
+ * arc_release() either allocates new anonymous ARC header and moves the ARC
+ * buffer to it, or reuses the old ARC header by discarding its identity and
+ * removing it from ARC hash. After buffer modification normal write process
+ * follows as described.
+ */
static arc_buf_hdr_t *
arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
- arc_buf_contents_t type, boolean_t alloc_rdata)
+ arc_buf_contents_t type)
{
arc_buf_hdr_t *hdr;
- int flags = ARC_HDR_DO_ADAPT;
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
if (protected) {
@@ -3260,7 +3269,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
} else {
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
}
- flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
ASSERT(HDR_EMPTY(hdr));
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3277,15 +3285,13 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_l1hdr.b_state = arc_anon;
hdr->b_l1hdr.b_arc_access = 0;
+ hdr->b_l1hdr.b_mru_hits = 0;
+ hdr->b_l1hdr.b_mru_ghost_hits = 0;
+ hdr->b_l1hdr.b_mfu_hits = 0;
+ hdr->b_l1hdr.b_mfu_ghost_hits = 0;
hdr->b_l1hdr.b_bufcnt = 0;
hdr->b_l1hdr.b_buf = NULL;
- /*
- * Allocate the hdr's buffer. This will contain either
- * the compressed or uncompressed data depending on the block
- * it references and compressed arc enablement.
- */
- arc_hdr_alloc_abd(hdr, flags);
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
return (hdr);
@@ -3460,7 +3466,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
- nhdr->b_l1hdr.b_l2_hits = hdr->b_l1hdr.b_l2_hits;
nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
@@ -3505,7 +3510,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
- hdr->b_l1hdr.b_l2_hits = 0;
hdr->b_l1hdr.b_acb = NULL;
hdr->b_l1hdr.b_pabd = NULL;
@@ -3569,7 +3573,7 @@ arc_buf_t *
arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
- B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
+ B_FALSE, ZIO_COMPRESS_OFF, 0, type);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3593,7 +3597,7 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
+ B_FALSE, compression_type, complevel, ARC_BUFC_DATA);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3601,16 +3605,12 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
arc_buf_thaw(buf);
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
- if (!arc_buf_is_shared(buf)) {
- /*
- * To ensure that the hdr has the correct data in it if we call
- * arc_untransform() on this buf before it's been written to
- * disk, it's easiest if we just set up sharing between the
- * buf and the hdr.
- */
- arc_hdr_free_abd(hdr, B_FALSE);
- arc_share_buf(hdr, buf);
- }
+ /*
+ * To ensure that the hdr has the correct data in it if we call
+ * arc_untransform() on this buf before it's been written to disk,
+ * it's easiest if we just set up sharing between the buf and the hdr.
+ */
+ arc_share_buf(hdr, buf);
return (buf);
}
@@ -3632,7 +3632,7 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
- compression_type, complevel, type, B_TRUE);
+ compression_type, complevel, type);
hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot;
@@ -5130,7 +5130,7 @@ arc_adapt(int bytes, arc_state_t *state)
* zfs_arc_overflow_shift.
*/
static arc_ovf_level_t
-arc_is_overflowing(void)
+arc_is_overflowing(boolean_t use_reserve)
{
/* Always allow at least one block of overflow */
int64_t overflow = MAX(SPA_MAXBLOCKSIZE,
@@ -5147,17 +5147,19 @@ arc_is_overflowing(void)
*/
int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
arc_c - overflow / 2;
+ if (!use_reserve)
+ overflow /= 2;
return (over < 0 ? ARC_OVF_NONE :
over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
}
static abd_t *
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
- boolean_t do_adapt)
+ int alloc_flags)
{
arc_buf_contents_t type = arc_buf_type(hdr);
- arc_get_data_impl(hdr, size, tag, do_adapt);
+ arc_get_data_impl(hdr, size, tag, alloc_flags);
if (type == ARC_BUFC_METADATA) {
return (abd_alloc(size, B_TRUE));
} else {
@@ -5171,7 +5173,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
{
arc_buf_contents_t type = arc_buf_type(hdr);
- arc_get_data_impl(hdr, size, tag, B_TRUE);
+ arc_get_data_impl(hdr, size, tag, ARC_HDR_DO_ADAPT);
if (type == ARC_BUFC_METADATA) {
return (zio_buf_alloc(size));
} else {
@@ -5188,9 +5190,9 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
* of ARC behavior and settings. See arc_lowmem_init().
*/
void
-arc_wait_for_eviction(uint64_t amount)
+arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
{
- switch (arc_is_overflowing()) {
+ switch (arc_is_overflowing(use_reserve)) {
case ARC_OVF_NONE:
return;
case ARC_OVF_SOME:
@@ -5267,12 +5269,12 @@ arc_wait_for_eviction(uint64_t amount)
*/
static void
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
- boolean_t do_adapt)
+ int alloc_flags)
{
arc_state_t *state = hdr->b_l1hdr.b_state;
arc_buf_contents_t type = arc_buf_type(hdr);
- if (do_adapt)
+ if (alloc_flags & ARC_HDR_DO_ADAPT)
arc_adapt(size, state);
/*
@@ -5288,7 +5290,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
* ensure that that progress is also made towards getting arc_size
* under arc_c. See the comment above zfs_arc_eviction_pct.
*/
- arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100);
+ arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
+ alloc_flags & ARC_HDR_USE_RESERVE);
VERIFY3U(hdr->b_type, ==, type);
if (type == ARC_BUFC_METADATA) {
@@ -5427,7 +5430,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
arc_hdr_clear_flags(hdr,
ARC_FLAG_PREFETCH |
ARC_FLAG_PRESCIENT_PREFETCH);
- atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+ hdr->b_l1hdr.b_mru_hits++;
ARCSTAT_BUMP(arcstat_mru_hits);
if (HDR_HAS_L2HDR(hdr))
l2arc_hdr_arcstats_increment_state(hdr);
@@ -5452,7 +5455,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(arc_mfu, hdr, hash_lock);
}
- atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+ hdr->b_l1hdr.b_mru_hits++;
ARCSTAT_BUMP(arcstat_mru_hits);
} else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
arc_state_t *new_state;
@@ -5481,7 +5484,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
arc_change_state(new_state, hdr, hash_lock);
- atomic_inc_32(&hdr->b_l1hdr.b_mru_ghost_hits);
+ hdr->b_l1hdr.b_mru_ghost_hits++;
ARCSTAT_BUMP(arcstat_mru_ghost_hits);
} else if (hdr->b_l1hdr.b_state == arc_mfu) {
/*
@@ -5494,7 +5497,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* the head of the list now.
*/
- atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
+ hdr->b_l1hdr.b_mfu_hits++;
ARCSTAT_BUMP(arcstat_mfu_hits);
hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
} else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
@@ -5517,7 +5520,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(new_state, hdr, hash_lock);
- atomic_inc_32(&hdr->b_l1hdr.b_mfu_ghost_hits);
+ hdr->b_l1hdr.b_mfu_ghost_hits++;
ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
} else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
/*
@@ -6098,8 +6101,7 @@ top:
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
- encrypted_read);
+ BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
if (!embedded_bp) {
hdr->b_dva = *BP_IDENTITY(bp);
@@ -6113,6 +6115,7 @@ top:
arc_hdr_destroy(hdr);
goto top; /* restart the IO request */
}
+ alloc_flags |= ARC_HDR_DO_ADAPT;
} else {
/*
* This block is in the ghost cache or encrypted data
@@ -6160,9 +6163,9 @@ top:
*/
arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
arc_access(hdr, hash_lock);
- arc_hdr_alloc_abd(hdr, alloc_flags);
}
+ arc_hdr_alloc_abd(hdr, alloc_flags);
if (encrypted_read) {
ASSERT(HDR_HAS_RABD(hdr));
size = HDR_GET_PSIZE(hdr);
@@ -6288,7 +6291,7 @@ top:
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
- atomic_inc_32(&hdr->b_l2hdr.b_hits);
+ hdr->b_l2hdr.b_hits++;
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_SLEEP);
@@ -6684,7 +6687,7 @@ arc_release(arc_buf_t *buf, void *tag)
* buffer which will be freed in arc_write().
*/
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
- compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
+ compress, hdr->b_complevel, type);
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6695,11 +6698,6 @@ arc_release(arc_buf_t *buf, void *tag)
nhdr->b_l1hdr.b_bufcnt = 1;
if (ARC_BUF_ENCRYPTED(buf))
nhdr->b_crypt_hdr.b_ebufcnt = 1;
- nhdr->b_l1hdr.b_mru_hits = 0;
- nhdr->b_l1hdr.b_mru_ghost_hits = 0;
- nhdr->b_l1hdr.b_mfu_hits = 0;
- nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
- nhdr->b_l1hdr.b_l2_hits = 0;
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
buf->b_hdr = nhdr;
@@ -6716,7 +6714,6 @@ arc_release(arc_buf_t *buf, void *tag)
hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
- hdr->b_l1hdr.b_l2_hits = 0;
arc_change_state(arc_anon, hdr, hash_lock);
hdr->b_l1hdr.b_arc_access = 0;
@@ -6870,7 +6867,8 @@ arc_write_ready(zio_t *zio)
if (ARC_BUF_ENCRYPTED(buf)) {
ASSERT3U(psize, >, 0);
ASSERT(ARC_BUF_COMPRESSED(buf));
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
+ ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
} else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
!arc_can_share(hdr, buf)) {
@@ -6881,17 +6879,19 @@ arc_write_ready(zio_t *zio)
*/
if (BP_IS_ENCRYPTED(bp)) {
ASSERT3U(psize, >, 0);
- arc_hdr_alloc_abd(hdr,
- ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
!ARC_BUF_COMPRESSED(buf)) {
ASSERT3U(psize, >, 0);
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
} else {
ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_USE_RESERVE);
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
arc_buf_size(buf));
}
@@ -7466,6 +7466,12 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
multilist_get_num_sublists(ml));
}
+static unsigned int
+arc_state_l2c_multilist_index_func(multilist_t *ml, void *obj)
+{
+ panic("Header %p insert into arc_l2c_only %p", obj, ml);
+}
+
#define WARN_IF_TUNING_IGNORED(tuning, value, do_warn) do { \
if ((do_warn) && (tuning) && ((tuning) != (value))) { \
cmn_err(CE_WARN, \
@@ -7498,7 +7504,7 @@ arc_tuning_update(boolean_t verbose)
/* Valid range: 64M - <all physical memory> */
if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
- (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) &&
+ (zfs_arc_max >= MIN_ARC_MAX) && (zfs_arc_max < allmem) &&
(zfs_arc_max > arc_c_min)) {
arc_c_max = zfs_arc_max;
arc_c = MIN(arc_c, arc_c_max);
@@ -7613,14 +7619,18 @@ arc_state_init(void)
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
+ /*
+ * L2 headers should never be on the L2 state list since they don't
+ * have L1 headers allocated. Special index function asserts that.
+ */
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- arc_state_multilist_index_func);
+ arc_state_l2c_multilist_index_func);
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- arc_state_multilist_index_func);
+ arc_state_l2c_multilist_index_func);
zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
@@ -7893,7 +7903,23 @@ arc_init(void)
arc_set_limits(allmem);
-#ifndef _KERNEL
+#ifdef _KERNEL
+ /*
+ * If zfs_arc_max is non-zero at init, meaning it was set in the kernel
+ * environment before the module was loaded, don't block setting the
+ * maximum because it is less than arc_c_min, instead, reset arc_c_min
+ * to a lower value.
+ * zfs_arc_min will be handled by arc_tuning_update().
+ */
+ if (zfs_arc_max != 0 && zfs_arc_max >= MIN_ARC_MAX &&
+ zfs_arc_max < allmem) {
+ arc_c_max = zfs_arc_max;
+ if (arc_c_min >= arc_c_max) {
+ arc_c_min = MAX(zfs_arc_max / 2,
+ 2ULL << SPA_MAXBLOCKSHIFT);
+ }
+ }
+#else
/*
* In userland, there's only the memory pressure that we artificially
* create (see arc_available_memory()). Don't let arc_c get too
@@ -7950,9 +7976,9 @@ arc_init(void)
}
arc_evict_zthr = zthr_create("arc_evict",
- arc_evict_cb_check, arc_evict_cb, NULL);
+ arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri);
arc_reap_zthr = zthr_create_timer("arc_reap",
- arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1));
+ arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri);
arc_warm = B_FALSE;
@@ -8687,7 +8713,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
*/
if (BP_IS_ENCRYPTED(bp)) {
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
- B_TRUE);
+ ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
zio_crypt_decode_params_bp(bp, salt, iv);
zio_crypt_decode_mac_bp(bp, mac);
@@ -8724,7 +8750,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
!HDR_COMPRESSION_ENABLED(hdr)) {
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
- B_TRUE);
+ ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -10965,10 +10991,10 @@ EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_long,
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
param_get_long, ZMOD_RW, "Min arc size");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_long,
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
param_get_long, ZMOD_RW, "Max arc size");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
diff --git a/sys/contrib/openzfs/module/zfs/refcount.c b/sys/contrib/openzfs/module/zfs/refcount.c
index a3877b8d15f6..354e021d9d26 100644
--- a/sys/contrib/openzfs/module/zfs/refcount.c
+++ b/sys/contrib/openzfs/module/zfs/refcount.c
@@ -112,13 +112,13 @@ zfs_refcount_destroy(zfs_refcount_t *rc)
int
zfs_refcount_is_zero(zfs_refcount_t *rc)
{
- return (rc->rc_count == 0);
+ return (zfs_refcount_count(rc) == 0);
}
int64_t
zfs_refcount_count(zfs_refcount_t *rc)
{
- return (rc->rc_count);
+ return (atomic_load_64(&rc->rc_count));
}
int64_t
@@ -127,15 +127,18 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder)
reference_t *ref = NULL;
int64_t count;
- if (rc->rc_tracked) {
- ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
- ref->ref_holder = holder;
- ref->ref_number = number;
+ if (!rc->rc_tracked) {
+ count = atomic_add_64_nv(&(rc)->rc_count, number);
+ ASSERT3U(count, >=, number);
+ return (count);
}
+
+ ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
+ ref->ref_holder = holder;
+ ref->ref_number = number;
mutex_enter(&rc->rc_mtx);
ASSERT3U(rc->rc_count, >=, 0);
- if (rc->rc_tracked)
- list_insert_head(&rc->rc_list, ref);
+ list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
count = rc->rc_count;
mutex_exit(&rc->rc_mtx);
@@ -156,16 +159,14 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
reference_t *ref;
int64_t count;
- mutex_enter(&rc->rc_mtx);
- ASSERT3U(rc->rc_count, >=, number);
-
if (!rc->rc_tracked) {
- rc->rc_count -= number;
- count = rc->rc_count;
- mutex_exit(&rc->rc_mtx);
+ count = atomic_add_64_nv(&(rc)->rc_count, -number);
+ ASSERT3S(count, >=, 0);
return (count);
}
+ mutex_enter(&rc->rc_mtx);
+ ASSERT3U(rc->rc_count, >=, number);
for (ref = list_head(&rc->rc_list); ref;
ref = list_next(&rc->rc_list, ref)) {
if (ref->ref_holder == holder && ref->ref_number == number) {
@@ -242,12 +243,10 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
reference_t *ref;
boolean_t found = B_FALSE;
- mutex_enter(&rc->rc_mtx);
- if (!rc->rc_tracked) {
- mutex_exit(&rc->rc_mtx);
+ if (!rc->rc_tracked)
return;
- }
+ mutex_enter(&rc->rc_mtx);
for (ref = list_head(&rc->rc_list); ref;
ref = list_next(&rc->rc_list, ref)) {
if (ref->ref_holder == current_holder &&
@@ -279,13 +278,10 @@ zfs_refcount_held(zfs_refcount_t *rc, const void *holder)
{
reference_t *ref;
- mutex_enter(&rc->rc_mtx);
-
- if (!rc->rc_tracked) {
- mutex_exit(&rc->rc_mtx);
- return (rc->rc_count > 0);
- }
+ if (!rc->rc_tracked)
+ return (zfs_refcount_count(rc) > 0);
+ mutex_enter(&rc->rc_mtx);
for (ref = list_head(&rc->rc_list); ref;
ref = list_next(&rc->rc_list, ref)) {
if (ref->ref_holder == holder) {
@@ -307,13 +303,10 @@ zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
{
reference_t *ref;
- mutex_enter(&rc->rc_mtx);
-
- if (!rc->rc_tracked) {
- mutex_exit(&rc->rc_mtx);
+ if (!rc->rc_tracked)
return (B_TRUE);
- }
+ mutex_enter(&rc->rc_mtx);
for (ref = list_head(&rc->rc_list); ref;
ref = list_next(&rc->rc_list, ref)) {
if (ref->ref_holder == holder) {
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 8ca9b49ba3b3..55870bee47fb 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -2610,7 +2610,8 @@ spa_start_livelist_destroy_thread(spa_t *spa)
ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL);
spa->spa_livelist_delete_zthr =
zthr_create("z_livelist_destroy",
- spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa);
+ spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa,
+ minclsyspri);
}
typedef struct livelist_new_arg {
@@ -2820,7 +2821,7 @@ spa_start_livelist_condensing_thread(spa_t *spa)
spa->spa_livelist_condense_zthr =
zthr_create("z_livelist_condense",
spa_livelist_condense_cb_check,
- spa_livelist_condense_cb, spa);
+ spa_livelist_condense_cb, spa, minclsyspri);
}
static void
@@ -2838,7 +2839,7 @@ spa_spawn_aux_threads(spa_t *spa)
spa->spa_checkpoint_discard_zthr =
zthr_create("z_checkpoint_discard",
spa_checkpoint_discard_thread_check,
- spa_checkpoint_discard_thread, spa);
+ spa_checkpoint_discard_thread, spa, minclsyspri);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index e476663ab582..14ebf5514676 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -885,7 +885,7 @@ spa_start_indirect_condensing_thread(spa_t *spa)
ASSERT3P(spa->spa_condense_zthr, ==, NULL);
spa->spa_condense_zthr = zthr_create("z_indirect_condense",
spa_condense_indirect_thread_check,
- spa_condense_indirect_thread, spa);
+ spa_condense_indirect_thread, spa, minclsyspri);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 06d22f6df4c5..cc5b15b8c028 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -599,7 +599,6 @@ static zio_t *
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
{
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
- zio_link_t *zl = NULL;
uint64_t maxgap = 0;
uint64_t size;
uint64_t limit;
@@ -797,19 +796,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size);
/*
- * We need to drop the vdev queue's lock during zio_execute() to
- * avoid a deadlock that we could encounter due to lock order
- * reversal between vq_lock and io_lock in zio_change_priority().
+ * Callers must call zio_vdev_io_bypass() and zio_execute() for
+ * aggregated (parent) I/Os so that we could avoid dropping the
+ * queue's lock here to avoid a deadlock that we could encounter
+ * due to lock order reversal between vq_lock and io_lock in
+ * zio_change_priority().
*/
- mutex_exit(&vq->vq_lock);
- while ((dio = zio_walk_parents(aio, &zl)) != NULL) {
- ASSERT3U(dio->io_type, ==, aio->io_type);
-
- zio_vdev_io_bypass(dio);
- zio_execute(dio);
- }
- mutex_enter(&vq->vq_lock);
-
return (aio);
}
@@ -847,23 +839,24 @@ again:
ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio);
- if (aio != NULL)
+ if (aio != NULL) {
zio = aio;
- else
+ } else {
vdev_queue_io_remove(vq, zio);
- /*
- * If the I/O is or was optional and therefore has no data, we need to
- * simply discard it. We need to drop the vdev queue's lock to avoid a
- * deadlock that we could encounter since this I/O will complete
- * immediately.
- */
- if (zio->io_flags & ZIO_FLAG_NODATA) {
- mutex_exit(&vq->vq_lock);
- zio_vdev_io_bypass(zio);
- zio_execute(zio);
- mutex_enter(&vq->vq_lock);
- goto again;
+ /*
+ * If the I/O is or was optional and therefore has no data, we
+ * need to simply discard it. We need to drop the vdev queue's
+ * lock to avoid a deadlock that we could encounter since this
+ * I/O will complete immediately.
+ */
+ if (zio->io_flags & ZIO_FLAG_NODATA) {
+ mutex_exit(&vq->vq_lock);
+ zio_vdev_io_bypass(zio);
+ zio_execute(zio);
+ mutex_enter(&vq->vq_lock);
+ goto again;
+ }
}
vdev_queue_pending_add(vq, zio);
@@ -876,7 +869,8 @@ zio_t *
vdev_queue_io(zio_t *zio)
{
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
+ zio_t *dio, *nio;
+ zio_link_t *zl = NULL;
if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
return (zio);
@@ -923,6 +917,11 @@ vdev_queue_io(zio_t *zio)
return (NULL);
if (nio->io_done == vdev_queue_agg_io_done) {
+ while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+ ASSERT3U(dio->io_type, ==, nio->io_type);
+ zio_vdev_io_bypass(dio);
+ zio_execute(dio);
+ }
zio_nowait(nio);
return (NULL);
}
@@ -934,7 +933,8 @@ void
vdev_queue_io_done(zio_t *zio)
{
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
+ zio_t *dio, *nio;
+ zio_link_t *zl = NULL;
hrtime_t now = gethrtime();
vq->vq_io_complete_ts = now;
@@ -946,6 +946,11 @@ vdev_queue_io_done(zio_t *zio)
while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
mutex_exit(&vq->vq_lock);
if (nio->io_done == vdev_queue_agg_io_done) {
+ while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+ ASSERT3U(dio->io_type, ==, nio->io_type);
+ zio_vdev_io_bypass(dio);
+ zio_execute(dio);
+ }
zio_nowait(nio);
} else {
zio_vdev_io_reissue(nio);
diff --git a/sys/contrib/openzfs/module/zfs/zthr.c b/sys/contrib/openzfs/module/zfs/zthr.c
index 5ac2e30467e3..33fdda7b68d1 100644
--- a/sys/contrib/openzfs/module/zfs/zthr.c
+++ b/sys/contrib/openzfs/module/zfs/zthr.c
@@ -83,10 +83,11 @@
* can be cancelled while doing work and not while checking for work.
*
* To start a zthr:
- * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args);
+ * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args,
+ * pri);
* or
* zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func,
- * args, max_sleep);
+ * args, max_sleep, pri);
*
* After that you should be able to wakeup, cancel, and resume the
* zthr from another thread using the zthr_pointer.
@@ -220,6 +221,9 @@ struct zthr {
*/
hrtime_t zthr_sleep_timeout;
+ /* Thread priority */
+ pri_t zthr_pri;
+
/* consumer-provided callbacks & data */
zthr_checkfunc_t *zthr_checkfunc;
zthr_func_t *zthr_func;
@@ -269,10 +273,10 @@ zthr_procedure(void *arg)
zthr_t *
zthr_create(const char *zthr_name, zthr_checkfunc_t *checkfunc,
- zthr_func_t *func, void *arg)
+ zthr_func_t *func, void *arg, pri_t pri)
{
return (zthr_create_timer(zthr_name, checkfunc,
- func, arg, (hrtime_t)0));
+ func, arg, (hrtime_t)0, pri));
}
/*
@@ -282,7 +286,7 @@ zthr_create(const char *zthr_name, zthr_checkfunc_t *checkfunc,
*/
zthr_t *
zthr_create_timer(const char *zthr_name, zthr_checkfunc_t *checkfunc,
- zthr_func_t *func, void *arg, hrtime_t max_sleep)
+ zthr_func_t *func, void *arg, hrtime_t max_sleep, pri_t pri)
{
zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -296,9 +300,10 @@ zthr_create_timer(const char *zthr_name, zthr_checkfunc_t *checkfunc,
t->zthr_arg = arg;
t->zthr_sleep_timeout = max_sleep;
t->zthr_name = zthr_name;
+ t->zthr_pri = pri;
t->zthr_thread = thread_create_named(zthr_name, NULL, 0,
- zthr_procedure, t, 0, &p0, TS_RUN, minclsyspri);
+ zthr_procedure, t, 0, &p0, TS_RUN, pri);
mutex_exit(&t->zthr_state_lock);
@@ -423,7 +428,7 @@ zthr_resume(zthr_t *t)
*/
if (t->zthr_thread == NULL) {
t->zthr_thread = thread_create_named(t->zthr_name, NULL, 0,
- zthr_procedure, t, 0, &p0, TS_RUN, minclsyspri);
+ zthr_procedure, t, 0, &p0, TS_RUN, t->zthr_pri);
}
mutex_exit(&t->zthr_state_lock);
diff --git a/sys/contrib/openzfs/scripts/zfs.sh b/sys/contrib/openzfs/scripts/zfs.sh
index 39c49d71e59f..7870b8930cab 100755
--- a/sys/contrib/openzfs/scripts/zfs.sh
+++ b/sys/contrib/openzfs/scripts/zfs.sh
@@ -14,6 +14,7 @@ fi
PROG=zfs.sh
VERBOSE="no"
UNLOAD="no"
+LOAD="yes"
STACK_TRACER="no"
ZED_PIDFILE=${ZED_PIDFILE:-/var/run/zed.pid}
@@ -44,12 +45,13 @@ DESCRIPTION:
OPTIONS:
-h Show this message
-v Verbose
+ -r Reload modules
-u Unload modules
-S Enable kernel stack tracer
EOF
}
-while getopts 'hvuS' OPTION; do
+while getopts 'hvruS' OPTION; do
case $OPTION in
h)
usage
@@ -58,8 +60,13 @@ while getopts 'hvuS' OPTION; do
v)
VERBOSE="yes"
;;
+ r)
+ UNLOAD="yes"
+ LOAD="yes"
+ ;;
u)
UNLOAD="yes"
+ LOAD="no"
;;
S)
STACK_TRACER="yes"
@@ -262,7 +269,8 @@ if [ "$UNLOAD" = "yes" ]; then
unload_modules_linux
;;
esac
-else
+fi
+if [ "$LOAD" = "yes" ]; then
case $UNAME in
FreeBSD)
load_modules_freebsd
diff --git a/sys/contrib/openzfs/tests/Makefile.am b/sys/contrib/openzfs/tests/Makefile.am
index 4bdde9c4508a..1dfc2cc5f518 100644
--- a/sys/contrib/openzfs/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/Makefile.am
@@ -4,5 +4,5 @@ SUBDIRS = runfiles test-runner zfs-tests
EXTRA_DIST = README.md
-SHELLCHECKSCRIPTS = $$(find -name '*.sh')
+SHELLCHECKSCRIPTS = $$(find . -name '*.sh')
.PHONY: $(SHELLCHECKSCRIPTS)
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 536788f2eeed..895e705525fa 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -575,6 +575,10 @@ tags = ['functional', 'compression']
tests = ['cp_files_001_pos']
tags = ['functional', 'cp_files']
+[tests/functional/crtime]
+tests = ['crtime_001_pos' ]
+tags = ['functional', 'crtime']
+
[tests/functional/ctime]
tests = ['ctime_001_pos' ]
tags = ['functional', 'ctime']
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 4661a47f55a9..f5a43c66fe83 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -76,6 +76,12 @@ python_deps_reason = 'Python modules missing: python-cffi'
tmpfile_reason = 'Kernel O_TMPFILE support required'
#
+# Some tests require the statx(2) system call on Linux which was first
+# introduced in the 4.11 kernel.
+#
+statx_reason = 'Kernel statx(2) system call required on Linux'
+
+#
# Some tests require that the NFS client and server utilities be installed.
#
share_reason = 'NFS client and server utilities required'
@@ -193,6 +199,7 @@ elif sys.platform.startswith('linux'):
#
maybe = {
'chattr/setup': ['SKIP', exec_reason],
+ 'crtime/crtime_001_pos': ['SKIP', statx_reason],
'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason],
'cli_root/zfs_destroy/zfs_destroy_dev_removal_condense':
['FAIL', known_reason],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index 5a360bd5e705..1dc6881b6d1a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -4024,6 +4024,34 @@ function stat_size #<path>
esac
}
+function stat_ctime #<path>
+{
+ typeset path=$1
+
+ case $(uname) in
+ FreeBSD)
+ stat -f %c "$path"
+ ;;
+ *)
+ stat -c %Z "$path"
+ ;;
+ esac
+}
+
+function stat_crtime #<path>
+{
+ typeset path=$1
+
+ case $(uname) in
+ FreeBSD)
+ stat -f %B "$path"
+ ;;
+ *)
+ stat -c %W "$path"
+ ;;
+ esac
+}
+
# Run a command as if it was being run in a TTY.
#
# Usage:
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/Makefile.am
index 3a5b7b0b9747..137cddd5f784 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/Makefile.am
@@ -16,6 +16,7 @@ SUBDIRS = \
cli_user \
compression \
cp_files \
+ crtime \
ctime \
deadman \
delegate \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg
index 9bf25327ef8d..785d5a001603 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg
@@ -62,4 +62,4 @@ set -A size "8k" "8K" "35K" "1m" "1M" "1mb" "1mB" "1Mb" "1MB" "1g" "1G" \
# explicitly check that its size has been rounded up to the nearest multiple
# The volume with the exact size must exist in the "size" array above
set -A explicit_size_check "35K"
-set -A expected_rounded_size "40960"
+set -A expected_rounded_size "49152"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh
index a4cedca49ce8..62c4e768c0a1 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh
@@ -84,11 +84,7 @@ do
continue;
fi
- if is_freebsd; then
- filetime="$(stat -f "%c" $file)"
- else
- filetime="$(stat -c '%Z' $file)"
- fi
+ filetime=$(stat_ctime $file)
if [[ "$filetime" != "$ctime" ]]; then
log_fail "Unexpected ctime for file $file ($filetime != $ctime)"
else
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/Makefile.am
new file mode 100644
index 000000000000..13e1c2dde31b
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/Makefile.am
@@ -0,0 +1,5 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/crtime
+dist_pkgdata_SCRIPTS = \
+ cleanup.ksh \
+ setup.ksh \
+ crtime_001_pos.ksh
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh
new file mode 100755
index 000000000000..3166bd6ec16e
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh
@@ -0,0 +1,34 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh
new file mode 100755
index 000000000000..4f9810553fa6
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh
@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#
+# Verify crtime is functional with xattr=on|sa
+
+verify_runnable "both"
+
+#
+# The statx system call was first added in the 4.11 Linux kernel. Prior to this
+# change there was no mechanism to obtain birth time on Linux. Therefore, this
+# test is expected to fail on older kernels and is skipped.
+#
+if is_linux; then
+ if [[ $(linux_version) -lt $(linux_version "4.11") ]]; then
+ log_unsupported "Requires statx(2) system call on Linux"
+ fi
+ typeset stat_version=$(stat --version | awk '{ print $NF; exit }')
+ if compare_version_gte "8.30" "${stat_version}"; then
+ log_unsupported "Requires coreutils stat(1) > 8.30 on Linux"
+ fi
+fi
+
+log_assert "Verify crtime is functional."
+
+set -A args "sa" "on"
+typeset TESTFILE=$TESTDIR/testfile
+
+for arg in ${args[*]}; do
+ log_note "Testing with xattr set to $arg"
+ log_must zfs set xattr=$arg $TESTPOOL
+ rm -f $TESTFILE
+ log_must touch $TESTFILE
+ typeset -i crtime=$(stat_crtime $TESTFILE)
+ typeset -i ctime=$(stat_ctime $TESTFILE)
+ if (( crtime != ctime )); then
+ log_fail "Incorrect crtime ($crtime != $ctime)"
+ fi
+ log_must touch $TESTFILE
+ typeset -i crtime1=$(stat_crtime $TESTFILE)
+ if (( crtime1 != crtime )); then
+ log_fail "touch modified crtime ($crtime1 != $crtime)"
+ fi
+done
+
+log_pass "Verified crtime is functional."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/setup.ksh
new file mode 100755
index 000000000000..fc5cec3063a6
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/crtime/setup.ksh
@@ -0,0 +1,35 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+default_setup $DISK
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh
index a118b982240c..66ac29277ef0 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh
@@ -45,14 +45,14 @@ log_assert "nopwrite works on volumes"
log_must zfs set compress=on $origin
log_must zfs set checksum=sha256 $origin
-dd if=/dev/urandom of=$vol bs=8192 count=4096 conv=notrunc >/dev/null \
+dd if=/dev/urandom of=$vol bs=16384 count=2048 conv=notrunc >/dev/null \
2>&1 || log_fail "dd into $origin failed."
zfs snapshot $origin@a || log_fail "zfs snap failed"
log_must zfs clone $origin@a $clone
log_must zfs set compress=on $clone
log_must zfs set checksum=sha256 $clone
block_device_wait
-dd if=$vol of=$volclone bs=8192 count=4096 conv=notrunc >/dev/null 2>&1 || \
+dd if=$vol of=$volclone bs=16384 count=2048 conv=notrunc >/dev/null 2>&1 || \
log_fail "dd into $clone failed."
log_must verify_nopwrite $origin $origin@a $clone
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
index bf3b17f35804..032d1fb91a2e 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
@@ -28,9 +28,15 @@ typeset ds_name="panic"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
-typeset stream=$(mktemp $tmpdir/stream.XXXX)
+typeset stream=$(mktemp $TEST_BASE_DIR/stream.XXXX)
-log_onexit redacted_cleanup $sendfs $recvfs
+function cleanup
+{
+ redacted_cleanup $sendfs $recvfs
+ rm -f $stream
+}
+
+log_onexit cleanup
log_must zfs create -o recsize=8k $sendfs
log_must dd if=/dev/urandom of=/$sendfs/file bs=1024k count=2048
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/reservation/reservation.shlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/reservation/reservation.shlib
index 49ee3b992dcc..47bd70f7cbcc 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/reservation/reservation.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/reservation/reservation.shlib
@@ -108,7 +108,7 @@ function create_multiple_fs # num_fs base_fs_name base_mnt_name
#
# This function compute the largest volume size which is multiple of volume
-# block size (default 8K) and not greater than the largest expected volsize.
+# block size (default 16K) and not greater than the largest expected volsize.
#
# $1 The largest expected volume size.
# $2 The volume block size
@@ -116,7 +116,7 @@ function create_multiple_fs # num_fs base_fs_name base_mnt_name
function floor_volsize #<largest_volsize> [volblksize]
{
typeset largest_volsize=$1
- typeset volblksize=${2:-8192}
+ typeset volblksize=${2:-16384}
if ((largest_volsize < volblksize)); then
log_fail "The largest_volsize must be greater than volblksize."
@@ -157,7 +157,7 @@ function volsize_to_reservation
typeset volblocksize=$(get_prop volblocksize $vol)
else
typeset ncopies=1
- typeset volblocksize=8192
+ typeset volblocksize=16384
fi
typeset nblocks=$((volsize / volblocksize))