aboutsummaryrefslogtreecommitdiff
path: root/include/sys
diff options
context:
space:
mode:
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/abd.h22
-rw-r--r--include/sys/abd_impl.h28
-rw-r--r--include/sys/arc.h20
-rw-r--r--include/sys/arc_impl.h135
-rw-r--r--include/sys/asm_linkage.h (renamed from include/sys/spa_boot.h)34
-rw-r--r--include/sys/avl.h2
-rw-r--r--include/sys/avl_impl.h2
-rw-r--r--include/sys/bitmap.h93
-rw-r--r--include/sys/bitops.h2
-rw-r--r--include/sys/blake3.h33
-rw-r--r--include/sys/bplist.h2
-rw-r--r--include/sys/bpobj.h5
-rw-r--r--include/sys/bptree.h2
-rw-r--r--include/sys/bqueue.h19
-rw-r--r--include/sys/brt.h63
-rw-r--r--include/sys/brt_impl.h199
-rw-r--r--include/sys/btree.h79
-rw-r--r--include/sys/crypto/api.h2
-rw-r--r--include/sys/crypto/common.h2
-rw-r--r--include/sys/crypto/icp.h2
-rw-r--r--include/sys/dataset_kstats.h11
-rw-r--r--include/sys/dbuf.h53
-rw-r--r--include/sys/ddt.h201
-rw-r--r--include/sys/ddt_impl.h95
-rw-r--r--include/sys/dmu.h52
-rw-r--r--include/sys/dmu_impl.h4
-rw-r--r--include/sys/dmu_objset.h10
-rw-r--r--include/sys/dmu_recv.h8
-rw-r--r--include/sys/dmu_redact.h2
-rw-r--r--include/sys/dmu_send.h2
-rw-r--r--include/sys/dmu_traverse.h2
-rw-r--r--include/sys/dmu_tx.h4
-rw-r--r--include/sys/dmu_zfetch.h20
-rw-r--r--include/sys/dnode.h55
-rw-r--r--include/sys/dsl_bookmark.h1
-rw-r--r--include/sys/dsl_crypt.h2
-rw-r--r--include/sys/dsl_dataset.h14
-rw-r--r--include/sys/dsl_deadlist.h4
-rw-r--r--include/sys/dsl_deleg.h2
-rw-r--r--include/sys/dsl_destroy.h2
-rw-r--r--include/sys/dsl_dir.h7
-rw-r--r--include/sys/dsl_pool.h16
-rw-r--r--include/sys/dsl_prop.h2
-rw-r--r--include/sys/dsl_scan.h31
-rw-r--r--include/sys/dsl_synctask.h2
-rw-r--r--include/sys/dsl_userhold.h2
-rw-r--r--include/sys/edonr.h41
-rw-r--r--include/sys/efi_partition.h2
-rw-r--r--include/sys/fm/fs/zfs.h12
-rw-r--r--include/sys/fm/protocol.h2
-rw-r--r--include/sys/fm/util.h4
-rw-r--r--include/sys/fs/zfs.h95
-rw-r--r--include/sys/metaslab.h6
-rw-r--r--include/sys/metaslab_impl.h9
-rw-r--r--include/sys/mmp.h2
-rw-r--r--include/sys/mntent.h5
-rw-r--r--include/sys/multilist.h5
-rw-r--r--include/sys/nvpair.h23
-rw-r--r--include/sys/nvpair_impl.h2
-rw-r--r--include/sys/pathname.h2
-rw-r--r--include/sys/qat.h2
-rw-r--r--include/sys/range_tree.h2
-rw-r--r--include/sys/rrwlock.h2
-rw-r--r--include/sys/sa.h2
-rw-r--r--include/sys/sa_impl.h2
-rw-r--r--include/sys/sha2.h127
-rw-r--r--include/sys/spa.h133
-rw-r--r--include/sys/spa_checkpoint.h2
-rw-r--r--include/sys/spa_checksum.h2
-rw-r--r--include/sys/spa_impl.h50
-rw-r--r--include/sys/spa_log_spacemap.h2
-rw-r--r--include/sys/space_map.h2
-rw-r--r--include/sys/space_reftree.h2
-rw-r--r--include/sys/sysevent.h2
-rw-r--r--include/sys/sysevent/dev.h5
-rw-r--r--include/sys/sysevent/eventdefs.h7
-rw-r--r--include/sys/txg.h4
-rw-r--r--include/sys/txg_impl.h5
-rw-r--r--include/sys/u8_textprep.h2
-rw-r--r--include/sys/u8_textprep_data.h2
-rw-r--r--include/sys/uberblock.h2
-rw-r--r--include/sys/uberblock_impl.h55
-rw-r--r--include/sys/uio_impl.h2
-rw-r--r--include/sys/unique.h2
-rw-r--r--include/sys/uuid.h2
-rw-r--r--include/sys/vdev.h29
-rw-r--r--include/sys/vdev_disk.h2
-rw-r--r--include/sys/vdev_draid.h2
-rw-r--r--include/sys/vdev_file.h2
-rw-r--r--include/sys/vdev_impl.h81
-rw-r--r--include/sys/vdev_initialize.h3
-rw-r--r--include/sys/vdev_raidz.h103
-rw-r--r--include/sys/vdev_raidz_impl.h55
-rw-r--r--include/sys/vdev_rebuild.h3
-rw-r--r--include/sys/vdev_removal.h2
-rw-r--r--include/sys/vdev_trim.h3
-rw-r--r--include/sys/xvattr.h2
-rw-r--r--include/sys/zap.h10
-rw-r--r--include/sys/zap_impl.h14
-rw-r--r--include/sys/zap_leaf.h12
-rw-r--r--include/sys/zcp.h4
-rw-r--r--include/sys/zcp_iter.h2
-rw-r--r--include/sys/zfeature.h2
-rw-r--r--include/sys/zfs_acl.h15
-rw-r--r--include/sys/zfs_chksum.h2
-rw-r--r--include/sys/zfs_context.h37
-rw-r--r--include/sys/zfs_debug.h4
-rw-r--r--include/sys/zfs_delay.h2
-rw-r--r--include/sys/zfs_file.h2
-rw-r--r--include/sys/zfs_fuid.h2
-rw-r--r--include/sys/zfs_impl.h69
-rw-r--r--include/sys/zfs_ioctl.h17
-rw-r--r--include/sys/zfs_ioctl_impl.h4
-rw-r--r--include/sys/zfs_onexit.h4
-rw-r--r--include/sys/zfs_project.h2
-rw-r--r--include/sys/zfs_quota.h2
-rw-r--r--include/sys/zfs_racct.h2
-rw-r--r--include/sys/zfs_refcount.h30
-rw-r--r--include/sys/zfs_rlock.h2
-rw-r--r--include/sys/zfs_sa.h2
-rw-r--r--include/sys/zfs_stat.h2
-rw-r--r--include/sys/zfs_sysfs.h2
-rw-r--r--include/sys/zfs_vfsops.h2
-rw-r--r--include/sys/zfs_vnops.h9
-rw-r--r--include/sys/zfs_znode.h46
-rw-r--r--include/sys/zil.h101
-rw-r--r--include/sys/zil_impl.h62
-rw-r--r--include/sys/zio.h157
-rw-r--r--include/sys/zio_checksum.h12
-rw-r--r--include/sys/zio_compress.h6
-rw-r--r--include/sys/zio_impl.h75
-rw-r--r--include/sys/zrlock.h5
-rw-r--r--include/sys/zvol.h7
-rw-r--r--include/sys/zvol_impl.h7
134 files changed, 2223 insertions, 779 deletions
diff --git a/include/sys/abd.h b/include/sys/abd.h
index 5c6bd0c271d4..19fe96292d5f 100644
--- a/include/sys/abd.h
+++ b/include/sys/abd.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -79,6 +79,9 @@ typedef struct abd {
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
+#if defined(__linux__) && defined(_KERNEL)
+typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
+#endif
extern int zfs_abd_scatter_enabled;
@@ -86,10 +89,15 @@ extern int zfs_abd_scatter_enabled;
* Allocations and deallocations
*/
+__attribute__((malloc))
abd_t *abd_alloc(size_t, boolean_t);
+__attribute__((malloc))
abd_t *abd_alloc_linear(size_t, boolean_t);
+__attribute__((malloc))
abd_t *abd_alloc_gang(void);
+__attribute__((malloc))
abd_t *abd_alloc_for_io(size_t, boolean_t);
+__attribute__((malloc))
abd_t *abd_alloc_sametype(abd_t *, size_t);
boolean_t abd_size_alloc_linear(size_t);
void abd_gang_add(abd_t *, abd_t *, boolean_t);
@@ -120,6 +128,10 @@ void abd_release_ownership_of_buf(abd_t *);
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *);
+#if defined(__linux__) && defined(_KERNEL)
+int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
+ void *);
+#endif
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
@@ -128,11 +140,11 @@ int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
void abd_verify(abd_t *);
-void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
- ssize_t csize, ssize_t dsize, const unsigned parity,
+void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
+ size_t csize, size_t dsize, const unsigned parity,
void (*func_raidz_gen)(void **, const void *, size_t, size_t));
void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
- ssize_t tsize, const unsigned parity,
+ size_t tsize, const unsigned parity,
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
const unsigned *mul),
const unsigned *mul);
@@ -208,6 +220,8 @@ void abd_fini(void);
/*
* Linux ABD bio functions
+ * Note: these are only needed to support vdev_classic. See comment in
+ * vdev_disk.c.
*/
#if defined(__linux__) && defined(_KERNEL)
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h
index e96f1edfc8ce..f88ea25e245d 100644
--- a/include/sys/abd_impl.h
+++ b/include/sys/abd_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
*/
#ifndef _ABD_IMPL_H
@@ -38,12 +39,30 @@ typedef enum abd_stats_op {
ABDSTAT_DECR /* Decrease abdstat values */
} abd_stats_op_t;
-struct scatterlist; /* forward declaration */
+/* forward declarations */
+struct scatterlist;
+struct page;
struct abd_iter {
/* public interface */
- void *iter_mapaddr; /* addr corresponding to iter_pos */
- size_t iter_mapsize; /* length of data valid at mapaddr */
+ union {
+ /* for abd_iter_map()/abd_iter_unmap() */
+ struct {
+ /* addr corresponding to iter_pos */
+ void *iter_mapaddr;
+ /* length of data valid at mapaddr */
+ size_t iter_mapsize;
+ };
+ /* for abd_iter_page() */
+ struct {
+ /* current page */
+ struct page *iter_page;
+ /* offset of data in page */
+ size_t iter_page_doff;
+ /* size of data in page */
+ size_t iter_page_dsize;
+ };
+ };
/* private */
abd_t *iter_abd; /* ABD being iterated through */
@@ -78,6 +97,7 @@ boolean_t abd_iter_at_end(struct abd_iter *);
void abd_iter_advance(struct abd_iter *, size_t);
void abd_iter_map(struct abd_iter *);
void abd_iter_unmap(struct abd_iter *);
+void abd_iter_page(struct abd_iter *);
/*
* Helper macros
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 8cee8be4bc93..05307aab99e3 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -81,10 +81,10 @@ typedef struct arc_prune arc_prune_t;
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
const blkptr_t *bp, arc_buf_t *buf, void *priv);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
-typedef void arc_prune_func_t(int64_t bytes, void *priv);
+typedef void arc_prune_func_t(uint64_t bytes, void *priv);
/* Shared module parameters */
-extern int zfs_arc_average_blocksize;
+extern uint_t zfs_arc_average_blocksize;
extern int l2arc_exclude_special;
/* generic arc_done_func_t's which you can use */
@@ -115,7 +115,7 @@ typedef enum arc_flags
ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
- ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
+ ARC_FLAG_UNCACHED = 1 << 5, /* evict after use */
ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */
/*
@@ -195,13 +195,11 @@ typedef enum arc_buf_flags {
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
- kmutex_t b_evict_lock;
void *b_data;
arc_buf_flags_t b_flags;
};
typedef enum arc_buf_contents {
- ARC_BUFC_INVALID, /* invalid type */
ARC_BUFC_DATA, /* buffer contains data */
ARC_BUFC_METADATA, /* buffer contains metadata */
ARC_BUFC_NUMTYPES
@@ -229,6 +227,7 @@ typedef enum arc_state_type {
ARC_STATE_MFU,
ARC_STATE_MFU_GHOST,
ARC_STATE_L2C_ONLY,
+ ARC_STATE_UNCACHED,
ARC_STATE_NUMTYPES
} arc_state_type_t;
@@ -302,12 +301,11 @@ int arc_referenced(arc_buf_t *buf);
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_read_done_func_t *done, void *priv, zio_priority_t priority,
int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
-zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
- blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
+zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
+ arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp,
arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
- arc_write_done_func_t *physdone, arc_write_done_func_t *done,
- void *priv, zio_priority_t priority, int zio_flags,
- const zbookmark_phys_t *zb);
+ arc_write_done_func_t *done, void *priv, zio_priority_t priority,
+ int zio_flags, const zbookmark_phys_t *zb);
arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
void arc_remove_prune_callback(arc_prune_t *p);
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index 755e87fe6e0e..defebe3b2fbb 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -30,6 +30,7 @@
#define _SYS_ARC_IMPL_H
#include <sys/arc.h>
+#include <sys/multilist.h>
#include <sys/zio_crypt.h>
#include <sys/zthr.h>
#include <sys/aggsum.h>
@@ -46,6 +47,7 @@ extern "C" {
* ARC_mru_ghost - recently used, no longer in cache
* ARC_mfu - frequently used, currently cached
* ARC_mfu_ghost - frequently used, no longer in cache
+ * ARC_uncached - uncacheable prefetch, to be evicted
* ARC_l2c_only - exists in L2ARC but not other states
* When there are no active references to the buffer, they are
* are linked onto a list in one of these arc states. These are
@@ -81,14 +83,17 @@ typedef struct arc_state {
*/
arc_state_type_t arcs_state;
/*
+ * total amount of data in this state.
+ */
+ zfs_refcount_t arcs_size[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
+ /*
* total amount of evictable data in this state
*/
- zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
+ zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
/*
- * total amount of data in this state; this includes: evictable,
- * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
+ * amount of hit bytes for this state (counted only for ghost states)
*/
- zfs_refcount_t arcs_size;
+ wmsum_t arcs_hits[ARC_BUFC_NUMTYPES];
} arc_state_t;
typedef struct arc_callback arc_callback_t;
@@ -101,9 +106,14 @@ struct arc_callback {
boolean_t acb_compressed;
boolean_t acb_noauth;
boolean_t acb_nobuf;
+ boolean_t acb_wait;
+ int acb_wait_error;
+ kmutex_t acb_wait_lock;
+ kcondvar_t acb_wait_cv;
zbookmark_phys_t acb_zb;
zio_t *acb_zio_dummy;
zio_t *acb_zio_head;
+ arc_callback_t *acb_prev;
arc_callback_t *acb_next;
};
@@ -113,7 +123,6 @@ struct arc_write_callback {
void *awcb_private;
arc_write_done_func_t *awcb_ready;
arc_write_done_func_t *awcb_children_ready;
- arc_write_done_func_t *awcb_physdone;
arc_write_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
};
@@ -150,13 +159,6 @@ struct arc_write_callback {
* these two allocation states.
*/
typedef struct l1arc_buf_hdr {
- kmutex_t b_freeze_lock;
- zio_cksum_t *b_freeze_cksum;
-
- /* for waiting on reads to complete */
- kcondvar_t b_cv;
- uint8_t b_byteswap;
-
/* protected by arc state mutex */
arc_state_t *b_state;
multilist_node_t b_arc_node;
@@ -167,7 +169,7 @@ typedef struct l1arc_buf_hdr {
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
- uint32_t b_bufcnt;
+ uint8_t b_byteswap;
arc_buf_t *b_buf;
/* self protecting */
@@ -175,6 +177,11 @@ typedef struct l1arc_buf_hdr {
arc_callback_t *b_acb;
abd_t *b_pabd;
+
+#ifdef ZFS_DEBUG
+ zio_cksum_t *b_freeze_cksum;
+ kmutex_t b_freeze_lock;
+#endif
} l1arc_buf_hdr_t;
typedef enum l2arc_dev_hdr_flags_t {
@@ -349,8 +356,9 @@ typedef struct l2arc_lb_ptr_buf {
#define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
#define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
#define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
-#define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8)
-#define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x)
+/* +/- 1 here are to keep compatibility after ARC_BUFC_INVALID removal. */
+#define L2BLK_GET_TYPE(field) (BF64_GET((field), 48, 8) - 1)
+#define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, (x) + 1)
#define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
#define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
#define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4)
@@ -424,12 +432,12 @@ typedef struct l2arc_dev {
*/
typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */
- dmu_object_type_t b_ot; /* object type */
- uint32_t b_ebufcnt; /* count of encrypted buffers */
/* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj;
+ dmu_object_type_t b_ot; /* object type */
+
/* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN];
@@ -511,20 +519,33 @@ struct arc_buf_hdr {
};
typedef struct arc_stats {
+ /* Number of requests that were satisfied without I/O. */
kstat_named_t arcstat_hits;
+ /* Number of requests for which I/O was already running. */
+ kstat_named_t arcstat_iohits;
+ /* Number of requests for which I/O has to be issued. */
kstat_named_t arcstat_misses;
+ /* Same three, but specifically for demand data. */
kstat_named_t arcstat_demand_data_hits;
+ kstat_named_t arcstat_demand_data_iohits;
kstat_named_t arcstat_demand_data_misses;
+ /* Same three, but specifically for demand metadata. */
kstat_named_t arcstat_demand_metadata_hits;
+ kstat_named_t arcstat_demand_metadata_iohits;
kstat_named_t arcstat_demand_metadata_misses;
+ /* Same three, but specifically for prefetch data. */
kstat_named_t arcstat_prefetch_data_hits;
+ kstat_named_t arcstat_prefetch_data_iohits;
kstat_named_t arcstat_prefetch_data_misses;
+ /* Same three, but specifically for prefetch metadata. */
kstat_named_t arcstat_prefetch_metadata_hits;
+ kstat_named_t arcstat_prefetch_metadata_iohits;
kstat_named_t arcstat_prefetch_metadata_misses;
kstat_named_t arcstat_mru_hits;
kstat_named_t arcstat_mru_ghost_hits;
kstat_named_t arcstat_mfu_hits;
kstat_named_t arcstat_mfu_ghost_hits;
+ kstat_named_t arcstat_uncached_hits;
kstat_named_t arcstat_deleted;
/*
* Number of buffers that could not be evicted because the hash lock
@@ -560,7 +581,9 @@ typedef struct arc_stats {
kstat_named_t arcstat_hash_collisions;
kstat_named_t arcstat_hash_chains;
kstat_named_t arcstat_hash_chain_max;
- kstat_named_t arcstat_p;
+ kstat_named_t arcstat_meta;
+ kstat_named_t arcstat_pd;
+ kstat_named_t arcstat_pm;
kstat_named_t arcstat_c;
kstat_named_t arcstat_c_min;
kstat_named_t arcstat_c_max;
@@ -633,6 +656,8 @@ typedef struct arc_stats {
* are all included in this value.
*/
kstat_named_t arcstat_anon_size;
+ kstat_named_t arcstat_anon_data;
+ kstat_named_t arcstat_anon_metadata;
/*
* Number of bytes consumed by ARC buffers that meet the
* following criteria: backing buffers of type ARC_BUFC_DATA,
@@ -654,6 +679,8 @@ typedef struct arc_stats {
* are all included in this value.
*/
kstat_named_t arcstat_mru_size;
+ kstat_named_t arcstat_mru_data;
+ kstat_named_t arcstat_mru_metadata;
/*
* Number of bytes consumed by ARC buffers that meet the
* following criteria: backing buffers of type ARC_BUFC_DATA,
@@ -678,6 +705,8 @@ typedef struct arc_stats {
* buffers *would have* consumed this number of bytes.
*/
kstat_named_t arcstat_mru_ghost_size;
+ kstat_named_t arcstat_mru_ghost_data;
+ kstat_named_t arcstat_mru_ghost_metadata;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
@@ -697,6 +726,8 @@ typedef struct arc_stats {
* are all included in this value.
*/
kstat_named_t arcstat_mfu_size;
+ kstat_named_t arcstat_mfu_data;
+ kstat_named_t arcstat_mfu_metadata;
/*
* Number of bytes consumed by ARC buffers that are eligible for
* eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
@@ -715,6 +746,8 @@ typedef struct arc_stats {
* arcstat_mru_ghost_size for more details.
*/
kstat_named_t arcstat_mfu_ghost_size;
+ kstat_named_t arcstat_mfu_ghost_data;
+ kstat_named_t arcstat_mfu_ghost_metadata;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
@@ -727,6 +760,23 @@ typedef struct arc_stats {
* ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
*/
kstat_named_t arcstat_mfu_ghost_evictable_metadata;
+ /*
+ * Total number of bytes that are going to be evicted from ARC due to
+ * ARC_FLAG_UNCACHED being set.
+ */
+ kstat_named_t arcstat_uncached_size;
+ kstat_named_t arcstat_uncached_data;
+ kstat_named_t arcstat_uncached_metadata;
+ /*
+ * Number of data bytes that are going to be evicted from ARC due to
+ * ARC_FLAG_UNCACHED being set.
+ */
+ kstat_named_t arcstat_uncached_evictable_data;
+ /*
+ * Number of metadata bytes that that are going to be evicted from ARC
+ * due to ARC_FLAG_UNCACHED being set.
+ */
+ kstat_named_t arcstat_uncached_evictable_metadata;
kstat_named_t arcstat_l2_hits;
kstat_named_t arcstat_l2_misses;
/*
@@ -839,13 +889,20 @@ typedef struct arc_stats {
kstat_named_t arcstat_loaned_bytes;
kstat_named_t arcstat_prune;
kstat_named_t arcstat_meta_used;
- kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_dnode_limit;
- kstat_named_t arcstat_meta_max;
- kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_async_upgrade_sync;
+ /* Number of predictive prefetch requests. */
+ kstat_named_t arcstat_predictive_prefetch;
+ /* Number of requests for which predictive prefetch has completed. */
kstat_named_t arcstat_demand_hit_predictive_prefetch;
+ /* Number of requests for which predictive prefetch was running. */
+ kstat_named_t arcstat_demand_iohit_predictive_prefetch;
+ /* Number of prescient prefetch requests. */
+ kstat_named_t arcstat_prescient_prefetch;
+ /* Number of requests for which prescient prefetch has completed. */
kstat_named_t arcstat_demand_hit_prescient_prefetch;
+ /* Number of requests for which prescient prefetch was running. */
+ kstat_named_t arcstat_demand_iohit_prescient_prefetch;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
kstat_named_t arcstat_raw_size;
@@ -855,19 +912,25 @@ typedef struct arc_stats {
typedef struct arc_sums {
wmsum_t arcstat_hits;
+ wmsum_t arcstat_iohits;
wmsum_t arcstat_misses;
wmsum_t arcstat_demand_data_hits;
+ wmsum_t arcstat_demand_data_iohits;
wmsum_t arcstat_demand_data_misses;
wmsum_t arcstat_demand_metadata_hits;
+ wmsum_t arcstat_demand_metadata_iohits;
wmsum_t arcstat_demand_metadata_misses;
wmsum_t arcstat_prefetch_data_hits;
+ wmsum_t arcstat_prefetch_data_iohits;
wmsum_t arcstat_prefetch_data_misses;
wmsum_t arcstat_prefetch_metadata_hits;
+ wmsum_t arcstat_prefetch_metadata_iohits;
wmsum_t arcstat_prefetch_metadata_misses;
wmsum_t arcstat_mru_hits;
wmsum_t arcstat_mru_ghost_hits;
wmsum_t arcstat_mfu_hits;
wmsum_t arcstat_mfu_ghost_hits;
+ wmsum_t arcstat_uncached_hits;
wmsum_t arcstat_deleted;
wmsum_t arcstat_mutex_miss;
wmsum_t arcstat_access_skip;
@@ -889,7 +952,7 @@ typedef struct arc_sums {
wmsum_t arcstat_data_size;
wmsum_t arcstat_metadata_size;
wmsum_t arcstat_dbuf_size;
- aggsum_t arcstat_dnode_size;
+ wmsum_t arcstat_dnode_size;
wmsum_t arcstat_bonus_size;
wmsum_t arcstat_l2_hits;
wmsum_t arcstat_l2_misses;
@@ -934,10 +997,14 @@ typedef struct arc_sums {
wmsum_t arcstat_memory_direct_count;
wmsum_t arcstat_memory_indirect_count;
wmsum_t arcstat_prune;
- aggsum_t arcstat_meta_used;
+ wmsum_t arcstat_meta_used;
wmsum_t arcstat_async_upgrade_sync;
+ wmsum_t arcstat_predictive_prefetch;
wmsum_t arcstat_demand_hit_predictive_prefetch;
+ wmsum_t arcstat_demand_iohit_predictive_prefetch;
+ wmsum_t arcstat_prescient_prefetch;
wmsum_t arcstat_demand_hit_prescient_prefetch;
+ wmsum_t arcstat_demand_iohit_prescient_prefetch;
wmsum_t arcstat_raw_size;
wmsum_t arcstat_cached_only_in_progress;
wmsum_t arcstat_abd_chunk_waste_size;
@@ -958,7 +1025,9 @@ typedef struct arc_evict_waiter {
#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
-#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
+#define arc_meta ARCSTAT(arcstat_meta) /* target frac of metadata */
+#define arc_pd ARCSTAT(arcstat_pd) /* target frac of data MRU */
+#define arc_pm ARCSTAT(arcstat_pm) /* target frac of meta MRU */
#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
@@ -970,23 +1039,24 @@ typedef struct arc_evict_waiter {
#define arc_mfu (&ARC_mfu)
#define arc_mfu_ghost (&ARC_mfu_ghost)
#define arc_l2c_only (&ARC_l2c_only)
+#define arc_uncached (&ARC_uncached)
extern taskq_t *arc_prune_taskq;
extern arc_stats_t arc_stats;
extern arc_sums_t arc_sums;
extern hrtime_t arc_growtime;
extern boolean_t arc_warm;
-extern int arc_grow_retry;
-extern int arc_no_grow_shift;
-extern int arc_shrink_shift;
+extern uint_t arc_grow_retry;
+extern uint_t arc_no_grow_shift;
+extern uint_t arc_shrink_shift;
extern kmutex_t arc_prune_mtx;
extern list_t arc_prune_list;
extern arc_state_t ARC_mfu;
extern arc_state_t ARC_mru;
extern uint_t zfs_arc_pc_percent;
-extern int arc_lotsfree_percent;
-extern unsigned long zfs_arc_min;
-extern unsigned long zfs_arc_max;
+extern uint_t arc_lotsfree_percent;
+extern uint64_t zfs_arc_min;
+extern uint64_t zfs_arc_max;
extern void arc_reduce_target_size(int64_t to_free);
extern boolean_t arc_reclaim_needed(void);
@@ -995,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
-extern void arc_prune_async(int64_t);
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void);
@@ -1003,7 +1072,7 @@ extern void arc_tuning_update(boolean_t);
extern void arc_register_hotplug(void);
extern void arc_unregister_hotplug(void);
-extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_u64(ZFS_MODULE_PARAM_ARGS);
extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
diff --git a/include/sys/spa_boot.h b/include/sys/asm_linkage.h
index 1d3622f5a108..749157d4c3db 100644
--- a/include/sys/spa_boot.h
+++ b/include/sys/asm_linkage.h
@@ -2,11 +2,12 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -19,24 +20,29 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#ifndef _SYS_SPA_BOOT_H
-#define _SYS_SPA_BOOT_H
+#ifndef _SYS_ASM_LINKAGE_H
+#define _SYS_ASM_LINKAGE_H
-#include <sys/nvpair.h>
+#define ASMABI
+
+#if defined(__i386) || defined(__amd64)
+
+#include <sys/ia32/asm_linkage.h> /* XX64 x86/sys/asm_linkage.h */
-#ifdef __cplusplus
-extern "C" {
#endif
-extern char *spa_get_bootprop(char *prop);
-extern void spa_free_bootprop(char *prop);
+#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL)
-#ifdef __cplusplus
-}
+#include <asm/frame.h>
+
+#else /* userspace */
+#define FRAME_BEGIN
+#define FRAME_END
#endif
-#endif /* _SYS_SPA_BOOT_H */
+
+#endif /* _SYS_ASM_LINKAGE_H */
diff --git a/include/sys/avl.h b/include/sys/avl.h
index 20e88f2a6b06..8818e3edb292 100644
--- a/include/sys/avl.h
+++ b/include/sys/avl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/avl_impl.h b/include/sys/avl_impl.h
index c464a62a1ca6..85277b42b471 100644
--- a/include/sys/avl_impl.h
+++ b/include/sys/avl_impl.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/bitmap.h b/include/sys/bitmap.h
new file mode 100644
index 000000000000..71eeba592cfd
--- /dev/null
+++ b/include/sys/bitmap.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+
+#ifndef _SYS_BITMAP_H
+#define _SYS_BITMAP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Operations on bitmaps of arbitrary size
+ * A bitmap is a vector of 1 or more ulong_t's.
+ * The user of the package is responsible for range checks and keeping
+ * track of sizes.
+ */
+
+#ifdef _LP64
+#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */
+#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */
+#else
+#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */
+#endif
+
+#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */
+#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */
+
+/*
+ * bitmap is a ulong_t *, bitindex an index_t
+ *
+ * The macros BT_WIM and BT_BIW internal; there is no need
+ * for users of this package to use them.
+ */
+
+/*
+ * word in map
+ */
+#define BT_WIM(bitmap, bitindex) \
+ ((bitmap)[(bitindex) >> BT_ULSHIFT])
+/*
+ * bit in word
+ */
+#define BT_BIW(bitindex) \
+ (1UL << ((bitindex) & BT_ULMASK))
+
+/*
+ * These are public macros
+ *
+ * BT_BITOUL == n bits to n ulong_t's
+ */
+#define BT_BITOUL(nbits) \
+ (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
+#define BT_SIZEOFMAP(nbits) \
+ (BT_BITOUL(nbits) * sizeof (ulong_t))
+#define BT_TEST(bitmap, bitindex) \
+ ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
+#define BT_SET(bitmap, bitindex) \
+ { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
+#define BT_CLEAR(bitmap, bitindex) \
+ { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BITMAP_H */
diff --git a/include/sys/bitops.h b/include/sys/bitops.h
index 69d07d76552a..5c477b38b205 100644
--- a/include/sys/bitops.h
+++ b/include/sys/bitops.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/blake3.h b/include/sys/blake3.h
index b3391c5f2349..b981b18db943 100644
--- a/include/sys/blake3.h
+++ b/include/sys/blake3.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -22,11 +22,11 @@
/*
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
- * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
-#ifndef BLAKE3_H
-#define BLAKE3_H
+#ifndef _SYS_BLAKE3_H
+#define _SYS_BLAKE3_H
#ifdef _KERNEL
#include <sys/types.h>
@@ -72,7 +72,7 @@ typedef struct {
*/
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
- /* const blake3_impl_ops_t *ops */
+ /* const blake3_ops_t *ops */
const void *ops;
} BLAKE3_CTX;
@@ -97,29 +97,8 @@ extern void **blake3_per_cpu_ctx;
extern void blake3_per_cpu_ctx_init(void);
extern void blake3_per_cpu_ctx_fini(void);
-/* return number of supported implementations */
-extern int blake3_get_impl_count(void);
-
-/* return id of selected implementation */
-extern int blake3_get_impl_id(void);
-
-/* return name of selected implementation */
-extern const char *blake3_get_impl_name(void);
-
-/* setup id as fastest implementation */
-extern void blake3_set_impl_fastest(uint32_t id);
-
-/* set implementation by id */
-extern void blake3_set_impl_id(uint32_t id);
-
-/* set implementation by name */
-extern int blake3_set_impl_name(const char *name);
-
-/* set startup implementation */
-extern void blake3_setup_impl(void);
-
#ifdef __cplusplus
}
#endif
-#endif /* BLAKE3_H */
+#endif /* _SYS_BLAKE3_H */
diff --git a/include/sys/bplist.h b/include/sys/bplist.h
index f8deaf8437e6..53dd346767fe 100644
--- a/include/sys/bplist.h
+++ b/include/sys/bplist.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/bpobj.h b/include/sys/bpobj.h
index 16e403526cff..81bc0fe21086 100644
--- a/include/sys/bpobj.h
+++ b/include/sys/bpobj.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -60,7 +60,7 @@ typedef struct bpobj {
kmutex_t bpo_lock;
objset_t *bpo_os;
uint64_t bpo_object;
- int bpo_epb;
+ uint32_t bpo_epb;
uint8_t bpo_havecomp;
uint8_t bpo_havesubobj;
uint8_t bpo_havefreed;
@@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
void *arg, int64_t start);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
+void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
diff --git a/include/sys/bptree.h b/include/sys/bptree.h
index 327c128bf493..9d189446ab69 100644
--- a/include/sys/bptree.h
+++ b/include/sys/bptree.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/bqueue.h b/include/sys/bqueue.h
index 797aecd791a3..edcee16227ec 100644
--- a/include/sys/bqueue.h
+++ b/include/sys/bqueue.h
@@ -27,27 +27,30 @@ extern "C" {
typedef struct bqueue {
list_t bq_list;
+ size_t bq_size;
+ list_t bq_dequeuing_list;
+ size_t bq_dequeuing_size;
+ list_t bq_enqueuing_list;
+ size_t bq_enqueuing_size;
kmutex_t bq_lock;
kcondvar_t bq_add_cv;
kcondvar_t bq_pop_cv;
- uint64_t bq_size;
- uint64_t bq_maxsize;
- uint64_t bq_fill_fraction;
+ size_t bq_maxsize;
+ uint_t bq_fill_fraction;
size_t bq_node_offset;
} bqueue_t;
typedef struct bqueue_node {
list_node_t bqn_node;
- uint64_t bqn_size;
+ size_t bqn_size;
} bqueue_node_t;
-int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t);
+int bqueue_init(bqueue_t *, uint_t, size_t, size_t);
void bqueue_destroy(bqueue_t *);
-void bqueue_enqueue(bqueue_t *, void *, uint64_t);
-void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t);
+void bqueue_enqueue(bqueue_t *, void *, size_t);
+void bqueue_enqueue_flush(bqueue_t *, void *, size_t);
void *bqueue_dequeue(bqueue_t *);
-boolean_t bqueue_empty(bqueue_t *);
#ifdef __cplusplus
}
diff --git a/include/sys/brt.h b/include/sys/brt.h
new file mode 100644
index 000000000000..f73df95058d9
--- /dev/null
+++ b/include/sys/brt.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek
+ */
+
+#ifndef _SYS_BRT_H
+#define _SYS_BRT_H
+
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/dmu.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
+extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);
+
+extern uint64_t brt_get_dspace(spa_t *spa);
+extern uint64_t brt_get_used(spa_t *spa);
+extern uint64_t brt_get_saved(spa_t *spa);
+extern uint64_t brt_get_ratio(spa_t *spa);
+
+extern boolean_t brt_maybe_exists(spa_t *spa, const blkptr_t *bp);
+extern void brt_init(void);
+extern void brt_fini(void);
+
+extern void brt_pending_add(spa_t *spa, const blkptr_t *bp, dmu_tx_t *tx);
+extern void brt_pending_remove(spa_t *spa, const blkptr_t *bp, dmu_tx_t *tx);
+extern void brt_pending_apply(spa_t *spa, uint64_t txg);
+
+extern void brt_create(spa_t *spa);
+extern int brt_load(spa_t *spa);
+extern void brt_unload(spa_t *spa);
+extern void brt_sync(spa_t *spa, uint64_t txg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BRT_H */
diff --git a/include/sys/brt_impl.h b/include/sys/brt_impl.h
new file mode 100644
index 000000000000..9cc06fbb2c3a
--- /dev/null
+++ b/include/sys/brt_impl.h
@@ -0,0 +1,199 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek
+ */
+
+#ifndef _SYS_BRT_IMPL_H
+#define _SYS_BRT_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * BRT - Block Reference Table.
+ */
+#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
+
+/*
+ * We divide each VDEV into 16MB chunks. Each chunk is represented in memory
+ * by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
+ * Each element in this array represents how many BRT entries do we have in this
+ * chunk of storage. We always load this entire array into memory and update as
+ * needed. By having it in memory we can quickly tell (during zio_free()) if
+ * there are any BRT entries that we might need to update.
+ *
+ * This value cannot be larger than 16MB, at least as long as we support
+ * 512 byte block sizes. With 512 byte block size we can have exactly
+ * 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
+ * many for a 16bit counter.
+ */
+#define BRT_RANGESIZE (16 * 1024 * 1024)
+_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
+ "BRT_RANGESIZE is too large.");
+/*
+ * We don't want to update the whole structure every time. Maintain bitmap
+ * of dirty blocks within the regions, so that a single bit represents a
+ * block size of entcounts. For example if we have a 1PB vdev then all
+ * entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
+ * 128MB array of entcounts into 32kB disk blocks, as we don't want to update
+ * the whole 128MB on disk when we have updated only a single entcount.
+ * We maintain a bitmap where each 32kB disk block within 128MB entcounts array
+ * is represented by a single bit. This gives us 4096 bits. A set bit in the
+ * bitmap means that we had a change in at least one of the 16384 entcounts
+ * that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
+ */
+#define BRT_BLOCKSIZE (32 * 1024)
+#define BRT_RANGESIZE_TO_NBLOCKS(size) \
+ (((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
+
+#define BRT_LITTLE_ENDIAN 0
+#define BRT_BIG_ENDIAN 1
+#ifdef _ZFS_LITTLE_ENDIAN
+#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
+#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
+#else
+#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
+#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
+#endif
+
+typedef struct brt_vdev_phys {
+ uint64_t bvp_mos_entries;
+ uint64_t bvp_size;
+ uint64_t bvp_byteorder;
+ uint64_t bvp_totalcount;
+ uint64_t bvp_rangesize;
+ uint64_t bvp_usedspace;
+ uint64_t bvp_savedspace;
+} brt_vdev_phys_t;
+
+typedef struct brt_vdev {
+ /*
+ * VDEV id.
+ */
+ uint64_t bv_vdevid;
+ /*
+ * Is the structure initiated?
+ * (bv_entcount and bv_bitmap are allocated?)
+ */
+ boolean_t bv_initiated;
+ /*
+ * Object number in the MOS for the entcount array and brt_vdev_phys.
+ */
+ uint64_t bv_mos_brtvdev;
+ /*
+ * Object number in the MOS for the entries table.
+ */
+ uint64_t bv_mos_entries;
+ /*
+ * Entries to sync.
+ */
+ avl_tree_t bv_tree;
+ /*
+ * Does the bv_entcount[] array needs byte swapping?
+ */
+ boolean_t bv_need_byteswap;
+ /*
+ * Number of entries in the bv_entcount[] array.
+ */
+ uint64_t bv_size;
+ /*
+ * This is the array with BRT entry count per BRT_RANGESIZE.
+ */
+ uint16_t *bv_entcount;
+ /*
+ * Sum of all bv_entcount[]s.
+ */
+ uint64_t bv_totalcount;
+ /*
+ * Space on disk occupied by cloned blocks (without compression).
+ */
+ uint64_t bv_usedspace;
+ /*
+ * How much additional space would be occupied without block cloning.
+ */
+ uint64_t bv_savedspace;
+ /*
+ * brt_vdev_phys needs updating on disk.
+ */
+ boolean_t bv_meta_dirty;
+ /*
+ * bv_entcount[] needs updating on disk.
+ */
+ boolean_t bv_entcount_dirty;
+ /*
+ * bv_entcount[] potentially can be a bit too big to sychronize it all
+ * when we just changed few entcounts. The fields below allow us to
+ * track updates to bv_entcount[] array since the last sync.
+ * A single bit in the bv_bitmap represents as many entcounts as can
+ * fit into a single BRT_BLOCKSIZE.
+ * For example we have 65536 entcounts in the bv_entcount array
+ * (so the whole array is 128kB). We updated bv_entcount[2] and
+ * bv_entcount[5]. In that case only first bit in the bv_bitmap will
+ * be set and we will write only first BRT_BLOCKSIZE out of 128kB.
+ */
+ ulong_t *bv_bitmap;
+ uint64_t bv_nblocks;
+} brt_vdev_t;
+
+/*
+ * In-core brt
+ */
+typedef struct brt {
+ krwlock_t brt_lock;
+ spa_t *brt_spa;
+#define brt_mos brt_spa->spa_meta_objset
+ uint64_t brt_rangesize;
+ uint64_t brt_usedspace;
+ uint64_t brt_savedspace;
+ avl_tree_t brt_pending_tree[TXG_SIZE];
+ kmutex_t brt_pending_lock[TXG_SIZE];
+ /* Sum of all entries across all bv_trees. */
+ uint64_t brt_nentries;
+ brt_vdev_t *brt_vdevs;
+ uint64_t brt_nvdevs;
+} brt_t;
+
+/* Size of bre_offset / sizeof (uint64_t). */
+#define BRT_KEY_WORDS (1)
+
+/*
+ * In-core brt entry.
+ * On-disk we use bre_offset as the key and bre_refcount as the value.
+ */
+typedef struct brt_entry {
+ uint64_t bre_offset;
+ uint64_t bre_refcount;
+ avl_node_t bre_node;
+} brt_entry_t;
+
+typedef struct brt_pending_entry {
+ blkptr_t bpe_bp;
+ int bpe_count;
+ avl_node_t bpe_node;
+} brt_pending_entry_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BRT_IMPL_H */
diff --git a/include/sys/btree.h b/include/sys/btree.h
index a901d654ef1c..6e05eee8f01d 100644
--- a/include/sys/btree.h
+++ b/include/sys/btree.h
@@ -65,7 +65,7 @@ extern "C" {
* them, and increased memory overhead. Increasing these values results in
* higher variance in operation time, and reduces memory overhead.
*/
-#define BTREE_CORE_ELEMS 128
+#define BTREE_CORE_ELEMS 126
#define BTREE_LEAF_SIZE 4096
extern kmem_cache_t *zfs_btree_leaf_cache;
@@ -95,9 +95,6 @@ typedef struct zfs_btree_leaf {
uint8_t btl_elems[];
} zfs_btree_leaf_t;
-#define BTREE_LEAF_ESIZE (BTREE_LEAF_SIZE - \
- offsetof(zfs_btree_leaf_t, btl_elems))
-
typedef struct zfs_btree_index {
zfs_btree_hdr_t *bti_node;
uint32_t bti_offset;
@@ -108,16 +105,69 @@ typedef struct zfs_btree_index {
boolean_t bti_before;
} zfs_btree_index_t;
-typedef struct btree {
- zfs_btree_hdr_t *bt_root;
- int64_t bt_height;
+typedef struct btree zfs_btree_t;
+typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
+ const void *, zfs_btree_index_t *);
+
+struct btree {
+ int (*bt_compar) (const void *, const void *);
+ bt_find_in_buf_f bt_find_in_buf;
size_t bt_elem_size;
+ size_t bt_leaf_size;
uint32_t bt_leaf_cap;
+ int32_t bt_height;
uint64_t bt_num_elems;
uint64_t bt_num_nodes;
+ zfs_btree_hdr_t *bt_root;
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
- int (*bt_compar) (const void *, const void *);
-} zfs_btree_t;
+};
+
+/*
+ * Implementation of Shar's algorithm designed to accelerate binary search by
+ * eliminating impossible to predict branches.
+ *
+ * For optimality, this should be used to generate the search function in the
+ * same file as the comparator and the comparator should be marked
+ * `__attribute__((always_inline) inline` so that the compiler will inline it.
+ *
+ * Arguments are:
+ *
+ * NAME - The function name for this instance of the search function. Use it
+ * in a subsequent call to zfs_btree_create().
+ * T - The element type stored inside the B-Tree.
+ * COMP - A comparator to compare two nodes, it must return exactly: -1, 0,
+ * or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
+ * TREE_CMP() from avl.h can be used in a boilerplate function.
+ */
+/* BEGIN CSTYLED */
+#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
+_Pragma("GCC diagnostic push") \
+_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") \
+static void * \
+NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems, \
+ const void *value, zfs_btree_index_t *where) \
+{ \
+ T *i = (T *)buf; \
+ (void) tree; \
+ _Pragma("GCC unroll 9") \
+ while (nelems > 1) { \
+ uint32_t half = nelems / 2; \
+ nelems -= half; \
+ i += (COMP(&i[half - 1], value) < 0) * half; \
+ } \
+ \
+ int comp = COMP(i, value); \
+ where->bti_offset = (i - (T *)buf) + (comp < 0); \
+ where->bti_before = (comp != 0); \
+ \
+ if (comp == 0) { \
+ return (i); \
+ } \
+ \
+ return (NULL); \
+} \
+_Pragma("GCC diagnostic pop")
+/* END CSTYLED */
/*
* Allocate and deallocate caches for btree nodes.
@@ -131,10 +181,19 @@ void zfs_btree_fini(void);
* tree - the tree to be initialized
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
* -1 for <, 0 for ==, and +1 for >
+ * find - optional function to accelerate searches inside B-Tree nodes
+ * through Shar's algorithm and comparator inlining. Setting this to
+ * NULL will use a generic function. The function should be created
+ * using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
+ * compar should be marked `__attribute__((always_inline)) inline` or
+ * performance is unlikely to improve very much.
* size - the value of sizeof(struct my_type)
+ * lsize - custom leaf size
*/
void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
- size_t);
+ bt_find_in_buf_f, size_t);
+void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
+ bt_find_in_buf_f, size_t, size_t);
/*
* Find a node with a matching value in the tree. Returns the matching node
diff --git a/include/sys/crypto/api.h b/include/sys/crypto/api.h
index b3d6c9c071b9..88e0ac4d9699 100644
--- a/include/sys/crypto/api.h
+++ b/include/sys/crypto/api.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/crypto/common.h b/include/sys/crypto/common.h
index 45a95d7eed71..261e88eceeea 100644
--- a/include/sys/crypto/common.h
+++ b/include/sys/crypto/common.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/crypto/icp.h b/include/sys/crypto/icp.h
index ae7f7eae529e..8c3f19886fd8 100644
--- a/include/sys/crypto/icp.h
+++ b/include/sys/crypto/icp.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dataset_kstats.h b/include/sys/dataset_kstats.h
index b165b98576dd..c81a07f0c116 100644
--- a/include/sys/dataset_kstats.h
+++ b/include/sys/dataset_kstats.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -30,6 +30,7 @@
#include <sys/wmsum.h>
#include <sys/dmu.h>
#include <sys/kstat.h>
+#include <sys/zil.h>
typedef struct dataset_sum_stats_t {
wmsum_t dss_writes;
@@ -56,15 +57,21 @@ typedef struct dataset_kstat_values {
* entry is removed from the unlinked set
*/
kstat_named_t dkv_nunlinked;
+ /*
+ * Per dataset zil kstats
+ */
+ zil_kstat_values_t dkv_zil_stats;
} dataset_kstat_values_t;
typedef struct dataset_kstats {
dataset_sum_stats_t dk_sums;
+ zil_sums_t dk_zil_sums;
kstat_t *dk_kstats;
} dataset_kstats_t;
-void dataset_kstats_create(dataset_kstats_t *, objset_t *);
+int dataset_kstats_create(dataset_kstats_t *, objset_t *);
void dataset_kstats_destroy(dataset_kstats_t *);
+void dataset_kstats_rename(dataset_kstats_t *dk, const char *);
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 959e111cee7a..3808a04cba80 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -55,26 +55,31 @@ extern "C" {
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
#define DB_RF_NO_DECRYPT (1 << 6)
+#define DB_RF_PARTIAL_FIRST (1 << 7)
+#define DB_RF_PARTIAL_MORE (1 << 8)
/*
* The simplified state transition diagram for dbufs looks like:
*
- * +----> READ ----+
- * | |
- * | V
- * (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
- * | ^ ^
- * | | |
- * +----> FILL ----+ |
- * | |
- * | |
- * +--------> NOFILL -------+
+ * +--> READ --+
+ * | |
+ * | V
+ * (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
+ * ^ | ^ ^
+ * | | | |
+ * | +--> FILL --+ |
+ * | | |
+ * | | |
+ * | +------> NOFILL -----+
+ * | |
+ * +---------------+
*
* DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range
* to find all dbufs in a range of a dnode and must be less than any other
* dbuf_states_t (see comment on dn_dbufs in dnode.h).
*/
typedef enum dbuf_states {
+ DB_MARKER = -2,
DB_SEARCH = -1,
DB_UNCACHED,
DB_FILL,
@@ -170,6 +175,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state;
uint8_t dr_copies;
boolean_t dr_nopwrite;
+ boolean_t dr_brtwrite;
boolean_t dr_has_raw_params;
/*
@@ -190,7 +196,7 @@ typedef struct dbuf_dirty_record {
uint64_t dr_blkid;
abd_t *dr_abd;
zio_prop_t dr_props;
- enum zio_flag dr_flags;
+ zio_flag_t dr_flags;
} dll;
} dt;
} dbuf_dirty_record_t;
@@ -294,6 +300,8 @@ typedef struct dmu_buf_impl {
/* Tells us which dbuf cache this dbuf is in, if any */
dbuf_cached_state_t db_caching_status;
+ uint64_t db_hash;
+
/* Data which is unique to data (leaf) blocks: */
/* User callback information. */
@@ -319,14 +327,19 @@ typedef struct dmu_buf_impl {
uint8_t db_pending_evict;
uint8_t db_dirtycnt;
+
+ /* The buffer was partially read. More reads may follow. */
+ uint8_t db_partial_read;
} dmu_buf_impl_t;
-#define DBUF_RWLOCKS 8192
-#define DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)])
+#define DBUF_HASH_MUTEX(h, idx) \
+ (&(h)->hash_mutexes[(idx) & ((h)->hash_mutex_mask)])
+
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
+ uint64_t hash_mutex_mask;
dmu_buf_impl_t **hash_table;
- krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
+ kmutex_t *hash_mutexes;
} dbuf_hash_table_t;
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
@@ -362,23 +375,25 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag,
boolean_t evicting);
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
- uint64_t blkid);
+ uint64_t blkid, uint64_t *hash_out);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
+void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
+boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
dmu_tx_t *tx);
+boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
bp_embedded_type_t etype, enum zio_compress comp,
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
int dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
- const struct zio_prop *zp, enum zio_flag flags, dmu_tx_t *tx);
+ const struct zio_prop *zp, zio_flag_t flags, dmu_tx_t *tx);
void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx);
void dbuf_destroy(dmu_buf_impl_t *db);
diff --git a/include/sys/ddt.h b/include/sys/ddt.h
index 25be6f56dddc..726f1a3902eb 100644
--- a/include/sys/ddt.h
+++ b/include/sys/ddt.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_DDT_H
@@ -39,32 +40,50 @@ extern "C" {
struct abd;
/*
- * On-disk DDT formats, in the desired search order (newest version first).
+ * DDT on-disk storage object types. Each one corresponds to specific
+ * implementation, see ddt_ops_t. The value itself is not stored on disk.
+ *
+ * When searching for an entry, objects types will be searched in this order.
+ *
+ * Note that DDT_TYPES is used as the "no type" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_type {
- DDT_TYPE_ZAP = 0,
+typedef enum {
+ DDT_TYPE_ZAP = 0, /* ZAP storage object, ddt_zap */
DDT_TYPES
-};
+} ddt_type_t;
+
+_Static_assert(DDT_TYPES <= UINT8_MAX,
+ "ddt_type_t must fit in a uint8_t");
+
+/* New and updated entries recieve this type, see ddt_sync_entry() */
+#define DDT_TYPE_DEFAULT (DDT_TYPE_ZAP)
/*
- * DDT classes, in the desired search order (highest replication level first).
+ * DDT storage classes. Each class has a separate storage object for each type.
+ * The value itself is not stored on disk.
+ *
+ * When search for an entry, object classes will be searched in this order.
+ *
+ * Note that DDT_CLASSES is used as the "no class" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_class {
- DDT_CLASS_DITTO = 0,
- DDT_CLASS_DUPLICATE,
- DDT_CLASS_UNIQUE,
+typedef enum {
+ DDT_CLASS_DITTO = 0, /* entry has ditto blocks (obsolete) */
+ DDT_CLASS_DUPLICATE, /* entry has multiple references */
+ DDT_CLASS_UNIQUE, /* entry has a single reference */
DDT_CLASSES
-};
-
-#define DDT_TYPE_CURRENT 0
+} ddt_class_t;
-#define DDT_COMPRESS_BYTEORDER_MASK 0x80
-#define DDT_COMPRESS_FUNCTION_MASK 0x7f
+_Static_assert(DDT_CLASSES < UINT8_MAX,
+ "ddt_class_t must fit in a uint8_t");
/*
- * On-disk ddt entry: key (name) and physical storage (value).
+ * The "key" part of an on-disk entry. This is the unique "name" for a block,
+ * that is, that parts of the block pointer that will always be the same for
+ * the same data.
*/
-typedef struct ddt_key {
+typedef struct {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
* Encoded with logical & physical size, encryption, and compression,
@@ -76,6 +95,10 @@ typedef struct ddt_key {
uint64_t ddk_prop;
} ddt_key_t;
+/*
+ * Macros for accessing parts of a ddt_key_t. These are similar to their BP_*
+ * counterparts.
+ */
#define DDK_GET_LSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_LSIZE(ddk, x) \
@@ -92,18 +115,25 @@ typedef struct ddt_key {
#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
-#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
-
-#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
- ? SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP)
-
-typedef struct ddt_phys {
+/*
+ * The "value" part for an on-disk entry. These are the "physical"
+ * characteristics of the stored block, such as its location on disk (DVAs),
+ * birth txg and ref count.
+ *
+ * Note that an entry has an array of four ddt_phys_t, one for each number of
+ * DVAs (copies= property) and another for additional "ditto" copies. Most
+ * users of ddt_phys_t will handle indexing into or counting the phys they
+ * want.
+ */
+typedef struct {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
} ddt_phys_t;
/*
+ * Named indexes into the ddt_phys_t array in each entry.
+ *
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
* we maintain the ability to free existing dedup-ditto blocks.
*/
@@ -116,99 +146,83 @@ enum ddt_phys_type {
};
/*
- * In-core ddt entry
+ * A "live" entry, holding changes to an entry made this txg, and other data to
+ * support loading, updating and repairing the entry.
*/
-struct ddt_entry {
- ddt_key_t dde_key;
- ddt_phys_t dde_phys[DDT_PHYS_TYPES];
+
+/* State flags for dde_flags */
+#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
+
+typedef struct {
+ /* key must be first for ddt_key_compare */
+ ddt_key_t dde_key; /* ddt_tree key */
+ ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
+
+ /* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
+
+ /* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
- enum ddt_type dde_type;
- enum ddt_class dde_class;
- uint8_t dde_loading;
- uint8_t dde_loaded;
- kcondvar_t dde_cv;
- avl_node_t dde_node;
-};
+
+ /* storage type and class the entry was loaded from */
+ ddt_type_t dde_type;
+ ddt_class_t dde_class;
+
+ uint8_t dde_flags; /* load state flags */
+ kcondvar_t dde_cv; /* signaled when load completes */
+
+ avl_node_t dde_node; /* ddt_tree node */
+} ddt_entry_t;
/*
- * In-core ddt
+ * In-core DDT object. This covers all entries and stats for a the whole pool
+ * for a given checksum type.
*/
-struct ddt {
- kmutex_t ddt_lock;
- avl_tree_t ddt_tree;
- avl_tree_t ddt_repair_tree;
- enum zio_checksum ddt_checksum;
- spa_t *ddt_spa;
- objset_t *ddt_os;
- uint64_t ddt_stat_object;
+typedef struct {
+ kmutex_t ddt_lock; /* protects changes to all fields */
+
+ avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
+
+ avl_tree_t ddt_repair_tree; /* entries being repaired */
+
+ enum zio_checksum ddt_checksum; /* checksum algorithm in use */
+ spa_t *ddt_spa; /* pool this ddt is on */
+ objset_t *ddt_os; /* ddt objset (always MOS) */
+
+ /* per-type/per-class entry store objects */
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
+
+ /* object ids for whole-ddt and per-type/per-class stats */
+ uint64_t ddt_stat_object;
+ ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
+
+ /* type/class stats by power-2-sized referenced blocks */
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
- ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
- avl_node_t ddt_node;
-};
+} ddt_t;
/*
- * In-core and on-disk bookmark for DDT walks
+ * In-core and on-disk bookmark for DDT walks. This is a cursor for ddt_walk(),
+ * and is stable across calls, even if the DDT is updated, the pool is
+ * restarted or loaded on another system, or OpenZFS is upgraded.
*/
-typedef struct ddt_bookmark {
+typedef struct {
uint64_t ddb_class;
uint64_t ddb_type;
uint64_t ddb_checksum;
uint64_t ddb_cursor;
} ddt_bookmark_t;
-/*
- * Ops vector to access a specific DDT object type.
- */
-typedef struct ddt_ops {
- char ddt_op_name[32];
- int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
- boolean_t prehash);
- int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
- int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
- void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
- ddt_entry_t *dde);
- int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- uint64_t *walk);
- int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
-} ddt_ops_t;
-
-#define DDT_NAMELEN 107
-
-extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, char *name);
-extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *walk, ddt_entry_t *dde);
-extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *count);
-extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, dmu_object_info_t *);
-extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz);
-
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
uint64_t txg);
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
const ddt_phys_t *ddp, blkptr_t *bp);
-extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
-
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
-extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
- uint64_t txg);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
-extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
-
-extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
@@ -220,9 +234,6 @@ extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
-extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
-extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
-
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
@@ -232,23 +243,21 @@ extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
-extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
+extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
const blkptr_t *bp);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
-extern int ddt_entry_compare(const void *x1, const void *x2);
+extern int ddt_key_compare(const void *x1, const void *x2);
extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
-extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, ddt_entry_t *dde, dmu_tx_t *tx);
-extern const ddt_ops_t ddt_zap_ops;
+extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
#ifdef __cplusplus
}
diff --git a/include/sys/ddt_impl.h b/include/sys/ddt_impl.h
new file mode 100644
index 000000000000..52b927b7519d
--- /dev/null
+++ b/include/sys/ddt_impl.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
+ */
+
+#ifndef _SYS_DDT_IMPL_H
+#define _SYS_DDT_IMPL_H
+
+#include <sys/ddt.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Ops vector to access a specific DDT object type.
+ */
+typedef struct {
+ char ddt_op_name[32];
+ int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
+ boolean_t prehash);
+ int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
+ int (*ddt_op_lookup)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
+ int (*ddt_op_contains)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk);
+ void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk);
+ int (*ddt_op_update)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
+ dmu_tx_t *tx);
+ int (*ddt_op_remove)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, dmu_tx_t *tx);
+ int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
+ ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
+ int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
+} ddt_ops_t;
+
+extern const ddt_ops_t ddt_zap_ops;
+
+extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
+
+/*
+ * These are only exposed so that zdb can access them. Try not to use them
+ * outside of the DDT implementation proper, and if you do, consider moving
+ * them up.
+ */
+
+/*
+ * Enough room to expand DMU_POOL_DDT format for all possible DDT
+ * checksum/class/type combinations.
+ */
+#define DDT_NAMELEN 32
+
+extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
+
+extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
+
+extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
+
+extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ char *name);
+extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ uint64_t *walk, ddt_entry_t *dde);
+extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ uint64_t *count);
+extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ dmu_object_info_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DDT_H */
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 7c55a5c26189..b5fed64da4ad 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -27,6 +27,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -136,18 +137,24 @@ typedef enum dmu_object_byteswap {
#endif
#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_METADATA) : \
+ (((ot) & DMU_OT_METADATA) != 0) : \
DMU_OT_IS_METADATA_IMPL(ot))
#define DMU_OT_IS_DDT(ot) \
((ot) == DMU_OT_DDT_ZAP)
+#define DMU_OT_IS_CRITICAL(ot) \
+ (DMU_OT_IS_METADATA(ot) && \
+ (ot) != DMU_OT_DNODE && \
+ (ot) != DMU_OT_DIRECTORY_CONTENTS && \
+ (ot) != DMU_OT_SA)
+
/* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */
#define DMU_OT_IS_FILE(ot) \
((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_ENCRYPTED) : \
+ (((ot) & DMU_OT_ENCRYPTED) != 0) : \
DMU_OT_IS_ENCRYPTED_IMPL(ot))
/*
@@ -371,6 +378,7 @@ typedef struct dmu_buf {
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
+#define DMU_POOL_ERRORSCRUB "error_scrub"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
@@ -564,11 +572,15 @@ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
uint64_t length, int read, const void *tag, int *numbufsp,
dmu_buf_t ***dbpp);
+int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
+ const void *tag, dmu_buf_t **dbp);
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
const void *tag, dmu_buf_t **dbp, int flags);
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
dmu_buf_t ***dbpp, uint32_t flags);
+int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
+ dmu_buf_t **dbp);
/*
* Add a reference to a dmu buffer that has already been held via
* dmu_buf_hold() in the current context.
@@ -640,6 +652,9 @@ typedef struct dmu_buf_user {
*/
taskq_ent_t dbu_tqent;
+ /* Size of user data, for inclusion in dbuf_cache accounting. */
+ uint64_t dbu_size;
+
/*
* This instance's eviction function pointers.
*
@@ -722,13 +737,21 @@ void *dmu_buf_replace_user(dmu_buf_t *db,
void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
/*
+ * User data size accounting. This can be used to artifically inflate the size
+ * of the dbuf during cache accounting, so that dbuf_evict_thread evicts enough
+ * to satisfy memory reclaim requests. It's not used for anything else, and
+ * defaults to 0.
+ */
+uint64_t dmu_buf_user_size(dmu_buf_t *db);
+void dmu_buf_add_user_size(dmu_buf_t *db, uint64_t nadd);
+void dmu_buf_sub_user_size(dmu_buf_t *db, uint64_t nsub);
+
+/*
* Returns the user data (dmu_buf_user_t *) associated with this dbuf.
*/
void *dmu_buf_get_user(dmu_buf_t *db);
objset_t *dmu_buf_get_objset(dmu_buf_t *db);
-dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
-void dmu_buf_dnode_exit(dmu_buf_t *db);
/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);
@@ -775,6 +798,11 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
+void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ int len);
+void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
@@ -865,13 +893,16 @@ int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
struct arc_buf *buf, dmu_tx_t *tx);
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
-extern int zfs_max_recordsize;
+extern uint_t zfs_max_recordsize;
/*
* Asynchronously try to read in the data.
*/
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);
+void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
+ uint64_t len, enum zio_priority pri);
+void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */
@@ -907,7 +938,7 @@ typedef const struct dmu_object_byteswap_info {
} dmu_object_byteswap_info_t;
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
-extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
+extern dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
/*
* Get information on a DMU object.
@@ -1052,6 +1083,11 @@ int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
uint64_t *off);
+int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t length, struct blkptr *bps, size_t *nbpsp);
+int dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps);
+
/*
* Initial setup and final teardown.
*/
@@ -1070,7 +1106,7 @@ int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
extern uint64_t zfs_crc64_table[256];
-extern int dmu_prefetch_max;
+extern uint_t dmu_prefetch_max;
#ifdef __cplusplus
}
diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h
index 95ec11ce2400..83ae2b76ba1f 100644
--- a/include/sys/dmu_impl.h
+++ b/include/sys/dmu_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -247,8 +247,6 @@ typedef struct dmu_sendstatus {
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
-int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
- const void *, dmu_buf_t **);
#ifdef __cplusplus
}
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index 782338fd210a..a9123e862af7 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -72,6 +72,10 @@ struct dmu_tx;
*/
#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
+#endif
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
@@ -88,6 +92,9 @@ typedef struct objset_phys {
char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 -
sizeof (dnode_phys_t)];
} objset_phys_t;
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
@@ -125,6 +132,7 @@ struct objset {
zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache;
+ zfs_prefetch_type_t os_prefetch;
zfs_sync_type_t os_sync;
zfs_redundant_metadata_type_t os_redundant_metadata;
uint64_t os_recordsize;
diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h
index 1fdb986e2ed6..3390ca1089f8 100644
--- a/include/sys/dmu_recv.h
+++ b/include/sys/dmu_recv.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -24,6 +24,7 @@
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
*/
#ifndef _DMU_RECV_H
@@ -47,6 +48,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
uint64_t drc_featureflags;
boolean_t drc_force;
+ boolean_t drc_heal;
boolean_t drc_resumable;
boolean_t drc_should_save;
boolean_t drc_raw;
@@ -77,8 +79,8 @@ typedef struct dmu_recv_cookie {
objlist_t *drc_ignore_objlist;
} dmu_recv_cookie_t;
-int dmu_recv_begin(char *, char *, dmu_replay_record_t *,
- boolean_t, boolean_t, nvlist_t *, nvlist_t *, char *,
+int dmu_recv_begin(const char *, const char *, dmu_replay_record_t *,
+ boolean_t, boolean_t, boolean_t, nvlist_t *, nvlist_t *, const char *,
dmu_recv_cookie_t *, zfs_file_t *, offset_t *);
int dmu_recv_stream(dmu_recv_cookie_t *, offset_t *);
int dmu_recv_end(dmu_recv_cookie_t *, void *);
diff --git a/include/sys/dmu_redact.h b/include/sys/dmu_redact.h
index 85f4b0522891..c18e2be103b7 100644
--- a/include/sys/dmu_redact.h
+++ b/include/sys/dmu_redact.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h
index d150f816c945..061b81532fb1 100644
--- a/include/sys/dmu_send.h
+++ b/include/sys/dmu_send.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h
index d76bfe3c9af3..7a0b38da7302 100644
--- a/include/sys/dmu_traverse.h
+++ b/include/sys/dmu_traverse.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
index ad3f1b0e47ca..aa55da626149 100644
--- a/include/sys/dmu_tx.h
+++ b/include/sys/dmu_tx.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -90,6 +90,8 @@ enum dmu_tx_hold_type {
THT_ZAP,
THT_SPACE,
THT_SPILL,
+ THT_CLONE,
+ THT_APPEND,
THT_NUMTYPES
};
diff --git a/include/sys/dmu_zfetch.h b/include/sys/dmu_zfetch.h
index cd1b79eb8e44..322472fb1ae2 100644
--- a/include/sys/dmu_zfetch.h
+++ b/include/sys/dmu_zfetch.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -36,8 +36,6 @@
extern "C" {
#endif
-extern unsigned long zfetch_array_rd_sz;
-
struct dnode; /* so we can reference dnode */
typedef struct zfetch {
@@ -47,18 +45,24 @@ typedef struct zfetch {
int zf_numstreams; /* number of zstream_t's */
} zfetch_t;
+typedef struct zsrange {
+ uint16_t start;
+ uint16_t end;
+} zsrange_t;
+
+#define ZFETCH_RANGES 9 /* Fits zstream_t into 128 bytes */
+
typedef struct zstream {
+ list_node_t zs_node; /* link for zf_stream */
uint64_t zs_blkid; /* expect next access at this blkid */
+ uint_t zs_atime; /* time last prefetch issued */
+ zsrange_t zs_ranges[ZFETCH_RANGES]; /* ranges from future */
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
uint64_t zs_pf_start; /* first data block to prefetch */
uint64_t zs_pf_end; /* data block to prefetch up to */
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
-
- list_node_t zs_node; /* link for zf_stream */
- hrtime_t zs_atime; /* time last prefetch issued */
- zfetch_t *zs_fetch; /* parent fetch */
boolean_t zs_missed; /* stream saw cache misses */
boolean_t zs_more; /* need more distant prefetch */
zfs_refcount_t zs_callers; /* number of pending callers */
@@ -76,7 +80,7 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
-void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
+void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
boolean_t);
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index 9745ae5bb651..dbe7350d4da7 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -36,6 +36,7 @@
#include <sys/dmu_zfetch.h>
#include <sys/zrlock.h>
#include <sys/multilist.h>
+#include <sys/wmsum.h>
#ifdef __cplusplus
extern "C" {
@@ -119,7 +120,11 @@ extern "C" {
#define DN_MAX_LEVELS (DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
-#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
+/*
+ * Use the flexible array instead of the fixed length one dn_bonus
+ * to address memcpy/memmove fortify error
+ */
+#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus_flexible + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
#define DN_MAX_BONUS_LEN(dnp) \
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
@@ -265,6 +270,10 @@ typedef struct dnode_phys {
sizeof (blkptr_t)];
blkptr_t dn_spill;
};
+ struct {
+ blkptr_t __dn_ignore4;
+ uint8_t dn_bonus_flexible[];
+ };
};
} dnode_phys_t;
@@ -456,15 +465,11 @@ void dnode_free_interior_slots(dnode_t *dn);
#define DNODE_IS_DIRTY(_dn) \
((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
-#define DNODE_IS_CACHEABLE(_dn) \
+#define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \
((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
- (DMU_OT_IS_METADATA((_dn)->dn_type) && \
+ (((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \
(_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA))
-#define DNODE_META_IS_CACHEABLE(_dn) \
- ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
- (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA)
-
/*
* Used for dnodestats kstat.
*/
@@ -587,10 +592,42 @@ typedef struct dnode_stats {
kstat_named_t dnode_move_active;
} dnode_stats_t;
+typedef struct dnode_sums {
+ wmsum_t dnode_hold_dbuf_hold;
+ wmsum_t dnode_hold_dbuf_read;
+ wmsum_t dnode_hold_alloc_hits;
+ wmsum_t dnode_hold_alloc_misses;
+ wmsum_t dnode_hold_alloc_interior;
+ wmsum_t dnode_hold_alloc_lock_retry;
+ wmsum_t dnode_hold_alloc_lock_misses;
+ wmsum_t dnode_hold_alloc_type_none;
+ wmsum_t dnode_hold_free_hits;
+ wmsum_t dnode_hold_free_misses;
+ wmsum_t dnode_hold_free_lock_misses;
+ wmsum_t dnode_hold_free_lock_retry;
+ wmsum_t dnode_hold_free_refcount;
+ wmsum_t dnode_hold_free_overflow;
+ wmsum_t dnode_free_interior_lock_retry;
+ wmsum_t dnode_allocate;
+ wmsum_t dnode_reallocate;
+ wmsum_t dnode_buf_evict;
+ wmsum_t dnode_alloc_next_chunk;
+ wmsum_t dnode_alloc_race;
+ wmsum_t dnode_alloc_next_block;
+ wmsum_t dnode_move_invalid;
+ wmsum_t dnode_move_recheck1;
+ wmsum_t dnode_move_recheck2;
+ wmsum_t dnode_move_special;
+ wmsum_t dnode_move_handle;
+ wmsum_t dnode_move_rwlock;
+ wmsum_t dnode_move_active;
+} dnode_sums_t;
+
extern dnode_stats_t dnode_stats;
+extern dnode_sums_t dnode_sums;
#define DNODE_STAT_INCR(stat, val) \
- atomic_add_64(&dnode_stats.stat.value.ui64, (val));
+ wmsum_add(&dnode_sums.stat, (val))
#define DNODE_STAT_BUMP(stat) \
DNODE_STAT_INCR(stat, 1);
diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h
index 353c5c2d260f..d4e559a09037 100644
--- a/include/sys/dsl_bookmark.h
+++ b/include/sys/dsl_bookmark.h
@@ -72,6 +72,7 @@ typedef struct redaction_list_phys {
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
+ dmu_buf_t *rl_bonus;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;
diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h
index db594eece1c3..fbcae3715355 100644
--- a/include/sys/dsl_crypt.h
+++ b/include/sys/dsl_crypt.h
@@ -206,6 +206,7 @@ void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
dmu_tx_t *tx);
int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
dsl_crypto_params_t *dcp, boolean_t *will_encrypt);
+boolean_t dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb);
void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
@@ -222,5 +223,6 @@ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
boolean_t *no_crypt);
+zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd);
#endif
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 25f86bce2e63..3450527af7e0 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -301,6 +301,14 @@ typedef struct dsl_dataset_snapshot_arg {
proc_t *ddsa_proc;
} dsl_dataset_snapshot_arg_t;
+typedef struct dsl_dataset_rename_snapshot_arg {
+ const char *ddrsa_fsname;
+ const char *ddrsa_oldsnapname;
+ const char *ddrsa_newsnapname;
+ boolean_t ddrsa_recursive;
+ dmu_tx_t *ddrsa_tx;
+} dsl_dataset_rename_snapshot_arg_t;
+
/*
* The max length of a temporary tag prefix is the number of hex digits
* required to express UINT64_MAX plus one for the hyphen.
@@ -375,7 +383,6 @@ boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx);
-void dsl_dataset_feature_set_activation(const blkptr_t *bp, dsl_dataset_t *ds);
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
@@ -474,6 +481,9 @@ void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result);
+int dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx);
+void dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx);
+
uint64_t dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds);
void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds);
diff --git a/include/sys/dsl_deadlist.h b/include/sys/dsl_deadlist.h
index 64358bb5fc0b..3feb3bbf062f 100644
--- a/include/sys/dsl_deadlist.h
+++ b/include/sys/dsl_deadlist.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -84,7 +84,7 @@ typedef struct livelist_condense_entry {
boolean_t cancelled;
} livelist_condense_entry_t;
-extern unsigned long zfs_livelist_max_entries;
+extern uint64_t zfs_livelist_max_entries;
extern int zfs_livelist_min_percent_shared;
typedef int deadlist_iter_t(void *args, dsl_deadlist_entry_t *dle);
diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h
index 7f46233a889b..d6abac90bbcc 100644
--- a/include/sys/dsl_deleg.h
+++ b/include/sys/dsl_deleg.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dsl_destroy.h b/include/sys/dsl_destroy.h
index 208d75bacffa..1a9b672a260b 100644
--- a/include/sys/dsl_destroy.h
+++ b/include/sys/dsl_destroy.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index 6c097e372e44..f7c0d9acd10d 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -52,6 +52,7 @@ struct zthr;
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"
#define DD_FIELD_LIVELIST "com.delphix:livelist"
+#define DD_FIELD_SNAPSHOTS_CHANGED "com.ixsystems:snapshots_changed"
typedef enum dd_used {
DD_USED_HEAD,
@@ -115,7 +116,7 @@ struct dsl_dir {
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
/* amount of space we expect to write; == amount of dirty data */
- int64_t dd_space_towrite[TXG_SIZE];
+ uint64_t dd_space_towrite[TXG_SIZE];
dsl_deadlist_t dd_livelist;
bplist_t dd_pending_frees;
@@ -191,7 +192,7 @@ int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
-void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
+void dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx);
inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
dmu_tx_t *tx);
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index 5bb5ef20d5b1..abcdc77a4b96 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -57,13 +57,13 @@ struct dsl_scan;
struct dsl_crypto_params;
struct dsl_deadlist;
-extern unsigned long zfs_dirty_data_max;
-extern unsigned long zfs_dirty_data_max_max;
-extern unsigned long zfs_wrlog_data_max;
-extern int zfs_dirty_data_max_percent;
-extern int zfs_dirty_data_max_max_percent;
-extern int zfs_delay_min_dirty_percent;
-extern unsigned long zfs_delay_scale;
+extern uint64_t zfs_dirty_data_max;
+extern uint64_t zfs_dirty_data_max_max;
+extern uint64_t zfs_wrlog_data_max;
+extern uint_t zfs_dirty_data_max_percent;
+extern uint_t zfs_dirty_data_max_max_percent;
+extern uint_t zfs_delay_min_dirty_percent;
+extern uint64_t zfs_delay_scale;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
diff --git a/include/sys/dsl_prop.h b/include/sys/dsl_prop.h
index fba8f908dc9e..7a84f2b6922a 100644
--- a/include/sys/dsl_prop.h
+++ b/include/sys/dsl_prop.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h
index d716510f879d..2e3452e5ebaa 100644
--- a/include/sys/dsl_scan.h
+++ b/include/sys/dsl_scan.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -29,6 +29,7 @@
#include <sys/zfs_context.h>
#include <sys/zio.h>
+#include <sys/zap.h>
#include <sys/ddt.h>
#include <sys/bplist.h>
@@ -60,7 +61,7 @@ typedef struct dsl_scan_phys {
uint64_t scn_end_time;
uint64_t scn_to_examine; /* total bytes to be scanned */
uint64_t scn_examined; /* bytes scanned so far */
- uint64_t scn_to_process;
+ uint64_t scn_skipped; /* bytes skipped by scanner */
uint64_t scn_processed;
uint64_t scn_errors; /* scan I/O error count */
uint64_t scn_ddt_class_max;
@@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
+typedef struct dsl_errorscrub_phys {
+ uint64_t dep_func; /* pool_scan_func_t */
+ uint64_t dep_state; /* dsl_scan_state_t */
+ uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
+ uint64_t dep_start_time; /* error scrub start time, unix timestamp */
+ uint64_t dep_end_time; /* error scrub end time, unix timestamp */
+ uint64_t dep_to_examine; /* total error blocks to be scrubbed */
+ uint64_t dep_examined; /* blocks scrubbed so far */
+ uint64_t dep_errors; /* error scrub I/O error count */
+ uint64_t dep_paused_flags; /* flag for paused */
+} dsl_errorscrub_phys_t;
+
+#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
+ / sizeof (uint64_t))
+
/*
* Every pool will have one dsl_scan_t and this structure will contain
* in-memory information about the scan and a pointer to the on-disk
@@ -151,11 +167,15 @@ typedef struct dsl_scan {
uint64_t scn_avg_zio_size_this_txg;
uint64_t scn_zios_this_txg;
+ /* zap cursor for tracing error scrub progress */
+ zap_cursor_t errorscrub_cursor;
/* members needed for syncing scan status to disk */
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
uint64_t scn_queues_pending; /* outstanding data to issue */
+ /* members needed for syncing error scrub status to disk */
+ dsl_errorscrub_phys_t errorscrub_phys;
} dsl_scan_t;
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -171,8 +191,12 @@ int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
-int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
+boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
+boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
+int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
+ pool_scrub_cmd_t cmd);
+void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -184,6 +208,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
+boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);
diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h
index 5a5b306419f1..cbdb20ec1d3a 100644
--- a/include/sys/dsl_synctask.h
+++ b/include/sys/dsl_synctask.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/dsl_userhold.h b/include/sys/dsl_userhold.h
index 071aeb86d1f1..c165edab3c53 100644
--- a/include/sys/dsl_userhold.h
+++ b/include/sys/dsl_userhold.h
@@ -7,7 +7,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/edonr.h b/include/sys/edonr.h
index 79b7cd8c75b8..b19b5eb42c29 100644
--- a/include/sys/edonr.h
+++ b/include/sys/edonr.h
@@ -1,6 +1,4 @@
/*
- * IDI,NTNU
- *
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -19,15 +17,13 @@
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
- *
- * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
- *
- * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
- *
- * $Id: edonr.h 517 2013-02-17 20:34:39Z joern $
*/
+
/*
- * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ * Based on Edon-R implementation for SUPERCOP, based on NIST API.
+ * Copyright (c) 2009, 2010 Jørn Amundsen <jorn.amundsen@ntnu.no>
+ * Copyright (c) 2013 Saso Kiselkov, All rights reserved
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
#ifndef _SYS_EDONR_H_
@@ -40,8 +36,8 @@ extern "C" {
#ifdef _KERNEL
#include <sys/types.h>
#else
-#include <stdint.h> /* uint32_t... */
-#include <stdlib.h> /* size_t ... */
+#include <stdint.h>
+#include <stdlib.h>
#endif
/*
@@ -52,44 +48,27 @@ extern "C" {
*/
/* Specific algorithm definitions */
-#define EdonR224_DIGEST_SIZE 28
-#define EdonR224_BLOCK_SIZE 64
-#define EdonR256_DIGEST_SIZE 32
-#define EdonR256_BLOCK_SIZE 64
-#define EdonR384_DIGEST_SIZE 48
-#define EdonR384_BLOCK_SIZE 128
#define EdonR512_DIGEST_SIZE 64
#define EdonR512_BLOCK_SIZE 128
-
-#define EdonR256_BLOCK_BITSIZE 512
#define EdonR512_BLOCK_BITSIZE 1024
typedef struct {
- uint32_t DoublePipe[16];
- uint8_t LastPart[EdonR256_BLOCK_SIZE * 2];
-} EdonRData256;
-typedef struct {
uint64_t DoublePipe[16];
uint8_t LastPart[EdonR512_BLOCK_SIZE * 2];
} EdonRData512;
typedef struct {
- size_t hashbitlen;
-
- /* + algorithm specific parameters */
- int unprocessed_bits;
uint64_t bits_processed;
+ int unprocessed_bits;
union {
- EdonRData256 p256[1];
EdonRData512 p512[1];
} pipe[1];
} EdonRState;
-void EdonRInit(EdonRState *state, size_t hashbitlen);
+void EdonRInit(EdonRState *state);
void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen);
void EdonRFinal(EdonRState *state, uint8_t *hashval);
-void EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
- uint8_t *hashval);
+void EdonRHash(const uint8_t *data, size_t databitlen, uint8_t *hashval);
#ifdef __cplusplus
}
diff --git a/include/sys/efi_partition.h b/include/sys/efi_partition.h
index 7d5e42e945ad..c4d7fd5088b5 100644
--- a/include/sys/efi_partition.h
+++ b/include/sys/efi_partition.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index cd080c8ee667..c746600cd2d5 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -78,6 +78,12 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS "vdev_read_errors"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N "vdev_cksum_n"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T "vdev_cksum_t"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N "vdev_io_n"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N "vdev_slow_io_n"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T "vdev_slow_io_t"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
@@ -98,8 +104,6 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
-#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
-#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
@@ -108,8 +112,6 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
-#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
-#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"
diff --git a/include/sys/fm/protocol.h b/include/sys/fm/protocol.h
index 78031f7c15ec..d4a9751c8aeb 100644
--- a/include/sys/fm/protocol.h
+++ b/include/sys/fm/protocol.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/fm/util.h b/include/sys/fm/util.h
index 5fb6d1d6072b..038162ab7524 100644
--- a/include/sys/fm/util.h
+++ b/include/sys/fm/util.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -95,7 +95,7 @@ extern void fm_init(void);
extern void fm_fini(void);
extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
-extern void zfs_zevent_drain_all(int *);
+extern void zfs_zevent_drain_all(uint_t *);
extern zfs_file_t *zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
extern void zfs_zevent_fd_rele(zfs_file_t *);
extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index f013e6b20603..e191420f2d2d 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
@@ -29,6 +29,7 @@
* Copyright (c) 2019 Datto Inc.
* Portions Copyright 2010 Robert Milkowski
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#ifndef _SYS_FS_ZFS_H
@@ -189,6 +190,9 @@ typedef enum {
ZFS_PROP_IVSET_GUID, /* not exposed to the user */
ZFS_PROP_REDACTED,
ZFS_PROP_REDACT_SNAPS,
+ ZFS_PROP_SNAPSHOTS_CHANGED,
+ ZFS_PROP_PREFETCH,
+ ZFS_PROP_VOLTHREADING,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -251,6 +255,9 @@ typedef enum {
ZPOOL_PROP_LOAD_GUID,
ZPOOL_PROP_AUTOTRIM,
ZPOOL_PROP_COMPATIBILITY,
+ ZPOOL_PROP_BCLONEUSED,
+ ZPOOL_PROP_BCLONESAVED,
+ ZPOOL_PROP_BCLONERATIO,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -353,6 +360,14 @@ typedef enum {
VDEV_PROP_BYTES_TRIM,
VDEV_PROP_REMOVING,
VDEV_PROP_ALLOCATING,
+ VDEV_PROP_FAILFAST,
+ VDEV_PROP_CHECKSUM_N,
+ VDEV_PROP_CHECKSUM_T,
+ VDEV_PROP_IO_N,
+ VDEV_PROP_IO_T,
+ VDEV_PROP_RAIDZ_EXPANDING,
+ VDEV_PROP_SLOW_IO_N,
+ VDEV_PROP_SLOW_IO_T,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -500,7 +515,9 @@ typedef enum {
typedef enum {
ZFS_REDUNDANT_METADATA_ALL,
- ZFS_REDUNDANT_METADATA_MOST
+ ZFS_REDUNDANT_METADATA_MOST,
+ ZFS_REDUNDANT_METADATA_SOME,
+ ZFS_REDUNDANT_METADATA_NONE
} zfs_redundant_metadata_type_t;
typedef enum {
@@ -531,6 +548,12 @@ typedef enum zfs_key_location {
ZFS_KEYLOCATION_LOCATIONS
} zfs_keylocation_t;
+typedef enum {
+ ZFS_PREFETCH_NONE = 0,
+ ZFS_PREFETCH_METADATA = 1,
+ ZFS_PREFETCH_ALL = 2
+} zfs_prefetch_type_t;
+
#define DEFAULT_PBKDF2_ITERATIONS 350000
#define MIN_PBKDF2_ITERATIONS 100000
@@ -704,6 +727,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */
#define ZPOOL_CONFIG_CHECKPOINT_STATS "checkpoint_stats" /* not on disk */
+#define ZPOOL_CONFIG_RAIDZ_EXPAND_STATS "raidz_expand_stats" /* not on disk */
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
#define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */
@@ -769,6 +793,8 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_SPARES "spares"
#define ZPOOL_CONFIG_IS_SPARE "is_spare"
#define ZPOOL_CONFIG_NPARITY "nparity"
+#define ZPOOL_CONFIG_RAIDZ_EXPANDING "raidz_expanding"
+#define ZPOOL_CONFIG_RAIDZ_EXPAND_TXGS "raidz_expand_txgs"
#define ZPOOL_CONFIG_HOSTID "hostid"
#define ZPOOL_CONFIG_HOSTNAME "hostname"
#define ZPOOL_CONFIG_LOADED_TIME "initial_load_time"
@@ -804,6 +830,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
#define ZPOOL_CONFIG_ERRATA "errata" /* not stored on disk */
+#define ZPOOL_CONFIG_VDEV_ROOT_ZAP "com.klarasystems:vdev_zap_root"
#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
@@ -886,6 +913,15 @@ typedef struct zpool_load_policy {
#define VDEV_TOP_ZAP_ALLOCATION_BIAS \
"org.zfsonlinux:allocation_bias"
+#define VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE \
+ "org.openzfs:raidz_expand_state"
+#define VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME \
+ "org.openzfs:raidz_expand_start_time"
+#define VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME \
+ "org.openzfs:raidz_expand_end_time"
+#define VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_COPIED \
+ "org.openzfs:raidz_expand_bytes_copied"
+
/* vdev metaslab allocation bias */
#define VDEV_ALLOC_BIAS_LOG "log"
#define VDEV_ALLOC_BIAS_SPECIAL "special"
@@ -1023,6 +1059,7 @@ typedef enum pool_scan_func {
POOL_SCAN_NONE,
POOL_SCAN_SCRUB,
POOL_SCAN_RESILVER,
+ POOL_SCAN_ERRORSCRUB,
POOL_SCAN_FUNCS
} pool_scan_func_t;
@@ -1057,12 +1094,18 @@ typedef enum zio_type {
ZIO_TYPE_WRITE,
ZIO_TYPE_FREE,
ZIO_TYPE_CLAIM,
- ZIO_TYPE_IOCTL,
+ ZIO_TYPE_FLUSH,
ZIO_TYPE_TRIM,
ZIO_TYPES
} zio_type_t;
/*
+ * Compatibility: _IOCTL was renamed to _FLUSH; keep the old name available to
+ * user programs.
+ */
+#define ZIO_TYPE_IOCTL ZIO_TYPE_FLUSH
+
+/*
* Pool statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
@@ -1074,7 +1117,7 @@ typedef struct pool_scan_stat {
uint64_t pss_end_time; /* scan end time */
uint64_t pss_to_examine; /* total bytes to scan */
uint64_t pss_examined; /* total bytes located by scanner */
- uint64_t pss_to_process; /* total bytes to process */
+ uint64_t pss_skipped; /* total bytes skipped by scanner */
uint64_t pss_processed; /* total processed bytes */
uint64_t pss_errors; /* scan errors */
@@ -1086,6 +1129,20 @@ typedef struct pool_scan_stat {
uint64_t pss_pass_scrub_spent_paused;
uint64_t pss_pass_issued; /* issued bytes per scan pass */
uint64_t pss_issued; /* total bytes checked by scanner */
+
+ /* error scrub values stored on disk */
+ uint64_t pss_error_scrub_func; /* pool_scan_func_t */
+ uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
+ uint64_t pss_error_scrub_start; /* error scrub start time */
+ uint64_t pss_error_scrub_end; /* error scrub end time */
+ uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
+ /* error blocks to be issued I/O */
+ uint64_t pss_error_scrub_to_be_examined;
+
+ /* error scrub values not stored on disk */
+ /* error scrub pause time in milliseconds */
+ uint64_t pss_pass_error_scrub_pause;
+
} pool_scan_stat_t;
typedef struct pool_removal_stat {
@@ -1102,11 +1159,22 @@ typedef struct pool_removal_stat {
uint64_t prs_mapping_memory;
} pool_removal_stat_t;
+typedef struct pool_raidz_expand_stat {
+ uint64_t pres_state; /* dsl_scan_state_t */
+ uint64_t pres_expanding_vdev;
+ uint64_t pres_start_time;
+ uint64_t pres_end_time;
+ uint64_t pres_to_reflow; /* bytes that need to be moved */
+ uint64_t pres_reflowed; /* bytes moved so far */
+ uint64_t pres_waiting_for_resilver;
+} pool_raidz_expand_stat_t;
+
typedef enum dsl_scan_state {
DSS_NONE,
DSS_SCANNING,
DSS_FINISHED,
DSS_CANCELED,
+ DSS_ERRORSCRUBBING,
DSS_NUM_STATES
} dsl_scan_state_t;
@@ -1123,6 +1191,7 @@ typedef struct vdev_rebuild_stat {
uint64_t vrs_pass_time_ms; /* pass run time (millisecs) */
uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
uint64_t vrs_pass_bytes_issued; /* bytes rebuilt since start/resume */
+ uint64_t vrs_pass_bytes_skipped; /* bytes skipped since start/resume */
} vdev_rebuild_stat_t;
/*
@@ -1252,6 +1321,7 @@ typedef enum pool_initialize_func {
POOL_INITIALIZE_START,
POOL_INITIALIZE_CANCEL,
POOL_INITIALIZE_SUSPEND,
+ POOL_INITIALIZE_UNINIT,
POOL_INITIALIZE_FUNCS
} pool_initialize_func_t;
@@ -1346,7 +1416,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
- * Core features - 81/128 numbers reserved.
+ * Core features - 88/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
@@ -1441,6 +1511,7 @@ typedef enum zfs_ioc {
ZFS_IOC_WAIT_FS, /* 0x5a54 */
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
+ ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
/*
* Per-platform (Optional) - 8/128 numbers reserved.
@@ -1535,6 +1606,10 @@ typedef enum {
ZFS_ERR_BADPROP,
ZFS_ERR_VDEV_NOTSUP,
ZFS_ERR_NOT_USER_NAMESPACE,
+ ZFS_ERR_RESUME_EXISTS,
+ ZFS_ERR_CRYPTO_NOTSUP,
+ ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
+ ZFS_ERR_ASHIFT_MISMATCH,
} zfs_errno_t;
/*
@@ -1559,6 +1634,7 @@ typedef enum {
ZPOOL_WAIT_RESILVER,
ZPOOL_WAIT_SCRUB,
ZPOOL_WAIT_TRIM,
+ ZPOOL_WAIT_RAIDZ_EXPAND,
ZPOOL_WAIT_NUM_ACTIVITIES
} zpool_wait_activity_t;
@@ -1651,6 +1727,7 @@ typedef enum {
#define ZFS_ONLINE_UNSPARE 0x2
#define ZFS_ONLINE_FORCEFAULT 0x4
#define ZFS_ONLINE_EXPAND 0x8
+#define ZFS_ONLINE_SPARE 0x10
#define ZFS_OFFLINE_TEMPORARY 0x1
/*
@@ -1757,9 +1834,9 @@ typedef enum {
* against the cost of COWing a giant block to modify one byte, and the
* large latency of reading or writing a large block.
*
- * Note that although blocks up to 16MB are supported, the recordsize
- * property can not be set larger than zfs_max_recordsize (default 1MB).
- * See the comment near zfs_max_recordsize in dsl_dataset.c for details.
+ * The recordsize property can not be set larger than zfs_max_recordsize
+ * (default 16MB on 64-bit and 1MB on 32-bit). See the comment near
+ * zfs_max_recordsize in dsl_dataset.c for details.
*
* Note that although the LSIZE field of the blkptr_t can store sizes up
* to 32MB, the dnode's dn_datablkszsec can only store sizes up to
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index 140739adb562..815b5d0c9cf1 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -39,6 +39,7 @@ extern "C" {
typedef struct metaslab_ops {
+ const char *msop_name;
uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
} metaslab_ops_t;
@@ -80,7 +81,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
#define METASLAB_ASYNC_ALLOC 0x8
#define METASLAB_DONT_THROTTLE 0x10
#define METASLAB_MUST_RESERVE 0x20
-#define METASLAB_FASTWRITE 0x40
#define METASLAB_ZIL 0x80
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
@@ -96,8 +96,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
-void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
-void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
void metaslab_stat_init(void);
void metaslab_stat_fini(void);
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 820c61a252e2..4f434291ddbf 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -250,7 +250,6 @@ struct metaslab_group {
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
- taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
@@ -313,7 +312,7 @@ struct metaslab_group {
* Each metaslab maintains a set of in-core trees to track metaslab
* operations. The in-core free tree (ms_allocatable) contains the list of
* free segments which are eligible for allocation. As blocks are
- * allocated, the allocated segment are removed from the ms_allocatable and
+ * allocated, the allocated segments are removed from the ms_allocatable and
* added to a per txg allocation tree (ms_allocating). As blocks are
* freed, they are added to the free tree (ms_freeing). These trees
* allow us to process all allocations and frees in syncing context
@@ -366,9 +365,9 @@ struct metaslab_group {
struct metaslab {
/*
* This is the main lock of the metaslab and its purpose is to
- * coordinate our allocations and frees [e.g metaslab_block_alloc(),
+ * coordinate our allocations and frees [e.g., metaslab_block_alloc(),
* metaslab_free_concrete(), ..etc] with our various syncing
- * procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc].
+ * procedures [e.g., metaslab_sync(), metaslab_sync_done(), ..etc].
*
* The lock is also used during some miscellaneous operations like
* using the metaslab's histogram for the metaslab group's histogram
diff --git a/include/sys/mmp.h b/include/sys/mmp.h
index ce9c4496a04f..1023334098d8 100644
--- a/include/sys/mmp.h
+++ b/include/sys/mmp.h
@@ -64,7 +64,7 @@ extern void mmp_signal_all_threads(void);
/* Global tuning */
extern int param_set_multihost_interval(ZFS_MODULE_PARAM_ARGS);
-extern ulong_t zfs_multihost_interval;
+extern uint64_t zfs_multihost_interval;
extern uint_t zfs_multihost_fail_intervals;
extern uint_t zfs_multihost_import_intervals;
diff --git a/include/sys/mntent.h b/include/sys/mntent.h
index 8d578f67b8a7..5bb7e080cda8 100644
--- a/include/sys/mntent.h
+++ b/include/sys/mntent.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -108,5 +108,8 @@
#define MNTOPT_NOACL "noacl" /* likewise */
#define MNTOPT_POSIXACL "posixacl" /* likewise */
#define MNTOPT_MNTPOINT "mntpoint" /* mount point hint */
+#define MNTOPT_CASESENSITIVE "casesensitive" /* case sensitivity */
+#define MNTOPT_CASEINSENSITIVE "caseinsensitive" /* case insensitivity */
+#define MNTOPT_CASEMIXED "casemixed" /* case mixed */
#endif /* _SYS_MNTENT_H */
diff --git a/include/sys/multilist.h b/include/sys/multilist.h
index 26f37c37ab38..e7de86f2379b 100644
--- a/include/sys/multilist.h
+++ b/include/sys/multilist.h
@@ -82,12 +82,15 @@ int multilist_is_empty(multilist_t *);
unsigned int multilist_get_num_sublists(multilist_t *);
unsigned int multilist_get_random_index(multilist_t *);
-multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
+void multilist_sublist_lock(multilist_sublist_t *);
+multilist_sublist_t *multilist_sublist_lock_idx(multilist_t *, unsigned int);
multilist_sublist_t *multilist_sublist_lock_obj(multilist_t *, void *);
void multilist_sublist_unlock(multilist_sublist_t *);
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
void multilist_sublist_insert_tail(multilist_sublist_t *, void *);
+void multilist_sublist_insert_after(multilist_sublist_t *, void *, void *);
+void multilist_sublist_insert_before(multilist_sublist_t *, void *, void *);
void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj);
void multilist_sublist_remove(multilist_sublist_t *, void *);
int multilist_sublist_is_empty(multilist_sublist_t *);
diff --git a/include/sys/nvpair.h b/include/sys/nvpair.h
index 81494b62d7ec..2dbd9e3eaf46 100644
--- a/include/sys/nvpair.h
+++ b/include/sys/nvpair.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -76,7 +76,7 @@ typedef struct nvpair {
int16_t nvp_reserve; /* not used */
int32_t nvp_value_elem; /* number of elements for array types */
data_type_t nvp_type; /* type of value */
- /* name string */
+ char nvp_name[]; /* name string */
/* aligned ptr array for string arrays */
/* aligned array of data for value */
} nvpair_t;
@@ -109,7 +109,7 @@ typedef struct nvlist {
#define NV_ALIGN4(x) (((x) + 3) & ~3)
#define NVP_SIZE(nvp) ((nvp)->nvp_size)
-#define NVP_NAME(nvp) ((char *)(nvp) + sizeof (nvpair_t))
+#define NVP_NAME(nvp) ((nvp)->nvp_name)
#define NVP_TYPE(nvp) ((nvp)->nvp_type)
#define NVP_NELEM(nvp) ((nvp)->nvp_value_elem)
#define NVP_VALUE(nvp) ((char *)(nvp) + NV_ALIGN(sizeof (nvpair_t) \
@@ -232,7 +232,8 @@ _SYS_NVPAIR_H int nvlist_lookup_int64(const nvlist_t *, const char *,
int64_t *);
_SYS_NVPAIR_H int nvlist_lookup_uint64(const nvlist_t *, const char *,
uint64_t *);
-_SYS_NVPAIR_H int nvlist_lookup_string(nvlist_t *, const char *, char **);
+_SYS_NVPAIR_H int nvlist_lookup_string(const nvlist_t *, const char *,
+ const char **);
_SYS_NVPAIR_H int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **);
_SYS_NVPAIR_H int nvlist_lookup_boolean_array(nvlist_t *, const char *,
boolean_t **, uint_t *);
@@ -267,14 +268,14 @@ _SYS_NVPAIR_H int nvlist_lookup_double(const nvlist_t *, const char *,
_SYS_NVPAIR_H int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **);
_SYS_NVPAIR_H int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *,
- nvpair_t **, int *, char **);
+ nvpair_t **, int *, const char **);
_SYS_NVPAIR_H boolean_t nvlist_exists(const nvlist_t *, const char *);
_SYS_NVPAIR_H boolean_t nvlist_empty(const nvlist_t *);
/* processing nvpair */
_SYS_NVPAIR_H nvpair_t *nvlist_next_nvpair(nvlist_t *, const nvpair_t *);
_SYS_NVPAIR_H nvpair_t *nvlist_prev_nvpair(nvlist_t *, const nvpair_t *);
-_SYS_NVPAIR_H char *nvpair_name(const nvpair_t *);
+_SYS_NVPAIR_H const char *nvpair_name(const nvpair_t *);
_SYS_NVPAIR_H data_type_t nvpair_type(const nvpair_t *);
_SYS_NVPAIR_H int nvpair_type_is_array(const nvpair_t *);
_SYS_NVPAIR_H int nvpair_value_boolean_value(const nvpair_t *, boolean_t *);
@@ -287,7 +288,7 @@ _SYS_NVPAIR_H int nvpair_value_int32(const nvpair_t *, int32_t *);
_SYS_NVPAIR_H int nvpair_value_uint32(const nvpair_t *, uint32_t *);
_SYS_NVPAIR_H int nvpair_value_int64(const nvpair_t *, int64_t *);
_SYS_NVPAIR_H int nvpair_value_uint64(const nvpair_t *, uint64_t *);
-_SYS_NVPAIR_H int nvpair_value_string(nvpair_t *, char **);
+_SYS_NVPAIR_H int nvpair_value_string(const nvpair_t *, const char **);
_SYS_NVPAIR_H int nvpair_value_nvlist(nvpair_t *, nvlist_t **);
_SYS_NVPAIR_H int nvpair_value_boolean_array(nvpair_t *, boolean_t **,
uint_t *);
@@ -300,7 +301,8 @@ _SYS_NVPAIR_H int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *);
_SYS_NVPAIR_H int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *);
_SYS_NVPAIR_H int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *);
_SYS_NVPAIR_H int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *);
-_SYS_NVPAIR_H int nvpair_value_string_array(nvpair_t *, char ***, uint_t *);
+_SYS_NVPAIR_H int nvpair_value_string_array(nvpair_t *, const char ***,
+ uint_t *);
_SYS_NVPAIR_H int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *);
_SYS_NVPAIR_H int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
#if !defined(_KERNEL) && !defined(_STANDALONE)
@@ -373,7 +375,8 @@ _SYS_NVPAIR_H uint8_t fnvlist_lookup_uint8(const nvlist_t *, const char *);
_SYS_NVPAIR_H uint16_t fnvlist_lookup_uint16(const nvlist_t *, const char *);
_SYS_NVPAIR_H uint32_t fnvlist_lookup_uint32(const nvlist_t *, const char *);
_SYS_NVPAIR_H uint64_t fnvlist_lookup_uint64(const nvlist_t *, const char *);
-_SYS_NVPAIR_H char *fnvlist_lookup_string(nvlist_t *, const char *);
+_SYS_NVPAIR_H const char *fnvlist_lookup_string(const nvlist_t *,
+ const char *);
_SYS_NVPAIR_H nvlist_t *fnvlist_lookup_nvlist(nvlist_t *, const char *);
_SYS_NVPAIR_H boolean_t *fnvlist_lookup_boolean_array(nvlist_t *, const char *,
uint_t *);
@@ -406,7 +409,7 @@ _SYS_NVPAIR_H uint8_t fnvpair_value_uint8(const nvpair_t *nvp);
_SYS_NVPAIR_H uint16_t fnvpair_value_uint16(const nvpair_t *nvp);
_SYS_NVPAIR_H uint32_t fnvpair_value_uint32(const nvpair_t *nvp);
_SYS_NVPAIR_H uint64_t fnvpair_value_uint64(const nvpair_t *nvp);
-_SYS_NVPAIR_H char *fnvpair_value_string(nvpair_t *nvp);
+_SYS_NVPAIR_H const char *fnvpair_value_string(const nvpair_t *nvp);
_SYS_NVPAIR_H nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
#ifdef __cplusplus
diff --git a/include/sys/nvpair_impl.h b/include/sys/nvpair_impl.h
index 809e5c454712..6cae256285e5 100644
--- a/include/sys/nvpair_impl.h
+++ b/include/sys/nvpair_impl.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/pathname.h b/include/sys/pathname.h
index 52f21316c23d..054223170db1 100644
--- a/include/sys/pathname.h
+++ b/include/sys/pathname.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/qat.h b/include/sys/qat.h
index fe0f2c672f97..76360ba99042 100644
--- a/include/sys/qat.h
+++ b/include/sys/qat.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h
index daa39e20dbd6..d6f60e795288 100644
--- a/include/sys/range_tree.h
+++ b/include/sys/range_tree.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/rrwlock.h b/include/sys/rrwlock.h
index 51ac364af519..367732a8391c 100644
--- a/include/sys/rrwlock.h
+++ b/include/sys/rrwlock.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/sa.h b/include/sys/sa.h
index 42479652ab2c..c551acecab30 100644
--- a/include/sys/sa.h
+++ b/include/sys/sa.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/sa_impl.h b/include/sys/sa_impl.h
index fa10aff8a306..744c8dcb7dfb 100644
--- a/include/sys/sa_impl.h
+++ b/include/sys/sa_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/sha2.h b/include/sys/sha2.h
new file mode 100644
index 000000000000..81dfbbb8cea9
--- /dev/null
+++ b/include/sys/sha2.h
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef _SYS_SHA2_H
+#define _SYS_SHA2_H
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHA224_BLOCK_LENGTH 64
+#define SHA256_BLOCK_LENGTH 64
+#define SHA384_BLOCK_LENGTH 128
+#define SHA512_BLOCK_LENGTH 128
+
+#define SHA224_DIGEST_LENGTH 28
+#define SHA256_DIGEST_LENGTH 32
+#define SHA384_DIGEST_LENGTH 48
+#define SHA512_DIGEST_LENGTH 64
+
+#define SHA512_224_DIGEST_LENGTH 28
+#define SHA512_256_DIGEST_LENGTH 32
+
+#define SHA256_HMAC_BLOCK_SIZE 64
+#define SHA512_HMAC_BLOCK_SIZE 128
+
+/* sha256 context */
+typedef struct {
+ uint32_t state[8];
+ uint64_t count[2];
+ uint8_t wbuf[64];
+
+ /* const sha256_ops_t *ops */
+ const void *ops;
+} sha256_ctx;
+
+/* sha512 context */
+typedef struct {
+ uint64_t state[8];
+ uint64_t count[2];
+ uint8_t wbuf[128];
+
+ /* const sha256_ops_t *ops */
+ const void *ops;
+} sha512_ctx;
+
+/* SHA2 context */
+typedef struct {
+ union {
+ sha256_ctx sha256;
+ sha512_ctx sha512;
+ };
+
+ /* algorithm type */
+ int algotype;
+} SHA2_CTX;
+
+/* SHA2 algorithm types */
+typedef enum sha2_mech_type {
+ SHA256_MECH_INFO_TYPE, /* SUN_CKM_SHA256 */
+ SHA256_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC */
+ SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
+ SHA384_MECH_INFO_TYPE, /* SUN_CKM_SHA384 */
+ SHA384_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC */
+ SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
+ SHA512_MECH_INFO_TYPE, /* SUN_CKM_SHA512 */
+ SHA512_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC */
+ SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
+ SHA512_224_MECH_INFO_TYPE, /* SUN_CKM_SHA512_224 */
+ SHA512_256_MECH_INFO_TYPE /* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#define SHA256 0
+#define SHA256_HMAC 1
+#define SHA256_HMAC_GEN 2
+#define SHA384 3
+#define SHA384_HMAC 4
+#define SHA384_HMAC_GEN 5
+#define SHA512 6
+#define SHA512_HMAC 7
+#define SHA512_HMAC_GEN 8
+#define SHA512_224 9
+#define SHA512_256 10
+
+/* SHA2 Init function */
+extern void SHA2Init(int algotype, SHA2_CTX *ctx);
+
+/* SHA2 Update function */
+extern void SHA2Update(SHA2_CTX *ctx, const void *data, size_t len);
+
+/* SHA2 Final function */
+extern void SHA2Final(void *digest, SHA2_CTX *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SYS_SHA2_H */
diff --git a/include/sys/spa.h b/include/sys/spa.h
index cd2499b30b40..3073c4d1b937 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -20,16 +20,16 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
- * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Datto Inc.
*/
#ifndef _SYS_SPA_H
@@ -62,9 +62,8 @@ typedef struct metaslab_class metaslab_class_t;
typedef struct zio zio_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
-typedef struct ddt ddt_t;
-typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark_phys zbookmark_phys_t;
+typedef struct zbookmark_err_phys zbookmark_err_phys_t;
struct bpobj;
struct bplist;
@@ -126,15 +125,15 @@ typedef struct zio_cksum_salt {
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | pad | vdev1 | GRID | ASIZE |
+ * 0 | pad | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | pad | vdev2 | GRID | ASIZE |
+ * 2 | pad | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 4 | pad | vdev3 | GRID | ASIZE |
+ * 4 | pad | vdev3 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -166,7 +165,6 @@ typedef struct zio_cksum_salt {
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
- * GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
@@ -191,11 +189,11 @@ typedef struct zio_cksum_salt {
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | vdev1 | GRID | ASIZE |
+ * 0 | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | vdev2 | GRID | ASIZE |
+ * 2 | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -356,7 +354,7 @@ typedef enum bp_embedded_type {
#define BPE_NUM_WORDS 14
#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
#define BPE_IS_PAYLOADWORD(bp, wp) \
- ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
+ ((wp) != &(bp)->blk_prop && (wp) != (&(bp)->blk_birth_word[1]))
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
@@ -375,8 +373,7 @@ typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[2]; /* Extra space for the future */
- uint64_t blk_phys_birth; /* txg when block was allocated */
- uint64_t blk_birth; /* transaction group at birth */
+ uint64_t blk_birth_word[2];
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
@@ -396,9 +393,6 @@ typedef struct blkptr {
BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
SPA_MINBLOCKSHIFT, 0, x)
-#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
-#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
-
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS)
#define DVA_SET_VDEV(dva, x) \
BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x)
@@ -481,15 +475,23 @@ typedef struct blkptr {
#define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1)
#define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x)
-#define BP_PHYSICAL_BIRTH(bp) \
- (BP_IS_EMBEDDED(bp) ? 0 : \
- (bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
+#define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1]
+#define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x))
+
+#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
+#define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x))
+
+#define BP_GET_BIRTH(bp) \
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \
+ BP_GET_LOGICAL_BIRTH(bp))
#define BP_SET_BIRTH(bp, logical, physical) \
{ \
ASSERT(!BP_IS_EMBEDDED(bp)); \
- (bp)->blk_birth = (logical); \
- (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
+ BP_SET_LOGICAL_BIRTH(bp, logical); \
+ BP_SET_PHYSICAL_BIRTH(bp, \
+ ((logical) == (physical) ? 0 : (physical))); \
}
#define BP_GET_FILL(bp) \
@@ -542,8 +544,8 @@ typedef struct blkptr {
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \
- (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
- (bp1)->blk_birth == (bp2)->blk_birth && \
+ (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \
+ BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
@@ -582,8 +584,8 @@ typedef struct blkptr {
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
- (bp)->blk_phys_birth = 0; \
- (bp)->blk_birth = 0; \
+ (bp)->blk_birth_word[0] = 0; \
+ (bp)->blk_birth_word[1] = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
@@ -600,7 +602,7 @@ typedef struct blkptr {
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
- * 'func' is either snprintf() or mdb_snprintf().
+ * 'func' is either kmem_scnprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
@@ -632,7 +634,7 @@ typedef struct blkptr {
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
(u_longlong_t)BP_GET_LSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_EMBEDDED(bp)) { \
len = func(buf + len, size - len, \
"EMBEDDED [L%llu %s] et=%u %s " \
@@ -643,14 +645,14 @@ typedef struct blkptr {
compress, \
(u_longlong_t)BPE_GET_LSIZE(bp), \
(u_longlong_t)BPE_GET_PSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_REDACTED(bp)) { \
len += func(buf + len, size - len, \
"REDACTED [L%llu %s] size=%llxL birth=%lluL", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
(u_longlong_t)BP_GET_LSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
@@ -663,6 +665,7 @@ typedef struct blkptr {
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
+ ASSERT3S(copies, >, 0); \
if (BP_IS_ENCRYPTED(bp)) { \
len += func(buf + len, size - len, \
"salt=%llx iv=%llx:%llx%c", \
@@ -678,7 +681,7 @@ typedef struct blkptr {
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
- "cksum=%llx:%llx:%llx:%llx", \
+ "cksum=%016llx:%016llx:%016llx:%016llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
@@ -691,8 +694,8 @@ typedef struct blkptr {
ws, \
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \
- (u_longlong_t)bp->blk_birth, \
- (u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \
+ (u_longlong_t)BP_GET_BIRTH(bp), \
(u_longlong_t)BP_GET_FILL(bp), \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
@@ -721,16 +724,10 @@ typedef enum spa_mode {
* Send TRIM commands in-line during normal pool operation while deleting.
* OFF: no
* ON: yes
- * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
*/
typedef enum {
SPA_AUTOTRIM_OFF = 0, /* default */
SPA_AUTOTRIM_ON,
-#ifdef IN_FREEBSD_BASE
- SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
-#else
- SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
-#endif
} spa_autotrim_t;
/*
@@ -773,7 +770,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_CONFIG_UPDATE 0x01
#define SPA_ASYNC_REMOVE 0x02
-#define SPA_ASYNC_PROBE 0x04
+#define SPA_ASYNC_FAULT_VDEV 0x04
#define SPA_ASYNC_RESILVER_DONE 0x08
#define SPA_ASYNC_RESILVER 0x10
#define SPA_ASYNC_AUTOEXPAND 0x20
@@ -785,9 +782,10 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_L2CACHE_REBUILD 0x800
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
#define SPA_ASYNC_REBUILD_DONE 0x2000
+#define SPA_ASYNC_DETACH_SPARE 0x4000
/* device manipulation */
-extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
+extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing, int rebuild);
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
@@ -826,10 +824,19 @@ extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
-extern int zfs_sync_pass_deferred_free;
+extern uint_t zfs_sync_pass_deferred_free;
+
+/* spa sync taskqueues */
+taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
+void spa_sync_tq_destroy(spa_t *spa);
+uint_t spa_acq_allocator(spa_t *spa);
+void spa_rel_allocator(spa_t *spa, uint_t allocator);
+void spa_select_allocator(zio_t *zio);
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
+extern avl_tree_t spa_namespace_avl;
+extern kcondvar_t spa_namespace_cv;
/*
* SPA configuration functions in spa_config.c
@@ -838,9 +845,9 @@ extern kmutex_t spa_namespace_lock;
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
-extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t);
+extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void);
-extern nvlist_t *spa_all_configs(uint64_t *);
+extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
@@ -969,11 +976,17 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid,
uint64_t max_txg);
extern int spa_import_progress_set_state(uint64_t pool_guid,
spa_load_state_t spa_load_state);
+extern void spa_import_progress_set_notes(spa_t *spa,
+ const char *fmt, ...) __printflike(2, 3);
+extern void spa_import_progress_set_notes_nolog(spa_t *spa,
+ const char *fmt, ...) __printflike(2, 3);
/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag,
krw_t rw);
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
+extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
+ krw_t rw);
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
@@ -1013,7 +1026,7 @@ extern boolean_t spa_indirect_vdevs_loaded(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
-extern int spa_sync_pass(spa_t *spa);
+extern uint32_t spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_load_guid(spa_t *spa);
@@ -1057,6 +1070,8 @@ extern uint64_t spa_deadman_synctime(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);
extern uint64_t spa_dirty_data(spa_t *spa);
extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
+extern int spa_get_allocator(spa_t *spa);
+extern void spa_set_allocator(spa_t *spa, const char *allocator);
/* Miscellaneous support routines */
extern void spa_load_failed(spa_t *spa, const char *fmt, ...)
@@ -1110,6 +1125,8 @@ extern uint32_t spa_get_hostid(spa_t *spa);
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
extern boolean_t spa_livelist_delete_check(spa_t *spa);
+extern boolean_t spa_mmp_remote_host_activity(spa_t *spa);
+
extern spa_mode_t spa_mode(spa_t *spa);
extern uint64_t zfs_strtonum(const char *str, char **nptr);
@@ -1133,7 +1150,10 @@ extern const char *spa_state_to_name(spa_t *spa);
/* error handling */
struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
+extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
+ const uint64_t birth);
+extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb,
+ uint64_t birth);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
@@ -1145,8 +1165,9 @@ extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
-extern uint64_t spa_get_errlog_size(spa_t *spa);
+extern uint64_t spa_approx_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
+extern uint64_t spa_get_last_errlog_size(spa_t *spa);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
@@ -1157,10 +1178,13 @@ extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
dmu_tx_t *tx);
extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);
-
-/* vdev cache */
-extern void vdev_cache_stat_init(void);
-extern void vdev_cache_stat_fini(void);
+extern int find_top_affected_fs(spa_t *spa, uint64_t head_ds,
+ zbookmark_err_phys_t *zep, uint64_t *top_affected_fs);
+extern int find_birth_txg(struct dsl_dataset *ds, zbookmark_err_phys_t *zep,
+ uint64_t *birth_txg);
+extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
+ zbookmark_phys_t *zb);
+extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
/* vdev mirror */
extern void vdev_mirror_stat_init(void);
@@ -1201,6 +1225,7 @@ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);
int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
+int param_set_active_allocator(ZFS_MODULE_PARAM_ARGS);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
@@ -1217,9 +1242,9 @@ int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
extern spa_mode_t spa_mode_global;
extern int zfs_deadman_enabled;
-extern unsigned long zfs_deadman_synctime_ms;
-extern unsigned long zfs_deadman_ziotime_ms;
-extern unsigned long zfs_deadman_checktime_ms;
+extern uint64_t zfs_deadman_synctime_ms;
+extern uint64_t zfs_deadman_ziotime_ms;
+extern uint64_t zfs_deadman_checktime_ms;
extern kmem_cache_t *zio_buf_cache[];
extern kmem_cache_t *zio_data_buf_cache[];
diff --git a/include/sys/spa_checkpoint.h b/include/sys/spa_checkpoint.h
index 9be2b6eeab3c..e4475ff35f44 100644
--- a/include/sys/spa_checkpoint.h
+++ b/include/sys/spa_checkpoint.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/spa_checksum.h b/include/sys/spa_checksum.h
index b87990105a71..2202afdeb8da 100644
--- a/include/sys/spa_checksum.h
+++ b/include/sys/spa_checksum.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 9946c4e3c316..a40914ec5fcb 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -20,13 +20,13 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019 Datto Inc.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -38,6 +38,7 @@
#include <sys/vdev.h>
#include <sys/vdev_rebuild.h>
#include <sys/vdev_removal.h>
+#include <sys/vdev_raidz.h>
#include <sys/metaslab.h>
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
@@ -62,10 +63,17 @@ typedef struct spa_alloc {
avl_tree_t spaa_tree;
} ____cacheline_aligned spa_alloc_t;
+typedef struct spa_allocs_use {
+ kmutex_t sau_lock;
+ uint_t sau_rotor;
+ boolean_t sau_inuse[];
+} spa_allocs_use_t;
+
typedef struct spa_error_entry {
zbookmark_phys_t se_bookmark;
char *se_name;
avl_node_t se_avl;
+ zbookmark_err_phys_t se_zep; /* not accounted in avl_find */
} spa_error_entry_t;
typedef struct spa_history_phys {
@@ -187,6 +195,12 @@ typedef struct spa_taskqs {
taskq_t **stqs_taskq;
} spa_taskqs_t;
+/* one for each thread in the spa sync taskq */
+typedef struct spa_syncthread_info {
+ kthread_t *sti_thread;
+ uint_t sti_allocator;
+} spa_syncthread_info_t;
+
typedef enum spa_all_vdev_zap_action {
AVZ_ACTION_NONE = 0,
AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
@@ -215,7 +229,7 @@ struct spa {
nvlist_t *spa_config_splitting; /* config for splitting */
nvlist_t *spa_load_info; /* info and errors from load */
uint64_t spa_config_txg; /* txg of last config change */
- int spa_sync_pass; /* iterate-to-convergence */
+ uint32_t spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_sync_on; /* sync threads are running */
@@ -229,6 +243,7 @@ struct spa {
dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */
boolean_t spa_is_exporting; /* true while exporting pool */
+ kthread_t *spa_load_thread; /* loading, no namespace lock */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
@@ -249,6 +264,7 @@ struct spa {
uint64_t spa_min_ashift; /* of vdevs in normal class */
uint64_t spa_max_ashift; /* of vdevs in normal class */
uint64_t spa_min_alloc; /* of vdevs in normal class */
+ uint64_t spa_gcd_alloc; /* of vdevs in normal class */
uint64_t spa_config_guid; /* config pool guid */
uint64_t spa_load_guid; /* spa_load initialized guid */
uint64_t spa_last_synced_guid; /* last synced guid */
@@ -260,10 +276,17 @@ struct spa {
* allocation performance in write-heavy workloads.
*/
spa_alloc_t *spa_allocs;
+ spa_allocs_use_t *spa_allocs_use;
int spa_alloc_count;
+ int spa_active_allocator; /* selectable allocator */
+
+ /* per-allocator sync thread taskqs */
+ taskq_t *spa_sync_tq;
+ spa_syncthread_info_t *spa_syncthreads;
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
+ boolean_t spa_aux_sync_uber; /* need to sync aux uber */
nvlist_t *spa_label_features; /* Features for reading MOS */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
@@ -294,6 +317,10 @@ struct spa {
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
+ /* error scrub pause time in milliseconds */
+ uint64_t spa_scan_pass_errorscrub_pause;
+ /* total error scrub paused time in milliseconds */
+ uint64_t spa_scan_pass_errorscrub_spent_paused;
/*
* We are in the middle of a resilver, and another resilver
* is needed once this one completes. This is set iff any
@@ -316,6 +343,9 @@ struct spa {
spa_condensing_indirect_t *spa_condensing_indirect;
zthr_t *spa_condense_zthr; /* zthr doing condense. */
+ vdev_raidz_expand_t *spa_raidz_expand;
+ zthr_t *spa_raidz_expand_zthr;
+
uint64_t spa_checkpoint_txg; /* the txg of the checkpoint */
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
zthr_t *spa_checkpoint_discard_zthr;
@@ -349,6 +379,7 @@ struct spa {
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
+ avl_tree_t spa_errlist_healed; /* list of healed blocks */
uint64_t spa_deflate; /* should we deflate? */
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
@@ -379,6 +410,7 @@ struct spa {
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
+ struct brt *spa_brt; /* in-core BRT */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
@@ -415,7 +447,9 @@ struct spa {
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
+ taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
+ taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */
@@ -439,15 +473,13 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */
-
- taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};
extern char *spa_config_path;
extern const char *zfs_deadman_failmode;
-extern int spa_slop_shift;
+extern uint_t spa_slop_shift;
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
- task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio);
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags);
extern void spa_load_spares(spa_t *spa);
@@ -459,6 +491,8 @@ extern int param_set_deadman_failmode_common(const char *val);
extern void spa_set_deadman_synctime(hrtime_t ns);
extern void spa_set_deadman_ziotime(hrtime_t ns);
extern const char *spa_history_zone(void);
+extern const char *zfs_active_allocator;
+extern int param_set_active_allocator_common(const char *val);
#ifdef __cplusplus
}
diff --git a/include/sys/spa_log_spacemap.h b/include/sys/spa_log_spacemap.h
index 72229df6cd16..f59e69917833 100644
--- a/include/sys/spa_log_spacemap.h
+++ b/include/sys/spa_log_spacemap.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/space_map.h b/include/sys/space_map.h
index cb81e710bd1e..14c5beccee55 100644
--- a/include/sys/space_map.h
+++ b/include/sys/space_map.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/space_reftree.h b/include/sys/space_reftree.h
index ca9d41dc1388..b7a846aec624 100644
--- a/include/sys/space_reftree.h
+++ b/include/sys/space_reftree.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/sysevent.h b/include/sys/sysevent.h
index 6510297d601f..f8ae17497366 100644
--- a/include/sys/sysevent.h
+++ b/include/sys/sysevent.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/sysevent/dev.h b/include/sys/sysevent/dev.h
index 1117538d822d..0783d0073162 100644
--- a/include/sys/sysevent/dev.h
+++ b/include/sys/sysevent/dev.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -244,6 +244,9 @@ extern "C" {
#define DEV_PATH "path"
#define DEV_IS_PART "is_slice"
#define DEV_SIZE "dev_size"
+
+/* Size of the whole parent block device (if dev is a partition) */
+#define DEV_PARENT_SIZE "dev_parent_size"
#endif /* __linux__ */
#define EV_V1 1
diff --git a/include/sys/sysevent/eventdefs.h b/include/sys/sysevent/eventdefs.h
index 2067b355afb4..a21085257967 100644
--- a/include/sys/sysevent/eventdefs.h
+++ b/include/sys/sysevent/eventdefs.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -123,6 +123,11 @@ extern "C" {
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
#define ESC_ZFS_TRIM_RESUME "trim_resume"
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
+#define ESC_ZFS_ERRORSCRUB_START "errorscrub_start"
+#define ESC_ZFS_ERRORSCRUB_FINISH "errorscrub_finish"
+#define ESC_ZFS_ERRORSCRUB_ABORT "errorscrub_abort"
+#define ESC_ZFS_ERRORSCRUB_RESUME "errorscrub_resume"
+#define ESC_ZFS_ERRORSCRUB_PAUSED "errorscrub_paused"
/*
* datalink subclass definitions.
diff --git a/include/sys/txg.h b/include/sys/txg.h
index f38f0006c040..46945210cdb5 100644
--- a/include/sys/txg.h
+++ b/include/sys/txg.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -138,7 +138,7 @@ extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
/* Global tuning */
-extern int zfs_txg_timeout;
+extern uint_t zfs_txg_timeout;
#ifdef ZFS_DEBUG
diff --git a/include/sys/txg_impl.h b/include/sys/txg_impl.h
index 047d51b94c66..8ab7969b25be 100644
--- a/include/sys/txg_impl.h
+++ b/include/sys/txg_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -73,8 +73,7 @@ struct tx_cpu {
kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
- char tc_pad[8]; /* pad to fill 3 cache lines */
-};
+} ____cacheline_aligned;
/*
* The tx_state structure maintains the state information about the different
diff --git a/include/sys/u8_textprep.h b/include/sys/u8_textprep.h
index 09ab13af268c..e82037de4fe4 100644
--- a/include/sys/u8_textprep.h
+++ b/include/sys/u8_textprep.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/u8_textprep_data.h b/include/sys/u8_textprep_data.h
index 03f71f26c9e1..2a97966ee56e 100644
--- a/include/sys/u8_textprep_data.h
+++ b/include/sys/u8_textprep_data.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/uberblock.h b/include/sys/uberblock.h
index 044e438387c0..ff3a8c81232a 100644
--- a/include/sys/uberblock.h
+++ b/include/sys/uberblock.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/uberblock_impl.h b/include/sys/uberblock_impl.h
index 91699e65131a..e480a4bac0b9 100644
--- a/include/sys/uberblock_impl.h
+++ b/include/sys/uberblock_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -50,20 +50,20 @@ extern "C" {
#define MMP_SEQ_VALID_BIT 0x02
#define MMP_FAIL_INT_VALID_BIT 0x04
-#define MMP_VALID(ubp) (ubp->ub_magic == UBERBLOCK_MAGIC && \
- ubp->ub_mmp_magic == MMP_MAGIC)
-#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
+ (ubp)->ub_mmp_magic == MMP_MAGIC)
+#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
MMP_INTERVAL_VALID_BIT))
-#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
MMP_SEQ_VALID_BIT))
-#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
+#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
MMP_FAIL_INT_VALID_BIT))
-#define MMP_INTERVAL(ubp) ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
+#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
>> 8)
-#define MMP_SEQ(ubp) ((ubp->ub_mmp_config & 0x0000FFFF00000000) \
+#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
>> 32)
-#define MMP_FAIL_INT(ubp) ((ubp->ub_mmp_config & 0xFFFF000000000000) \
+#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
>> 48)
#define MMP_INTERVAL_SET(write) \
@@ -75,6 +75,39 @@ extern "C" {
#define MMP_FAIL_INT_SET(fail) \
(((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
+/*
+ * RAIDZ expansion reflow information.
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * |Scratch | Reflow |
+ * | State | Offset |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ */
+typedef enum raidz_reflow_scratch_state {
+ RRSS_SCRATCH_NOT_IN_USE = 0,
+ RRSS_SCRATCH_VALID,
+ RRSS_SCRATCH_INVALID_SYNCED,
+ RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT,
+ RRSS_SCRATCH_INVALID_SYNCED_REFLOW
+} raidz_reflow_scratch_state_t;
+
+#define RRSS_GET_OFFSET(ub) \
+ BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0)
+#define RRSS_SET_OFFSET(ub, x) \
+ BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x)
+
+#define RRSS_GET_STATE(ub) \
+ BF64_GET((ub)->ub_raidz_reflow_info, 55, 9)
+#define RRSS_SET_STATE(ub, x) \
+ BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x)
+
+#define RAIDZ_REFLOW_SET(ub, state, offset) do { \
+ (ub)->ub_raidz_reflow_info = 0; \
+ RRSS_SET_OFFSET(ub, offset); \
+ RRSS_SET_STATE(ub, state); \
+} while (0)
+
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
uint64_t ub_version; /* SPA_VERSION */
@@ -132,10 +165,12 @@ struct uberblock {
* pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
* the value of the field is used to determine which ZIL blocks have
* been allocated according to the ms_sm when we are rewinding to a
- * checkpoint. Specifically, if blk_birth > ub_checkpoint_txg, then
+ * checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then
* the ZIL block is not allocated [see uses of spa_min_claim_txg()].
*/
uint64_t ub_checkpoint_txg;
+
+ uint64_t ub_raidz_reflow_info;
};
#ifdef __cplusplus
diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h
index cde3ef40485b..aa34edda5f6a 100644
--- a/include/sys/uio_impl.h
+++ b/include/sys/uio_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/unique.h b/include/sys/unique.h
index d4ba32e5c642..bc7944657521 100644
--- a/include/sys/unique.h
+++ b/include/sys/unique.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/uuid.h b/include/sys/uuid.h
index eab4622a6d9a..19f1baa4432e 100644
--- a/include/sys/uuid.h
+++ b/include/sys/uuid.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 8a526d0bf511..38f62b07dc59 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -132,15 +132,19 @@ extern void vdev_space_update(vdev_t *vd,
extern int64_t vdev_deflated_space(vdev_t *vd, int64_t space);
+extern uint64_t vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize,
+ uint64_t txg);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
/*
- * Return the amount of space allocated for a gang block header.
+ * Return the amount of space allocated for a gang block header. Note that
+ * since the physical birth txg is not provided, this must be constant for
+ * a given vdev. (e.g. raidz expansion can't change this)
*/
static inline uint64_t
vdev_gang_header_asize(vdev_t *vd)
{
- return (vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE));
+ return (vdev_psize_to_asize_txg(vd, SPA_GANGBLOCKSIZE, 0));
}
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
@@ -148,6 +152,7 @@ extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
vdev_state_t *);
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
+extern int vdev_remove_wanted(spa_t *spa, uint64_t guid);
extern void vdev_clear(spa_t *spa, vdev_t *vd);
extern boolean_t vdev_is_dead(vdev_t *vd);
@@ -157,20 +162,15 @@ extern boolean_t vdev_allocatable(vdev_t *vd);
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);
-extern void vdev_cache_init(vdev_t *vd);
-extern void vdev_cache_fini(vdev_t *vd);
-extern boolean_t vdev_cache_read(zio_t *zio);
-extern void vdev_cache_write(zio_t *zio);
-extern void vdev_cache_purge(vdev_t *vd);
-
extern void vdev_queue_init(vdev_t *vd);
extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio);
extern void vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority);
-extern int vdev_queue_length(vdev_t *vd);
+extern uint32_t vdev_queue_length(vdev_t *vd);
extern uint64_t vdev_queue_last_offset(vdev_t *vd);
+extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p);
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
@@ -185,11 +185,12 @@ extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1,
- VDEV_CONFIG_REMOVING = 1 << 2,
- VDEV_CONFIG_MOS = 1 << 3,
- VDEV_CONFIG_MISSING = 1 << 4
+ VDEV_CONFIG_MOS = 1 << 2,
+ VDEV_CONFIG_MISSING = 1 << 3
} vdev_config_flag_t;
+extern void vdev_post_kobj_evt(vdev_t *vd);
+extern void vdev_clear_kobj_evt(vdev_t *vd);
extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
boolean_t getstats, vdev_config_flag_t flags);
@@ -207,6 +208,8 @@ extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
offset, uint64_t size, zio_done_func_t *done, void *priv, int flags);
extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
+extern int vdev_uberblock_sync_list(vdev_t **, int, struct uberblock *, int);
+extern int vdev_check_boot_reserve(spa_t *, vdev_t *);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
index a7e19fbf0c4b..02c583777ebc 100644
--- a/include/sys/vdev_disk.h
+++ b/include/sys/vdev_disk.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/vdev_draid.h b/include/sys/vdev_draid.h
index dd334acbacf1..a204f1e3c74a 100644
--- a/include/sys/vdev_draid.h
+++ b/include/sys/vdev_draid.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/vdev_file.h b/include/sys/vdev_file.h
index 1514a44fcabb..fddecbfe1ab5 100644
--- a/include/sys/vdev_file.h
+++ b/include/sys/vdev_file.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index db8fbdeb06df..57ff31e89eb9 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_VDEV_IMPL_H
@@ -34,7 +35,6 @@
#include <sys/nvpair.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
-#include <sys/dkio.h>
#include <sys/uberblock_impl.h>
#include <sys/vdev_indirect_mapping.h>
#include <sys/vdev_indirect_births.h>
@@ -57,23 +57,22 @@ extern "C" {
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
-typedef struct vdev_cache vdev_cache_t;
-typedef struct vdev_cache_entry vdev_cache_entry_t;
struct abd;
-extern int zfs_vdev_queue_depth_pct;
-extern int zfs_vdev_def_queue_depth;
-extern uint32_t zfs_vdev_async_write_max_active;
+extern uint_t zfs_vdev_queue_depth_pct;
+extern uint_t zfs_vdev_def_queue_depth;
+extern uint_t zfs_vdev_async_write_max_active;
/*
* Virtual device operations
*/
typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd);
+typedef void vdev_kobj_post_evt_func_t(vdev_t *vd);
typedef void vdev_fini_func_t(vdev_t *vd);
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
uint64_t *ashift, uint64_t *pshift);
typedef void vdev_close_func_t(vdev_t *vd);
-typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
+typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize, uint64_t txg);
typedef uint64_t vdev_min_asize_func_t(vdev_t *vd);
typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd);
typedef void vdev_io_start_func_t(zio_t *zio);
@@ -123,6 +122,7 @@ typedef const struct vdev_ops {
vdev_config_generate_func_t *vdev_op_config_generate;
vdev_nparity_func_t *vdev_op_nparity;
vdev_ndisks_func_t *vdev_op_ndisks;
+ vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post;
char vdev_op_type[16];
boolean_t vdev_op_leaf;
} vdev_ops_t;
@@ -130,44 +130,27 @@ typedef const struct vdev_ops {
/*
* Virtual device properties
*/
-struct vdev_cache_entry {
- struct abd *ve_abd;
- uint64_t ve_offset;
- clock_t ve_lastused;
- avl_node_t ve_offset_node;
- avl_node_t ve_lastused_node;
- uint32_t ve_hits;
- uint16_t ve_missed_update;
- zio_t *ve_fill_io;
-};
-
-struct vdev_cache {
- avl_tree_t vc_offset_tree;
- avl_tree_t vc_lastused_tree;
- kmutex_t vc_lock;
-};
-
-typedef struct vdev_queue_class {
- uint32_t vqc_active;
-
- /*
- * Sorted by offset or timestamp, depending on if the queue is
- * LBA-ordered vs FIFO.
- */
- avl_tree_t vqc_queued_tree;
+typedef union vdev_queue_class {
+ struct {
+ ulong_t vqc_list_numnodes;
+ list_t vqc_list;
+ };
+ avl_tree_t vqc_tree;
} vdev_queue_class_t;
struct vdev_queue {
vdev_t *vq_vdev;
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
- avl_tree_t vq_active_tree;
avl_tree_t vq_read_offset_tree;
avl_tree_t vq_write_offset_tree;
- avl_tree_t vq_trim_offset_tree;
uint64_t vq_last_offset;
zio_priority_t vq_last_prio; /* Last sent I/O priority. */
+ uint32_t vq_cqueued; /* Classes with queued I/Os. */
+ uint32_t vq_cactive[ZIO_PRIORITY_NUM_QUEUEABLE];
+ uint32_t vq_active; /* Number of active I/Os. */
uint32_t vq_ia_active; /* Active interactive I/Os. */
uint32_t vq_nia_credit; /* Non-interactive I/Os credit. */
+ list_t vq_active_list; /* List of active I/Os. */
hrtime_t vq_io_complete_ts; /* time last i/o completed */
hrtime_t vq_io_delta_ts;
zio_t vq_io_search; /* used as local for stack reduction */
@@ -275,6 +258,7 @@ struct vdev {
kthread_t *vdev_open_thread; /* thread opening children */
kthread_t *vdev_validate_thread; /* thread validating children */
uint64_t vdev_crtxg; /* txg when top-level was added */
+ uint64_t vdev_root_zap;
/*
* Top-level vdev state.
@@ -285,18 +269,19 @@ struct vdev {
metaslab_group_t *vdev_mg; /* metaslab group */
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
metaslab_t **vdev_ms; /* metaslab array */
- uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
boolean_t vdev_remove_wanted; /* async remove wanted? */
- boolean_t vdev_probe_wanted; /* async probe wanted? */
+ boolean_t vdev_fault_wanted; /* async faulted wanted? */
list_node_t vdev_config_dirty_node; /* config dirty list */
list_node_t vdev_state_dirty_node; /* state dirty list */
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
+ uint64_t vdev_failfast; /* device failfast setting */
+ boolean_t vdev_rz_expanding; /* raidz is being expanded? */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
@@ -326,6 +311,7 @@ struct vdev {
list_node_t vdev_trim_node;
kmutex_t vdev_autotrim_lock;
kcondvar_t vdev_autotrim_cv;
+ kcondvar_t vdev_autotrim_kick_cv;
kthread_t *vdev_autotrim_thread;
/* Protects vdev_trim_thread and vdev_trim_state. */
kmutex_t vdev_trim_lock;
@@ -436,8 +422,9 @@ struct vdev {
boolean_t vdev_isl2cache; /* was a l2cache device */
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
boolean_t vdev_resilver_deferred; /* resilver deferred */
+ boolean_t vdev_kobj_flag; /* kobj event record */
+ boolean_t vdev_attaching; /* vdev attach ashift handling */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
- vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
@@ -465,6 +452,16 @@ struct vdev {
zfs_ratelimit_t vdev_delay_rl;
zfs_ratelimit_t vdev_deadman_rl;
zfs_ratelimit_t vdev_checksum_rl;
+
+ /*
+ * Vdev properties for tuning ZED or zfsd
+ */
+ uint64_t vdev_checksum_n;
+ uint64_t vdev_checksum_t;
+ uint64_t vdev_io_n;
+ uint64_t vdev_io_t;
+ uint64_t vdev_slow_io_n;
+ uint64_t vdev_slow_io_t;
};
#define VDEV_PAD_SIZE (8 << 10)
@@ -542,6 +539,7 @@ typedef struct vdev_label {
/*
* Size of embedded boot loader region on each label.
* The total size of the first two labels plus the boot area is 4MB.
+ * On RAIDZ, this space is overwritten during RAIDZ expansion.
*/
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
@@ -614,7 +612,7 @@ extern vdev_ops_t vdev_indirect_ops;
*/
extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
range_seg64_t *physical_rs, range_seg64_t *remain_rs);
-extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
+extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg);
extern uint64_t vdev_default_min_asize(vdev_t *vd);
extern uint64_t vdev_get_min_asize(vdev_t *vd);
extern void vdev_set_min_asize(vdev_t *vd);
@@ -641,12 +639,13 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
*/
int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
void vdev_metaslab_group_create(vdev_t *vd);
+uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
/*
* Vdev ashift optimization tunables
*/
-extern uint64_t zfs_vdev_min_auto_ashift;
-extern uint64_t zfs_vdev_max_auto_ashift;
+extern uint_t zfs_vdev_min_auto_ashift;
+extern uint_t zfs_vdev_max_auto_ashift;
int param_set_min_auto_ashift(ZFS_MODULE_PARAM_ARGS);
int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS);
diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h
index 81d39ebebcb2..78702b7325a0 100644
--- a/include/sys/vdev_initialize.h
+++ b/include/sys/vdev_initialize.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -33,6 +33,7 @@ extern "C" {
#endif
extern void vdev_initialize(vdev_t *vd);
+extern void vdev_uninitialize(vdev_t *vd);
extern void vdev_initialize_stop(vdev_t *vd,
vdev_initializing_state_t tgt_state, list_t *vd_list);
extern void vdev_initialize_stop_all(vdev_t *vd,
diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h
index c7cf0af6d945..a34bc00ca4df 100644
--- a/include/sys/vdev_raidz.h
+++ b/include/sys/vdev_raidz.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -26,6 +26,7 @@
#define _SYS_VDEV_RAIDZ_H
#include <sys/types.h>
+#include <sys/zfs_rlock.h>
#ifdef __cplusplus
extern "C" {
@@ -35,6 +36,8 @@ struct zio;
struct raidz_col;
struct raidz_row;
struct raidz_map;
+struct vdev_raidz;
+struct uberblock;
#if !defined(_KERNEL)
struct kernel_param {};
#endif
@@ -44,13 +47,19 @@ struct kernel_param {};
*/
struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
uint64_t);
+struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *,
+ uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t);
void vdev_raidz_map_free(struct raidz_map *);
+void vdev_raidz_free(struct vdev_raidz *);
void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
void vdev_raidz_generate_parity(struct raidz_map *);
void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
void vdev_raidz_child_done(zio_t *);
void vdev_raidz_io_done(zio_t *);
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
+struct raidz_row *vdev_raidz_row_alloc(int);
+void vdev_raidz_reflow_copy_scratch(spa_t *);
+void raidz_dtl_reassessed(vdev_t *);
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
@@ -65,11 +74,101 @@ int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
const int *, const int *, const int);
int vdev_raidz_impl_set(const char *);
+typedef struct vdev_raidz_expand {
+ uint64_t vre_vdev_id;
+
+ kmutex_t vre_lock;
+ kcondvar_t vre_cv;
+
+ /*
+ * How much i/o is outstanding (issued and not completed).
+ */
+ uint64_t vre_outstanding_bytes;
+
+ /*
+ * Next offset to issue i/o for.
+ */
+ uint64_t vre_offset;
+
+ /*
+ * Lowest offset of a failed expansion i/o. The expansion will retry
+ * from here. Once the expansion thread notices the failure and exits,
+ * vre_failed_offset is reset back to UINT64_MAX, and
+ * vre_waiting_for_resilver will be set.
+ */
+ uint64_t vre_failed_offset;
+ boolean_t vre_waiting_for_resilver;
+
+ /*
+ * Offset that is completing each txg
+ */
+ uint64_t vre_offset_pertxg[TXG_SIZE];
+
+ /*
+ * Bytes copied in each txg.
+ */
+ uint64_t vre_bytes_copied_pertxg[TXG_SIZE];
+
+ /*
+ * The rangelock prevents normal read/write zio's from happening while
+ * there are expansion (reflow) i/os in progress to the same offsets.
+ */
+ zfs_rangelock_t vre_rangelock;
+
+ /*
+ * These fields are stored on-disk in the vdev_top_zap:
+ */
+ dsl_scan_state_t vre_state;
+ uint64_t vre_start_time;
+ uint64_t vre_end_time;
+ uint64_t vre_bytes_copied;
+} vdev_raidz_expand_t;
+
typedef struct vdev_raidz {
- int vd_logical_width;
+ /*
+ * Number of child vdevs when this raidz vdev was created (i.e. before
+ * any raidz expansions).
+ */
+ int vd_original_width;
+
+ /*
+ * The current number of child vdevs, which may be more than the
+ * original width if an expansion is in progress or has completed.
+ */
+ int vd_physical_width;
+
int vd_nparity;
+
+ /*
+ * Tree of reflow_node_t's. The lock protects the avl tree only.
+ * The reflow_node_t's describe completed expansions, and are used
+ * to determine the logical width given a block's birth time.
+ */
+ avl_tree_t vd_expand_txgs;
+ kmutex_t vd_expand_lock;
+
+ /*
+ * If this vdev is being expanded, spa_raidz_expand is set to this
+ */
+ vdev_raidz_expand_t vn_vre;
} vdev_raidz_t;
+extern int vdev_raidz_attach_check(vdev_t *);
+extern void vdev_raidz_attach_sync(void *, dmu_tx_t *);
+extern void spa_start_raidz_expansion_thread(spa_t *);
+extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
+extern int vdev_raidz_load(vdev_t *);
+
+/* RAIDZ scratch area pause points (for testing) */
+#define RAIDZ_EXPAND_PAUSE_NONE 0
+#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
+#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
+#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
+#define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
+#define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
+#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
+#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h
index 890e725e18d8..45cb5864a22b 100644
--- a/include/sys/vdev_raidz_impl.h
+++ b/include/sys/vdev_raidz_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -30,6 +30,8 @@
#include <sys/kstat.h>
#include <sys/abd.h>
#include <sys/vdev_impl.h>
+#include <sys/abd_impl.h>
+#include <sys/zfs_rlock.h>
#ifdef __cplusplus
extern "C" {
@@ -102,35 +104,39 @@ typedef struct raidz_impl_ops {
char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
} raidz_impl_ops_t;
+
typedef struct raidz_col {
- uint64_t rc_devidx; /* child device index for I/O */
+ int rc_devidx; /* child device index for I/O */
+ uint32_t rc_size; /* I/O size */
uint64_t rc_offset; /* device offset */
- uint64_t rc_size; /* I/O size */
abd_t rc_abdstruct; /* rc_abd probably points here */
abd_t *rc_abd; /* I/O data */
abd_t *rc_orig_data; /* pre-reconstruction */
int rc_error; /* I/O error for this device */
- uint8_t rc_tried; /* Did we attempt this I/O column? */
- uint8_t rc_skipped; /* Did we skip this I/O column? */
- uint8_t rc_need_orig_restore; /* need to restore from orig_data? */
- uint8_t rc_force_repair; /* Write good data to this column */
- uint8_t rc_allow_repair; /* Allow repair I/O to this column */
+ uint8_t rc_tried:1; /* Did we attempt this I/O column? */
+ uint8_t rc_skipped:1; /* Did we skip this I/O column? */
+ uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
+ uint8_t rc_force_repair:1; /* Write good data to this column */
+ uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
+ int rc_shadow_devidx; /* for double write during expansion */
+ int rc_shadow_error; /* for double write during expansion */
+ uint64_t rc_shadow_offset; /* for double write during expansion */
} raidz_col_t;
typedef struct raidz_row {
- uint64_t rr_cols; /* Regular column count */
- uint64_t rr_scols; /* Count including skipped columns */
- uint64_t rr_bigcols; /* Remainder data column count */
- uint64_t rr_missingdata; /* Count of missing data devices */
- uint64_t rr_missingparity; /* Count of missing parity devices */
- uint64_t rr_firstdatacol; /* First data column/parity count */
+ int rr_cols; /* Regular column count */
+ int rr_scols; /* Count including skipped columns */
+ int rr_bigcols; /* Remainder data column count */
+ int rr_missingdata; /* Count of missing data devices */
+ int rr_missingparity; /* Count of missing parity devices */
+ int rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
#endif
- raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
+ raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t;
typedef struct raidz_map {
@@ -138,10 +144,25 @@ typedef struct raidz_map {
int rm_nrows; /* Regular row count */
int rm_nskip; /* RAIDZ sectors skipped for padding */
int rm_skipstart; /* Column index of padding start */
+ int rm_original_width; /* pre-expansion width of raidz vdev */
+ int rm_nphys_cols; /* num entries in rm_phys_col[] */
+ zfs_locked_range_t *rm_lr;
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
- raidz_row_t *rm_row[0]; /* flexible array of rows */
+ raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */
+ raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t;
+/*
+ * Nodes in vdev_raidz_t:vd_expand_txgs.
+ * Blocks with physical birth time of re_txg or later have the specified
+ * logical width (until the next node).
+ */
+typedef struct reflow_node {
+ uint64_t re_txg;
+ uint64_t re_logical_width;
+ avl_node_t re_link;
+} reflow_node_t;
+
#define RAIDZ_ORIGINAL_IMPL (INT_MAX)
@@ -321,7 +342,7 @@ vdev_raidz_exp2(const uint8_t a, const unsigned exp)
* Galois Field operations.
*
* gf_exp2 - computes 2 raised to the given power
- * gf_exp2 - computes 4 raised to the given power
+ * gf_exp4 - computes 4 raised to the given power
* gf_mul - multiplication
* gf_div - division
* gf_inv - multiplicative inverse
diff --git a/include/sys/vdev_rebuild.h b/include/sys/vdev_rebuild.h
index b59fbe153903..55ec6c570316 100644
--- a/include/sys/vdev_rebuild.h
+++ b/include/sys/vdev_rebuild.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -79,6 +79,7 @@ typedef struct vdev_rebuild {
uint64_t vr_pass_start_time;
uint64_t vr_pass_bytes_scanned;
uint64_t vr_pass_bytes_issued;
+ uint64_t vr_pass_bytes_skipped;
/* On-disk state updated by vdev_rebuild_zap_update_sync() */
vdev_rebuild_phys_t vr_rebuild_phys;
diff --git a/include/sys/vdev_removal.h b/include/sys/vdev_removal.h
index e3bab0658d62..70b743f4ec6b 100644
--- a/include/sys/vdev_removal.h
+++ b/include/sys/vdev_removal.h
@@ -87,7 +87,7 @@ extern int spa_vdev_remove_cancel(spa_t *);
extern void spa_vdev_removal_destroy(spa_vdev_removal_t *);
extern uint64_t spa_remove_max_segment(spa_t *);
-extern int vdev_removal_max_span;
+extern uint_t vdev_removal_max_span;
#ifdef __cplusplus
}
diff --git a/include/sys/vdev_trim.h b/include/sys/vdev_trim.h
index 16f4be2a41f8..7a94d4af098f 100644
--- a/include/sys/vdev_trim.h
+++ b/include/sys/vdev_trim.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -41,6 +41,7 @@ extern void vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state);
extern void vdev_trim_stop_wait(spa_t *spa, list_t *vd_list);
extern void vdev_trim_restart(vdev_t *vd);
extern void vdev_autotrim(spa_t *spa);
+extern void vdev_autotrim_kick(spa_t *spa);
extern void vdev_autotrim_stop_all(spa_t *spa);
extern void vdev_autotrim_stop_wait(vdev_t *vd);
extern void vdev_autotrim_restart(spa_t *spa);
diff --git a/include/sys/xvattr.h b/include/sys/xvattr.h
index 277c4694069d..a7994db894b9 100644
--- a/include/sys/xvattr.h
+++ b/include/sys/xvattr.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zap.h b/include/sys/zap.h
index dc2f661fb065..96ddcc324b65 100644
--- a/include/sys/zap.h
+++ b/include/sys/zap.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -253,6 +253,9 @@ int zap_add_by_dnode(dnode_t *dn, const char *key,
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
+int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+ int key_numints, int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx);
/*
* Set the attribute with the given name to the given value. If an
@@ -267,6 +270,9 @@ int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
+int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+ int key_numints,
+ int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
/*
* Get the length (in integers) and the integer size of the specified
@@ -292,6 +298,8 @@ int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx);
+int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+ int key_numints, dmu_tx_t *tx);
/*
* Returns (in *count) the number of attributes in the specified zap
diff --git a/include/sys/zap_impl.h b/include/sys/zap_impl.h
index 4549a9bd1177..2959aa9b2ca4 100644
--- a/include/sys/zap_impl.h
+++ b/include/sys/zap_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -66,10 +66,9 @@ typedef struct mzap_phys {
} mzap_phys_t;
typedef struct mzap_ent {
- avl_node_t mze_node;
- int mze_chunkid;
- uint64_t mze_hash;
- uint32_t mze_cd; /* copy from mze_phys->mze_cd */
+ uint32_t mze_hash;
+ uint16_t mze_cd; /* copy from mze_phys->mze_cd */
+ uint16_t mze_chunkid;
} mzap_ent_t;
#define MZE_PHYS(zap, mze) \
@@ -146,6 +145,7 @@ typedef struct zap {
dmu_buf_user_t zap_dbu;
objset_t *zap_objset;
uint64_t zap_object;
+ dnode_t *zap_dnode;
struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock;
boolean_t zap_ismicro;
@@ -164,7 +164,7 @@ typedef struct zap {
int16_t zap_num_entries;
int16_t zap_num_chunks;
int16_t zap_alloc_next;
- avl_tree_t zap_avl;
+ zfs_btree_t zap_tree;
} zap_micro;
} zap_u;
} zap_t;
@@ -203,7 +203,7 @@ int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
zap_t **zapp);
void zap_unlockdir(zap_t *zap, const void *tag);
void zap_evict_sync(void *dbu);
-zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
+zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap);
uint32_t zap_maxcd(zap_t *zap);
diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h
index a3da1036a5ee..e54456d3472b 100644
--- a/include/sys/zap_leaf.h
+++ b/include/sys/zap_leaf.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -47,7 +47,7 @@ struct zap_stats;
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS_BS(bs) \
- (((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
+ (((1U << (bs)) - 2 * ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
ZAP_LEAF_CHUNKSIZE - 2)
#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
@@ -80,7 +80,7 @@ struct zap_stats;
* chunks per entry (3).
*/
#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
-#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
+#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1U << ZAP_LEAF_HASH_SHIFT_BS(bs))
#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
@@ -132,7 +132,7 @@ typedef struct zap_leaf_phys {
* with the ZAP_LEAF_CHUNK() macro.
*/
- uint16_t l_hash[1];
+ uint16_t l_hash[];
} zap_leaf_phys_t;
typedef union zap_leaf_chunk {
@@ -163,7 +163,7 @@ typedef struct zap_leaf {
dmu_buf_user_t l_dbu;
krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
- int l_bs; /* block size shift */
+ uint_t l_bs; /* block size shift */
dmu_buf_t *l_dbuf;
} zap_leaf_t;
@@ -243,7 +243,7 @@ extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
*/
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
-extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
+extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, size_t len);
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l,
struct zap_stats *zs);
diff --git a/include/sys/zcp.h b/include/sys/zcp.h
index f0a78f9cb5c4..6301cc08e7ea 100644
--- a/include/sys/zcp.h
+++ b/include/sys/zcp.h
@@ -33,8 +33,8 @@ extern "C" {
#define ZCP_RUN_INFO_KEY "runinfo"
-extern unsigned long zfs_lua_max_instrlimit;
-extern unsigned long zfs_lua_max_memlimit;
+extern uint64_t zfs_lua_max_instrlimit;
+extern uint64_t zfs_lua_max_memlimit;
int zcp_argerror(lua_State *, int, const char *, ...);
diff --git a/include/sys/zcp_iter.h b/include/sys/zcp_iter.h
index 1d92d0c6d10c..fa6eeef25edd 100644
--- a/include/sys/zcp_iter.h
+++ b/include/sys/zcp_iter.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfeature.h b/include/sys/zfeature.h
index 5abde149a615..bf9361374d33 100644
--- a/include/sys/zfeature.h
+++ b/include/sys/zfeature.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_acl.h b/include/sys/zfs_acl.h
index 98387a49adbe..e19288528849 100644
--- a/include/sys/zfs_acl.h
+++ b/include/sys/zfs_acl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -206,7 +206,7 @@ struct zfsvfs;
#ifdef _KERNEL
int zfs_acl_ids_create(struct znode *, int, vattr_t *,
- cred_t *, vsecattr_t *, zfs_acl_ids_t *);
+ cred_t *, vsecattr_t *, zfs_acl_ids_t *, zidmap_t *);
void zfs_acl_ids_free(zfs_acl_ids_t *);
boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *, uint64_t);
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
@@ -215,15 +215,16 @@ void zfs_acl_rele(void *);
void zfs_oldace_byteswap(ace_t *, int);
void zfs_ace_byteswap(void *, size_t, boolean_t);
extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
-extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
+extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *,
+ zidmap_t *);
int zfs_fastaccesschk_execute(struct znode *, cred_t *);
-extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
-extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
+extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *, zidmap_t *);
+extern int zfs_zaccess_unix(void *, int, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
-int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
+int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *, zidmap_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
- struct znode *, struct znode *, cred_t *cr);
+ struct znode *, struct znode *, cred_t *cr, zidmap_t *mnt_ns);
void zfs_acl_free(zfs_acl_t *);
int zfs_vsec_2_aclp(struct zfsvfs *, umode_t, vsecattr_t *, cred_t *,
struct zfs_fuid_info **, zfs_acl_t **);
diff --git a/include/sys/zfs_chksum.h b/include/sys/zfs_chksum.h
index cfd07bd0ffe7..a0e1b35189bb 100644
--- a/include/sys/zfs_chksum.h
+++ b/include/sys/zfs_chksum.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 50257f1d6fdd..8f264b50e995 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -50,6 +50,7 @@ extern "C" {
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
+#include <sys/misc.h>
#include <sys/taskq.h>
#include <sys/param.h>
#include <sys/disp.h>
@@ -150,10 +151,14 @@ extern "C" {
extern void dprintf_setup(int *argc, char **argv);
-extern void cmn_err(int, const char *, ...);
-extern void vcmn_err(int, const char *, va_list);
-extern __attribute__((noreturn)) void panic(const char *, ...);
-extern __attribute__((noreturn)) void vpanic(const char *, va_list);
+extern void cmn_err(int, const char *, ...)
+ __attribute__((format(printf, 2, 3)));
+extern void vcmn_err(int, const char *, va_list)
+ __attribute__((format(printf, 2, 0)));
+extern void panic(const char *, ...)
+ __attribute__((format(printf, 1, 2), noreturn));
+extern void vpanic(const char *, va_list)
+ __attribute__((format(printf, 1, 0), noreturn));
#define fm_panic panic
@@ -219,14 +224,13 @@ typedef pthread_t kthread_t;
#define TS_JOINABLE 0x00000004
#define curthread ((void *)(uintptr_t)pthread_self())
-#define kpreempt(x) yield()
#define getcomm() "unknown"
#define thread_create_named(name, stk, stksize, func, arg, len, \
pp, state, pri) \
- zk_thread_create(func, arg, stksize, state)
+ zk_thread_create(name, func, arg, stksize, state)
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
- zk_thread_create(func, arg, stksize, state)
+ zk_thread_create(#func, func, arg, stksize, state)
#define thread_exit() pthread_exit(NULL)
#define thread_join(t) pthread_join((pthread_t)(t), NULL)
@@ -242,15 +246,17 @@ extern struct proc p0;
#define PS_NONE -1
-extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
- size_t stksize, int state);
+extern kthread_t *zk_thread_create(const char *name, void (*func)(void *),
+ void *arg, size_t stksize, int state);
#define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE)
+#define KPREEMPT_SYNC (-1)
+
+#define kpreempt(x) sched_yield()
#define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0)
-#define cond_resched() sched_yield()
/*
* Mutexes
@@ -268,11 +274,13 @@ typedef struct kmutex {
extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
extern void mutex_destroy(kmutex_t *mp);
extern void mutex_enter(kmutex_t *mp);
+extern int mutex_enter_check_return(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp);
#define NESTED_SINGLE 1
#define mutex_enter_nested(mp, class) mutex_enter(mp)
+#define mutex_enter_interruptible(mp) mutex_enter_check_return(mp)
/*
* RW locks
*/
@@ -488,6 +496,8 @@ extern taskq_t *system_taskq;
extern taskq_t *system_delay_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
+ kthread_t ***);
#define taskq_create_proc(a, b, c, d, e, p, f) \
(taskq_create(a, b, c, d, e, f))
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
@@ -689,6 +699,11 @@ extern char *kmem_asprintf(const char *fmt, ...);
#define kmem_strfree(str) kmem_free((str), strlen(str) + 1)
#define kmem_strdup(s) strdup(s)
+#ifndef __cplusplus
+extern int kmem_scnprintf(char *restrict str, size_t size,
+ const char *restrict fmt, ...);
+#endif
+
/*
* Hostname information
*/
diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h
index 7b103510dd07..8d94557a5882 100644
--- a/include/sys/zfs_debug.h
+++ b/include/sys/zfs_debug.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -57,6 +57,8 @@ extern int zfs_dbgmsg_enable;
#define ZFS_DEBUG_TRIM (1 << 11)
#define ZFS_DEBUG_LOG_SPACEMAP (1 << 12)
#define ZFS_DEBUG_METASLAB_ALLOC (1 << 13)
+#define ZFS_DEBUG_BRT (1 << 14)
+#define ZFS_DEBUG_RAIDZ_RECONSTRUCT (1 << 15)
extern void __set_error(const char *file, const char *func, int line, int err);
extern void __zfs_dbgmsg(char *buf);
diff --git a/include/sys/zfs_delay.h b/include/sys/zfs_delay.h
index 40e617dba961..56ac1f3c439b 100644
--- a/include/sys/zfs_delay.h
+++ b/include/sys/zfs_delay.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h
index 02cd1a6f041a..e944165adc40 100644
--- a/include/sys/zfs_file.h
+++ b/include/sys/zfs_file.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_fuid.h b/include/sys/zfs_fuid.h
index 1975e57cf62b..d6b2942d1bec 100644
--- a/include/sys/zfs_fuid.h
+++ b/include/sys/zfs_fuid.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_impl.h b/include/sys/zfs_impl.h
new file mode 100644
index 000000000000..df4899f132b8
--- /dev/null
+++ b/include/sys/zfs_impl.h
@@ -0,0 +1,69 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef _SYS_ZFS_IMPL_H
+#define _SYS_ZFS_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* generic implementation backends */
+typedef struct
+{
+ /* algorithm name */
+ const char *name;
+
+ /* get number of supported implementations */
+ uint32_t (*getcnt)(void);
+
+ /* get id of selected implementation */
+ uint32_t (*getid)(void);
+
+ /* get name of selected implementation */
+ const char *(*getname)(void);
+
+ /* setup id as fastest implementation */
+ void (*set_fastest)(uint32_t id);
+
+ /* set implementation by id */
+ void (*setid)(uint32_t id);
+
+ /* set implementation by name */
+ int (*setname)(const char *val);
+} zfs_impl_t;
+
+/* return some set of function pointer */
+extern const zfs_impl_t *zfs_impl_get_ops(const char *algo);
+
+extern const zfs_impl_t zfs_blake3_ops;
+extern const zfs_impl_t zfs_sha256_ops;
+extern const zfs_impl_t zfs_sha512_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZFS_IMPL_H */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 94522179676a..525d40759fdd 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2017, Intel Corporation.
*/
@@ -124,7 +124,13 @@ typedef enum drr_headertype {
* default use of "zfs send" won't encounter the bug mentioned above.
*/
#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
-#define DMU_BACKUP_FEATURE_BLAKE3 (1 << 28)
+/* flag #28 is reserved for a Nutanix feature */
+/*
+ * flag #29 is the last unused bit. It is reserved to indicate a to-be-designed
+ * extension to the stream format which will accomodate more feature flags.
+ * If you need to add another feature flag, please reach out to the OpenZFS
+ * community, e.g., on GitHub or Slack.
+ */
/*
* Mask of all supported backup features
@@ -135,7 +141,7 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
- DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_BLAKE3)
+ DMU_BACKUP_FEATURE_ZSTD)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -448,6 +454,8 @@ typedef enum zinject_type {
ZINJECT_PANIC,
ZINJECT_DELAY_IO,
ZINJECT_DECRYPT_FAULT,
+ ZINJECT_DELAY_IMPORT,
+ ZINJECT_DELAY_EXPORT,
} zinject_type_t;
typedef struct zfs_share {
@@ -569,7 +577,6 @@ typedef struct zfsdev_state {
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
-extern uint_t zfs_fsyncer_key;
extern uint_t zfs_allow_log_key;
#endif /* _KERNEL */
diff --git a/include/sys/zfs_ioctl_impl.h b/include/sys/zfs_ioctl_impl.h
index f9e4f6e6c4b2..cb852c5577fd 100644
--- a/include/sys/zfs_ioctl_impl.h
+++ b/include/sys/zfs_ioctl_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -24,7 +24,7 @@
#define _ZFS_IOCTL_IMPL_H_
extern kmutex_t zfsdev_state_lock;
-extern unsigned long zfs_max_nvlist_src_size;
+extern uint64_t zfs_max_nvlist_src_size;
typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
diff --git a/include/sys/zfs_onexit.h b/include/sys/zfs_onexit.h
index fd3030e3ac2d..91f49d4cc5a3 100644
--- a/include/sys/zfs_onexit.h
+++ b/include/sys/zfs_onexit.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -54,7 +54,7 @@ extern void zfs_onexit_destroy(zfs_onexit_t *zo);
extern zfs_file_t *zfs_onexit_fd_hold(int fd, minor_t *minorp);
extern void zfs_onexit_fd_rele(zfs_file_t *);
extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
- uint64_t *action_handle);
+ uintptr_t *action_handle);
#ifdef __cplusplus
}
diff --git a/include/sys/zfs_project.h b/include/sys/zfs_project.h
index 81a238905225..8a46e5e068db 100644
--- a/include/sys/zfs_project.h
+++ b/include/sys/zfs_project.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_quota.h b/include/sys/zfs_quota.h
index b215b8dd0013..4567cc651afb 100644
--- a/include/sys/zfs_quota.h
+++ b/include/sys/zfs_quota.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_racct.h b/include/sys/zfs_racct.h
index cfcdd336ea42..0e8bd04c1a13 100644
--- a/include/sys/zfs_racct.h
+++ b/include/sys/zfs_racct.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_refcount.h b/include/sys/zfs_refcount.h
index 2f59ebb32b07..77965a0aa580 100644
--- a/include/sys/zfs_refcount.h
+++ b/include/sys/zfs_refcount.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -27,6 +27,7 @@
#define _SYS_ZFS_REFCOUNT_H
#include <sys/inttypes.h>
+#include <sys/avl.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
@@ -43,19 +44,22 @@ extern "C" {
#ifdef ZFS_DEBUG
typedef struct reference {
- list_node_t ref_link;
+ union {
+ avl_node_t a;
+ list_node_t l;
+ } ref_link;
const void *ref_holder;
uint64_t ref_number;
- uint8_t *ref_removed;
+ boolean_t ref_search;
} reference_t;
typedef struct refcount {
+ uint64_t rc_count;
kmutex_t rc_mtx;
- boolean_t rc_tracked;
- list_t rc_list;
+ avl_tree_t rc_tree;
list_t rc_removed;
- uint64_t rc_count;
- uint64_t rc_removed_count;
+ uint_t rc_removed_count;
+ boolean_t rc_tracked;
} zfs_refcount_t;
/*
@@ -73,13 +77,15 @@ int64_t zfs_refcount_count(zfs_refcount_t *);
int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
/*
- * Note that (add|remove)_many add/remove one reference with "number" N,
- * _not_ make N references with "number" 1, which is what vanilla
- * zfs_refcount_(add|remove) would do if called N times.
+ * Note that (add|remove)_many adds/removes one reference with "number" N,
+ * _not_ N references with "number" 1, which is what (add|remove)_few does,
+ * or what vanilla zfs_refcount_(add|remove) called N times would do.
*
* Attempting to remove a reference with number N when none exists is a
* panic on debug kernels with reference_tracking enabled.
*/
+void zfs_refcount_add_few(zfs_refcount_t *, uint64_t, const void *);
+void zfs_refcount_remove_few(zfs_refcount_t *, uint64_t, const void *);
int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
@@ -108,6 +114,10 @@ typedef struct refcount {
#define zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
#define zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
#define zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
+#define zfs_refcount_add_few(rc, number, holder) \
+ atomic_add_64(&(rc)->rc_count, number)
+#define zfs_refcount_remove_few(rc, number, holder) \
+ atomic_add_64(&(rc)->rc_count, -number)
#define zfs_refcount_add_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, number)
#define zfs_refcount_remove_many(rc, number, holder) \
diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h
index 2302abb37337..5e5d6d68d6c5 100644
--- a/include/sys/zfs_rlock.h
+++ b/include/sys/zfs_rlock.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h
index 6b0336997c20..1b4b8abf0244 100644
--- a/include/sys/zfs_sa.h
+++ b/include/sys/zfs_sa.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_stat.h b/include/sys/zfs_stat.h
index 465aefaa2063..1589f945cbd7 100644
--- a/include/sys/zfs_stat.h
+++ b/include/sys/zfs_stat.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_sysfs.h b/include/sys/zfs_sysfs.h
index d1cb2ef4321c..6fe9b7a9cd2c 100644
--- a/include/sys/zfs_sysfs.h
+++ b/include/sys/zfs_sysfs.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index a438c86f0a0c..19ae7b77b459 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h
index 18259f0dc9b5..e60b99bed192 100644
--- a/include/sys/zfs_vnops.h
+++ b/include/sys/zfs_vnops.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -24,13 +24,20 @@
#ifndef _SYS_FS_ZFS_VNOPS_H
#define _SYS_FS_ZFS_VNOPS_H
+
#include <sys/zfs_vnops_os.h>
+extern int zfs_bclone_enabled;
+
extern int zfs_fsync(znode_t *, int, cred_t *);
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_holey(znode_t *, ulong_t, loff_t *);
extern int zfs_access(znode_t *, int, int, cred_t *);
+extern int zfs_clone_range(znode_t *, uint64_t *, znode_t *, uint64_t *,
+ uint64_t *, cred_t *);
+extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t,
+ const blkptr_t *, size_t);
extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *);
extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *);
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 098cf9dbc16f..d71144807f47 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -158,6 +158,7 @@ extern "C" {
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
+extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
#ifdef _KERNEL
#include <sys/zfs_znode_impl.h>
@@ -188,9 +189,7 @@ typedef struct znode {
boolean_t z_atime_dirty; /* atime needs to be synced */
boolean_t z_zn_prefetch; /* Prefetch znodes? */
boolean_t z_is_sa; /* are we native sa? */
- boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
- boolean_t z_is_stale; /* are we stale due to rollback? */
boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
@@ -218,11 +217,34 @@ typedef struct znode {
ZNODE_OS_FIELDS;
} znode_t;
+/* Verifies the znode is valid. */
+static inline int
+zfs_verify_zp(znode_t *zp)
+{
+ if (unlikely(zp->z_sa_hdl == NULL))
+ return (SET_ERROR(EIO));
+ return (0);
+}
+
+/* zfs_enter and zfs_verify_zp together */
+static inline int
+zfs_enter_verify_zp(zfsvfs_t *zfsvfs, znode_t *zp, const char *tag)
+{
+ int error;
+ if ((error = zfs_enter(zfsvfs, tag)) != 0)
+ return (error);
+ if ((error = zfs_verify_zp(zp)) != 0) {
+ zfs_exit(zfsvfs, tag);
+ return (error);
+ }
+ return (0);
+}
+
typedef struct znode_hold {
uint64_t zh_obj; /* object id */
- kmutex_t zh_lock; /* lock serializing object access */
avl_node_t zh_node; /* avl tree linkage */
- zfs_refcount_t zh_refcount; /* active consumer reference count */
+ kmutex_t zh_lock; /* lock serializing object access */
+ int zh_refcount; /* active consumer reference count */
} znode_hold_t;
static inline uint64_t
@@ -250,6 +272,8 @@ extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_znode_hold_compare(const void *, const void *);
+extern znode_hold_t *zfs_znode_hold_enter(zfsvfs_t *, uint64_t);
+extern void zfs_znode_hold_exit(zfsvfs_t *, znode_hold_t *);
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
@@ -257,7 +281,6 @@ extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
extern void zfs_remove_op_tables(void);
extern int zfs_create_op_tables(void);
extern dev_t zfs_cmpldev(uint64_t);
-extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
extern void zfs_znode_dmu_fini(znode_t *);
@@ -277,8 +300,14 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp);
+extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx,
+ uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
+ const char *dname, znode_t *szp);
+extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
+ uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
+ const char *dname, znode_t *szp, znode_t *wzp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, offset_t off, ssize_t len, int ioflag,
+ znode_t *zp, offset_t off, ssize_t len, boolean_t commit,
zil_callback_t callback, void *callback_data);
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len);
@@ -286,6 +315,9 @@ extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
+extern void zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+ znode_t *zp, uint64_t offset, uint64_t length, uint64_t blksz,
+ const blkptr_t *bps, size_t nbps);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 2a7381f016ab..4747ecc067a9 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -33,6 +33,7 @@
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/zio_crypt.h>
+#include <sys/wmsum.h>
#ifdef __cplusplus
extern "C" {
@@ -163,7 +164,10 @@ typedef enum zil_create {
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
-#define TX_MAX_TYPE 22 /* Max transaction type */
+#define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */
+#define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */
+#define TX_CLONE_RANGE 24 /* Clone a file range */
+#define TX_MAX_TYPE 25 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
@@ -173,9 +177,9 @@ typedef enum zil_create {
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
/*
- * Transactions for write, truncate, setattr, acl_v0, and acl can be logged
- * out of order. For convenience in the code, all such records must have
- * lr_foid at the same offset.
+ * Transactions for operations below can be logged out of order.
+ * For convenience in the code, all such records must have lr_foid
+ * at the same offset.
*/
#define TX_OOO(txtype) \
((txtype) == TX_WRITE || \
@@ -184,7 +188,8 @@ typedef enum zil_create {
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2 || \
- (txtype) == TX_SETSAXATTR)
+ (txtype) == TX_SETSAXATTR || \
+ (txtype) == TX_CLONE_RANGE)
/*
* The number of dnode slots consumed by the object is stored in the 8
@@ -317,6 +322,19 @@ typedef struct {
} lr_rename_t;
typedef struct {
+ lr_rename_t lr_rename; /* common rename portion */
+ /* members related to the whiteout file (based on lr_create_t) */
+ uint64_t lr_wfoid; /* obj id of the new whiteout file */
+ uint64_t lr_wmode; /* mode of object */
+ uint64_t lr_wuid; /* uid of whiteout */
+ uint64_t lr_wgid; /* gid of whiteout */
+ uint64_t lr_wgen; /* generation (txg of creation) */
+ uint64_t lr_wcrtime[2]; /* creation time */
+ uint64_t lr_wrdev; /* always makedev(0, 0) */
+ /* 2 strings: names of source and destination follow this */
+} lr_rename_whiteout_t;
+
+typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to write */
uint64_t lr_offset; /* offset to write to */
@@ -371,6 +389,17 @@ typedef struct {
/* lr_acl_bytes number of variable sized ace's follows */
} lr_acl_t;
+typedef struct {
+ lr_t lr_common; /* common portion of log record */
+ uint64_t lr_foid; /* file object to clone into */
+ uint64_t lr_offset; /* offset to clone to */
+ uint64_t lr_length; /* length of the blocks to clone */
+ uint64_t lr_blksz; /* file's block size */
+ uint64_t lr_nbps; /* number of block pointers */
+ blkptr_t lr_bps[];
+ /* block pointers of the blocks to clone follows */
+} lr_clone_range_t;
+
/*
* ZIL structure definitions, interface function prototype and globals.
*/
@@ -460,24 +489,54 @@ typedef struct zil_stats {
* Transactions which have been allocated to the "normal"
* (i.e. not slog) storage pool. Note that "bytes" accumulate
* the actual log record sizes - which do not include the actual
- * data in case of indirect writes.
+ * data in case of indirect writes. bytes <= write <= alloc.
*/
kstat_named_t zil_itx_metaslab_normal_count;
kstat_named_t zil_itx_metaslab_normal_bytes;
+ kstat_named_t zil_itx_metaslab_normal_write;
+ kstat_named_t zil_itx_metaslab_normal_alloc;
/*
* Transactions which have been allocated to the "slog" storage pool.
* If there are no separate log devices, this is the same as the
- * "normal" pool.
+ * "normal" pool. bytes <= write <= alloc.
*/
kstat_named_t zil_itx_metaslab_slog_count;
kstat_named_t zil_itx_metaslab_slog_bytes;
-} zil_stats_t;
-
-#define ZIL_STAT_INCR(stat, val) \
- atomic_add_64(&zil_stats.stat.value.ui64, (val));
-#define ZIL_STAT_BUMP(stat) \
- ZIL_STAT_INCR(stat, 1);
+ kstat_named_t zil_itx_metaslab_slog_write;
+ kstat_named_t zil_itx_metaslab_slog_alloc;
+} zil_kstat_values_t;
+
+typedef struct zil_sums {
+ wmsum_t zil_commit_count;
+ wmsum_t zil_commit_writer_count;
+ wmsum_t zil_itx_count;
+ wmsum_t zil_itx_indirect_count;
+ wmsum_t zil_itx_indirect_bytes;
+ wmsum_t zil_itx_copied_count;
+ wmsum_t zil_itx_copied_bytes;
+ wmsum_t zil_itx_needcopy_count;
+ wmsum_t zil_itx_needcopy_bytes;
+ wmsum_t zil_itx_metaslab_normal_count;
+ wmsum_t zil_itx_metaslab_normal_bytes;
+ wmsum_t zil_itx_metaslab_normal_write;
+ wmsum_t zil_itx_metaslab_normal_alloc;
+ wmsum_t zil_itx_metaslab_slog_count;
+ wmsum_t zil_itx_metaslab_slog_bytes;
+ wmsum_t zil_itx_metaslab_slog_write;
+ wmsum_t zil_itx_metaslab_slog_alloc;
+} zil_sums_t;
+
+#define ZIL_STAT_INCR(zil, stat, val) \
+ do { \
+ int64_t tmpval = (val); \
+ wmsum_add(&(zil_sums_global.stat), tmpval); \
+ if ((zil)->zl_sums) \
+ wmsum_add(&((zil)->zl_sums->stat), tmpval); \
+ } while (0)
+
+#define ZIL_STAT_BUMP(zil, stat) \
+ ZIL_STAT_INCR(zil, stat, 1);
typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
uint64_t txg);
@@ -497,13 +556,14 @@ extern void zil_fini(void);
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
extern void zil_free(zilog_t *zilog);
-extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
+extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data,
+ zil_sums_t *zil_sums);
extern void zil_close(zilog_t *zilog);
-extern void zil_replay(objset_t *os, void *arg,
+extern boolean_t zil_replay(objset_t *os, void *arg,
zil_replay_func_t *const replay_func[TX_MAX_TYPE]);
extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
-extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
+extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
@@ -535,7 +595,12 @@ extern void zil_set_sync(zilog_t *zilog, uint64_t syncval);
extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
extern uint64_t zil_max_copied_data(zilog_t *zilog);
-extern uint64_t zil_max_log_data(zilog_t *zilog);
+extern uint64_t zil_max_log_data(zilog_t *zilog, size_t hdrsize);
+
+extern void zil_sums_init(zil_sums_t *zs);
+extern void zil_sums_fini(zil_sums_t *zs);
+extern void zil_kstat_values_update(zil_kstat_values_t *zs,
+ zil_sums_t *zil_sums);
extern int zil_replay_disable;
diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h
index 8409ce864e90..9a34bafc1c77 100644
--- a/include/sys/zil_impl.h
+++ b/include/sys/zil_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -38,14 +38,22 @@ extern "C" {
/*
* Possible states for a given lwb structure.
*
- * An lwb will start out in the "closed" state, and then transition to
- * the "opened" state via a call to zil_lwb_write_open(). When
- * transitioning from "closed" to "opened" the zilog's "zl_issuer_lock"
- * must be held.
+ * An lwb will start out in the "new" state, and transition to the "opened"
+ * state via a call to zil_lwb_write_open() on first itx assignment. When
+ * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be
+ * held.
*
- * After the lwb is "opened", it can transition into the "issued" state
- * via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
- * be held when making this transition.
+ * After the lwb is "opened", it can be assigned number of itxs and transition
+ * into the "closed" state via zil_lwb_write_close() when full or on timeout.
+ * When transitioning from "opened" to "closed" the zilog's "zl_issuer_lock"
+ * must be held. New lwb allocation also takes "zl_lock" to protect the list.
+ *
+ * After the lwb is "closed", it can transition into the "ready" state via
+ * zil_lwb_write_issue(). "zl_lock" must be held when making this transition.
+ * Since it is done by the same thread, "zl_issuer_lock" is not needed.
+ *
+ * When lwb in "ready" state receives its block pointer, it can transition to
+ * "issued". "zl_lock" must be held when making this transition.
*
* After the lwb's write zio completes, it transitions into the "write
* done" state via zil_lwb_write_done(); and then into the "flush done"
@@ -62,17 +70,20 @@ extern "C" {
*
* Additionally, correctness when reading an lwb's state is often
* achieved by exploiting the fact that these state transitions occur in
- * this specific order; i.e. "closed" to "opened" to "issued" to "done".
+ * this specific order; i.e. "new" to "opened" to "closed" to "ready" to
+ * "issued" to "write_done" and finally "flush_done".
*
- * Thus, if an lwb is in the "closed" or "opened" state, holding the
+ * Thus, if an lwb is in the "new" or "opened" state, holding the
* "zl_issuer_lock" will prevent a concurrent thread from transitioning
- * that lwb to the "issued" state. Likewise, if an lwb is already in the
- * "issued" state, holding the "zl_lock" will prevent a concurrent
- * thread from transitioning that lwb to the "write done" state.
+ * that lwb to the "closed" state. Likewise, if an lwb is already in the
+ * "ready" state, holding the "zl_lock" will prevent a concurrent thread
+ * from transitioning that lwb to the "issued" state.
*/
typedef enum {
- LWB_STATE_CLOSED,
+ LWB_STATE_NEW,
LWB_STATE_OPENED,
+ LWB_STATE_CLOSED,
+ LWB_STATE_READY,
LWB_STATE_ISSUED,
LWB_STATE_WRITE_DONE,
LWB_STATE_FLUSH_DONE,
@@ -91,22 +102,28 @@ typedef enum {
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
- boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
+ boolean_t lwb_slim; /* log block has slim format */
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
+ int lwb_error; /* log block allocation error */
+ int lwb_nmax; /* max bytes in the buffer */
int lwb_nused; /* # used bytes in buffer */
+ int lwb_nfilled; /* # filled bytes in buffer */
int lwb_sz; /* size of block and buffer */
lwb_state_t lwb_state; /* the state of this lwb */
char *lwb_buf; /* log write buffer */
+ zio_t *lwb_child_zio; /* parent zio for children */
zio_t *lwb_write_zio; /* zio for the lwb buffer */
zio_t *lwb_root_zio; /* root zio for lwb write and flushes */
+ hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
uint64_t lwb_issued_txg; /* the txg when the write is issued */
+ uint64_t lwb_alloc_txg; /* the txg when lwb_blk is allocated */
uint64_t lwb_max_txg; /* highest txg in this lwb */
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
+ list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
list_t lwb_itxs; /* list of itx's */
list_t lwb_waiters; /* list of zil_commit_waiter's */
avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */
kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */
- hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
} lwb_t;
/*
@@ -164,7 +181,7 @@ typedef struct zil_vdev_node {
avl_node_t zv_node; /* AVL tree linkage */
} zil_vdev_node_t;
-#define ZIL_PREV_BLKS 16
+#define ZIL_BURSTS 8
/*
* Stable storage intent log management structure. One per dataset.
@@ -199,14 +216,18 @@ struct zilog {
uint64_t zl_parse_lr_count; /* number of log records parsed */
itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */
list_t zl_itx_commit_list; /* itx list to be committed */
- uint64_t zl_cur_used; /* current commit log size used */
+ uint64_t zl_cur_size; /* current burst full size */
+ uint64_t zl_cur_left; /* current burst remaining size */
+ uint64_t zl_cur_max; /* biggest record in current burst */
list_t zl_lwb_list; /* in-flight log write list */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
zil_header_t zl_old_header; /* debugging aid */
- uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
+ uint_t zl_parallel; /* workload is multi-threaded */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
+ uint_t zl_prev_opt[ZIL_BURSTS]; /* optimal block size */
+ uint_t zl_prev_min[ZIL_BURSTS]; /* minimal first block size */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */
@@ -222,6 +243,9 @@ struct zilog {
* (see zil_max_copied_data()).
*/
uint64_t zl_max_block_size;
+
+ /* Pointer for per dataset zil sums */
+ zil_sums_t *zl_sums;
};
typedef struct zil_bp_node {
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 9bee7cc9b9fd..77c70b9b481c 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -22,12 +22,12 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2019, Allan Jude
- * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, 2023, 2024, Klara Inc.
* Copyright (c) 2019-2020, Michael Niewöhner
*/
@@ -163,32 +163,36 @@ typedef enum zio_suspend_reason {
ZIO_SUSPEND_MMP,
} zio_suspend_reason_t;
-enum zio_flag {
+/*
+ * This was originally an enum type. However, those are 32-bit and there is no
+ * way to make a 64-bit enum type. Since we ran out of bits for flags, we were
+ * forced to upgrade it to a uint64_t.
+ */
+typedef uint64_t zio_flag_t;
/*
* Flags inherited by gang, ddt, and vdev children,
* and that must be equal for two zios to aggregate
*/
- ZIO_FLAG_DONT_AGGREGATE = 1U << 0,
- ZIO_FLAG_IO_REPAIR = 1U << 1,
- ZIO_FLAG_SELF_HEAL = 1U << 2,
- ZIO_FLAG_RESILVER = 1U << 3,
- ZIO_FLAG_SCRUB = 1U << 4,
- ZIO_FLAG_SCAN_THREAD = 1U << 5,
- ZIO_FLAG_PHYSICAL = 1U << 6,
+#define ZIO_FLAG_DONT_AGGREGATE (1ULL << 0)
+#define ZIO_FLAG_IO_REPAIR (1ULL << 1)
+#define ZIO_FLAG_SELF_HEAL (1ULL << 2)
+#define ZIO_FLAG_RESILVER (1ULL << 3)
+#define ZIO_FLAG_SCRUB (1ULL << 4)
+#define ZIO_FLAG_SCAN_THREAD (1ULL << 5)
+#define ZIO_FLAG_PHYSICAL (1ULL << 6)
#define ZIO_FLAG_AGG_INHERIT (ZIO_FLAG_CANFAIL - 1)
/*
* Flags inherited by ddt, gang, and vdev children.
*/
- ZIO_FLAG_CANFAIL = 1U << 7, /* must be first for INHERIT */
- ZIO_FLAG_SPECULATIVE = 1U << 8,
- ZIO_FLAG_CONFIG_WRITER = 1U << 9,
- ZIO_FLAG_DONT_RETRY = 1U << 10,
- ZIO_FLAG_DONT_CACHE = 1U << 11,
- ZIO_FLAG_NODATA = 1U << 12,
- ZIO_FLAG_INDUCE_DAMAGE = 1U << 13,
- ZIO_FLAG_IO_ALLOCATING = 1U << 14,
+#define ZIO_FLAG_CANFAIL (1ULL << 7) /* must be first for INHERIT */
+#define ZIO_FLAG_SPECULATIVE (1ULL << 8)
+#define ZIO_FLAG_CONFIG_WRITER (1ULL << 9)
+#define ZIO_FLAG_DONT_RETRY (1ULL << 10)
+#define ZIO_FLAG_NODATA (1ULL << 12)
+#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
+#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14)
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
@@ -196,30 +200,31 @@ enum zio_flag {
/*
* Flags inherited by vdev children.
*/
- ZIO_FLAG_IO_RETRY = 1U << 15, /* must be first for INHERIT */
- ZIO_FLAG_PROBE = 1U << 16,
- ZIO_FLAG_TRYHARD = 1U << 17,
- ZIO_FLAG_OPTIONAL = 1U << 18,
+#define ZIO_FLAG_IO_RETRY (1ULL << 15) /* must be first for INHERIT */
+#define ZIO_FLAG_PROBE (1ULL << 16)
+#define ZIO_FLAG_TRYHARD (1ULL << 17)
+#define ZIO_FLAG_OPTIONAL (1ULL << 18)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
- ZIO_FLAG_DONT_QUEUE = 1U << 19, /* must be first for INHERIT */
- ZIO_FLAG_DONT_PROPAGATE = 1U << 20,
- ZIO_FLAG_IO_BYPASS = 1U << 21,
- ZIO_FLAG_IO_REWRITE = 1U << 22,
- ZIO_FLAG_RAW_COMPRESS = 1U << 23,
- ZIO_FLAG_RAW_ENCRYPT = 1U << 24,
- ZIO_FLAG_GANG_CHILD = 1U << 25,
- ZIO_FLAG_DDT_CHILD = 1U << 26,
- ZIO_FLAG_GODFATHER = 1U << 27,
- ZIO_FLAG_NOPWRITE = 1U << 28,
- ZIO_FLAG_REEXECUTED = 1U << 29,
- ZIO_FLAG_DELEGATED = 1U << 30,
- ZIO_FLAG_FASTWRITE = 1U << 31,
-};
+#define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */
+#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20)
+#define ZIO_FLAG_IO_BYPASS (1ULL << 21)
+#define ZIO_FLAG_IO_REWRITE (1ULL << 22)
+#define ZIO_FLAG_RAW_COMPRESS (1ULL << 23)
+#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24)
+#define ZIO_FLAG_GANG_CHILD (1ULL << 25)
+#define ZIO_FLAG_DDT_CHILD (1ULL << 26)
+#define ZIO_FLAG_GODFATHER (1ULL << 27)
+#define ZIO_FLAG_NOPWRITE (1ULL << 28)
+#define ZIO_FLAG_REEXECUTED (1ULL << 29)
+#define ZIO_FLAG_DELEGATED (1ULL << 30)
+
+#define ZIO_ALLOCATOR_NONE (-1)
+#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
@@ -299,12 +304,12 @@ struct zbookmark_phys {
uint64_t zb_blkid;
};
-typedef struct zbookmark_err_phys {
+struct zbookmark_err_phys {
uint64_t zb_object;
int64_t zb_level;
uint64_t zb_blkid;
uint64_t zb_birth;
-} zbookmark_err_phys_t;
+};
#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
@@ -338,12 +343,13 @@ typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
uint8_t zp_complevel;
- dmu_object_type_t zp_type;
uint8_t zp_level;
uint8_t zp_copies;
+ dmu_object_type_t zp_type;
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
+ boolean_t zp_brtwrite;
boolean_t zp_encrypt;
boolean_t zp_byteorder;
uint8_t zp_salt[ZIO_DATA_SALT_LEN];
@@ -432,6 +438,12 @@ typedef struct zio_link {
list_node_t zl_child_node;
} zio_link_t;
+enum zio_qstate {
+ ZIO_QS_NONE = 0,
+ ZIO_QS_QUEUED,
+ ZIO_QS_ACTIVE,
+};
+
struct zio {
/* Core information about this I/O */
zbookmark_phys_t io_bookmark;
@@ -439,7 +451,6 @@ struct zio {
zio_type_t io_type;
enum zio_child io_child_type;
enum trim_flag io_trim_flags;
- int io_cmd;
zio_priority_t io_priority;
uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES];
@@ -456,7 +467,6 @@ struct zio {
/* Callback info */
zio_done_func_t *io_ready;
zio_done_func_t *io_children_ready;
- zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
@@ -476,6 +486,12 @@ struct zio {
const zio_vsd_ops_t *io_vsd_ops;
metaslab_class_t *io_metaslab_class; /* dva throttle class */
+ enum zio_qstate io_queue_state; /* vdev queue state */
+ union {
+ list_node_t l;
+ avl_node_t a;
+ } io_queue_node ____cacheline_aligned; /* allocator and vdev queues */
+ avl_node_t io_offset_node; /* vdev offset queues */
uint64_t io_offset;
hrtime_t io_timestamp; /* submitted at */
hrtime_t io_queued_timestamp;
@@ -483,25 +499,19 @@ struct zio {
hrtime_t io_delta; /* vdev queue service delta */
hrtime_t io_delay; /* Device access time (disk or */
/* file). */
- avl_node_t io_queue_node;
- avl_node_t io_offset_node;
- avl_node_t io_alloc_node;
zio_alloc_list_t io_alloc_list;
/* Internal pipeline state */
- enum zio_flag io_flags;
+ zio_flag_t io_flags;
enum zio_stage io_stage;
enum zio_stage io_pipeline;
- enum zio_flag io_orig_flags;
+ zio_flag_t io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
enum zio_stage io_pipeline_trace;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
- uint64_t io_child_count;
- uint64_t io_phys_children;
- uint64_t io_parent_count;
uint64_t *io_stall;
zio_t *io_gang_leader;
zio_gang_node_t *io_gang_tree;
@@ -526,57 +536,61 @@ enum blk_verify_flag {
BLK_VERIFY_HALT
};
+enum blk_config_flag {
+ BLK_CONFIG_HELD, // SCL_VDEV held for writer
+ BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
+ BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
+};
+
extern int zio_bookmark_compare(const void *, const void *);
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
- zio_done_func_t *done, void *priv, enum zio_flag flags);
+ zio_done_func_t *done, void *priv, zio_flag_t flags);
extern zio_t *zio_root(spa_t *spa,
- zio_done_func_t *done, void *priv, enum zio_flag flags);
+ zio_done_func_t *done, void *priv, zio_flag_t flags);
+
+extern void zio_destroy(zio_t *zio);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
struct abd *data, uint64_t lsize, zio_done_func_t *done, void *priv,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
+ zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
- zio_done_func_t *physdone, zio_done_func_t *done,
- void *priv, zio_priority_t priority, enum zio_flag flags,
- const zbookmark_phys_t *zb);
+ zio_done_func_t *done, void *priv, zio_priority_t priority,
+ zio_flag_t flags, const zbookmark_phys_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
struct abd *data, uint64_t size, zio_done_func_t *done, void *priv,
- zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb);
+ zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
- boolean_t nopwrite);
+ boolean_t nopwrite, boolean_t brtwrite);
extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp,
- zio_done_func_t *done, void *priv, enum zio_flag flags);
-
-extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *priv, enum zio_flag flags);
+ zio_done_func_t *done, void *priv, zio_flag_t flags);
extern zio_t *zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_done_func_t *done, void *priv, zio_priority_t priority,
- enum zio_flag flags, enum trim_flag trim_flags);
+ zio_flag_t flags, enum trim_flag trim_flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *priv, zio_priority_t priority,
- enum zio_flag flags, boolean_t labels);
+ zio_flag_t flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *priv, zio_priority_t priority,
- enum zio_flag flags, boolean_t labels);
+ zio_flag_t flags, boolean_t labels);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
- const blkptr_t *bp, enum zio_flag flags);
+ const blkptr_t *bp, zio_flag_t flags);
extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
blkptr_t *new_bp, uint64_t size, boolean_t *slog);
@@ -595,6 +609,7 @@ extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
+extern void zio_add_child_first(zio_t *pio, zio_t *cio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
@@ -609,12 +624,12 @@ extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, struct abd *data, uint64_t size, int type,
- zio_priority_t priority, enum zio_flag flags,
+ zio_priority_t priority, zio_flag_t flags,
zio_done_func_t *done, void *priv);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
struct abd *data, uint64_t size, zio_type_t type, zio_priority_t priority,
- enum zio_flag flags, zio_done_func_t *done, void *priv);
+ zio_flag_t flags, zio_done_func_t *done, void *priv);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
@@ -639,7 +654,7 @@ extern int zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
- boolean_t config_held, enum blk_verify_flag blk_verify);
+ enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
/*
* Initial setup and teardown.
@@ -668,6 +683,8 @@ extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
extern hrtime_t zio_handle_io_delay(zio_t *zio);
+extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
+extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
/*
* Checksum ereport functions
@@ -696,6 +713,8 @@ extern void spa_handle_ignored_writes(spa_t *spa);
/* zbookmark_phys functions */
boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
+boolean_t zbookmark_subtree_tbd(const struct dnode_phys *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
diff --git a/include/sys/zio_checksum.h b/include/sys/zio_checksum.h
index 989f125e6afd..37fd65b7cb3e 100644
--- a/include/sys/zio_checksum.h
+++ b/include/sys/zio_checksum.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -94,15 +94,13 @@ typedef const struct zio_checksum_info {
} zio_checksum_info_t;
typedef struct zio_bad_cksum {
- zio_cksum_t zbc_expected;
- zio_cksum_t zbc_actual;
const char *zbc_checksum_name;
uint8_t zbc_byteswapped;
uint8_t zbc_injected;
uint8_t zbc_has_cksum; /* expected/actual valid */
} zio_bad_cksum_t;
-_SYS_ZIO_CHECKSUM_H const zio_checksum_info_t
+_SYS_ZIO_CHECKSUM_H zio_checksum_info_t
zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
@@ -110,9 +108,9 @@ _SYS_ZIO_CHECKSUM_H const zio_checksum_info_t
*/
/* SHA2 */
-extern zio_checksum_t abd_checksum_SHA256;
-extern zio_checksum_t abd_checksum_SHA512_native;
-extern zio_checksum_t abd_checksum_SHA512_byteswap;
+extern zio_checksum_t abd_checksum_sha256;
+extern zio_checksum_t abd_checksum_sha512_native;
+extern zio_checksum_t abd_checksum_sha512_byteswap;
/* Skein */
extern zio_checksum_t abd_checksum_skein_native;
diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h
index 26600b43bb49..691d7b624488 100644
--- a/include/sys/zio_compress.h
+++ b/include/sys/zio_compress.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -152,7 +152,7 @@ typedef const struct zio_compress_info {
zio_decompresslevel_func_t *ci_decompress_level;
} zio_compress_info_t;
-extern const zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
+extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
/*
* lz4 compression init & free
@@ -183,7 +183,7 @@ extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
/*
* Compress and decompress data if necessary.
*/
-extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst,
+extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void **dst,
size_t s_len, uint8_t level);
extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
size_t s_len, size_t d_len, uint8_t *level);
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 4c998571653a..2b026d48675a 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -25,6 +25,7 @@
/*
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2024, Klara Inc.
*/
#ifndef _ZIO_IMPL_H
@@ -39,7 +40,7 @@ extern "C" {
*
* The ZFS I/O pipeline is comprised of various stages which are defined
* in the zio_stage enum below. The individual stages are used to construct
- * these basic I/O operations: Read, Write, Free, Claim, and Ioctl.
+ * these basic I/O operations: Read, Write, Free, Claim, Flush and Trim.
*
* I/O operations: (XXX - provide detail for each of the operations)
*
@@ -47,7 +48,8 @@ extern "C" {
* Write:
* Free:
* Claim:
- * Ioctl:
+ * Flush:
+ * Trim:
*
* Although the most common pipeline are used by the basic I/O operations
* above, there are some helper pipelines (one could consider them
@@ -77,6 +79,12 @@ extern "C" {
* and zstd. Compression occurs as part of the write pipeline and is
* performed in the ZIO_STAGE_WRITE_BP_INIT stage.
*
+ * Block cloning:
+ * The block cloning functionality introduces ZIO_STAGE_BRT_FREE stage which
+ * is called during a free pipeline. If the block is referenced in the
+ * Block Cloning Table (BRT) we will just decrease its reference counter
+ * instead of actually freeing the block.
+ *
* Dedup:
* Dedup reads are handled by the ZIO_STAGE_DDT_READ_START and
* ZIO_STAGE_DDT_READ_DONE stages. These stages are added to an existing
@@ -114,43 +122,48 @@ extern "C" {
* zio pipeline stage definitions
*/
enum zio_stage {
- ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */
+ ZIO_STAGE_OPEN = 1 << 0, /* RWFCXT */
+
+ ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R----- */
+ ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W---- */
+ ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F--- */
+ ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* -WF--T */
+ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W---- */
- ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */
- ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W--- */
- ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F-- */
- ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
- ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
+ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W---- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W---- */
- ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W---- */
- ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */
+ ZIO_STAGE_BRT_FREE = 1 << 9, /* --F--- */
- ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 10, /* R----- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 11, /* R----- */
+ ZIO_STAGE_DDT_WRITE = 1 << 12, /* -W---- */
+ ZIO_STAGE_DDT_FREE = 1 << 13, /* --F--- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 14, /* RWFC-- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 15, /* RWFC-- */
- ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 16, /* -W---- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 17, /* -W---- */
+ ZIO_STAGE_DVA_FREE = 1 << 18, /* --F--- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 19, /* ---C-- */
- ZIO_STAGE_READY = 1 << 19, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 20, /* RWFCXT */
- ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--XT */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW--XT */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--XT */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
- ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 25 /* RWFCXT */
};
+#define ZIO_ROOT_PIPELINE \
+ ZIO_STAGE_DONE
+
#define ZIO_INTERLOCK_STAGES \
(ZIO_STAGE_READY | \
ZIO_STAGE_DONE)
@@ -233,6 +246,7 @@ enum zio_stage {
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
+ ZIO_STAGE_BRT_FREE | \
ZIO_STAGE_DVA_FREE)
#define ZIO_DDT_FREE_PIPELINE \
@@ -245,10 +259,9 @@ enum zio_stage {
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_DVA_CLAIM)
-#define ZIO_IOCTL_PIPELINE \
+#define ZIO_FLUSH_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
- ZIO_STAGE_VDEV_IO_START | \
- ZIO_STAGE_VDEV_IO_ASSESS)
+ ZIO_VDEV_IO_STAGES)
#define ZIO_TRIM_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
diff --git a/include/sys/zrlock.h b/include/sys/zrlock.h
index b6eba1a18ff4..e2a7a254a6e0 100644
--- a/include/sys/zrlock.h
+++ b/include/sys/zrlock.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -34,9 +34,8 @@ extern "C" {
typedef struct zrlock {
kmutex_t zr_mtx;
- volatile int32_t zr_refcount;
kcondvar_t zr_cv;
- uint16_t zr_pad;
+ volatile int32_t zr_refcount;
#ifdef ZFS_DEBUG
kthread_t *zr_owner;
const char *zr_caller;
diff --git a/include/sys/zvol.h b/include/sys/zvol.h
index a0f18001304e..c79fe1d9ad22 100644
--- a/include/sys/zvol.h
+++ b/include/sys/zvol.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -50,8 +50,9 @@ extern int zvol_get_stats(objset_t *, nvlist_t *);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *);
extern int zvol_set_volsize(const char *, uint64_t);
-extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
-extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
+extern int zvol_set_volthreading(const char *, boolean_t);
+extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
+extern int zvol_set_ro(const char *, boolean_t);
extern zvol_state_handle_t *zvol_suspend(const char *);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
diff --git a/include/sys/zvol_impl.h b/include/sys/zvol_impl.h
index 94203347066b..6c15c84b6bf4 100644
--- a/include/sys/zvol_impl.h
+++ b/include/sys/zvol_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -58,6 +58,7 @@ typedef struct zvol_state {
atomic_t zv_suspend_ref; /* refcount for suspend */
krwlock_t zv_suspend_lock; /* suspend lock */
struct zvol_state_os *zv_zso; /* private platform state */
+ boolean_t zv_threading; /* volthreading property */
} zvol_state_t;
@@ -81,9 +82,9 @@ void zvol_remove_minors_impl(const char *name);
void zvol_last_close(zvol_state_t *zv);
void zvol_insert(zvol_state_t *zv);
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
- uint64_t len, boolean_t sync);
+ uint64_t len);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
- uint64_t size, int sync);
+ uint64_t size, boolean_t commit);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);