aboutsummaryrefslogtreecommitdiff
path: root/include/sys/ddt.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/sys/ddt.h')
-rw-r--r--include/sys/ddt.h201
1 files changed, 105 insertions, 96 deletions
diff --git a/include/sys/ddt.h b/include/sys/ddt.h
index 25be6f56dddc..726f1a3902eb 100644
--- a/include/sys/ddt.h
+++ b/include/sys/ddt.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_DDT_H
@@ -39,32 +40,50 @@ extern "C" {
struct abd;
/*
- * On-disk DDT formats, in the desired search order (newest version first).
+ * DDT on-disk storage object types. Each one corresponds to specific
+ * implementation, see ddt_ops_t. The value itself is not stored on disk.
+ *
+ * When searching for an entry, objects types will be searched in this order.
+ *
+ * Note that DDT_TYPES is used as the "no type" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_type {
- DDT_TYPE_ZAP = 0,
+typedef enum {
+ DDT_TYPE_ZAP = 0, /* ZAP storage object, ddt_zap */
DDT_TYPES
-};
+} ddt_type_t;
+
+_Static_assert(DDT_TYPES <= UINT8_MAX,
+ "ddt_type_t must fit in a uint8_t");
+
+/* New and updated entries recieve this type, see ddt_sync_entry() */
+#define DDT_TYPE_DEFAULT (DDT_TYPE_ZAP)
/*
- * DDT classes, in the desired search order (highest replication level first).
+ * DDT storage classes. Each class has a separate storage object for each type.
+ * The value itself is not stored on disk.
+ *
+ * When search for an entry, object classes will be searched in this order.
+ *
+ * Note that DDT_CLASSES is used as the "no class" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_class {
- DDT_CLASS_DITTO = 0,
- DDT_CLASS_DUPLICATE,
- DDT_CLASS_UNIQUE,
+typedef enum {
+ DDT_CLASS_DITTO = 0, /* entry has ditto blocks (obsolete) */
+ DDT_CLASS_DUPLICATE, /* entry has multiple references */
+ DDT_CLASS_UNIQUE, /* entry has a single reference */
DDT_CLASSES
-};
-
-#define DDT_TYPE_CURRENT 0
+} ddt_class_t;
-#define DDT_COMPRESS_BYTEORDER_MASK 0x80
-#define DDT_COMPRESS_FUNCTION_MASK 0x7f
+_Static_assert(DDT_CLASSES < UINT8_MAX,
+ "ddt_class_t must fit in a uint8_t");
/*
- * On-disk ddt entry: key (name) and physical storage (value).
+ * The "key" part of an on-disk entry. This is the unique "name" for a block,
+ * that is, that parts of the block pointer that will always be the same for
+ * the same data.
*/
-typedef struct ddt_key {
+typedef struct {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
* Encoded with logical & physical size, encryption, and compression,
@@ -76,6 +95,10 @@ typedef struct ddt_key {
uint64_t ddk_prop;
} ddt_key_t;
+/*
+ * Macros for accessing parts of a ddt_key_t. These are similar to their BP_*
+ * counterparts.
+ */
#define DDK_GET_LSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_LSIZE(ddk, x) \
@@ -92,18 +115,25 @@ typedef struct ddt_key {
#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
-#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
-
-#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
- ? SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP)
-
-typedef struct ddt_phys {
+/*
+ * The "value" part for an on-disk entry. These are the "physical"
+ * characteristics of the stored block, such as its location on disk (DVAs),
+ * birth txg and ref count.
+ *
+ * Note that an entry has an array of four ddt_phys_t, one for each number of
+ * DVAs (copies= property) and another for additional "ditto" copies. Most
+ * users of ddt_phys_t will handle indexing into or counting the phys they
+ * want.
+ */
+typedef struct {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
} ddt_phys_t;
/*
+ * Named indexes into the ddt_phys_t array in each entry.
+ *
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
* we maintain the ability to free existing dedup-ditto blocks.
*/
@@ -116,99 +146,83 @@ enum ddt_phys_type {
};
/*
- * In-core ddt entry
+ * A "live" entry, holding changes to an entry made this txg, and other data to
+ * support loading, updating and repairing the entry.
*/
-struct ddt_entry {
- ddt_key_t dde_key;
- ddt_phys_t dde_phys[DDT_PHYS_TYPES];
+
+/* State flags for dde_flags */
+#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
+
+typedef struct {
+ /* key must be first for ddt_key_compare */
+ ddt_key_t dde_key; /* ddt_tree key */
+ ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
+
+ /* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
+
+ /* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
- enum ddt_type dde_type;
- enum ddt_class dde_class;
- uint8_t dde_loading;
- uint8_t dde_loaded;
- kcondvar_t dde_cv;
- avl_node_t dde_node;
-};
+
+ /* storage type and class the entry was loaded from */
+ ddt_type_t dde_type;
+ ddt_class_t dde_class;
+
+ uint8_t dde_flags; /* load state flags */
+ kcondvar_t dde_cv; /* signaled when load completes */
+
+ avl_node_t dde_node; /* ddt_tree node */
+} ddt_entry_t;
/*
- * In-core ddt
+ * In-core DDT object. This covers all entries and stats for a the whole pool
+ * for a given checksum type.
*/
-struct ddt {
- kmutex_t ddt_lock;
- avl_tree_t ddt_tree;
- avl_tree_t ddt_repair_tree;
- enum zio_checksum ddt_checksum;
- spa_t *ddt_spa;
- objset_t *ddt_os;
- uint64_t ddt_stat_object;
+typedef struct {
+ kmutex_t ddt_lock; /* protects changes to all fields */
+
+ avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
+
+ avl_tree_t ddt_repair_tree; /* entries being repaired */
+
+ enum zio_checksum ddt_checksum; /* checksum algorithm in use */
+ spa_t *ddt_spa; /* pool this ddt is on */
+ objset_t *ddt_os; /* ddt objset (always MOS) */
+
+ /* per-type/per-class entry store objects */
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
+
+ /* object ids for whole-ddt and per-type/per-class stats */
+ uint64_t ddt_stat_object;
+ ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
+
+ /* type/class stats by power-2-sized referenced blocks */
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
- ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
- avl_node_t ddt_node;
-};
+} ddt_t;
/*
- * In-core and on-disk bookmark for DDT walks
+ * In-core and on-disk bookmark for DDT walks. This is a cursor for ddt_walk(),
+ * and is stable across calls, even if the DDT is updated, the pool is
+ * restarted or loaded on another system, or OpenZFS is upgraded.
*/
-typedef struct ddt_bookmark {
+typedef struct {
uint64_t ddb_class;
uint64_t ddb_type;
uint64_t ddb_checksum;
uint64_t ddb_cursor;
} ddt_bookmark_t;
-/*
- * Ops vector to access a specific DDT object type.
- */
-typedef struct ddt_ops {
- char ddt_op_name[32];
- int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
- boolean_t prehash);
- int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
- int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
- void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
- ddt_entry_t *dde);
- int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- uint64_t *walk);
- int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
-} ddt_ops_t;
-
-#define DDT_NAMELEN 107
-
-extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, char *name);
-extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *walk, ddt_entry_t *dde);
-extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *count);
-extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, dmu_object_info_t *);
-extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz);
-
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
uint64_t txg);
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
const ddt_phys_t *ddp, blkptr_t *bp);
-extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
-
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
-extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
- uint64_t txg);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
-extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
-
-extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
@@ -220,9 +234,6 @@ extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
-extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
-extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
-
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
@@ -232,23 +243,21 @@ extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
-extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
+extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
const blkptr_t *bp);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
-extern int ddt_entry_compare(const void *x1, const void *x2);
+extern int ddt_key_compare(const void *x1, const void *x2);
extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
-extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, ddt_entry_t *dde, dmu_tx_t *tx);
-extern const ddt_ops_t ddt_zap_ops;
+extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
#ifdef __cplusplus
}