diff options
Diffstat (limited to 'sys/contrib/openzfs/include')
44 files changed, 388 insertions, 202 deletions
diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am index a9258deabfd7..7588cd0aedc9 100644 --- a/sys/contrib/openzfs/include/Makefile.am +++ b/sys/contrib/openzfs/include/Makefile.am @@ -10,6 +10,7 @@ COMMON_H = \ cityhash.h \ zfeature_common.h \ zfs_comutil.h \ + zfs_crrd.h \ zfs_deleg.h \ zfs_fletcher.h \ zfs_namecheck.h \ @@ -69,7 +70,6 @@ COMMON_H = \ sys/metaslab_impl.h \ sys/mmp.h \ sys/mntent.h \ - sys/mod.h \ sys/multilist.h \ sys/nvpair.h \ sys/nvpair_impl.h \ diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 485af793862c..3fcdc176a621 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -30,6 +30,7 @@ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2021, Colm Buckley <colm@tuatha.org> + * Copyright (c) 2025 Hewlett Packard Enterprise Development LP. */ #ifndef _LIBZFS_H @@ -288,10 +289,22 @@ typedef struct trimflags { uint64_t rate; } trimflags_t; +typedef struct trim_cbdata { + trimflags_t trim_flags; + pool_trim_func_t cmd_type; +} trim_cbdata_t; + +typedef struct initialize_cbdata { + boolean_t wait; + pool_initialize_func_t cmd_type; +} initialize_cbdata_t; /* * Functions to manipulate pool and vdev state */ _LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); +_LIBZFS_H int zpool_scan_range(zpool_handle_t *, pool_scan_func_t, + pool_scrub_cmd_t, time_t, time_t); +_LIBZFS_H int zpool_initialize_one(zpool_handle_t *, void *); _LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); _LIBZFS_H int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t, @@ -304,7 +317,9 @@ _LIBZFS_H int zpool_reguid(zpool_handle_t *); _LIBZFS_H int zpool_set_guid(zpool_handle_t *, const uint64_t *); _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *); +_LIBZFS_H void zpool_collect_leaves(zpool_handle_t *, nvlist_t *, nvlist_t *); _LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *); +_LIBZFS_H int zpool_trim_one(zpool_handle_t *, void *); _LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t, uint64_t); diff --git a/sys/contrib/openzfs/include/os/freebsd/Makefile.am b/sys/contrib/openzfs/include/os/freebsd/Makefile.am index d975c4fe69fa..d6b6923d033f 100644 --- a/sys/contrib/openzfs/include/os/freebsd/Makefile.am +++ b/sys/contrib/openzfs/include/os/freebsd/Makefile.am @@ -33,7 +33,7 @@ noinst_HEADERS = \ %D%/spl/sys/list_impl.h \ %D%/spl/sys/lock.h \ %D%/spl/sys/misc.h \ - %D%/spl/sys/mod_os.h \ + %D%/spl/sys/mod.h \ %D%/spl/sys/mode.h \ %D%/spl/sys/mount.h \ %D%/spl/sys/mutex.h \ diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h index 974704e92bbd..32bc02f3dc86 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h @@ -69,6 +69,10 @@ #define __maybe_unused __attribute__((unused)) #endif +#ifndef __must_check +#define __must_check __attribute__((__warn_unused_result__)) +#endif + /* * Without this, we see warnings from objtool during normal Linux builds when * the kernel is built with CONFIG_STACK_VALIDATION=y: diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h index 091ebe772810..acce8734b2c5 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h @@ -56,4 +56,9 @@ struct opensolaris_utsname { #define task_io_account_read(n) #define task_io_account_write(n) +/* + * Check if the current thread is a memory reclaim thread. + */ +extern int current_is_reclaim_thread(void); + #endif /* _OPENSOLARIS_SYS_MISC_H_ */ diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod.h index 4214189c32df..4214189c32df 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod.h diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h index c6bc10d6babe..1cbd79ec893f 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h @@ -77,8 +77,8 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg, /* * Be sure there are no surprises. */ - ASSERT(stk == NULL); - ASSERT(len == 0); + ASSERT0P(stk); + ASSERT0(len); ASSERT(state == TS_RUN); if (pp == &p0) diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h index 454078f0fe79..d36bee881d0b 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h @@ -35,6 +35,7 @@ extern const int zfs_vm_pagerret_bad; extern const int zfs_vm_pagerret_error; extern const int zfs_vm_pagerret_ok; +extern const int zfs_vm_pagerret_pend; extern const int zfs_vm_pagerput_sync; extern const int zfs_vm_pagerput_inval; diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h index 0df3378c23e7..b18836aa563e 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h @@ -227,6 +227,7 @@ struct taskq; #define LOOKUP_XATTR 0x02 /* lookup up extended attr dir */ #define CREATE_XATTR_DIR 0x04 /* Create extended attr dir */ #define LOOKUP_HAVE_SYSATTR_DIR 0x08 /* Already created virtual GFS dir */ +#define LOOKUP_NAMED_ATTR 0x10 /* Lookup a named attribute */ /* * Public vnode manipulation functions. diff --git a/sys/contrib/openzfs/include/os/linux/Makefile.am b/sys/contrib/openzfs/include/os/linux/Makefile.am index 4fe6705defe5..e156ca183dbd 100644 --- a/sys/contrib/openzfs/include/os/linux/Makefile.am +++ b/sys/contrib/openzfs/include/os/linux/Makefile.am @@ -75,7 +75,7 @@ kernel_spl_sys_HEADERS = \ %D%/spl/sys/kstat.h \ %D%/spl/sys/list.h \ %D%/spl/sys/misc.h \ - %D%/spl/sys/mod_os.h \ + %D%/spl/sys/mod.h \ %D%/spl/sys/mutex.h \ %D%/spl/sys/param.h \ %D%/spl/sys/proc.h \ diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h index cd245a5f0135..e8004e18c4a4 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h @@ -139,15 +139,6 @@ */ #if defined(HAVE_KERNEL_FPU_INTERNAL) -/* - * For kernels not exporting *kfpu_{begin,end} we have to use inline assembly - * with the XSAVE{,OPT,S} instructions, so we need the toolchain to support at - * least XSAVE. - */ -#if !defined(HAVE_XSAVE) -#error "Toolchain needs to support the XSAVE assembler instruction" -#endif - #ifndef XFEATURE_MASK_XTILE /* * For kernels where this doesn't exist yet, we still don't want to break @@ -335,9 +326,13 @@ kfpu_begin(void) return; } #endif +#if defined(HAVE_XSAVE) if (static_cpu_has(X86_FEATURE_XSAVE)) { kfpu_do_xsave("xsave", state, ~XFEATURE_MASK_XTILE); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { + return; + } +#endif + if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_save_fxsr(state); } else { kfpu_save_fsave(state); @@ -390,9 +385,13 @@ kfpu_end(void) goto out; } #endif +#if defined(HAVE_XSAVE) if (static_cpu_has(X86_FEATURE_XSAVE)) { kfpu_do_xrstor("xrstor", state, ~XFEATURE_MASK_XTILE); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { + goto out; + } +#endif + if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_restore_fxsr(state); } else { kfpu_restore_fsave(state); diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h b/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h index b2a39d7d6cbf..f4bcd58bd281 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h @@ -71,6 +71,22 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval) return ((void *)atomic_cas_64((volatile uint64_t *)target, (uint64_t)cmp, (uint64_t)newval)); } +static __inline__ void * +atomic_swap_ptr(volatile void *target, void *newval) +{ + return ((void *)atomic_swap_64((volatile uint64_t *)target, + (uint64_t)newval)); +} +static __inline__ void * +atomic_load_ptr(volatile void *target) +{ + return ((void *)atomic_load_64((volatile uint64_t *)target)); +} +static __inline__ void +atomic_store_ptr(volatile void *target, void *newval) +{ + atomic_store_64((volatile uint64_t *)target, (uint64_t)newval); +} #else /* _LP64 */ static __inline__ void * atomic_cas_ptr(volatile void *target, void *cmp, void *newval) @@ -78,6 +94,22 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval) return ((void *)atomic_cas_32((volatile uint32_t *)target, (uint32_t)cmp, (uint32_t)newval)); } +static __inline__ void * +atomic_swap_ptr(volatile void *target, void *newval) +{ + return ((void *)atomic_swap_32((volatile uint32_t *)target, + (uint32_t)newval)); +} +static __inline__ void * +atomic_load_ptr(volatile void *target) +{ + return ((void *)atomic_load_32((volatile uint32_t *)target)); +} +static __inline__ void +atomic_store_ptr(volatile void *target, void *newval) +{ + atomic_store_32((volatile uint32_t *)target, (uint32_t)newval); +} #endif /* _LP64 */ #endif /* _SPL_ATOMIC_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h index 1671ba4074da..85b96e1e23a7 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h @@ -69,6 +69,10 @@ #define __maybe_unused __attribute__((unused)) #endif +#ifndef __must_check +#define __must_check __attribute__((__warn_unused_result__)) +#endif + /* * Without this, we see warnings from objtool during normal Linux builds when * the kernel is built with CONFIG_STACK_VALIDATION=y: diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h index 995236117dd4..fe34de9c179e 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h @@ -61,7 +61,7 @@ void *spl_kvmalloc(size_t size, gfp_t flags); /* * Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion * function is context aware which means that KM_SLEEP allocations can be - * safely used in syncing contexts which have set PF_FSTRANS. + * safely used in syncing contexts which have set SPL_FSTRANS. */ static inline gfp_t kmem_flags_convert(int flags) @@ -91,25 +91,11 @@ typedef struct { } fstrans_cookie_t; /* - * Introduced in Linux 3.9, however this cannot be solely relied on before - * Linux 3.18 as it doesn't turn off __GFP_FS as it should. + * SPL_FSTRANS is the set of flags that indicate that the task is in a + * filesystem or IO codepath, and so any allocation must not call back into + * those codepaths (eg to swap). */ -#ifdef PF_MEMALLOC_NOIO -#define __SPL_PF_MEMALLOC_NOIO (PF_MEMALLOC_NOIO) -#else -#define __SPL_PF_MEMALLOC_NOIO (0) -#endif - -/* - * PF_FSTRANS is removed from Linux 4.12 - */ -#ifdef PF_FSTRANS -#define __SPL_PF_FSTRANS (PF_FSTRANS) -#else -#define __SPL_PF_FSTRANS (0) -#endif - -#define SPL_FSTRANS (__SPL_PF_FSTRANS|__SPL_PF_MEMALLOC_NOIO) +#define SPL_FSTRANS (PF_MEMALLOC_NOIO) static inline fstrans_cookie_t spl_fstrans_mark(void) @@ -141,43 +127,8 @@ spl_fstrans_check(void) return (current->flags & SPL_FSTRANS); } -/* - * specifically used to check PF_FSTRANS flag, cannot be relied on for - * checking spl_fstrans_mark(). - */ -static inline int -__spl_pf_fstrans_check(void) -{ - return (current->flags & __SPL_PF_FSTRANS); -} - -/* - * Kernel compatibility for GFP flags - */ -/* < 4.13 */ -#ifndef __GFP_RETRY_MAYFAIL -#define __GFP_RETRY_MAYFAIL __GFP_REPEAT -#endif -/* < 4.4 */ -#ifndef __GFP_RECLAIM -#define __GFP_RECLAIM __GFP_WAIT -#endif - -#ifdef HAVE_ATOMIC64_T -#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used) -#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used) -#define kmem_alloc_used_read() atomic64_read(&kmem_alloc_used) -#define kmem_alloc_used_set(size) atomic64_set(&kmem_alloc_used, size) extern atomic64_t kmem_alloc_used; -extern unsigned long long kmem_alloc_max; -#else /* HAVE_ATOMIC64_T */ -#define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used) -#define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used) -#define kmem_alloc_used_read() atomic_read(&kmem_alloc_used) -#define kmem_alloc_used_set(size) atomic_set(&kmem_alloc_used, size) -extern atomic_t kmem_alloc_used; -extern unsigned long long kmem_alloc_max; -#endif /* HAVE_ATOMIC64_T */ +extern uint64_t kmem_alloc_max; extern unsigned int spl_kmem_alloc_warn; extern unsigned int spl_kmem_alloc_max; diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h index 0b44786f8a6e..fbaaf229bd1a 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h @@ -24,7 +24,13 @@ #define _OS_LINUX_SPL_MISC_H #include <linux/kobject.h> +#include <linux/swap.h> extern void spl_signal_kobj_evt(struct block_device *bdev); +/* + * Check if the current thread is a memory reclaim thread. + */ +extern int current_is_reclaim_thread(void); + #endif diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mod.h index eaeb9255039e..eaeb9255039e 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/mod_os.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mod.h diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h index f000f53ab9b6..4eca2414fc5b 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h @@ -111,7 +111,7 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ #undef mutex_destroy #define mutex_destroy(mp) \ { \ - VERIFY3P(mutex_owner(mp), ==, NULL); \ + VERIFY0P(mutex_owner(mp)); \ } #define mutex_tryenter(mp) \ diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h index 8923657daf02..d88b4937ef08 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h @@ -59,8 +59,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __field(uint64_t, z_size) __field(uint64_t, z_pflags) __field(uint32_t, z_sync_cnt) - __field(uint32_t, z_sync_writes_cnt) - __field(uint32_t, z_async_writes_cnt) __field(mode_t, z_mode) __field(boolean_t, z_is_sa) __field(boolean_t, z_is_ctldir) @@ -92,8 +90,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_size = zn->z_size; __entry->z_pflags = zn->z_pflags; __entry->z_sync_cnt = zn->z_sync_cnt; - __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt; - __entry->z_async_writes_cnt = zn->z_async_writes_cnt; __entry->z_mode = zn->z_mode; __entry->z_is_sa = zn->z_is_sa; __entry->z_is_ctldir = zn->z_is_ctldir; @@ -117,7 +113,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, TP_printk("zn { id %llu unlinked %u atime_dirty %u " "zn_prefetch %u blksz %u seq %u " "mapcnt %llu size %llu pflags %llu " - "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " + "sync_cnt %u " "mode 0x%x is_sa %d is_ctldir %d " "inode { uid %u gid %u ino %lu nlink %u size %lli " "blkbits %u bytes %u mode 0x%x generation %x } } " @@ -126,7 +122,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_zn_prefetch, __entry->z_blksz, __entry->z_seq, __entry->z_mapcnt, __entry->z_size, __entry->z_pflags, __entry->z_sync_cnt, - __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir, __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink, __entry->i_size, __entry->i_blkbits, diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h index 85cf8cc20b09..e1b6d61099b9 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h @@ -45,7 +45,7 @@ __field(zio_flag_t, zio_orig_flags) \ __field(enum zio_stage, zio_orig_stage) \ __field(enum zio_stage, zio_orig_pipeline) \ - __field(uint8_t, zio_reexecute) \ + __field(uint8_t, zio_post) \ __field(uint64_t, zio_txg) \ __field(int, zio_error) \ __field(uint64_t, zio_ena) \ @@ -74,7 +74,7 @@ __entry->zio_orig_flags = zio->io_orig_flags; \ __entry->zio_orig_stage = zio->io_orig_stage; \ __entry->zio_orig_pipeline = zio->io_orig_pipeline; \ - __entry->zio_reexecute = zio->io_reexecute; \ + __entry->zio_post = zio->io_post; \ __entry->zio_txg = zio->io_txg; \ __entry->zio_error = zio->io_error; \ __entry->zio_ena = zio->io_ena; \ @@ -92,7 +92,7 @@ "zio { type %u prio %u size %llu orig_size %llu " \ "offset %llu timestamp %llu delta %llu delay %llu " \ "flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \ - "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \ + "orig_stage 0x%x orig_pipeline 0x%x post %u " \ "txg %llu error %d ena %llu prop { checksum %u compress %u " \ "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }" @@ -102,7 +102,7 @@ __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \ __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \ __entry->zio_orig_flags, __entry->zio_orig_stage, \ - __entry->zio_orig_pipeline, __entry->zio_reexecute, \ + __entry->zio_orig_pipeline, __entry->zio_post, \ __entry->zio_txg, __entry->zio_error, __entry->zio_ena, \ __entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \ __entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \ diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h index b38847b20462..6a77e40abe10 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -157,6 +157,7 @@ struct znode; extern int zfs_sync(struct super_block *, int, cred_t *); extern int zfs_inode_alloc(struct super_block *, struct inode **ip); +extern void zfs_inode_free(struct inode *); extern void zfs_inode_destroy(struct inode *); extern void zfs_mark_inode_dirty(struct inode *); extern boolean_t zfs_relatime_need_update(const struct inode *); diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index 1b30389107c5..b55d5da3378c 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -954,7 +954,7 @@ typedef struct arc_sums { wmsum_t arcstat_data_size; wmsum_t arcstat_metadata_size; wmsum_t arcstat_dbuf_size; - wmsum_t arcstat_dnode_size; + aggsum_t arcstat_dnode_size; wmsum_t arcstat_bonus_size; wmsum_t arcstat_l2_hits; wmsum_t arcstat_l2_misses; diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h index 756459b2fbb5..baf3b1508335 100644 --- a/sys/contrib/openzfs/include/sys/dbuf.h +++ b/sys/contrib/openzfs/include/sys/dbuf.h @@ -164,6 +164,7 @@ typedef struct dbuf_dirty_record { boolean_t dr_nopwrite; boolean_t dr_brtwrite; boolean_t dr_diowrite; + boolean_t dr_rewrite; boolean_t dr_has_raw_params; /* Override and raw params are mutually exclusive. */ diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h index 0b2e443a433a..b18961be1282 100644 --- a/sys/contrib/openzfs/include/sys/dmu.h +++ b/sys/contrib/openzfs/include/sys/dmu.h @@ -414,6 +414,9 @@ typedef struct dmu_buf { #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" #define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones" +#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes" +#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days" +#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months" /* * Allocate an object from this objset. The range of object numbers @@ -739,8 +742,8 @@ dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync, dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp __maybe_unused) { - ASSERT(dbu->dbu_evict_func_sync == NULL); - ASSERT(dbu->dbu_evict_func_async == NULL); + ASSERT0P(dbu->dbu_evict_func_sync); + ASSERT0P(dbu->dbu_evict_func_async); /* must have at least one evict func */ IMPLY(evict_func_sync == NULL, evict_func_async != NULL); @@ -822,6 +825,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db); */ void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_dirty_flags(dmu_buf_t *db, dmu_tx_t *tx, dmu_flags_t flags); +void dmu_buf_will_rewrite(dmu_buf_t *db, dmu_tx_t *tx); boolean_t dmu_buf_is_dirty(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); diff --git a/sys/contrib/openzfs/include/sys/dmu_objset.h b/sys/contrib/openzfs/include/sys/dmu_objset.h index 288ad30166df..492be29200e4 100644 --- a/sys/contrib/openzfs/include/sys/dmu_objset.h +++ b/sys/contrib/openzfs/include/sys/dmu_objset.h @@ -152,7 +152,7 @@ struct objset { * The largest zpl file block allowed in special class. * cached here instead of zfsvfs for easier access. */ - int os_zpl_special_smallblock; + uint64_t os_zpl_special_smallblock; /* * Pointer is constant; the blkptr it points to is protected by diff --git a/sys/contrib/openzfs/include/sys/dmu_traverse.h b/sys/contrib/openzfs/include/sys/dmu_traverse.h index 3196b2addeee..70cafa4c74f1 100644 --- a/sys/contrib/openzfs/include/sys/dmu_traverse.h +++ b/sys/contrib/openzfs/include/sys/dmu_traverse.h @@ -59,6 +59,13 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ #define TRAVERSE_NO_DECRYPT (1<<5) +/* + * Always use logical birth time for birth time comparisons. This is useful + * for operations that care about user data changes rather than physical + * block rewrites (e.g., incremental replication). + */ +#define TRAVERSE_LOGICAL (1<<6) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h index c8deb5be419e..fc359c10365a 100644 --- a/sys/contrib/openzfs/include/sys/fs/zfs.h +++ b/sys/contrib/openzfs/include/sys/fs/zfs.h @@ -1627,6 +1627,9 @@ typedef struct zfs_rewrite_args { uint64_t arg; } zfs_rewrite_args_t; +/* zfs_rewrite_args flags */ +#define ZFS_REWRITE_PHYSICAL 0x1 /* Preserve logical birth time. */ + #define ZFS_IOC_REWRITE _IOW(0x83, 3, zfs_rewrite_args_t) /* diff --git a/sys/contrib/openzfs/include/sys/metaslab.h b/sys/contrib/openzfs/include/sys/metaslab.h index 4d57e52e8468..36cbe06bacce 100644 --- a/sys/contrib/openzfs/include/sys/metaslab.h +++ b/sys/contrib/openzfs/include/sys/metaslab.h @@ -110,9 +110,10 @@ void metaslab_class_balance(metaslab_class_t *mc, boolean_t onsync); void metaslab_class_histogram_verify(metaslab_class_t *); uint64_t metaslab_class_fragmentation(metaslab_class_t *); uint64_t metaslab_class_expandable_space(metaslab_class_t *); -boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, zio_t *, - boolean_t, boolean_t *); -boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *); +boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int, + uint64_t, boolean_t, boolean_t *); +boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, + uint64_t); void metaslab_class_evict_old(metaslab_class_t *, uint64_t); const char *metaslab_class_get_name(metaslab_class_t *); uint64_t metaslab_class_get_alloc(metaslab_class_t *); diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h index 83fbe620fe37..6ce995d0a086 100644 --- a/sys/contrib/openzfs/include/sys/metaslab_impl.h +++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h @@ -539,6 +539,8 @@ typedef struct metaslab_unflushed_phys { uint64_t msp_unflushed_txg; } metaslab_unflushed_phys_t; +char *metaslab_rt_name(metaslab_group_t *, metaslab_t *, const char *); + #ifdef __cplusplus } #endif diff --git a/sys/contrib/openzfs/include/sys/mod.h b/sys/contrib/openzfs/include/sys/mod.h deleted file mode 100644 index 4122889ab758..000000000000 --- a/sys/contrib/openzfs/include/sys/mod.h +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf <behlendorf1@llnl.gov>. - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef _SYS_MOD_H -#define _SYS_MOD_H - -#ifdef _KERNEL -#include <sys/mod_os.h> -#else -/* - * Exported symbols - */ -#define EXPORT_SYMBOL(x) -#endif - -#endif /* SYS_MOD_H */ diff --git a/sys/contrib/openzfs/include/sys/range_tree.h b/sys/contrib/openzfs/include/sys/range_tree.h index 23e80f64284b..0f6884682459 100644 --- a/sys/contrib/openzfs/include/sys/range_tree.h +++ b/sys/contrib/openzfs/include/sys/range_tree.h @@ -49,6 +49,9 @@ typedef enum zfs_range_seg_type { ZFS_RANGE_SEG_NUM_TYPES, } zfs_range_seg_type_t; +#define ZFS_RT_NAME(rt) (((rt)->rt_name != NULL) ? (rt)->rt_name : "") +#define ZFS_RT_F_DYN_NAME (1ULL << 0) /* if rt_name must be freed */ + /* * Note: the range_tree may not be accessed concurrently; consumers * must provide external locking if required. @@ -68,6 +71,9 @@ typedef struct zfs_range_tree { void *rt_arg; uint64_t rt_gap; /* allowable inter-segment gap */ + uint64_t rt_flags; + const char *rt_name; /* details for debugging */ + /* * The rt_histogram maintains a histogram of ranges. Each bucket, * rt_histogram[i], contains the number of ranges whose size is: @@ -281,6 +287,9 @@ zfs_range_tree_t *zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, uint64_t gap); zfs_range_tree_t *zfs_range_tree_create(const zfs_range_tree_ops_t *ops, zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); +zfs_range_tree_t *zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, + uint64_t flags, const char *name); void zfs_range_tree_destroy(zfs_range_tree_t *rt); boolean_t zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size); diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index a3e36c1f59ae..db6de332ae67 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -140,7 +140,7 @@ typedef struct zio_cksum_salt { * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 |R| padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -175,6 +175,7 @@ typedef struct zio_cksum_salt { * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type + * R rewrite (reallocated/rewritten at phys birth TXG) * phys birth txg when dva[0] was written; zero if same as logical birth txg * note that typically all the dva's would be written in this * txg, but they could be different if they were moved by @@ -190,11 +191,11 @@ typedef struct zio_cksum_salt { * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 0 | vdev1 | pad | ASIZE | + * 0 | pad | vdev1 | pad | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 1 |G| offset1 | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 2 | vdev2 | pad | ASIZE | + * 2 | pad | vdev2 | pad | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 3 |G| offset2 | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -204,7 +205,7 @@ typedef struct zio_cksum_salt { * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 |R| padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -373,7 +374,8 @@ typedef enum bp_embedded_type { typedef struct blkptr { dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ uint64_t blk_prop; /* size, compression, type, etc */ - uint64_t blk_pad[2]; /* Extra space for the future */ + uint64_t blk_prop2; /* additional properties */ + uint64_t blk_pad; /* Extra space for the future */ uint64_t blk_birth_word[2]; uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ @@ -476,32 +478,51 @@ typedef struct blkptr { #define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1) #define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x) +/* + * Block birth time macros for different use cases: + * - BP_GET_LOGICAL_BIRTH(): When the block was logically modified by user. + * To be used with a focus on user data, like incremental replication. + * - BP_GET_PHYSICAL_BIRTH(): When the block was physically written to disks. + * For regular writes is equal to logical birth. For dedup and block cloning + * can be smaller than logical birth. For remapped and rewritten blocks can + * be bigger. To be used with focus on physical disk content: ARC, DDT, scrub. + * - BP_GET_RAW_PHYSICAL_BIRTH(): Raw physical birth value. Zero if equal + * to logical birth. Should only be used for BP copying and debugging. + * - BP_GET_BIRTH(): When the block was allocated, which is a physical birth + * for rewritten blocks (rewrite flag set) or logical birth otherwise. + */ #define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1] #define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x)) -#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0] +#define BP_GET_RAW_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0] #define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x)) -#define BP_GET_BIRTH(bp) \ - (BP_IS_EMBEDDED(bp) ? 0 : \ - BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \ +#define BP_GET_PHYSICAL_BIRTH(bp) \ + (BP_IS_EMBEDDED(bp) ? 0 : \ + BP_GET_RAW_PHYSICAL_BIRTH(bp) ? BP_GET_RAW_PHYSICAL_BIRTH(bp) : \ BP_GET_LOGICAL_BIRTH(bp)) -#define BP_SET_BIRTH(bp, logical, physical) \ -{ \ - ASSERT(!BP_IS_EMBEDDED(bp)); \ - BP_SET_LOGICAL_BIRTH(bp, logical); \ - BP_SET_PHYSICAL_BIRTH(bp, \ - ((logical) == (physical) ? 0 : (physical))); \ +#define BP_GET_BIRTH(bp) \ + ((BP_IS_EMBEDDED(bp) || !BP_GET_REWRITE(bp)) ? \ + BP_GET_LOGICAL_BIRTH(bp) : BP_GET_PHYSICAL_BIRTH(bp)) + +#define BP_SET_BIRTH(bp, logical, physical) \ +{ \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + BP_SET_LOGICAL_BIRTH(bp, logical); \ + BP_SET_PHYSICAL_BIRTH(bp, \ + ((logical) == (physical) ? 0 : (physical))); \ } #define BP_GET_FILL(bp) \ - ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ - ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + (BP_IS_EMBEDDED(bp) ? 1 : \ + BP_IS_ENCRYPTED(bp) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + (bp)->blk_fill) #define BP_SET_FILL(bp, fill) \ { \ - if (BP_IS_ENCRYPTED(bp)) \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + if (BP_IS_ENCRYPTED(bp)) \ BF64_SET((bp)->blk_fill, 0, 32, fill); \ else \ (bp)->blk_fill = fill; \ @@ -516,6 +537,15 @@ typedef struct blkptr { BF64_SET((bp)->blk_fill, 32, 32, iv2); \ } +#define BP_GET_REWRITE(bp) \ + (BP_IS_EMBEDDED(bp) ? 0 : BF64_GET((bp)->blk_prop2, 63, 1)) + +#define BP_SET_REWRITE(bp, x) \ +{ \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + BF64_SET((bp)->blk_prop2, 63, 1, x); \ +} + #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -545,7 +575,7 @@ typedef struct blkptr { (dva1)->dva_word[0] == (dva2)->dva_word[0]) #define BP_EQUAL(bp1, bp2) \ - (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \ + (BP_GET_PHYSICAL_BIRTH(bp1) == BP_GET_PHYSICAL_BIRTH(bp2) && \ BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \ DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ @@ -588,8 +618,8 @@ typedef struct blkptr { { \ BP_ZERO_DVAS(bp); \ (bp)->blk_prop = 0; \ - (bp)->blk_pad[0] = 0; \ - (bp)->blk_pad[1] = 0; \ + (bp)->blk_prop2 = 0; \ + (bp)->blk_pad = 0; \ (bp)->blk_birth_word[0] = 0; \ (bp)->blk_birth_word[1] = 0; \ (bp)->blk_fill = 0; \ @@ -696,7 +726,7 @@ typedef struct blkptr { (u_longlong_t)BP_GET_LSIZE(bp), \ (u_longlong_t)BP_GET_PSIZE(bp), \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \ - (u_longlong_t)BP_GET_BIRTH(bp), \ + (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp), \ (u_longlong_t)BP_GET_FILL(bp), \ ws, \ (u_longlong_t)bp->blk_cksum.zc_word[0], \ @@ -1065,6 +1095,7 @@ extern metaslab_class_t *spa_normal_class(spa_t *spa); extern metaslab_class_t *spa_log_class(spa_t *spa); extern metaslab_class_t *spa_embedded_log_class(spa_t *spa); extern metaslab_class_t *spa_special_class(spa_t *spa); +extern metaslab_class_t *spa_special_embedded_log_class(spa_t *spa); extern metaslab_class_t *spa_dedup_class(spa_t *spa); extern metaslab_class_t *spa_preferred_class(spa_t *spa, const zio_t *zio); extern boolean_t spa_special_has_ddt(spa_t *spa); diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index 8c52f751a819..07a959db3447 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -55,6 +55,8 @@ #include <sys/dsl_deadlist.h> #include <zfeature_common.h> +#include "zfs_crrd.h" + #ifdef __cplusplus extern "C" { #endif @@ -246,6 +248,7 @@ struct spa { metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */ metaslab_class_t *spa_special_class; /* special allocation class */ + metaslab_class_t *spa_special_embedded_log_class; /* log on special */ metaslab_class_t *spa_dedup_class; /* dedup allocation class */ uint64_t spa_first_txg; /* first txg after spa_open() */ uint64_t spa_final_txg; /* txg of export/destroy */ @@ -343,6 +346,12 @@ struct spa { spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; + kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */ + dbrrd_t spa_txg_log_time; + uint64_t spa_last_noted_txg; + uint64_t spa_last_noted_txg_time; + uint64_t spa_last_flush_txg_time; + space_map_t *spa_syncing_log_sm; /* current log space map */ avl_tree_t spa_sm_logs_by_txg; kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */ diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h index 7f457c3a0b76..510474d6c085 100644 --- a/sys/contrib/openzfs/include/sys/vdev.h +++ b/sys/contrib/openzfs/include/sys/vdev.h @@ -139,6 +139,7 @@ extern uint64_t vdev_asize_to_psize_txg(vdev_t *vd, uint64_t asize, extern uint64_t vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize, uint64_t txg); extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize); +extern uint64_t vdev_get_min_alloc(vdev_t *vd); /* * Return the amount of space allocated for a gang block header. Note that @@ -148,7 +149,20 @@ extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize); static inline uint64_t vdev_gang_header_asize(vdev_t *vd) { - return (vdev_psize_to_asize_txg(vd, SPA_GANGBLOCKSIZE, 0)); + return (vdev_psize_to_asize_txg(vd, SPA_OLD_GANGBLOCKSIZE, 0)); +} + +/* + * Return the amount of data that can be stored in a gang header. Because we + * need to ensure gang headers can always be allocated (as long as there is + * space available), this is the minimum allocatable size on the vdev. Note that + * since the physical birth txg is not provided, this must be constant for + * a given vdev. (e.g. raidz expansion can't change this) + */ +static inline uint64_t +vdev_gang_header_psize(vdev_t *vd) +{ + return (vdev_get_min_alloc(vd)); } extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux); diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h index 385d7224f2c5..4ab472bd6742 100644 --- a/sys/contrib/openzfs/include/sys/vdev_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_impl.h @@ -621,7 +621,6 @@ extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg); extern uint64_t vdev_default_min_asize(vdev_t *vd); extern uint64_t vdev_get_min_asize(vdev_t *vd); extern void vdev_set_min_asize(vdev_t *vd); -extern uint64_t vdev_get_min_alloc(vdev_t *vd); extern uint64_t vdev_get_nparity(vdev_t *vd); extern uint64_t vdev_get_ndisks(vdev_t *vd); @@ -645,10 +644,11 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise); int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj); void vdev_metaslab_group_create(vdev_t *vd); uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b); -#if defined(__linux__) +#if defined(__linux__) && defined(_KERNEL) int param_get_raidz_impl(char *buf, zfs_kernel_param_t *kp); #endif int param_set_raidz_impl(ZFS_MODULE_PARAM_ARGS); +char *vdev_rt_name(vdev_t *vd, const char *name); /* * Vdev ashift optimization tunables diff --git a/sys/contrib/openzfs/include/sys/xvattr.h b/sys/contrib/openzfs/include/sys/xvattr.h index 447842d269b3..5dadbdb4c619 100644 --- a/sys/contrib/openzfs/include/sys/xvattr.h +++ b/sys/contrib/openzfs/include/sys/xvattr.h @@ -311,6 +311,7 @@ xva_getxoptattr(xvattr_t *xvap) */ #define V_ACE_MASK 0x1 /* mask represents NFSv4 ACE permissions */ #define V_APPEND 0x2 /* want to do append only check */ +#define V_NAMEDATTR 0x4 /* is a named attribute check */ /* * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h index 256c9c2cc2d3..7112d3ef5c99 100644 --- a/sys/contrib/openzfs/include/sys/zfs_context.h +++ b/sys/contrib/openzfs/include/sys/zfs_context.h @@ -205,18 +205,6 @@ extern void vpanic(const char *, va_list) #define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) /* - * Tunables. - */ -typedef struct zfs_kernel_param { - const char *name; /* unused stub */ -} zfs_kernel_param_t; - -#define ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc) -#define ZFS_MODULE_PARAM_ARGS void -#define ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, \ - getfunc, perm, desc) - -/* * Threads. */ typedef pthread_t kthread_t; @@ -236,6 +224,11 @@ typedef pthread_t kthread_t; #define thread_join(t) pthread_join((pthread_t)(t), NULL) #define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) +/* + * Check if the current thread is a memory reclaim thread. + * Always returns false in userspace (no memory reclaim thread). + */ +#define current_is_reclaim_thread() (0) /* in libzpool, p0 exists only to have its address taken */ typedef struct proc { @@ -673,7 +666,7 @@ extern void random_fini(void); struct spa; extern void show_pool_stats(struct spa *); -extern int set_global_var(char const *arg); +extern int handle_tunable_option(const char *, boolean_t); typedef struct callb_cpr { kmutex_t *cc_lockp; @@ -778,7 +771,6 @@ typedef int fstrans_cookie_t; extern fstrans_cookie_t spl_fstrans_mark(void); extern void spl_fstrans_unmark(fstrans_cookie_t); -extern int __spl_pf_fstrans_check(void); extern int kmem_cache_reap_active(void); diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h index 2fedaff78534..79b845a672a8 100644 --- a/sys/contrib/openzfs/include/sys/zfs_znode.h +++ b/sys/contrib/openzfs/include/sys/zfs_znode.h @@ -73,7 +73,7 @@ extern "C" { pflags |= attr; \ else \ pflags &= ~attr; \ - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \ + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \ &pflags, sizeof (pflags), tx)); \ } @@ -202,8 +202,6 @@ typedef struct znode { uint64_t z_size; /* file size (cached) */ uint64_t z_pflags; /* pflags (cached) */ uint32_t z_sync_cnt; /* synchronous open count */ - uint32_t z_sync_writes_cnt; /* synchronous write count */ - uint32_t z_async_writes_cnt; /* asynchronous write count */ mode_t z_mode; /* mode (cached) */ kmutex_t z_acl_lock; /* acl data lock */ zfs_acl_t *z_acl_cached; /* cached acl */ diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h index fa7945d8ab8b..da085998879b 100644 --- a/sys/contrib/openzfs/include/sys/zil.h +++ b/sys/contrib/openzfs/include/sys/zil.h @@ -456,7 +456,7 @@ typedef enum { WR_NUM_STATES /* number of states */ } itx_wr_state_t; -typedef void (*zil_callback_t)(void *data); +typedef void (*zil_callback_t)(void *data, int err); typedef struct itx { list_node_t itx_node; /* linkage on zl_itx_list */ @@ -498,10 +498,13 @@ typedef struct zil_stats { * (see zil_commit_writer_stall()) * - suspend: ZIL suspended * (see zil_commit(), zil_get_commit_list()) + * - crash: ZIL crashed + * (see zil_crash(), zil_commit(), ...) */ kstat_named_t zil_commit_error_count; kstat_named_t zil_commit_stall_count; kstat_named_t zil_commit_suspend_count; + kstat_named_t zil_commit_crash_count; /* * Number of transactions (reads, writes, renames, etc.) @@ -549,6 +552,7 @@ typedef struct zil_sums { wmsum_t zil_commit_error_count; wmsum_t zil_commit_stall_count; wmsum_t zil_commit_suspend_count; + wmsum_t zil_commit_crash_count; wmsum_t zil_itx_count; wmsum_t zil_itx_indirect_count; wmsum_t zil_itx_indirect_bytes; @@ -577,6 +581,25 @@ typedef struct zil_sums { #define ZIL_STAT_BUMP(zil, stat) \ ZIL_STAT_INCR(zil, stat, 1); +/* + * Flags for zil_commit_flags(). zil_commit() is a shortcut for + * zil_commit_flags(ZIL_COMMIT_FAILMODE), which is the most common use. + */ +typedef enum { + /* + * Try to commit the ZIL. If it fails, fall back to txg_wait_synced(). + * If that fails, return EIO. + */ + ZIL_COMMIT_NOW = 0, + + /* + * Like ZIL_COMMIT_NOW, but if the ZIL commit fails because the pool + * suspended, act according to the pool's failmode= setting (wait for + * the pool to resume, or return EIO). + */ + ZIL_COMMIT_FAILMODE = (1 << 1), +} zil_commit_flag_t; + typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t txg); typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg, @@ -606,14 +629,16 @@ extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first); extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); -extern void zil_itx_destroy(itx_t *itx); +extern void zil_itx_destroy(itx_t *itx, int err); extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx); extern void zil_async_to_sync(zilog_t *zilog, uint64_t oid); -extern void zil_commit(zilog_t *zilog, uint64_t oid); -extern void zil_commit_impl(zilog_t *zilog, uint64_t oid); extern void zil_remove_async(zilog_t *zilog, uint64_t oid); +extern int zil_commit_flags(zilog_t *zilog, uint64_t oid, + zil_commit_flag_t flags); +extern int __must_check zil_commit(zilog_t *zilog, uint64_t oid); + extern int zil_reset(const char *osname, void *txarg); extern int zil_claim(struct dsl_pool *dp, struct dsl_dataset *ds, void *txarg); @@ -635,6 +660,8 @@ extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); extern uint64_t zil_max_copied_data(zilog_t *zilog); extern uint64_t zil_max_log_data(zilog_t *zilog, size_t hdrsize); +extern itx_wr_state_t zil_write_state(zilog_t *zilog, uint64_t size, + uint32_t blocksize, boolean_t o_direct, boolean_t commit); extern void zil_sums_init(zil_sums_t *zs); extern void zil_sums_fini(zil_sums_t *zs); @@ -642,6 +669,8 @@ extern void zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums); extern int zil_replay_disable; +extern uint_t zfs_immediate_write_sz; +extern int zil_special_is_slog; #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h index 252264b9eae9..44b776e16b52 100644 --- a/sys/contrib/openzfs/include/sys/zil_impl.h +++ b/sys/contrib/openzfs/include/sys/zil_impl.h @@ -221,6 +221,7 @@ struct zilog { uint64_t zl_cur_left; /* current burst remaining size */ uint64_t zl_cur_max; /* biggest record in current burst */ list_t zl_lwb_list; /* in-flight log write list */ + list_t zl_lwb_crash_list; /* log writes in-flight at crash */ avl_tree_t zl_bp_tree; /* track bps during log parse */ clock_t zl_replay_time; /* lbolt of when replay started */ uint64_t zl_replay_blks; /* number of log blocks replayed */ @@ -245,6 +246,9 @@ struct zilog { */ uint64_t zl_max_block_size; + /* After crash, txg to restart zil */ + uint64_t zl_restart_txg; + /* Pointer for per dataset zil sums */ zil_sums_t *zl_sums; }; diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index d91a4eb1e998..4f46eab3db89 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -59,21 +59,36 @@ typedef struct zio_eck { /* * Gang block headers are self-checksumming and contain an array - * of block pointers. + * of block pointers. The old gang block size has enough room for 3 blkptrs, + * while new gang blocks can store more. + * + * Layout: + * +--------+--------+--------+-----+---------+-----------+ + * | | | | | | | + * | blkptr | blkptr | blkptr | ... | padding | zio_eck_t | + * | 1 | 2 | 3 | | | | + * +--------+--------+--------+-----+---------+-----------+ + * 128B 128B 128B 88B 40B */ -#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE -#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t)) / sizeof (blkptr_t)) -#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t) - \ - (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ - sizeof (uint64_t)) - -typedef struct zio_gbh { - blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; - uint64_t zg_filler[SPA_GBH_FILLER]; - zio_eck_t zg_tail; -} zio_gbh_phys_t; +#define SPA_OLD_GANGBLOCKSIZE SPA_MINBLOCKSIZE +typedef void zio_gbh_phys_t; + +static inline uint64_t +gbh_nblkptrs(uint64_t size) { + ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t))); + return ((size - sizeof (zio_eck_t)) / sizeof (blkptr_t)); +} + +static inline zio_eck_t * +gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) { + ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t))); + return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size - sizeof (zio_eck_t))); +} + +static inline blkptr_t * +gbh_bp(zio_gbh_phys_t *gbh, int bp) { + return (&((blkptr_t *)gbh)[bp]); +} enum zio_checksum { ZIO_CHECKSUM_INHERIT = 0, @@ -196,7 +211,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_DONT_RETRY (1ULL << 10) #define ZIO_FLAG_NODATA (1ULL << 12) #define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13) -#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14) +#define ZIO_FLAG_ALLOC_THROTTLED (1ULL << 14) #define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) #define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) @@ -226,8 +241,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NOPWRITE (1ULL << 29) #define ZIO_FLAG_REEXECUTED (1ULL << 30) #define ZIO_FLAG_DELEGATED (1ULL << 31) -#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32) -#define ZIO_FLAG_PREALLOCATED (1ULL << 33) +#define ZIO_FLAG_PREALLOCATED (1ULL << 32) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) @@ -360,6 +374,7 @@ typedef struct zio_prop { boolean_t zp_encrypt; boolean_t zp_byteorder; boolean_t zp_direct_write; + boolean_t zp_rewrite; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; @@ -399,7 +414,9 @@ typedef struct zio_vsd_ops { typedef struct zio_gang_node { zio_gbh_phys_t *gn_gbh; - struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS]; + uint64_t gn_gangblocksize; + uint64_t gn_allocsize; + struct zio_gang_node *gn_child[]; } zio_gang_node_t; typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, @@ -418,14 +435,16 @@ typedef struct zio_transform { typedef zio_t *zio_pipe_stage_t(zio_t *zio); /* - * The io_reexecute flags are distinct from io_flags because the child must - * be able to propagate them to the parent. The normal io_flags are local - * to the zio, not protected by any lock, and not modifiable by children; - * the reexecute flags are protected by io_lock, modifiable by children, - * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. + * The io_post flags describe additional actions that a parent IO should + * consider or perform on behalf of a child. They are distinct from io_flags + * because the child must be able to propagate them to the parent. The normal + * io_flags are local to the zio, not protected by any lock, and not modifiable + * by children; the reexecute flags are protected by io_lock, modifiable by + * children, and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set. */ -#define ZIO_REEXECUTE_NOW 0x01 -#define ZIO_REEXECUTE_SUSPEND 0x02 +#define ZIO_POST_REEXECUTE (1 << 0) +#define ZIO_POST_SUSPEND (1 << 1) +#define ZIO_POST_DIO_CHKSUM_ERR (1 << 2) /* * The io_trim flags are used to specify the type of TRIM to perform. They @@ -461,7 +480,7 @@ struct zio { enum zio_child io_child_type; enum trim_flag io_trim_flags; zio_priority_t io_priority; - uint8_t io_reexecute; + uint8_t io_post; uint8_t io_state[ZIO_WAIT_TYPES]; uint64_t io_txg; spa_t *io_spa; diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h index 32e703650935..cdc9dba2a28d 100644 --- a/sys/contrib/openzfs/include/sys/zvol.h +++ b/sys/contrib/openzfs/include/sys/zvol.h @@ -36,8 +36,7 @@ #define SPEC_MAXOFFSET_T ((1LL << ((NBBY * sizeof (daddr32_t)) + \ DEV_BSHIFT - 1)) - 1) -extern void zvol_create_minor(const char *); -extern void zvol_create_minors_recursive(const char *); +extern void zvol_create_minors(const char *); extern void zvol_remove_minors(spa_t *, const char *, boolean_t); extern void zvol_rename_minors(spa_t *, const char *, const char *, boolean_t); diff --git a/sys/contrib/openzfs/include/sys/zvol_impl.h b/sys/contrib/openzfs/include/sys/zvol_impl.h index 038d4cb48f97..f3dd9f26f23c 100644 --- a/sys/contrib/openzfs/include/sys/zvol_impl.h +++ b/sys/contrib/openzfs/include/sys/zvol_impl.h @@ -108,7 +108,6 @@ zvol_state_t *zvol_find_by_name_hash(const char *name, uint64_t hash, int mode); int zvol_first_open(zvol_state_t *zv, boolean_t readonly); uint64_t zvol_name_hash(const char *name); -void zvol_remove_minors_impl(const char *name); void zvol_last_close(zvol_state_t *zv); void zvol_insert(zvol_state_t *zv); void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, @@ -132,7 +131,7 @@ void zv_request_task_free(zv_request_task_t *task); * platform dependent functions exported to platform independent code */ void zvol_os_free(zvol_state_t *zv); -void zvol_os_rename_minor(zvol_state_t *zv, const char *newname); +int zvol_os_rename_minor(zvol_state_t *zv, const char *newname); int zvol_os_create_minor(const char *name); int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize); boolean_t zvol_os_is_zvol(const char *path); diff --git a/sys/contrib/openzfs/include/zfeature_common.h b/sys/contrib/openzfs/include/zfeature_common.h index 85537c1ae96e..56382ca85b55 100644 --- a/sys/contrib/openzfs/include/zfeature_common.h +++ b/sys/contrib/openzfs/include/zfeature_common.h @@ -87,6 +87,9 @@ typedef enum spa_feature { SPA_FEATURE_FAST_DEDUP, SPA_FEATURE_LONGNAME, SPA_FEATURE_LARGE_MICROZAP, + SPA_FEATURE_DYNAMIC_GANG_HEADER, + SPA_FEATURE_BLOCK_CLONING_ENDIAN, + SPA_FEATURE_PHYSICAL_REWRITE, SPA_FEATURES } spa_feature_t; @@ -103,7 +106,15 @@ typedef enum zfeature_flags { /* Activate this feature at the same time it is enabled. */ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2), /* Each dataset has a field set if it has ever used this feature. */ - ZFEATURE_FLAG_PER_DATASET = (1 << 3) + ZFEATURE_FLAG_PER_DATASET = (1 << 3), + /* + * This feature isn't enabled by zpool upgrade; it must be explicitly + * listed to be enabled. It will also be applied if listed in an + * explicitly provided compatibility list. This flag can be removed + * from a given feature once support is sufficiently widespread, or + * worries about backwards compatibility are no longer relevant. + */ + ZFEATURE_FLAG_NO_UPGRADE = (1 << 4) } zfeature_flags_t; typedef enum zfeature_type { diff --git a/sys/contrib/openzfs/include/zfs_crrd.h b/sys/contrib/openzfs/include/zfs_crrd.h new file mode 100644 index 000000000000..ba192a2062ea --- /dev/null +++ b/sys/contrib/openzfs/include/zfs_crrd.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024 Klara Inc. + * + * This software was developed by + * Mariusz Zaborski <mariusz.zaborski@klarasystems.com> + * Fred Weigel <fred.weigel@klarasystems.com> + * under sponsorship from Wasabi Technology, Inc. and Klara Inc. + */ + +#ifndef _CRRD_H_ +#define _CRRD_H_ + +#define RRD_MAX_ENTRIES 256 + +#define RRD_ENTRY_SIZE sizeof (uint64_t) +#define RRD_STRUCT_ELEM (sizeof (rrd_t) / RRD_ENTRY_SIZE) + +typedef enum { + DBRRD_FLOOR, + DBRRD_CEILING +} dbrrd_rounding_t; + +typedef struct { + uint64_t rrdd_time; + uint64_t rrdd_txg; +} rrd_data_t; + +typedef struct { + uint64_t rrd_head; /* head (beginning) */ + uint64_t rrd_tail; /* tail (end) */ + uint64_t rrd_length; + + rrd_data_t rrd_entries[RRD_MAX_ENTRIES]; +} rrd_t; + +typedef struct { + rrd_t dbr_minutes; + rrd_t dbr_days; + rrd_t dbr_months; +} dbrrd_t; + +size_t rrd_len(rrd_t *rrd); + +const rrd_data_t *rrd_entry(rrd_t *r, size_t i); +rrd_data_t *rrd_tail_entry(rrd_t *rrd); +uint64_t rrd_tail(rrd_t *rrd); +uint64_t rrd_get(rrd_t *rrd, size_t i); + +void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg); + +void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg); +uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rouding); + +#endif |