aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/zfs_ioctl.c')
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c294
1 files changed, 242 insertions, 52 deletions
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 2ac1e34dccec..53366ad49781 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -38,7 +38,7 @@
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
- * Copyright (c) 2019, 2021, Klara Inc.
+ * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright 2024 Oxide Computer Company
*/
@@ -1794,17 +1794,45 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc)
return (error);
}
+/*
+ * inputs:
+ * zc_nvlist_src nvlist optionally containing ZPOOL_REGUID_GUID
+ * zc_nvlist_src_size size of the nvlist
+ */
static int
zfs_ioc_pool_reguid(zfs_cmd_t *zc)
{
+ uint64_t *guidp = NULL;
+ nvlist_t *props = NULL;
spa_t *spa;
+ uint64_t guid;
int error;
+ if (zc->zc_nvlist_src_size != 0) {
+ error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &props);
+ if (error != 0)
+ return (error);
+
+ error = nvlist_lookup_uint64(props, ZPOOL_REGUID_GUID, &guid);
+ if (error == 0)
+ guidp = &guid;
+ else if (error == ENOENT)
+ guidp = NULL;
+ else
+ goto out;
+ }
+
error = spa_open(zc->zc_name, &spa, FTAG);
if (error == 0) {
- error = spa_change_guid(spa);
+ error = spa_change_guid(spa, guidp);
spa_close(spa, FTAG);
}
+
+out:
+ if (props != NULL)
+ nvlist_free(props);
+
return (error);
}
@@ -2336,7 +2364,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
}
while (error == 0) {
- if (issig(JUSTLOOKING) && issig(FORREAL)) {
+ if (issig()) {
error = SET_ERROR(EINTR);
break;
}
@@ -3009,34 +3037,51 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
return (error);
}
+/*
+ * innvl: {
+ * "get_props_names": [ "prop1", "prop2", ..., "propN" ]
+ * }
+ */
+
+static const zfs_ioc_key_t zfs_keys_get_props[] = {
+ { ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL },
+};
+
static int
-zfs_ioc_pool_get_props(zfs_cmd_t *zc)
+zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
{
spa_t *spa;
+ char **props = NULL;
+ unsigned int n_props = 0;
int error;
- nvlist_t *nvp = NULL;
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
+ if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
+ &props, &n_props) != 0) {
+ props = NULL;
+ }
+
+ if ((error = spa_open(pool, &spa, FTAG)) != 0) {
/*
* If the pool is faulted, there may be properties we can still
* get (such as altroot and cachefile), so attempt to get them
* anyway.
*/
mutex_enter(&spa_namespace_lock);
- if ((spa = spa_lookup(zc->zc_name)) != NULL)
- error = spa_prop_get(spa, &nvp);
+ if ((spa = spa_lookup(pool)) != NULL) {
+ error = spa_prop_get(spa, outnvl);
+ if (error == 0 && props != NULL)
+ error = spa_prop_get_nvlist(spa, props, n_props,
+ outnvl);
+ }
mutex_exit(&spa_namespace_lock);
} else {
- error = spa_prop_get(spa, &nvp);
+ error = spa_prop_get(spa, outnvl);
+ if (error == 0 && props != NULL)
+ error = spa_prop_get_nvlist(spa, props, n_props,
+ outnvl);
spa_close(spa, FTAG);
}
- if (error == 0 && zc->zc_nvlist_dst != 0)
- error = put_nvlist(zc, nvp);
- else
- error = SET_ERROR(EFAULT);
-
- nvlist_free(nvp);
return (error);
}
@@ -4032,6 +4077,52 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
}
/*
+ * Loads specific types of data for the given pool
+ *
+ * innvl: {
+ * "prefetch_type" -> int32_t
+ * }
+ *
+ * outnvl: empty
+ */
+static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
+ {ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0},
+};
+
+static int
+zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ (void) outnvl;
+
+ int error;
+ spa_t *spa;
+ int32_t type;
+
+ /*
+ * Currently, only ZPOOL_PREFETCH_DDT is supported
+ */
+ if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 ||
+ type != ZPOOL_PREFETCH_DDT) {
+ return (EINVAL);
+ }
+
+ error = spa_open(poolname, &spa, FTAG);
+ if (error != 0)
+ return (error);
+
+ hrtime_t start_time = gethrtime();
+
+ ddt_prefetch_all(spa);
+
+ zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name,
+ (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
+
+ spa_close(spa, FTAG);
+
+ return (error);
+}
+
+/*
* inputs:
* zc_name name of dataset to destroy
* zc_defer_destroy mark for deferred destroy
@@ -4251,6 +4342,51 @@ zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
}
+#define DDT_PRUNE_UNIT "ddt_prune_unit"
+#define DDT_PRUNE_AMOUNT "ddt_prune_amount"
+
+/*
+ * innvl: {
+ * "ddt_prune_unit" -> uint32_t
+ * "ddt_prune_amount" -> uint64_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_ddt_prune[] = {
+ {DDT_PRUNE_UNIT, DATA_TYPE_INT32, 0},
+ {DDT_PRUNE_AMOUNT, DATA_TYPE_UINT64, 0},
+};
+
+static int
+zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int32_t unit;
+ uint64_t amount;
+
+ if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 ||
+ nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) {
+ return (EINVAL);
+ }
+
+ spa_t *spa;
+ int error = spa_open(poolname, &spa, FTAG);
+ if (error != 0)
+ return (error);
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ENOTSUP));
+ }
+
+ error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit,
+ amount);
+
+ spa_close(spa, FTAG);
+
+ return (error);
+}
+
/*
* This ioctl waits for activity of a particular type to complete. If there is
* no activity of that type in progress, it returns immediately, and the
@@ -5514,6 +5650,14 @@ out:
return (error);
}
+/*
+ * When stack space is limited, we write replication stream data to the target
+ * on a separate taskq thread, to make sure there's enough stack space.
+ */
+#ifndef HAVE_LARGE_STACKS
+#define USE_SEND_TASKQ 1
+#endif
+
typedef struct dump_bytes_io {
zfs_file_t *dbi_fp;
caddr_t dbi_buf;
@@ -5534,31 +5678,65 @@ dump_bytes_cb(void *arg)
dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
}
+typedef struct dump_bytes_arg {
+ zfs_file_t *dba_fp;
+#ifdef USE_SEND_TASKQ
+ taskq_t *dba_tq;
+ taskq_ent_t dba_tqent;
+#endif
+} dump_bytes_arg_t;
+
static int
dump_bytes(objset_t *os, void *buf, int len, void *arg)
{
+ dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
dump_bytes_io_t dbi;
- dbi.dbi_fp = arg;
+ dbi.dbi_fp = dba->dba_fp;
dbi.dbi_buf = buf;
dbi.dbi_len = len;
-#if defined(HAVE_LARGE_STACKS)
- dump_bytes_cb(&dbi);
+#ifdef USE_SEND_TASKQ
+ taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
+ &dba->dba_tqent);
+ taskq_wait(dba->dba_tq);
#else
- /*
- * The vn_rdwr() call is performed in a taskq to ensure that there is
- * always enough stack space to write safely to the target filesystem.
- * The ZIO_TYPE_FREE threads are used because there can be a lot of
- * them and they are used in vdev_file.c for a similar purpose.
- */
- spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
- ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
-#endif /* HAVE_LARGE_STACKS */
+ dump_bytes_cb(&dbi);
+#endif
return (dbi.dbi_err);
}
+static int
+dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
+{
+ zfs_file_t *fp = zfs_file_get(fd);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
+
+ dba->dba_fp = fp;
+#ifdef USE_SEND_TASKQ
+ dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
+ taskq_init_ent(&dba->dba_tqent);
+#endif
+
+ memset(out, 0, sizeof (dmu_send_outparams_t));
+ out->dso_outfunc = dump_bytes;
+ out->dso_arg = dba;
+ out->dso_dryrun = B_FALSE;
+
+ return (0);
+}
+
+static void
+dump_bytes_fini(dump_bytes_arg_t *dba)
+{
+ zfs_file_put(dba->dba_fp);
+#ifdef USE_SEND_TASKQ
+ taskq_destroy(dba->dba_tq);
+#endif
+}
+
/*
* inputs:
* zc_name name of snapshot to send
@@ -5643,21 +5821,18 @@ zfs_ioc_send(zfs_cmd_t *zc)
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
} else {
- zfs_file_t *fp;
- dmu_send_outparams_t out = {0};
-
- if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
- return (SET_ERROR(EBADF));
+ dump_bytes_arg_t dba;
+ dmu_send_outparams_t out;
+ error = dump_bytes_init(&dba, zc->zc_cookie, &out);
+ if (error)
+ return (error);
- off = zfs_file_off(fp);
- out.dso_outfunc = dump_bytes;
- out.dso_arg = fp;
- out.dso_dryrun = B_FALSE;
+ off = zfs_file_off(dba.dba_fp);
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
zc->zc_fromobj, embedok, large_block_ok, compressok,
rawok, savedok, zc->zc_cookie, &off, &out);
- zfs_file_put(fp);
+ dump_bytes_fini(&dba);
}
return (error);
}
@@ -5823,10 +5998,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
/*
* If multihost is enabled, resuming I/O is unsafe as another
- * host may have imported the pool.
+ * host may have imported the pool. Check for remote activity.
*/
- if (spa_multihost(spa) && spa_suspended(spa))
- return (SET_ERROR(EINVAL));
+ if (spa_multihost(spa) && spa_suspended(spa) &&
+ spa_mmp_remote_host_activity(spa)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(EREMOTEIO));
+ }
spa_vdev_state_enter(spa, SCL_NONE);
@@ -6601,7 +6779,6 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
offset_t off;
const char *fromname = NULL;
int fd;
- zfs_file_t *fp;
boolean_t largeblockok;
boolean_t embedok;
boolean_t compressok;
@@ -6626,20 +6803,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
- if ((fp = zfs_file_get(fd)) == NULL)
- return (SET_ERROR(EBADF));
-
- off = zfs_file_off(fp);
+ dump_bytes_arg_t dba;
+ dmu_send_outparams_t out;
+ error = dump_bytes_init(&dba, fd, &out);
+ if (error)
+ return (error);
- dmu_send_outparams_t out = {0};
- out.dso_outfunc = dump_bytes;
- out.dso_arg = fp;
- out.dso_dryrun = B_FALSE;
+ off = zfs_file_off(dba.dba_fp);
error = dmu_send(snapname, fromname, embedok, largeblockok,
compressok, rawok, savedok, resumeobj, resumeoff,
redactbook, fd, &off, &out);
- zfs_file_put(fp);
+ dump_bytes_fini(&dba);
+
return (error);
}
@@ -7243,6 +7419,12 @@ zfs_ioctl_init(void)
zfs_keys_pool_discard_checkpoint,
ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
+ zfs_ioctl_register("zpool_prefetch",
+ ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
+ zfs_secpolicy_config, POOL_NAME,
+ POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
+ zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
+
zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@@ -7288,6 +7470,16 @@ zfs_ioctl_init(void)
POOL_CHECK_NONE, B_TRUE, B_TRUE,
zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
+ zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
+ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
+ POOL_CHECK_NONE, B_FALSE, B_FALSE,
+ zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
+
+ zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE,
+ zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+ zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune));
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -7343,8 +7535,6 @@ zfs_ioctl_init(void)
zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
- zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
- zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);