diff options
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/zfs_ioctl.c')
-rw-r--r-- | sys/contrib/openzfs/module/zfs/zfs_ioctl.c | 294 |
1 files changed, 242 insertions, 52 deletions
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 2ac1e34dccec..53366ad49781 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -38,7 +38,7 @@ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. - * Copyright (c) 2019, 2021, Klara Inc. + * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright 2024 Oxide Computer Company */ @@ -1794,17 +1794,45 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_nvlist_src nvlist optionally containing ZPOOL_REGUID_GUID + * zc_nvlist_src_size size of the nvlist + */ static int zfs_ioc_pool_reguid(zfs_cmd_t *zc) { + uint64_t *guidp = NULL; + nvlist_t *props = NULL; spa_t *spa; + uint64_t guid; int error; + if (zc->zc_nvlist_src_size != 0) { + error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props); + if (error != 0) + return (error); + + error = nvlist_lookup_uint64(props, ZPOOL_REGUID_GUID, &guid); + if (error == 0) + guidp = &guid; + else if (error == ENOENT) + guidp = NULL; + else + goto out; + } + error = spa_open(zc->zc_name, &spa, FTAG); if (error == 0) { - error = spa_change_guid(spa); + error = spa_change_guid(spa, guidp); spa_close(spa, FTAG); } + +out: + if (props != NULL) + nvlist_free(props); + return (error); } @@ -2336,7 +2364,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) } while (error == 0) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { + if (issig()) { error = SET_ERROR(EINTR); break; } @@ -3009,34 +3037,51 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc) return (error); } +/* + * innvl: { + * "get_props_names": [ "prop1", "prop2", ..., "propN" ] + * } + */ + +static const zfs_ioc_key_t zfs_keys_get_props[] = { + { ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL }, +}; + static int -zfs_ioc_pool_get_props(zfs_cmd_t *zc) +zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) { spa_t *spa; + char **props = NULL; + unsigned int n_props = 0; int error; - nvlist_t *nvp = NULL; - if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { + if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES, + &props, &n_props) != 0) { + props = NULL; + } + + if ((error = spa_open(pool, &spa, FTAG)) != 0) { /* * If the pool is faulted, there may be properties we can still * get (such as altroot and cachefile), so attempt to get them * anyway. */ mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(zc->zc_name)) != NULL) - error = spa_prop_get(spa, &nvp); + if ((spa = spa_lookup(pool)) != NULL) { + error = spa_prop_get(spa, outnvl); + if (error == 0 && props != NULL) + error = spa_prop_get_nvlist(spa, props, n_props, + outnvl); + } mutex_exit(&spa_namespace_lock); } else { - error = spa_prop_get(spa, &nvp); + error = spa_prop_get(spa, outnvl); + if (error == 0 && props != NULL) + error = spa_prop_get_nvlist(spa, props, n_props, + outnvl); spa_close(spa, FTAG); } - if (error == 0 && zc->zc_nvlist_dst != 0) - error = put_nvlist(zc, nvp); - else - error = SET_ERROR(EFAULT); - - nvlist_free(nvp); return (error); } @@ -4032,6 +4077,52 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl, } /* + * Loads specific types of data for the given pool + * + * innvl: { + * "prefetch_type" -> int32_t + * } + * + * outnvl: empty + */ +static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = { + {ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0}, +}; + +static int +zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + (void) outnvl; + + int error; + spa_t *spa; + int32_t type; + + /* + * Currently, only ZPOOL_PREFETCH_DDT is supported + */ + if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 || + type != ZPOOL_PREFETCH_DDT) { + return (EINVAL); + } + + error = spa_open(poolname, &spa, FTAG); + if (error != 0) + return (error); + + hrtime_t start_time = gethrtime(); + + ddt_prefetch_all(spa); + + zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + + spa_close(spa, FTAG); + + return (error); +} + +/* * inputs: * zc_name name of dataset to destroy * zc_defer_destroy mark for deferred destroy @@ -4251,6 +4342,51 @@ zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) return (total_errors > 0 ? SET_ERROR(EINVAL) : 0); } +#define DDT_PRUNE_UNIT "ddt_prune_unit" +#define DDT_PRUNE_AMOUNT "ddt_prune_amount" + +/* + * innvl: { + * "ddt_prune_unit" -> uint32_t + * "ddt_prune_amount" -> uint64_t + * } + * + * outnvl: "waited" -> boolean_t + */ +static const zfs_ioc_key_t zfs_keys_ddt_prune[] = { + {DDT_PRUNE_UNIT, DATA_TYPE_INT32, 0}, + {DDT_PRUNE_AMOUNT, DATA_TYPE_UINT64, 0}, +}; + +static int +zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int32_t unit; + uint64_t amount; + + if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 || + nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) { + return (EINVAL); + } + + spa_t *spa; + int error = spa_open(poolname, &spa, FTAG); + if (error != 0) + return (error); + + if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } + + error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit, + amount); + + spa_close(spa, FTAG); + + return (error); +} + /* * This ioctl waits for activity of a particular type to complete. If there is * no activity of that type in progress, it returns immediately, and the @@ -5514,6 +5650,14 @@ out: return (error); } +/* + * When stack space is limited, we write replication stream data to the target + * on a separate taskq thread, to make sure there's enough stack space. + */ +#ifndef HAVE_LARGE_STACKS +#define USE_SEND_TASKQ 1 +#endif + typedef struct dump_bytes_io { zfs_file_t *dbi_fp; caddr_t dbi_buf; @@ -5534,31 +5678,65 @@ dump_bytes_cb(void *arg) dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL); } +typedef struct dump_bytes_arg { + zfs_file_t *dba_fp; +#ifdef USE_SEND_TASKQ + taskq_t *dba_tq; + taskq_ent_t dba_tqent; +#endif +} dump_bytes_arg_t; + static int dump_bytes(objset_t *os, void *buf, int len, void *arg) { + dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg; dump_bytes_io_t dbi; - dbi.dbi_fp = arg; + dbi.dbi_fp = dba->dba_fp; dbi.dbi_buf = buf; dbi.dbi_len = len; -#if defined(HAVE_LARGE_STACKS) - dump_bytes_cb(&dbi); +#ifdef USE_SEND_TASKQ + taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP, + &dba->dba_tqent); + taskq_wait(dba->dba_tq); #else - /* - * The vn_rdwr() call is performed in a taskq to ensure that there is - * always enough stack space to write safely to the target filesystem. - * The ZIO_TYPE_FREE threads are used because there can be a lot of - * them and they are used in vdev_file.c for a similar purpose. - */ - spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE, - ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP); -#endif /* HAVE_LARGE_STACKS */ + dump_bytes_cb(&dbi); +#endif return (dbi.dbi_err); } +static int +dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out) +{ + zfs_file_t *fp = zfs_file_get(fd); + if (fp == NULL) + return (SET_ERROR(EBADF)); + + dba->dba_fp = fp; +#ifdef USE_SEND_TASKQ + dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0); + taskq_init_ent(&dba->dba_tqent); +#endif + + memset(out, 0, sizeof (dmu_send_outparams_t)); + out->dso_outfunc = dump_bytes; + out->dso_arg = dba; + out->dso_dryrun = B_FALSE; + + return (0); +} + +static void +dump_bytes_fini(dump_bytes_arg_t *dba) +{ + zfs_file_put(dba->dba_fp); +#ifdef USE_SEND_TASKQ + taskq_destroy(dba->dba_tq); +#endif +} + /* * inputs: * zc_name name of snapshot to send @@ -5643,21 +5821,18 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(tosnap, FTAG); dsl_pool_rele(dp, FTAG); } else { - zfs_file_t *fp; - dmu_send_outparams_t out = {0}; - - if ((fp = zfs_file_get(zc->zc_cookie)) == NULL) - return (SET_ERROR(EBADF)); + dump_bytes_arg_t dba; + dmu_send_outparams_t out; + error = dump_bytes_init(&dba, zc->zc_cookie, &out); + if (error) + return (error); - off = zfs_file_off(fp); - out.dso_outfunc = dump_bytes; - out.dso_arg = fp; - out.dso_dryrun = B_FALSE; + off = zfs_file_off(dba.dba_fp); error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, savedok, zc->zc_cookie, &off, &out); - zfs_file_put(fp); + dump_bytes_fini(&dba); } return (error); } @@ -5823,10 +5998,13 @@ zfs_ioc_clear(zfs_cmd_t *zc) /* * If multihost is enabled, resuming I/O is unsafe as another - * host may have imported the pool. + * host may have imported the pool. Check for remote activity. */ - if (spa_multihost(spa) && spa_suspended(spa)) - return (SET_ERROR(EINVAL)); + if (spa_multihost(spa) && spa_suspended(spa) && + spa_mmp_remote_host_activity(spa)) { + spa_close(spa, FTAG); + return (SET_ERROR(EREMOTEIO)); + } spa_vdev_state_enter(spa, SCL_NONE); @@ -6601,7 +6779,6 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) offset_t off; const char *fromname = NULL; int fd; - zfs_file_t *fp; boolean_t largeblockok; boolean_t embedok; boolean_t compressok; @@ -6626,20 +6803,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) (void) nvlist_lookup_string(innvl, "redactbook", &redactbook); - if ((fp = zfs_file_get(fd)) == NULL) - return (SET_ERROR(EBADF)); - - off = zfs_file_off(fp); + dump_bytes_arg_t dba; + dmu_send_outparams_t out; + error = dump_bytes_init(&dba, fd, &out); + if (error) + return (error); - dmu_send_outparams_t out = {0}; - out.dso_outfunc = dump_bytes; - out.dso_arg = fp; - out.dso_dryrun = B_FALSE; + off = zfs_file_off(dba.dba_fp); error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, rawok, savedok, resumeobj, resumeoff, redactbook, fd, &off, &out); - zfs_file_put(fp); + dump_bytes_fini(&dba); + return (error); } @@ -7243,6 +7419,12 @@ zfs_ioctl_init(void) zfs_keys_pool_discard_checkpoint, ARRAY_SIZE(zfs_keys_pool_discard_checkpoint)); + zfs_ioctl_register("zpool_prefetch", + ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch, + zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE, + zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch)); + zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE, zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, @@ -7288,6 +7470,16 @@ zfs_ioctl_init(void) POOL_CHECK_NONE, B_TRUE, B_TRUE, zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub)); + zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS, + zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, + POOL_CHECK_NONE, B_FALSE, B_FALSE, + zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props)); + + zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE, + zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, + zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, @@ -7343,8 +7535,6 @@ zfs_ioctl_init(void) zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); - zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, - zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); |