diff options
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c')
-rw-r--r-- | sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c | 432 |
1 files changed, 175 insertions, 257 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index c8a04539258f..967a018640e1 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ #include <sys/dataset_kstats.h> @@ -51,21 +51,12 @@ static void zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, boolean_t force_sync); static unsigned int zvol_major = ZVOL_MAJOR; -static unsigned int zvol_request_sync = 0; -static unsigned int zvol_prefetch_bytes = (128 * 1024); static unsigned long zvol_max_discard_blocks = 16384; -/* - * Switch taskq at multiple of 512 MB offset. This can be set to a lower value - * to utilize more threads for small files but may affect prefetch hits. - */ -#define ZVOL_TASKQ_OFFSET_SHIFT 29 - #ifndef HAVE_BLKDEV_GET_ERESTARTSYS static unsigned int zvol_open_timeout_ms = 1000; #endif -static unsigned int zvol_threads = 0; static unsigned int zvol_blk_mq_threads = 0; static unsigned int zvol_blk_mq_actual_threads; static boolean_t zvol_use_blk_mq = B_FALSE; @@ -82,8 +73,6 @@ static boolean_t zvol_use_blk_mq = B_FALSE; */ static unsigned int zvol_blk_mq_blocks_per_thread = 8; -static unsigned int zvol_num_taskqs = 0; - #ifndef BLKDEV_DEFAULT_RQ /* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */ #define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ @@ -95,8 +84,9 @@ static unsigned int zvol_num_taskqs = 0; static inline void zvol_end_io(struct bio *bio, struct request *rq, int error) { + ASSERT3U(error, >=, 0); if (bio) { - bio->bi_status = errno_to_bi_status(-error); + bio->bi_status = errno_to_bi_status(error); bio_endio(bio); } else { blk_mq_end_request(rq, errno_to_bi_status(error)); @@ -117,45 +107,8 @@ struct zvol_state_os { boolean_t use_blk_mq; }; -typedef struct zv_taskq { - uint_t tqs_cnt; - taskq_t **tqs_taskq; -} zv_taskq_t; -static zv_taskq_t zvol_taskqs; static struct ida zvol_ida; -typedef struct zv_request_stack { - zvol_state_t *zv; - struct bio *bio; - struct request *rq; -} zv_request_t; - -typedef struct zv_work { - struct request *rq; - struct work_struct work; -} zv_work_t; - -typedef struct zv_request_task { - zv_request_t zvr; - taskq_ent_t ent; -} zv_request_task_t; - -static zv_request_task_t * -zv_request_task_create(zv_request_t zvr) -{ - zv_request_task_t *task; - task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP); - taskq_init_ent(&task->ent); - task->zvr = zvr; - return (task); -} - -static void -zv_request_task_free(zv_request_task_t *task) -{ - kmem_free(task, sizeof (*task)); -} - /* * This is called when a new block multiqueue request comes in. A request * contains one or more BIOs. @@ -256,8 +209,14 @@ zvol_write(zv_request_t *zvr) disk = zv->zv_zso->zvo_disk; /* bio marked as FLUSH need to flush before write */ - if (io_is_flush(bio, rq)) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (io_is_flush(bio, rq)) { + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error != 0) { + rw_exit(&zv->zv_suspend_lock); + zvol_end_io(bio, rq, -error); + return; + } + } /* Some requests are just for flush and nothing else. */ if (io_size(bio, rq) == 0) { @@ -305,7 +264,8 @@ zvol_write(zv_request_t *zvr) dmu_tx_abort(tx); break; } - error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx); + error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx, + DMU_READ_PREFETCH); if (error == 0) { zvol_log_write(zv, tx, off, bytes, sync); } @@ -320,8 +280,8 @@ zvol_write(zv_request_t *zvr) dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); task_io_account_write(nwritten); - if (sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error == 0 && sync) + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); @@ -329,7 +289,7 @@ zvol_write(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, WRITE, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -408,7 +368,7 @@ zvol_discard(zv_request_t *zvr) zfs_rangelock_exit(lr); if (error == 0 && sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); unlock: rw_exit(&zv->zv_suspend_lock); @@ -418,7 +378,7 @@ unlock: start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -475,7 +435,8 @@ zvol_read(zv_request_t *zvr) if (bytes > volsize - uio.uio_loffset) bytes = volsize - uio.uio_loffset; - error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes); + error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes, + DMU_READ_PREFETCH); if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) @@ -495,7 +456,7 @@ zvol_read(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, READ, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -523,10 +484,31 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t offset = io_offset(bio, rq); uint64_t size = io_size(bio, rq); - int rw = io_data_dir(bio, rq); + int rw; + + if (rq != NULL) { + /* + * Flush & trim requests go down the zvol_write codepath. Or + * more specifically: + * + * If request is a write, or if it's op_is_sync() and not a + * read, or if it's a flush, or if it's a discard, then send the + * request down the write path. + */ + if (op_is_write(rq->cmd_flags) || + (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || + req_op(rq) == REQ_OP_FLUSH || + op_is_discard(rq->cmd_flags)) { + rw = WRITE; + } else { + rw = READ; + } + } else { + rw = bio_data_dir(bio); + } if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { - zvol_end_io(bio, rq, -SET_ERROR(ENXIO)); + zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; } @@ -545,7 +527,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, (long long unsigned)offset, (long unsigned)size); - zvol_end_io(bio, rq, -SET_ERROR(EIO)); + zvol_end_io(bio, rq, SET_ERROR(EIO)); goto out; } @@ -558,8 +540,8 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, #ifdef HAVE_BLK_MQ_RQ_HCTX blk_mq_hw_queue = rq->mq_hctx->queue_num; #else - blk_mq_hw_queue = - rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; + blk_mq_hw_queue = rq->q->queue_hw_ctx[ + rq->q->mq_map[raw_smp_processor_id()]]->queue_num; #endif taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, blk_mq_hw_queue); @@ -567,7 +549,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, if (rw == WRITE) { if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - zvol_end_io(bio, rq, -SET_ERROR(EROFS)); + zvol_end_io(bio, rq, SET_ERROR(EROFS)); goto out; } @@ -718,28 +700,19 @@ zvol_open(struct block_device *bdev, fmode_t flag) retry: #endif - rw_enter(&zvol_state_lock, RW_READER); - /* - * Obtain a copy of private_data under the zvol_state_lock to make - * sure that either the result of zvol free code path setting - * disk->private_data to NULL is observed, or zvol_os_free() - * is not called on this zv because of the positive zv_open_count. - */ + #ifdef HAVE_BLK_MODE_T - zv = disk->private_data; + zv = atomic_load_ptr(&disk->private_data); #else - zv = bdev->bd_disk->private_data; + zv = atomic_load_ptr(&bdev->bd_disk->private_data); #endif if (zv == NULL) { - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } mutex_enter(&zv->zv_state_lock); - if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } @@ -751,8 +724,28 @@ retry: if (zv->zv_open_count == 0) { if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { mutex_exit(&zv->zv_state_lock); + + /* + * Removal may happen while the locks are down, so + * we can't trust zv any longer; we have to start over. + */ +#ifdef HAVE_BLK_MODE_T + zv = atomic_load_ptr(&disk->private_data); +#else + zv = atomic_load_ptr(&bdev->bd_disk->private_data); +#endif + if (zv == NULL) + return (-SET_ERROR(ENXIO)); + rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); + return (-SET_ERROR(ENXIO)); + } + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -763,7 +756,6 @@ retry: drop_suspend = B_TRUE; } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -860,11 +852,11 @@ zvol_release(struct gendisk *disk, fmode_t unused) #if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG) (void) unused; #endif - zvol_state_t *zv; boolean_t drop_suspend = B_TRUE; - rw_enter(&zvol_state_lock, RW_READER); - zv = disk->private_data; + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); + if (zv == NULL) + return; mutex_enter(&zv->zv_state_lock); ASSERT3U(zv->zv_open_count, >, 0); @@ -878,6 +870,15 @@ zvol_release(struct gendisk *disk, fmode_t unused) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_open(), we don't check if removal + * started here, because we might be one of the openers + * that needs to be thrown out! If we're the last, we + * need to call zvol_last_close() below to finish + * cleanup. So, no special treatment for us. + */ + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 1) { rw_exit(&zv->zv_suspend_lock); @@ -887,7 +888,6 @@ zvol_release(struct gendisk *disk, fmode_t unused) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -907,9 +907,10 @@ static int zvol_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - zvol_state_t *zv = bdev->bd_disk->private_data; int error = 0; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); switch (cmd) { @@ -932,16 +933,18 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, case BLKZNAME: mutex_enter(&zv->zv_state_lock); - error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); + error = -copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); mutex_exit(&zv->zv_state_lock); + if (error) + error = SET_ERROR(error); break; default: - error = -ENOTTY; + error = SET_ERROR(ENOTTY); break; } - return (SET_ERROR(error)); + return (-error); } #ifdef CONFIG_COMPAT @@ -960,9 +963,8 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) { unsigned int mask = 0; - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0; @@ -970,17 +972,14 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (mask); } static int zvol_revalidate_disk(struct gendisk *disk) { - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); set_capacity(zv->zv_zso->zvo_disk, @@ -988,8 +987,6 @@ zvol_revalidate_disk(struct gendisk *disk) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (0); } @@ -1008,16 +1005,6 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) return (0); } -void -zvol_os_clear_private(zvol_state_t *zv) -{ - /* - * Cleared while holding zvol_state_lock as a writer - * which will prevent zvol_open() from opening it. - */ - zv->zv_zso->zvo_disk->private_data = NULL; -} - /* * Provide a simple virtual geometry for legacy compatibility. For devices * smaller than 1 MiB a small head and sector count is used to allow very @@ -1027,9 +1014,10 @@ zvol_os_clear_private(zvol_state_t *zv) static int zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - zvol_state_t *zv = bdev->bd_disk->private_data; sector_t sectors; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); sectors = get_capacity(zv->zv_zso->zvo_disk); @@ -1348,27 +1336,30 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) * Allocate memory for a new zvol_state_t and setup the required * request queue and generic disk structures for the block device. */ -static zvol_state_t * -zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) +static int +zvol_alloc(dev_t dev, const char *name, uint64_t volsize, uint64_t volblocksize, + zvol_state_t **zvp) { zvol_state_t *zv; struct zvol_state_os *zso; uint64_t volmode; int ret; - if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0) - return (NULL); + ret = dsl_prop_get_integer(name, "volmode", &volmode, NULL); + if (ret) + return (ret); if (volmode == ZFS_VOLMODE_DEFAULT) volmode = zvol_volmode; if (volmode == ZFS_VOLMODE_NONE) - return (NULL); + return (0); zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP); zv->zv_zso = zso; zv->zv_volmode = volmode; + zv->zv_volsize = volsize; zv->zv_volblocksize = volblocksize; list_link_init(&zv->zv_next); @@ -1397,13 +1388,15 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) */ if (zv->zv_zso->use_blk_mq) { ret = zvol_alloc_blk_mq(zv, &limits); + if (ret != 0) + goto out_kmem; zso->zvo_disk->fops = &zvol_ops_blk_mq; } else { ret = zvol_alloc_non_blk_mq(zso, &limits); + if (ret != 0) + goto out_kmem; zso->zvo_disk->fops = &zvol_ops; } - if (ret != 0) - goto out_kmem; /* Limit read-ahead to a single page to prevent over-prefetching. */ blk_queue_set_read_ahead(zso->zvo_queue, 1); @@ -1440,61 +1433,79 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d", ZVOL_DEV_NAME, (dev & MINORMASK)); - return (zv); + *zvp = zv; + return (ret); out_kmem: kmem_free(zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); - return (NULL); + return (ret); } -/* - * Cleanup then free a zvol_state_t which was created by zvol_alloc(). - * At this time, the structure is not opened by anyone, is taken off - * the zvol_state_list, and has its private data set to NULL. - * The zvol_state_lock is dropped. - * - * This function may take many milliseconds to complete (e.g. we've seen - * it take over 256ms), due to the calls to "blk_cleanup_queue" and - * "del_gendisk". Thus, consumers need to be careful to account for this - * latency when calling this function. - */ void -zvol_os_free(zvol_state_t *zv) +zvol_os_remove_minor(zvol_state_t *zv) { - - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); - ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL); + ASSERT0(atomic_read(&zv->zv_suspend_ref)); + ASSERT(zv->zv_flags & ZVOL_REMOVING); - rw_destroy(&zv->zv_suspend_lock); - zfs_rangelock_fini(&zv->zv_rangelock); + struct zvol_state_os *zso = zv->zv_zso; + zv->zv_zso = NULL; + + /* Clearing private_data will make new callers return immediately. */ + atomic_store_ptr(&zso->zvo_disk->private_data, NULL); + + /* + * Drop the state lock before calling del_gendisk(). There may be + * callers waiting to acquire it, but del_gendisk() will block until + * they exit, which would deadlock. + */ + mutex_exit(&zv->zv_state_lock); - del_gendisk(zv->zv_zso->zvo_disk); + del_gendisk(zso->zvo_disk); #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \ (defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG)) #if defined(HAVE_BLK_CLEANUP_DISK) - blk_cleanup_disk(zv->zv_zso->zvo_disk); + blk_cleanup_disk(zso->zvo_disk); #else - put_disk(zv->zv_zso->zvo_disk); + put_disk(zso->zvo_disk); #endif #else - blk_cleanup_queue(zv->zv_zso->zvo_queue); - put_disk(zv->zv_zso->zvo_disk); + blk_cleanup_queue(zso->zvo_queue); + put_disk(zso->zvo_disk); #endif - if (zv->zv_zso->use_blk_mq) - blk_mq_free_tag_set(&zv->zv_zso->tag_set); + if (zso->use_blk_mq) + blk_mq_free_tag_set(&zso->tag_set); - ida_simple_remove(&zvol_ida, - MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); + ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); + + kmem_free(zso, sizeof (struct zvol_state_os)); + + mutex_enter(&zv->zv_state_lock); +} + +void +zvol_os_free(zvol_state_t *zv) +{ + + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT0(zv->zv_open_count); + ASSERT0P(zv->zv_zso); + + ASSERT0P(zv->zv_objset); + ASSERT0P(zv->zv_zilog); + ASSERT0P(zv->zv_dn); + + rw_destroy(&zv->zv_suspend_lock); + zfs_rangelock_fini(&zv->zv_rangelock); cv_destroy(&zv->zv_removing_cv); mutex_destroy(&zv->zv_state_lock); dataset_kstats_destroy(&zv->zv_kstat); - kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); } @@ -1514,7 +1525,9 @@ __zvol_os_add_disk(struct gendisk *disk) { int error = 0; #ifdef HAVE_ADD_DISK_RET - error = add_disk(disk); + error = -add_disk(disk); + if (error) + error = SET_ERROR(error); #else add_disk(disk); #endif @@ -1606,7 +1619,7 @@ zvol_os_add_disk(struct gendisk *disk) int zvol_os_create_minor(const char *name) { - zvol_state_t *zv; + zvol_state_t *zv = NULL; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; @@ -1655,18 +1668,16 @@ zvol_os_create_minor(const char *name) if (error) goto out_dmu_objset_disown; - zv = zvol_alloc(MKDEV(zvol_major, minor), name, - doi->doi_data_block_size); - if (zv == NULL) { - error = SET_ERROR(EAGAIN); + error = zvol_alloc(MKDEV(zvol_major, minor), name, + volsize, doi->doi_data_block_size, &zv); + if (error || zv == NULL) goto out_dmu_objset_disown; - } + zv->zv_hash = hash; if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; - zv->zv_volsize = volsize; zv->zv_objset = os; /* Default */ @@ -1691,11 +1702,11 @@ zvol_os_create_minor(const char *name) blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue); #endif - ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); + ASSERT0P(zv->zv_kstat.dk_kstats); error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); if (error) goto out_dmu_objset_disown; - ASSERT3P(zv->zv_zilog, ==, NULL); + ASSERT0P(zv->zv_zilog); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) @@ -1733,7 +1744,7 @@ out_doi: * zvol_open()->zvol_first_open() and zvol_release()->zvol_last_close() * directly as well. */ - if (error == 0) { + if (error == 0 && zv) { rw_enter(&zvol_state_lock, RW_WRITER); zvol_insert(zv); rw_exit(&zvol_state_lock); @@ -1745,7 +1756,7 @@ out_doi: return (error); } -void +int zvol_os_rename_minor(zvol_state_t *zv, const char *newname) { int readonly = get_disk_ro(zv->zv_zso->zvo_disk); @@ -1772,6 +1783,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) set_disk_ro(zv->zv_zso->zvo_disk, readonly); dataset_kstats_rename(&zv->zv_kstat, newname); + + return (0); } void @@ -1793,61 +1806,16 @@ zvol_init(void) { int error; - /* - * zvol_threads is the module param the user passes in. - * - * zvol_actual_threads is what we use internally, since the user can - * pass zvol_thread = 0 to mean "use all the CPUs" (the default). - */ - static unsigned int zvol_actual_threads; - - if (zvol_threads == 0) { - /* - * See dde9380a1 for why 32 was chosen here. This should - * probably be refined to be some multiple of the number - * of CPUs. - */ - zvol_actual_threads = MAX(num_online_cpus(), 32); - } else { - zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024); + error = zvol_init_impl(); + if (error) { + printk(KERN_INFO "ZFS: zvol_init_impl() failed %d\n", error); + return (error); } - /* - * Use atleast 32 zvol_threads but for many core system, - * prefer 6 threads per taskq, but no more taskqs - * than threads in them on large systems. - * - * taskq total - * cpus taskqs threads threads - * ------- ------- ------- ------- - * 1 1 32 32 - * 2 1 32 32 - * 4 1 32 32 - * 8 2 16 32 - * 16 3 11 33 - * 32 5 7 35 - * 64 8 8 64 - * 128 11 12 132 - * 256 16 16 256 - */ - zv_taskq_t *ztqs = &zvol_taskqs; - uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs); - if (num_tqs == 0) { - num_tqs = 1 + num_online_cpus() / 6; - while (num_tqs * num_tqs > zvol_actual_threads) - num_tqs--; - } - uint_t per_tq_thread = zvol_actual_threads / num_tqs; - if (per_tq_thread * num_tqs < zvol_actual_threads) - per_tq_thread++; - ztqs->tqs_cnt = num_tqs; - ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP); - error = register_blkdev(zvol_major, ZVOL_DRIVER); + error = -register_blkdev(zvol_major, ZVOL_DRIVER); if (error) { - kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *)); - ztqs->tqs_taskq = NULL; printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error); - return (error); + return (SET_ERROR(error)); } if (zvol_blk_mq_queue_depth == 0) { @@ -1864,25 +1832,6 @@ zvol_init(void) 1024); } - for (uint_t i = 0; i < num_tqs; i++) { - char name[32]; - (void) snprintf(name, sizeof (name), "%s_tq-%u", - ZVOL_DRIVER, i); - ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread, - maxclsyspri, per_tq_thread, INT_MAX, - TASKQ_PREPOPULATE | TASKQ_DYNAMIC); - if (ztqs->tqs_taskq[i] == NULL) { - for (int j = i - 1; j >= 0; j--) - taskq_destroy(ztqs->tqs_taskq[j]); - unregister_blkdev(zvol_major, ZVOL_DRIVER); - kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * - sizeof (taskq_t *)); - ztqs->tqs_taskq = NULL; - return (-ENOMEM); - } - } - - zvol_init_impl(); ida_init(&zvol_ida); return (0); } @@ -1890,50 +1839,19 @@ zvol_init(void) void zvol_fini(void) { - zv_taskq_t *ztqs = &zvol_taskqs; - zvol_fini_impl(); unregister_blkdev(zvol_major, ZVOL_DRIVER); - if (ztqs->tqs_taskq == NULL) { - ASSERT3U(ztqs->tqs_cnt, ==, 0); - } else { - for (uint_t i = 0; i < ztqs->tqs_cnt; i++) { - ASSERT3P(ztqs->tqs_taskq[i], !=, NULL); - taskq_destroy(ztqs->tqs_taskq[i]); - } - kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * - sizeof (taskq_t *)); - ztqs->tqs_taskq = NULL; - } + zvol_fini_impl(); ida_destroy(&zvol_ida); } -module_param(zvol_inhibit_dev, uint, 0644); -MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes"); - module_param(zvol_major, uint, 0444); MODULE_PARM_DESC(zvol_major, "Major number for zvol device"); -module_param(zvol_threads, uint, 0444); -MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set" - "to 0 to use all active CPUs"); - -module_param(zvol_request_sync, uint, 0644); -MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests"); - module_param(zvol_max_discard_blocks, ulong, 0444); MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard"); -module_param(zvol_num_taskqs, uint, 0444); -MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs"); - -module_param(zvol_prefetch_bytes, uint, 0644); -MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end"); - -module_param(zvol_volmode, uint, 0644); -MODULE_PARM_DESC(zvol_volmode, "Default volmode property value"); - module_param(zvol_blk_mq_queue_depth, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth"); |