diff options
Diffstat (limited to 'module/os/freebsd/zfs/zvol_os.c')
-rw-r--r-- | module/os/freebsd/zfs/zvol_os.c | 150 |
1 files changed, 106 insertions, 44 deletions
diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index 1011aaf68ac6..ddb20b031448 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -30,6 +30,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2024, Klara, Inc. */ /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ @@ -92,6 +93,7 @@ #include <sys/zio_checksum.h> #include <sys/zil_impl.h> #include <sys/filio.h> +#include <sys/freebsd_event.h> #include <geom/geom.h> #include <sys/zvol.h> @@ -122,7 +124,7 @@ struct zvol_state_os { /* volmode=dev */ struct zvol_state_dev { struct cdev *zsd_cdev; - uint64_t zsd_sync_cnt; + struct selinfo zsd_selinfo; } _zso_dev; /* volmode=geom */ @@ -167,6 +169,7 @@ static d_ioctl_t zvol_cdev_ioctl; static d_read_t zvol_cdev_read; static d_write_t zvol_cdev_write; static d_strategy_t zvol_geom_bio_strategy; +static d_kqfilter_t zvol_cdev_kqfilter; static struct cdevsw zvol_cdevsw = { .d_name = "zvol", @@ -178,6 +181,16 @@ static struct cdevsw zvol_cdevsw = { .d_read = zvol_cdev_read, .d_write = zvol_cdev_write, .d_strategy = zvol_geom_bio_strategy, + .d_kqfilter = zvol_cdev_kqfilter, +}; + +static void zvol_filter_detach(struct knote *kn); +static int zvol_filter_vnode(struct knote *kn, long hint); + +static struct filterops zvol_filterops_vnode = { + .f_isfd = 1, + .f_detach = zvol_filter_detach, + .f_event = zvol_filter_vnode, }; extern uint_t zfs_geom_probe_vdev_key; @@ -238,7 +251,7 @@ retry: } mutex_enter(&zv->zv_state_lock); - if (zv->zv_zso->zso_dying) { + if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { rw_exit(&zvol_state_lock); err = SET_ERROR(ENXIO); goto out_zv_locked; @@ -280,6 +293,7 @@ retry: if (!mutex_tryenter(&spa_namespace_lock)) { mutex_exit(&zv->zv_state_lock); rw_exit(&zv->zv_suspend_lock); + drop_suspend = B_FALSE; kern_yield(PRI_USER); goto retry; } else { @@ -602,6 +616,49 @@ zvol_geom_bio_getattr(struct bio *bp) } static void +zvol_filter_detach(struct knote *kn) +{ + zvol_state_t *zv; + struct zvol_state_dev *zsd; + + zv = kn->kn_hook; + zsd = &zv->zv_zso->zso_dev; + + knlist_remove(&zsd->zsd_selinfo.si_note, kn, 0); +} + +static int +zvol_filter_vnode(struct knote *kn, long hint) +{ + kn->kn_fflags |= kn->kn_sfflags & hint; + + return (kn->kn_fflags != 0); +} + +static int +zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn) +{ + zvol_state_t *zv; + struct zvol_state_dev *zsd; + + zv = dev->si_drv2; + zsd = &zv->zv_zso->zso_dev; + + if (kn->kn_filter != EVFILT_VNODE) + return (EINVAL); + + /* XXX: extend support for other NOTE_* events */ + if (kn->kn_sfflags != NOTE_ATTRIB) + return (EINVAL); + + kn->kn_fop = &zvol_filterops_vnode; + kn->kn_hook = zv; + knlist_add(&zsd->zsd_selinfo.si_note, kn, 0); + + return (0); +} + +static void zvol_geom_bio_strategy(struct bio *bp) { zvol_state_t *zv; @@ -613,7 +670,7 @@ zvol_geom_bio_strategy(struct bio *bp) int error = 0; boolean_t doread = B_FALSE; boolean_t is_dumpified; - boolean_t sync; + boolean_t commit; if (bp->bio_to) zv = bp->bio_to->private; @@ -627,6 +684,11 @@ zvol_geom_bio_strategy(struct bio *bp) rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); + if (zv->zv_flags & ZVOL_REMOVING) { + error = SET_ERROR(ENXIO); + goto resume; + } + switch (bp->bio_cmd) { case BIO_READ: doread = B_TRUE; @@ -640,7 +702,7 @@ zvol_geom_bio_strategy(struct bio *bp) } zvol_ensure_zilog(zv); if (bp->bio_cmd == BIO_FLUSH) - goto sync; + goto commit; break; default: error = SET_ERROR(EOPNOTSUPP); @@ -662,7 +724,7 @@ zvol_geom_bio_strategy(struct bio *bp) } is_dumpified = B_FALSE; - sync = !doread && !is_dumpified && + commit = !doread && !is_dumpified && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; /* @@ -678,7 +740,7 @@ zvol_geom_bio_strategy(struct bio *bp) if (error != 0) { dmu_tx_abort(tx); } else { - zvol_log_truncate(zv, tx, off, resid, sync); + zvol_log_truncate(zv, tx, off, resid); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, off, resid); @@ -699,7 +761,7 @@ zvol_geom_bio_strategy(struct bio *bp) dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, size, addr, tx); - zvol_log_write(zv, tx, off, size, sync); + zvol_log_write(zv, tx, off, size, commit); dmu_tx_commit(tx); } } @@ -737,8 +799,8 @@ unlock: break; } - if (sync) { -sync: + if (commit) { +commit: zil_commit(zv->zv_zilog, ZVOL_OBJ); } resume: @@ -776,6 +838,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize)) return (SET_ERROR(EIO)); + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); ssize_t start_resid = zfs_uio_resid(&uio); lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), zfs_uio_resid(&uio), RL_READER); @@ -797,6 +860,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) zfs_rangelock_exit(lr); int64_t nread = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); + rw_exit(&zv->zv_suspend_lock); return (error); } @@ -808,7 +872,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) uint64_t volsize; zfs_locked_range_t *lr; int error = 0; - boolean_t sync; + boolean_t commit; zfs_uio_t uio; zv = dev->si_drv2; @@ -822,7 +886,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) return (SET_ERROR(EIO)); ssize_t start_resid = zfs_uio_resid(&uio); - sync = (ioflag & IO_SYNC) || + commit = (ioflag & IO_SYNC) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); @@ -846,7 +910,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) } error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx); if (error == 0) - zvol_log_write(zv, tx, off, bytes, sync); + zvol_log_write(zv, tx, off, bytes, commit); dmu_tx_commit(tx); if (error) @@ -855,7 +919,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) zfs_rangelock_exit(lr); int64_t nwritten = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); - if (sync) + if (commit) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); return (error); @@ -865,7 +929,6 @@ static int zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { zvol_state_t *zv; - struct zvol_state_dev *zsd; int err = 0; boolean_t drop_suspend = B_FALSE; @@ -927,6 +990,7 @@ retry: if (!mutex_tryenter(&spa_namespace_lock)) { mutex_exit(&zv->zv_state_lock); rw_exit(&zv->zv_suspend_lock); + drop_suspend = B_FALSE; kern_yield(PRI_USER); goto retry; } else { @@ -959,13 +1023,6 @@ retry: } zv->zv_open_count++; - if (flags & O_SYNC) { - zsd = &zv->zv_zso->zso_dev; - zsd->zsd_sync_cnt++; - if (zsd->zsd_sync_cnt == 1 && - (zv->zv_flags & ZVOL_WRITTEN_TO) != 0) - zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ); - } out_opened: if (zv->zv_open_count == 0) { zvol_last_close(zv); @@ -983,7 +1040,6 @@ static int zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { zvol_state_t *zv; - struct zvol_state_dev *zsd; boolean_t drop_suspend = B_TRUE; rw_enter(&zvol_state_lock, ZVOL_RW_READER); @@ -1033,10 +1089,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) * You may get multiple opens, but only one close. */ zv->zv_open_count--; - if (flags & O_SYNC) { - zsd = &zv->zv_zso->zso_dev; - zsd->zsd_sync_cnt--; - } if (zv->zv_open_count == 0) { ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock)); @@ -1105,7 +1157,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, dmu_tx_abort(tx); } else { sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); - zvol_log_truncate(zv, tx, offset, length, sync); + zvol_log_truncate(zv, tx, offset, length); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length); @@ -1156,7 +1208,10 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; + lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, + RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); + zfs_rangelock_exit(lr); *off = noff; break; } @@ -1189,7 +1244,7 @@ zvol_ensure_zilog(zvol_state_t *zv) } if (zv->zv_zilog == NULL) { zv->zv_zilog = zil_open(zv->zv_objset, - zvol_get_data); + zvol_get_data, &zv->zv_kstat.dk_zil_sums); zv->zv_flags |= ZVOL_WRITTEN_TO; /* replay / destroy done in zvol_os_create_minor() */ VERIFY0(zv->zv_zilog->zl_header->zh_flags & @@ -1212,7 +1267,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) ASSERT(MUTEX_HELD(&zv->zv_state_lock)); /* Move to a new hashtable entry. */ - zv->zv_hash = zvol_name_hash(zv->zv_name); + zv->zv_hash = zvol_name_hash(newname); hlist_del(&zv->zv_hlink); hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash)); @@ -1263,15 +1318,12 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) args.mda_si_drv2 = zv; if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname) == 0) { -#if __FreeBSD_version > 1300130 dev->si_iosize_max = maxphys; -#else - dev->si_iosize_max = MAXPHYS; -#endif zsd->zsd_cdev = dev; } } strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); + dataset_kstats_rename(&zv->zv_kstat, newname); } /* @@ -1306,10 +1358,13 @@ zvol_os_free(zvol_state_t *zv) if (dev != NULL) { ASSERT3P(dev->si_drv2, ==, NULL); destroy_dev(dev); + knlist_clear(&zsd->zsd_selinfo.si_note, 0); + knlist_destroy(&zsd->zsd_selinfo.si_note); } } mutex_destroy(&zv->zv_state_lock); + cv_destroy(&zv->zv_removing_cv); dataset_kstats_destroy(&zv->zv_kstat); kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); @@ -1328,6 +1383,7 @@ zvol_os_create_minor(const char *name) uint64_t volsize; uint64_t volmode, hash; int error; + bool replayed_zil = B_FALSE; ZFS_LOG(1, "Creating ZVOL %s...", name); hash = zvol_name_hash(name); @@ -1366,6 +1422,7 @@ zvol_os_create_minor(const char *name) zv = kmem_zalloc(sizeof (*zv), KM_SLEEP); zv->zv_hash = hash; mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP); zv->zv_volmode = volmode; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { @@ -1403,12 +1460,10 @@ zvol_os_create_minor(const char *name) args.mda_si_drv2 = zv; if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name) == 0) { -#if __FreeBSD_version > 1300130 dev->si_iosize_max = maxphys; -#else - dev->si_iosize_max = MAXPHYS; -#endif zsd->zsd_cdev = dev; + knlist_init_sx(&zsd->zsd_selinfo.si_note, + &zv->zv_state_lock); } } (void) strlcpy(zv->zv_name, name, MAXPATHLEN); @@ -1422,18 +1477,21 @@ zvol_os_create_minor(const char *name) zv->zv_volsize = volsize; zv->zv_objset = os; + ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); + error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); + if (error) + goto out_dmu_objset_disown; ASSERT3P(zv->zv_zilog, ==, NULL); - zv->zv_zilog = zil_open(os, zvol_get_data); + zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) - zil_destroy(zv->zv_zilog, B_FALSE); + replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE); else - zil_replay(os, zv, zvol_replay_vector); + replayed_zil = zil_replay(os, zv, zvol_replay_vector); } - zil_close(zv->zv_zilog); + if (replayed_zil) + zil_close(zv->zv_zilog); zv->zv_zilog = NULL; - ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); - dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); /* TODO: prefetch for geom tasting */ @@ -1513,6 +1571,10 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) g_resize_provider(pp, zv->zv_volsize); g_topology_unlock(); + } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { + struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; + + KNOTE_UNLOCKED(&zsd->zsd_selinfo.si_note, NOTE_ATTRIB); } return (0); } |