diff options
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux/zfs')
14 files changed, 247 insertions, 152 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 248c9b7a6d3b..8a8316f63c48 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -863,9 +863,9 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount) * Ensure that last chunk is not in use. abd_iterate_*() must clear * this state (directly or abd_iter_unmap()) before advancing. */ - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); - ASSERT3P(aiter->iter_page, ==, NULL); + ASSERT0P(aiter->iter_page); ASSERT0(aiter->iter_page_doff); ASSERT0(aiter->iter_page_dsize); @@ -897,7 +897,7 @@ abd_iter_map(struct abd_iter *aiter) void *paddr; size_t offset = 0; - ASSERT3P(aiter->iter_mapaddr, ==, NULL); + ASSERT0P(aiter->iter_mapaddr); ASSERT0(aiter->iter_mapsize); /* There's nothing left to iterate over, so do nothing */ diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 154ca22d9513..830fad7fe793 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -552,7 +552,7 @@ vdev_bio_associate_blkg(struct bio *bio) #endif ASSERT3P(q, !=, NULL); - ASSERT3P(bio->bi_blkg, ==, NULL); + ASSERT0P(bio->bi_blkg); if (q->root_blkg && vdev_blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; @@ -574,7 +574,7 @@ vdev_bio_set_dev(struct bio *bio, struct block_device *bdev) bio->bi_bdev = bdev; ASSERT3P(q, !=, NULL); - ASSERT3P(bio->bi_blkg, ==, NULL); + ASSERT0P(bio->bi_blkg); if (q->root_blkg && vdev_blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; @@ -806,7 +806,7 @@ vbio_completion(struct bio *bio) * here; instead we stash vbio on the zio and take care of it in the * done callback. */ - ASSERT3P(zio->io_bio, ==, NULL); + ASSERT0P(zio->io_bio); zio->io_bio = vbio; zio_delay_interrupt(zio); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c index 1b169122f25b..daa4b5776837 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c @@ -1900,7 +1900,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (!(flag & IS_ROOT_NODE) && (dzp->z_pflags & ZFS_INHERIT_ACE) && !(dzp->z_pflags & ZFS_XATTR)) { - VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, + VERIFY0(zfs_acl_node_read(dzp, B_TRUE, &paclp, B_FALSE)); acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, vap->va_mode, paclp, acl_ids->z_mode, &need_chmod); @@ -2204,8 +2204,8 @@ top: } error = zfs_aclset_common(zp, aclp, cr, tx); - ASSERT(error == 0); - ASSERT(zp->z_acl_cached == NULL); + ASSERT0(error); + ASSERT0P(zp->z_acl_cached); zp->z_acl_cached = aclp; if (fuid_dirtied) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index 6552a933ce0a..fb4de50480a3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -494,9 +494,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, if (!creation) now = current_time(ip); zp = ITOZ(ip); - ASSERT3P(zp->z_dirlocks, ==, NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); zp->z_id = id; zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; @@ -590,7 +590,7 @@ zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, int zfsctl_create(zfsvfs_t *zfsvfs) { - ASSERT(zfsvfs->z_ctldir == NULL); + ASSERT0P(zfsvfs->z_ctldir); zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, &zpl_fops_root, &zpl_ops_root, 0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c index 2f935bb3fc8c..e8de536606e2 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c @@ -463,7 +463,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) zfsvfs_t *zfsvfs = ZTOZSB(zp); ASSERT(zp->z_unlinked); - ASSERT(ZTOI(zp)->i_nlink == 0); + ASSERT0(ZTOI(zp)->i_nlink); VERIFY3U(0, ==, zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); @@ -662,8 +662,8 @@ zfs_rmnode(znode_t *zp) uint64_t links; int error; - ASSERT(ZTOI(zp)->i_nlink == 0); - ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); + ASSERT0(ZTOI(zp)->i_nlink); + ASSERT0(atomic_read(&ZTOI(zp)->i_count)); /* * If this is an attribute directory, purge its contents. @@ -710,7 +710,7 @@ zfs_rmnode(znode_t *zp) &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); - ASSERT(error == 0); + ASSERT0(error); } acl_obj = zfs_external_acl(zp); @@ -744,12 +744,12 @@ zfs_rmnode(znode_t *zp) } if (xzp) { - ASSERT(error == 0); + ASSERT0(error); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ clear_nlink(ZTOI(xzp)); /* no more links to it */ links = 0; - VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + VERIFY0(sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &links, sizeof (links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); @@ -872,7 +872,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) ctime); } error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&zp->z_lock); @@ -894,7 +894,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&dzp->z_lock); return (0); @@ -986,7 +986,7 @@ zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, sizeof (links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT3U(error, ==, 0); + ASSERT0(error); if (unlinkedp != NULL) *unlinkedp = unlinked; @@ -1058,7 +1058,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, /* The only error is !zfs_dirempty() and we checked earlier. */ error = zfs_drop_nlink_locked(zp, tx, &unlinked); - ASSERT3U(error, ==, 0); + ASSERT0(error); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); @@ -1083,7 +1083,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); mutex_exit(&dzp->z_lock); if (unlinkedp != NULL) @@ -1167,7 +1167,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr) ASSERT(error == 0 && parent == zp->z_id); #endif - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); if (!zp->z_unlinked) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c index 1c187d7b9cab..895d80b2d79e 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_sysfs.c @@ -223,7 +223,7 @@ zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name) { /* zko_default_group.attrs must be NULL terminated */ ASSERT(zkobj->zko_default_group.attrs != NULL); - ASSERT(zkobj->zko_default_group.attrs[zkobj->zko_attr_count] == NULL); + ASSERT0P(zkobj->zko_default_group.attrs[zkobj->zko_attr_count]); kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type); return (kobject_add(&zkobj->zko_kobj, parent, name)); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 396faef8f646..cd606e667bff 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -279,19 +279,14 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr) return (err); /* - * If the pool is suspended, just return an error. This is to help - * with shutting down with pools suspended, as we don't want to block - * in that case. + * Sync any pending writes, but do not block if the pool is suspended. + * This is to help with shutting down with pools suspended, as we don't + * want to block in that case. */ - if (spa_suspended(zfsvfs->z_os->os_spa)) { - zfs_exit(zfsvfs, FTAG); - return (SET_ERROR(EIO)); - } - - zil_commit(zfsvfs->z_log, 0); + err = zil_commit_flags(zfsvfs->z_log, 0, ZIL_COMMIT_NOW); zfs_exit(zfsvfs, FTAG); - return (0); + return (err); } static void @@ -883,7 +878,7 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { - ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); + ASSERT0P(zfsvfs->z_kstat.dk_kstats); error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); if (error) return (error); @@ -1676,7 +1671,7 @@ zfs_umount(struct super_block *sb) if (zfsvfs->z_arc_prune != NULL) arc_remove_prune_callback(zfsvfs->z_arc_prune); - VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); + VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); os = zfsvfs->z_os; /* @@ -1802,8 +1797,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) ASSERT(*ipp != NULL); if (object == ZFSCTL_INO_SNAPDIR) { - VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, - 0, kcred, NULL, NULL) == 0); + VERIFY0(zfsctl_root_lookup(*ipp, "snapshot", ipp, + 0, kcred, NULL, NULL)); } else { /* * Must have an existing ref, so igrab() @@ -1905,7 +1900,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) goto bail; ds->ds_dir->dd_activity_cancelled = B_FALSE; - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); zfs_set_fuid_feature(zfsvfs); zfsvfs->z_rollback_time = jiffies; @@ -2078,7 +2073,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); - VERIFY(0 == sa_set_sa_object(os, sa_obj)); + VERIFY0(sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 6a2fc5ad7935..6106726651a3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -841,8 +841,8 @@ out: *zpp = zp; } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1203,8 +1203,8 @@ out: zfs_zrele_async(xzp); } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -1392,14 +1392,15 @@ out: zfs_dirent_unlock(dl); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - if (error != 0) { zrele(zp); } else { zfs_znode_update_vfs(dzp); zfs_znode_update_vfs(zp); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); + } zfs_exit(zfsvfs, FTAG); return (error); @@ -1528,8 +1529,8 @@ out: zfs_znode_update_vfs(zp); zrele(zp); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -2483,10 +2484,10 @@ top: new_mode = zp->z_mode; } err = zfs_acl_chown_setattr(zp); - ASSERT(err == 0); + ASSERT0(err); if (attrzp) { err = zfs_acl_chown_setattr(attrzp); - ASSERT(err == 0); + ASSERT0(err); } } @@ -2600,7 +2601,7 @@ out: if (err == 0 && xattr_count > 0) { err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, xattr_count, tx); - ASSERT(err2 == 0); + ASSERT0(err2); } if (aclp) @@ -2630,8 +2631,8 @@ out: } out2: - if (os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (err == 0 && os->os_sync == ZFS_SYNC_ALWAYS) + err = zil_commit(zilog, 0); out3: kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks); @@ -3157,7 +3158,7 @@ top: * zfs_link_create() to add back the same entry, but with a new * dnode (szp), should not fail. */ - ASSERT3P(tzp, ==, NULL); + ASSERT0P(tzp); goto commit_link_tzp; } @@ -3235,8 +3236,8 @@ out: zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); return (error); @@ -3436,7 +3437,7 @@ top: *zpp = zp; if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + error = zil_commit(zilog, 0); } else { zrele(zp); } @@ -3654,8 +3655,8 @@ top: * operation are sync safe. */ if (is_tmpfile) { - VERIFY(zap_remove_int(zfsvfs->z_os, - zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0); + VERIFY0(zap_remove_int(zfsvfs->z_os, + zfsvfs->z_unlinkedobj, szp->z_id, tx)); } else { if (flags & FIGNORECASE) txtype |= TX_CI; @@ -3670,18 +3671,20 @@ top: zfs_dirent_unlock(dl); - if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - - if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { - txg_wait_flag_t wait_flags = - spa_get_failmode(dmu_objset_spa(zfsvfs->z_os)) == - ZIO_FAILURE_MODE_CONTINUE ? TXG_WAIT_SUSPEND : 0; - error = txg_wait_synced_flags(dmu_objset_pool(zfsvfs->z_os), - txg, wait_flags); - if (error != 0) { - ASSERT3U(error, ==, ESHUTDOWN); - error = SET_ERROR(EIO); + if (error == 0) { + if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + error = zil_commit(zilog, 0); + + if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { + txg_wait_flag_t wait_flags = + spa_get_failmode(dmu_objset_spa(zfsvfs->z_os)) == + ZIO_FAILURE_MODE_CONTINUE ? TXG_WAIT_SUSPEND : 0; + error = txg_wait_synced_flags( + dmu_objset_pool(zfsvfs->z_os), txg, wait_flags); + if (error != 0) { + ASSERT3U(error, ==, ESHUTDOWN); + error = SET_ERROR(EIO); + } } } @@ -3691,16 +3694,42 @@ top: return (error); } -static void -zfs_putpage_commit_cb(void *arg) +/* Finish page writeback. */ +static inline void +zfs_page_writeback_done(struct page *pp, int err) { - struct page *pp = arg; + if (err != 0) { + /* + * Writeback failed. Re-dirty the page. It was undirtied before + * the IO was issued (in zfs_putpage() or write_cache_pages()). + * The kernel only considers writeback for dirty pages; if we + * don't do this, it is eligible for eviction without being + * written out, which we definitely don't want. + */ +#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO + filemap_dirty_folio(page_mapping(pp), page_folio(pp)); +#else + __set_page_dirty_nobuffers(pp); +#endif + } ClearPageError(pp); end_page_writeback(pp); } /* + * ZIL callback for page writeback. Passes to zfs_log_write() in zfs_putpage() + * for syncing writes. Called when the ZIL itx has been written to the log or + * the whole txg syncs, or if the ZIL crashes or the pool suspends. Any failure + * is passed as `err`. + */ +static void +zfs_putpage_commit_cb(void *arg, int err) +{ + zfs_page_writeback_done(arg, err); +} + +/* * Push a page out to disk, once the page is on stable storage the * registered commit callback will be run as notification of completion. * @@ -3853,16 +3882,15 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, err = dmu_tx_assign(tx, DMU_TX_WAIT); if (err != 0) { dmu_tx_abort(tx); -#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO - filemap_dirty_folio(page_mapping(pp), page_folio(pp)); -#else - __set_page_dirty_nobuffers(pp); -#endif - ClearPageError(pp); - end_page_writeback(pp); + zfs_page_writeback_done(pp, err); zfs_rangelock_exit(lr); zfs_exit(zfsvfs, FTAG); - return (err); + + /* + * Don't return error for an async writeback; we've re-dirtied + * the page so it will be tried again some other time. + */ + return (for_sync ? err : 0); } va = kmap(pp); @@ -3916,7 +3944,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, * ALL, zfs_putpage should do it. * * Summary: - * for_sync: 0=unlock immediately; 1 unlock once on disk + * for_sync: 0=unlock immediately; 1=unlock once on disk * sync_mode: NONE=caller will commit; ALL=we will commit */ boolean_t need_commit = (wbc->sync_mode != WB_SYNC_NONE); @@ -3931,16 +3959,24 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, B_FALSE, for_sync ? zfs_putpage_commit_cb : NULL, pp); if (!for_sync) { - ClearPageError(pp); - end_page_writeback(pp); + /* + * Async writeback is logged and written to the DMU, so page + * can now be unlocked. + */ + zfs_page_writeback_done(pp, 0); } dmu_tx_commit(tx); zfs_rangelock_exit(lr); - if (need_commit) - zil_commit(zfsvfs->z_log, zp->z_id); + if (need_commit) { + err = zil_commit_flags(zfsvfs->z_log, zp->z_id, ZIL_COMMIT_NOW); + if (err != 0) { + zfs_exit(zfsvfs, FTAG); + return (err); + } + } dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c index 7683eeb3cf9f..bcaabeb32b8a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c @@ -144,9 +144,9 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_xattr_lock); zfs_rangelock_fini(&zp->z_rangelock); - ASSERT3P(zp->z_dirlocks, ==, NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); } static int @@ -178,13 +178,13 @@ zfs_znode_init(void) * backed by kmalloc() when on the Linux slab in order that any * wait_on_bit() operations on the related inode operate properly. */ - ASSERT(znode_cache == NULL); + ASSERT0P(znode_cache); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB | KMC_RECLAIMABLE); - ASSERT(znode_hold_cache == NULL); + ASSERT0P(znode_hold_cache); znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor, zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0); @@ -327,10 +327,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, mutex_enter(&zp->z_lock); - ASSERT(zp->z_sa_hdl == NULL); - ASSERT(zp->z_acl_cached == NULL); + ASSERT0P(zp->z_sa_hdl); + ASSERT0P(zp->z_acl_cached); if (sa_hdl == NULL) { - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; @@ -530,9 +530,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, return (NULL); zp = ITOZ(ip); - ASSERT(zp->z_dirlocks == NULL); - ASSERT3P(zp->z_acl_cached, ==, NULL); - ASSERT3P(zp->z_xattr_cached, ==, NULL); + ASSERT0P(zp->z_dirlocks); + ASSERT0P(zp->z_acl_cached); + ASSERT0P(zp->z_xattr_cached); zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; zp->z_is_ctldir = B_FALSE; @@ -611,7 +611,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, * processing so do not hash unlinked znodes. */ if (links > 0) - VERIFY3S(insert_inode_locked(ip), ==, 0); + VERIFY0(insert_inode_locked(ip)); mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); @@ -811,7 +811,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } /* Now add in all of the "SA" attributes */ - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, + VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* @@ -901,7 +901,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, acl_ids->z_fuid, acl_ids->z_fgid); } - VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); + VERIFY0(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); if (!(flag & IS_ROOT_NODE)) { /* @@ -1200,7 +1200,7 @@ zfs_rezget(znode_t *zp) } rw_exit(&zp->z_xattr_lock); - ASSERT(zp->z_sa_hdl == NULL); + ASSERT0P(zp->z_sa_hdl); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { zfs_znode_hold_exit(zfsvfs, zh); @@ -1314,9 +1314,9 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) zh = zfs_znode_hold_enter(zfsvfs, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); - VERIFY(0 == dmu_object_free(os, acl_obj, tx)); + VERIFY0(dmu_object_free(os, acl_obj, tx)); } - VERIFY(0 == dmu_object_free(os, obj, tx)); + VERIFY0(dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zfsvfs, zh); } @@ -1536,7 +1536,7 @@ zfs_extend(znode_t *zp, uint64_t end) zp->z_size = end; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); zfs_rangelock_exit(lr); @@ -1726,7 +1726,7 @@ zfs_trunc(znode_t *zp, uint64_t end) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); } - VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); + VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); dmu_tx_commit(tx); zfs_rangelock_exit(lr); @@ -1793,7 +1793,7 @@ log: NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); @@ -1840,7 +1840,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Set starting attributes. @@ -1853,7 +1853,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) const char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); - VERIFY(nvpair_value_uint64(elem, &val) == 0); + VERIFY0(nvpair_value_uint64(elem, &val)); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) @@ -1861,7 +1861,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } - ASSERT(error == 0); + ASSERT0(error); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) @@ -1869,7 +1869,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Create zap object used for SA attribute registration @@ -1879,7 +1879,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); - ASSERT(error == 0); + ASSERT0(error); } else { sa_obj = 0; } @@ -1889,7 +1889,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); - ASSERT(error == 0); + ASSERT0(error); /* * Create root znode. Create minimal znode/inode/zfsvfs/sb @@ -1922,7 +1922,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table); - ASSERT(error == 0); + ASSERT0(error); /* * Fold case on file systems that are always or sometimes case @@ -1946,12 +1946,12 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL); } - VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, + VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids, zfs_init_idmap)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); - ASSERT(error == 0); + ASSERT0(error); zfs_acl_ids_free(&acl_ids); atomic_set(&ZTOI(rootzp)->i_count, 0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index ef7bd7352084..d07317b0d910 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ @@ -106,6 +107,10 @@ zpl_iterate(struct file *filp, struct dir_context *ctx) return (error); } +static inline int +zpl_write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, void *data); + static int zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { @@ -115,9 +120,38 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) int error; fstrans_cookie_t cookie; - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (error) - return (error); + /* + * Force dirty pages in the range out to the DMU and the log, ready + * for zil_commit() to write down. + * + * We call write_cache_pages() directly to ensure that zpl_putpage() is + * called with the flags we need. We need WB_SYNC_NONE to avoid a call + * to zil_commit() (since we're doing this as a kind of pre-sync); but + * we do need for_sync so that the pages remain in writeback until + * they're on disk, and so that we get an error if the DMU write fails. + */ + if (filemap_range_has_page(inode->i_mapping, start, end)) { + int for_sync = 1; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = LONG_MAX, + .range_start = start, + .range_end = end, + }; + error = + zpl_write_cache_pages(inode->i_mapping, &wbc, &for_sync); + if (error != 0) { + /* + * Unclear what state things are in. zfs_putpage() will + * ensure the pages remain dirty if they haven't been + * written down to the DMU, but because there may be + * nothing logged, we can't assume that zfs_sync() -> + * zil_commit() will give us a useful error. It's + * safest if we just error out here. + */ + return (error); + } + } crhold(cr); cookie = spl_fstrans_mark(); @@ -494,11 +528,30 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) if (sync_mode != wbc->sync_mode) { if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (result); - if (zfsvfs->z_log != NULL) - zil_commit(zfsvfs->z_log, zp->z_id); + + if (zfsvfs->z_log != NULL) { + /* + * We don't want to block here if the pool suspends, + * because this is not a syncing op by itself, but + * might be part of one that the caller will + * coordinate. + */ + result = -zil_commit_flags(zfsvfs->z_log, zp->z_id, + ZIL_COMMIT_NOW); + } + zpl_exit(zfsvfs, FTAG); /* + * If zil_commit_flags() failed, it's unclear what state things + * are currently in. putpage() has written back out what it can + * to the DMU, but it may not be on disk. We have little choice + * but to escape. + */ + if (result != 0) + return (result); + + /* * We need to call write_cache_pages() again (we can't just * return after the commit) because the previous call in * non-SYNC mode does not guarantee that we got all the dirty diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index f9f6406f8b47..f97662d052c7 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -247,7 +247,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, * and fifos, but we want to know if this behavior ever changes. */ if (S_ISSOCK(mode) || S_ISFIFO(mode)) - ASSERT(rdev == 0); + ASSERT0(rdev); crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c index 94dcdd0b887d..53819628627d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c @@ -49,7 +49,7 @@ zpl_inode_alloc(struct super_block *sb) static void zpl_inode_free(struct inode *ip) { - ASSERT(atomic_read(&ip->i_count) == 0); + ASSERT0(atomic_read(&ip->i_count)); zfs_inode_free(ip); } #endif @@ -57,7 +57,7 @@ zpl_inode_free(struct inode *ip) static void zpl_inode_destroy(struct inode *ip) { - ASSERT(atomic_read(&ip->i_count) == 0); + ASSERT0(atomic_read(&ip->i_count)); zfs_inode_destroy(ip); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c index a098197e7448..d93282db815a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c @@ -1494,7 +1494,7 @@ zpl_posix_acl_free(void *arg) acl_rel_head = NULL; if (cmpxchg(&acl_rel_tail, &a->next, &acl_rel_head) == &a->next) { - ASSERT3P(a->next, ==, NULL); + ASSERT0P(a->next); a->next = freelist; freelist = a; break; @@ -1544,7 +1544,7 @@ zpl_posix_acl_release_impl(struct posix_acl *acl) a->time = ddi_get_lbolt(); /* atomically points tail to us and get the previous tail */ prev = xchg(&acl_rel_tail, &a->next); - ASSERT3P(*prev, ==, NULL); + ASSERT0P(*prev); *prev = a; /* if it was empty before, schedule the free task */ if (prev == &acl_rel_head) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index a7431cc4da9d..a73acdad34ae 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -84,8 +84,9 @@ static unsigned int zvol_blk_mq_blocks_per_thread = 8; static inline void zvol_end_io(struct bio *bio, struct request *rq, int error) { + ASSERT3U(error, >=, 0); if (bio) { - bio->bi_status = errno_to_bi_status(-error); + bio->bi_status = errno_to_bi_status(error); bio_endio(bio); } else { blk_mq_end_request(rq, errno_to_bi_status(error)); @@ -208,8 +209,14 @@ zvol_write(zv_request_t *zvr) disk = zv->zv_zso->zvo_disk; /* bio marked as FLUSH need to flush before write */ - if (io_is_flush(bio, rq)) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (io_is_flush(bio, rq)) { + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error != 0) { + rw_exit(&zv->zv_suspend_lock); + zvol_end_io(bio, rq, -error); + return; + } + } /* Some requests are just for flush and nothing else. */ if (io_size(bio, rq) == 0) { @@ -273,8 +280,8 @@ zvol_write(zv_request_t *zvr) dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); task_io_account_write(nwritten); - if (sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + if (error == 0 && sync) + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); @@ -282,7 +289,7 @@ zvol_write(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, WRITE, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -361,7 +368,7 @@ zvol_discard(zv_request_t *zvr) zfs_rangelock_exit(lr); if (error == 0 && sync) - zil_commit(zv->zv_zilog, ZVOL_OBJ); + error = zil_commit(zv->zv_zilog, ZVOL_OBJ); unlock: rw_exit(&zv->zv_suspend_lock); @@ -371,7 +378,7 @@ unlock: start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -449,7 +456,7 @@ zvol_read(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, READ, bio, start_time); } - zvol_end_io(bio, rq, -error); + zvol_end_io(bio, rq, error); } static void @@ -480,7 +487,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, int rw = io_data_dir(bio, rq); if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { - zvol_end_io(bio, rq, -SET_ERROR(ENXIO)); + zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; } @@ -499,7 +506,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, (long long unsigned)offset, (long unsigned)size); - zvol_end_io(bio, rq, -SET_ERROR(EIO)); + zvol_end_io(bio, rq, SET_ERROR(EIO)); goto out; } @@ -512,8 +519,8 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, #ifdef HAVE_BLK_MQ_RQ_HCTX blk_mq_hw_queue = rq->mq_hctx->queue_num; #else - blk_mq_hw_queue = - rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; + blk_mq_hw_queue = rq->q->queue_hw_ctx[ + rq->q->mq_map[raw_smp_processor_id()]]->queue_num; #endif taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, blk_mq_hw_queue); @@ -521,7 +528,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, if (rw == WRITE) { if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - zvol_end_io(bio, rq, -SET_ERROR(EROFS)); + zvol_end_io(bio, rq, SET_ERROR(EROFS)); goto out; } @@ -886,16 +893,18 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, case BLKZNAME: mutex_enter(&zv->zv_state_lock); - error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); + error = -copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN); mutex_exit(&zv->zv_state_lock); + if (error) + error = SET_ERROR(error); break; default: - error = -ENOTTY; + error = SET_ERROR(ENOTTY); break; } - return (SET_ERROR(error)); + return (-error); } #ifdef CONFIG_COMPAT @@ -1426,7 +1435,7 @@ zvol_os_free(zvol_state_t *zv) ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); - ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL); + ASSERT0P(zv->zv_zso->zvo_disk->private_data); rw_destroy(&zv->zv_suspend_lock); zfs_rangelock_fini(&zv->zv_rangelock); @@ -1474,7 +1483,9 @@ __zvol_os_add_disk(struct gendisk *disk) { int error = 0; #ifdef HAVE_ADD_DISK_RET - error = add_disk(disk); + error = -add_disk(disk); + if (error) + error = SET_ERROR(error); #else add_disk(disk); #endif @@ -1649,11 +1660,11 @@ zvol_os_create_minor(const char *name) blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue); #endif - ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL); + ASSERT0P(zv->zv_kstat.dk_kstats); error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset); if (error) goto out_dmu_objset_disown; - ASSERT3P(zv->zv_zilog, ==, NULL); + ASSERT0P(zv->zv_zilog); zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) @@ -1759,10 +1770,10 @@ zvol_init(void) return (error); } - error = register_blkdev(zvol_major, ZVOL_DRIVER); + error = -register_blkdev(zvol_major, ZVOL_DRIVER); if (error) { printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error); - return (error); + return (SET_ERROR(error)); } if (zvol_blk_mq_queue_depth == 0) { |