aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/os/linux
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux')
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c55
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c85
5 files changed, 142 insertions, 13 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 830fad7fe793..1bd3500e9f66 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -471,13 +471,17 @@ vdev_disk_close(vdev_t *v)
if (v->vdev_reopening || vd == NULL)
return;
+ rw_enter(&vd->vd_lock, RW_WRITER);
+
if (vd->vd_bdh != NULL)
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
zfs_vdev_holder);
+ v->vdev_tsd = NULL;
+
+ rw_exit(&vd->vd_lock);
rw_destroy(&vd->vd_lock);
kmem_free(vd, sizeof (vdev_disk_t));
- v->vdev_tsd = NULL;
}
/*
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index c729947369c2..3fdcdbac6f68 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -115,8 +115,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
*/
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
- ssize_t *resid)
+ uint8_t ashift, ssize_t *resid)
{
+ (void) ashift;
ssize_t rc;
rc = kernel_write(fp, buf, count, &off);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index cd606e667bff..8a7d14ab6119 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1556,6 +1556,12 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_xattr = zpl_xattr_handlers;
sb->s_export_op = &zpl_export_operations;
+#ifdef HAVE_SET_DEFAULT_D_OP
+ set_default_d_op(sb, &zpl_dentry_operations);
+#else
+ sb->s_d_op = &zpl_dentry_operations;
+#endif
+
/* Set features for file system. */
zfs_set_fuid_feature(zfsvfs);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index 48dae79a2373..81ac26cb0c93 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -202,7 +202,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
return (!!dentry->d_inode);
}
-static dentry_operations_t zpl_dops_snapdirs = {
+static const struct dentry_operations zpl_dops_snapdirs = {
/*
* Auto mounting of snapshots is only supported for 2.6.37 and
* newer kernels. Prior to this kernel the ops->follow_link()
@@ -215,6 +215,51 @@ static dentry_operations_t zpl_dops_snapdirs = {
.d_revalidate = zpl_snapdir_revalidate,
};
+/*
+ * For the .zfs control directory to work properly we must be able to override
+ * the default operations table and register custom .d_automount and
+ * .d_revalidate callbacks.
+ */
+static void
+set_snapdir_dentry_ops(struct dentry *dentry, unsigned int extraflags) {
+ static const unsigned int op_flags =
+ DCACHE_OP_HASH | DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE |
+ DCACHE_OP_PRUNE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_REAL;
+
+#ifdef HAVE_D_SET_D_OP
+ /*
+ * d_set_d_op() will set the DCACHE_OP_ flags according to what it
+ * finds in the passed dentry_operations, so we don't have to.
+ *
+ * We clear the flags and the old op table before calling d_set_d_op()
+ * because issues a warning when the dentry operations table is already
+ * set.
+ */
+ dentry->d_op = NULL;
+ dentry->d_flags &= ~op_flags;
+ d_set_d_op(dentry, &zpl_dops_snapdirs);
+ dentry->d_flags |= extraflags;
+#else
+ /*
+ * Since 6.17 there's no exported way to modify dentry ops, so we have
+ * to reach in and do it ourselves. This should be safe for our very
+ * narrow use case, which is to create or splice in an entry to give
+ * access to a snapshot.
+ *
+ * We need to set the op flags directly. We hardcode
+ * DCACHE_OP_REVALIDATE because that's the only operation we have; if
+ * we ever extend zpl_dops_snapdirs we will need to update the op flags
+ * to match.
+ */
+ spin_lock(&dentry->d_lock);
+ dentry->d_op = &zpl_dops_snapdirs;
+ dentry->d_flags &= ~op_flags;
+ dentry->d_flags |= DCACHE_OP_REVALIDATE | extraflags;
+ spin_unlock(&dentry->d_lock);
+#endif
+}
+
static struct dentry *
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
unsigned int flags)
@@ -236,10 +281,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
return (ERR_PTR(error));
ASSERT(error == 0 || ip == NULL);
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-
+ set_snapdir_dentry_ops(dentry, DCACHE_NEED_AUTOMOUNT);
return (d_splice_alias(ip, dentry));
}
@@ -373,8 +415,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
- d_clear_d_op(dentry);
- d_set_d_op(dentry, &zpl_dops_snapdirs);
+ set_snapdir_dentry_ops(dentry, 0);
d_instantiate(dentry, ip);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index 53819628627d..444948d03cb3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
* Copyright (c) 2023, Datto Inc. All rights reserved.
+ * Copyright (c) 2025, Klara, Inc.
*/
@@ -33,6 +34,20 @@
#include <linux/iversion.h>
#include <linux/version.h>
+/*
+ * What to do when the last reference to an inode is released. If 0, the kernel
+ * will cache it on the superblock. If 1, the inode will be freed immediately.
+ * See zpl_drop_inode().
+ */
+int zfs_delete_inode = 0;
+
+/*
+ * What to do when the last reference to a dentry is released. If 0, the kernel
+ * will cache it until the entry (file) is destroyed. If 1, the dentry will be
+ * marked for cleanup, at which time its inode reference will be released. See
+ * zpl_dentry_delete().
+ */
+int zfs_delete_dentry = 0;
static struct inode *
zpl_inode_alloc(struct super_block *sb)
@@ -77,11 +92,36 @@ zpl_dirty_inode(struct inode *ip, int flags)
}
/*
- * When ->drop_inode() is called its return value indicates if the
- * inode should be evicted from the inode cache. If the inode is
- * unhashed and has no links the default policy is to evict it
- * immediately.
+ * ->drop_inode() is called when the last reference to an inode is released.
+ * Its return value indicates if the inode should be destroyed immediately, or
+ * cached on the superblock structure.
+ *
+ * By default (zfs_delete_inode=0), we call generic_drop_inode(), which returns
+ * "destroy immediately" if the inode is unhashed and has no links (roughly: no
+ * longer exists on disk). On datasets with millions of rarely-accessed files,
+ * this can cause a large amount of memory to be "pinned" by cached inodes,
+ * which in turn pin their associated dnodes and dbufs, until the kernel starts
+ * reporting memory pressure and requests OpenZFS release some memory (see
+ * zfs_prune()).
+ *
+ * When set to 1, we call generic_delete_node(), which always returns "destroy
+ * immediately", resulting in inodes being destroyed immediately, releasing
+ * their associated dnodes and dbufs to the dbuf cached and the ARC to be
+ * evicted as normal.
*
+ * Note that the "last reference" doesn't always mean the last _userspace_
+ * reference; the dentry cache also holds a reference, so "busy" inodes will
+ * still be kept alive that way (subject to dcache tuning).
+ */
+static int
+zpl_drop_inode(struct inode *ip)
+{
+ if (zfs_delete_inode)
+ return (generic_delete_inode(ip));
+ return (generic_drop_inode(ip));
+}
+
+/*
* The ->evict_inode() callback must minimally truncate the inode pages,
* and call clear_inode(). For 2.6.35 and later kernels this will
* simply update the inode state, with the sync occurring before the
@@ -470,6 +510,7 @@ const struct super_operations zpl_super_operations = {
.destroy_inode = zpl_inode_destroy,
.dirty_inode = zpl_dirty_inode,
.write_inode = NULL,
+ .drop_inode = zpl_drop_inode,
.evict_inode = zpl_evict_inode,
.put_super = zpl_put_super,
.sync_fs = zpl_sync_fs,
@@ -480,6 +521,35 @@ const struct super_operations zpl_super_operations = {
.show_stats = NULL,
};
+/*
+ * ->d_delete() is called when the last reference to a dentry is released. Its
+ * return value indicates if the dentry should be destroyed immediately, or
+ * retained in the dentry cache.
+ *
+ * By default (zfs_delete_dentry=0) the kernel will always cache unused
+ * entries. Each dentry holds an inode reference, so cached dentries can hold
+ * the final inode reference indefinitely, leading to the inode and its related
+ * data being pinned (see zpl_drop_inode()).
+ *
+ * When set to 1, we signal that the dentry should be destroyed immediately and
+ * never cached. This reduces memory usage, at the cost of higher overheads to
+ * lookup a file, as the inode and its underlying data (dnode/dbuf) need to be
+ * reloaded and reinflated.
+ *
+ * Note that userspace does not have direct control over dentry references and
+ * reclaim; rather, this is part of the kernel's caching and reclaim subsystems
+ * (eg vm.vfs_cache_pressure).
+ */
+static int
+zpl_dentry_delete(const struct dentry *dentry)
+{
+ return (zfs_delete_dentry ? 1 : 0);
+}
+
+const struct dentry_operations zpl_dentry_operations = {
+ .d_delete = zpl_dentry_delete,
+};
+
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
@@ -491,3 +561,10 @@ struct file_system_type zpl_fs_type = {
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};
+
+ZFS_MODULE_PARAM(zfs, zfs_, delete_inode, INT, ZMOD_RW,
+ "Delete inodes as soon as the last reference is released.");
+
+ZFS_MODULE_PARAM(zfs, zfs_, delete_dentry, INT, ZMOD_RW,
+ "Delete dentries from dentry cache as soon as the last reference is "
+ "released.");