aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c')
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c65
1 files changed, 65 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index a3837f784668..396faef8f646 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1217,6 +1217,63 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
}
/*
+ * Dentry and inode caches referenced by a task in non-root memcg are
+ * not going to be scanned by the kernel-provided shrinker. So, if
+ * kernel prunes nothing, fall back to this manual walk to free dnodes.
+ * To avoid scanning the same znodes multiple times they are always rotated
+ * to the end of the z_all_znodes list. New znodes are inserted at the
+ * end of the list so we're always scanning the oldest znodes first.
+ */
+static int
+zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
+{
+ znode_t **zp_array, *zp;
+ int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
+ int objects = 0;
+ int i = 0, j = 0;
+
+ zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
+
+ if ((i++ > nr_to_scan) || (j >= max_array))
+ break;
+
+ ASSERT(list_link_active(&zp->z_link_node));
+ list_remove(&zfsvfs->z_all_znodes, zp);
+ list_insert_tail(&zfsvfs->z_all_znodes, zp);
+
+ /* Skip active znodes and .zfs entries */
+ if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
+ continue;
+
+ if (igrab(ZTOI(zp)) == NULL)
+ continue;
+
+ zp_array[j] = zp;
+ j++;
+ }
+ mutex_exit(&zfsvfs->z_znodes_lock);
+
+ for (i = 0; i < j; i++) {
+ zp = zp_array[i];
+
+ ASSERT3P(zp, !=, NULL);
+ d_prune_aliases(ZTOI(zp));
+
+ if (atomic_read(&ZTOI(zp)->i_count) == 1)
+ objects++;
+
+ zrele(zp);
+ }
+
+ vmem_free(zp_array, max_array * sizeof (znode_t *));
+
+ return (objects);
+}
+
+/*
* The ARC has requested that the filesystem drop entries from the dentry
* and inode caches. This can occur when the ARC needs to free meta data
* blocks but can't because they are all pinned by entries in these caches.
@@ -1267,6 +1324,14 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
*objects = (*shrinker->scan_objects)(shrinker, &sc);
#endif
+ /*
+ * Fall back to zfs_prune_aliases if kernel's shrinker did nothing
+ * due to dentry and inode caches being referenced by a task running
+ * in non-root memcg.
+ */
+ if (*objects == 0)
+ *objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
+
zfs_exit(zfsvfs, FTAG);
dprintf_ds(zfsvfs->z_os->os_dsl_dataset,