aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2023-03-21 07:27:25 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2023-08-25 22:52:23 +0000
commitb61645ef3c4e6b7f66e6e8accad61aab227a0dd9 (patch)
tree3939c284b43b2a2dcf172e4a8be1038f0b046aed
parent42d73e9ec1464a4ebc26d32201def571550e9ba9 (diff)
downloadsrc-b61645ef3c4e6b7f66e6e8accad61aab227a0dd9.tar.gz
src-b61645ef3c4e6b7f66e6e8accad61aab227a0dd9.zip
vfs: decouple freevnodes from vnode batching
In principle one cpu can keep vholding vnodes, while another vdrops them. In this case it may be the local count will keep growing in an unbounded manner. Roll it up after a threshold instead. While here move it out of dpcpu into struct pcpu. Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D39195 (cherry picked from commit b5d43972e3941d6c03d935dc423385f3c2178d68)
-rw-r--r--sys/kern/vfs_subr.c55
-rw-r--r--sys/sys/pcpu.h3
2 files changed, 35 insertions, 23 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index c85f0512b286..6befc5eead0e 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -284,7 +284,6 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
#define VDBATCH_SIZE 8
struct vdbatch {
u_int index;
- long freevnodes;
struct mtx lock;
struct vnode *tab[VDBATCH_SIZE];
};
@@ -1431,48 +1430,62 @@ static int vnlruproc_sig;
* at any given moment can still exceed slop, but it should not be by significant
* margin in practice.
*/
-#define VNLRU_FREEVNODES_SLOP 128
+#define VNLRU_FREEVNODES_SLOP 126
+
+static void __noinline
+vfs_freevnodes_rollup(int8_t *lfreevnodes)
+{
+
+ atomic_add_long(&freevnodes, *lfreevnodes);
+ *lfreevnodes = 0;
+ critical_exit();
+}
static __inline void
vfs_freevnodes_inc(void)
{
- struct vdbatch *vd;
+ int8_t *lfreevnodes;
critical_enter();
- vd = DPCPU_PTR(vd);
- vd->freevnodes++;
- critical_exit();
+ lfreevnodes = PCPU_PTR(vfs_freevnodes);
+ (*lfreevnodes)++;
+ if (__predict_false(*lfreevnodes == VNLRU_FREEVNODES_SLOP))
+ vfs_freevnodes_rollup(lfreevnodes);
+ else
+ critical_exit();
}
static __inline void
vfs_freevnodes_dec(void)
{
- struct vdbatch *vd;
+ int8_t *lfreevnodes;
critical_enter();
- vd = DPCPU_PTR(vd);
- vd->freevnodes--;
- critical_exit();
+ lfreevnodes = PCPU_PTR(vfs_freevnodes);
+ (*lfreevnodes)--;
+ if (__predict_false(*lfreevnodes == -VNLRU_FREEVNODES_SLOP))
+ vfs_freevnodes_rollup(lfreevnodes);
+ else
+ critical_exit();
}
static u_long
vnlru_read_freevnodes(void)
{
- struct vdbatch *vd;
- long slop;
+ long slop, rfreevnodes;
int cpu;
- mtx_assert(&vnode_list_mtx, MA_OWNED);
- if (freevnodes > freevnodes_old)
- slop = freevnodes - freevnodes_old;
+ rfreevnodes = atomic_load_long(&freevnodes);
+
+ if (rfreevnodes > freevnodes_old)
+ slop = rfreevnodes - freevnodes_old;
else
- slop = freevnodes_old - freevnodes;
+ slop = freevnodes_old - rfreevnodes;
if (slop < VNLRU_FREEVNODES_SLOP)
- return (freevnodes >= 0 ? freevnodes : 0);
- freevnodes_old = freevnodes;
+ return (rfreevnodes >= 0 ? rfreevnodes : 0);
+ freevnodes_old = rfreevnodes;
CPU_FOREACH(cpu) {
- vd = DPCPU_ID_PTR((cpu), vd);
- freevnodes_old += vd->freevnodes;
+ freevnodes_old += cpuid_to_pcpu[cpu]->pc_vfs_freevnodes;
}
return (freevnodes_old >= 0 ? freevnodes_old : 0);
}
@@ -3518,7 +3531,6 @@ vdbatch_process(struct vdbatch *vd)
mtx_lock(&vnode_list_mtx);
critical_enter();
- freevnodes += vd->freevnodes;
for (i = 0; i < VDBATCH_SIZE; i++) {
vp = vd->tab[i];
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
@@ -3527,7 +3539,6 @@ vdbatch_process(struct vdbatch *vd)
vp->v_dbatchcpu = NOCPU;
}
mtx_unlock(&vnode_list_mtx);
- vd->freevnodes = 0;
bzero(vd->tab, sizeof(vd->tab));
vd->index = 0;
critical_exit();
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
index 15d74d90778a..58e47f11f917 100644
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -187,7 +187,8 @@ struct pcpu {
long pc_cp_time[CPUSTATES]; /* statclock ticks */
struct _device *pc_device; /* CPU device handle */
void *pc_netisr; /* netisr SWI cookie */
- int pc_unused1; /* unused field */
+ int8_t pc_vfs_freevnodes; /* freevnodes counter */
+ char pc_unused1[3]; /* unused pad */
int pc_domain; /* Memory domain. */
struct rm_queue pc_rm_queue; /* rmlock list of trackers */
uintptr_t pc_dynamic; /* Dynamic per-cpu data area */