aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2021-04-14 16:57:24 +0000
committerMark Johnston <markj@FreeBSD.org>2021-04-14 17:03:34 +0000
commitaabe13f1450bb4caba66ec2a7a41c0dfefff511d (patch)
tree912bc95617dbf7b13d381364f4e624216fda76bc
parent54f421f9e84234c4313f2d636e4ebd74009a74d6 (diff)
downloadsrc-aabe13f1450bb4caba66ec2a7a41c0dfefff511d.tar.gz
src-aabe13f1450bb4caba66ec2a7a41c0dfefff511d.zip
uma: Introduce per-domain reclamation functions
Make it possible to reclaim items from a specific NUMA domain. - Add uma_zone_reclaim_domain() and uma_reclaim_domain(). - Permit parallel reclamations. Use a counter instead of a flag to synchronize with zone_dtor(). - Use the zone lock to protect cache_shrink() now that parallel reclaims can happen. - Add a sysctl that can be used to trigger reclamation from a specific domain. Currently the new KPIs are unused, so there should be no functional change. Reviewed by: mav MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D29685
-rw-r--r--share/man/man9/zone.914
-rw-r--r--sys/vm/uma.h8
-rw-r--r--sys/vm/uma_core.c152
-rw-r--r--sys/vm/uma_int.h5
-rw-r--r--sys/vm/vm_kern.c29
5 files changed, 137 insertions, 71 deletions
diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9
index 7da40b13469b..89d5f3e2640f 100644
--- a/share/man/man9/zone.9
+++ b/share/man/man9/zone.9
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 11, 2021
+.Dd April 14, 2021
.Dt UMA 9
.Os
.Sh NAME
@@ -98,8 +98,12 @@ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
.Ft void
.Fn uma_reclaim "int req"
.Ft void
+.Fn uma_reclaim_domain "int req" "int domain"
+.Ft void
.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
.Ft void
+.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
+.Ft void
.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
.Ft void
.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
@@ -471,6 +475,14 @@ Free items in the per-CPU caches are left alone.
.It Dv UMA_RECLAIM_DRAIN_CPU
Reclaim all cached items.
.El
+The
+.Fn uma_reclaim_domain
+and
+.Fn uma_zone_reclaim_domain
+functions apply only to items allocated from the specified domain.
+In the case of domains using a round-robin NUMA policy, cached items from all
+domains are freed to the keg, but only slabs from the specific domain will
+be freed.
.Pp
The
.Fn uma_zone_set_allocf
diff --git a/sys/vm/uma.h b/sys/vm/uma.h
index 361c64900845..5d473ba909b6 100644
--- a/sys/vm/uma.h
+++ b/sys/vm/uma.h
@@ -446,10 +446,12 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
/*
- * Reclaims unused memory
+ * Reclaims unused memory. If no NUMA domain is specified, memory from all
+ * domains is reclaimed.
*
* Arguments:
- * req Reclamation request type.
+ * req Reclamation request type.
+ * domain The target NUMA domain.
* Returns:
* None
*/
@@ -457,7 +459,9 @@ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
#define UMA_RECLAIM_DRAIN_CPU 2 /* release bucket and per-CPU caches */
#define UMA_RECLAIM_TRIM 3 /* trim bucket cache to WSS */
void uma_reclaim(int req);
+void uma_reclaim_domain(int req, int domain);
void uma_zone_reclaim(uma_zone_t, int req);
+void uma_zone_reclaim_domain(uma_zone_t, int req, int domain);
/*
* Sets the alignment mask to be used for all zones requesting cache
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 0348d6468d74..6b0add6b6b07 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -168,17 +168,20 @@ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
static LIST_HEAD(,uma_zone) uma_cachezones =
LIST_HEAD_INITIALIZER(uma_cachezones);
-/* This RW lock protects the keg list */
+/*
+ * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of
+ * zones.
+ */
static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
+static struct sx uma_reclaim_lock;
+
/*
* First available virual address for boot time allocations.
*/
static vm_offset_t bootstart;
static vm_offset_t bootmem;
-static struct sx uma_reclaim_lock;
-
/*
* kmem soft limit, initialized by uma_set_limit(). Ensure that early
* allocations don't trigger a wakeup of the reclaim thread.
@@ -289,7 +292,7 @@ static void pcpu_page_free(void *, vm_size_t, uint8_t);
static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
-static void bucket_cache_reclaim(uma_zone_t zone, bool);
+static void bucket_cache_reclaim(uma_zone_t zone, bool, int);
static int keg_ctor(void *, int, void *, int);
static void keg_dtor(void *, int, void *);
static int zone_ctor(void *, int, void *, int);
@@ -315,7 +318,7 @@ static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
-static void bucket_zone_drain(void);
+static void bucket_zone_drain(int domain);
static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
@@ -525,12 +528,13 @@ bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
}
static void
-bucket_zone_drain(void)
+bucket_zone_drain(int domain)
{
struct uma_bucket_zone *ubz;
for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
- uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
+ uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN,
+ domain);
}
#ifdef KASAN
@@ -1308,7 +1312,7 @@ cache_drain(uma_zone_t zone)
bucket_free(zone, bucket, NULL);
}
}
- bucket_cache_reclaim(zone, true);
+ bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN);
}
static void
@@ -1318,8 +1322,10 @@ cache_shrink(uma_zone_t zone, void *unused)
if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
return;
+ ZONE_LOCK(zone);
zone->uz_bucket_size =
(zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
+ ZONE_UNLOCK(zone);
}
static void
@@ -1442,7 +1448,7 @@ bucket_cache_reclaim_domain(uma_zone_t zone, bool drain, int domain)
}
static void
-bucket_cache_reclaim(uma_zone_t zone, bool drain)
+bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain)
{
int i;
@@ -1453,8 +1459,13 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
if (zone->uz_bucket_size > zone->uz_bucket_size_min)
zone->uz_bucket_size--;
- for (i = 0; i < vm_ndomains; i++)
- bucket_cache_reclaim_domain(zone, drain, i);
+ if (domain != UMA_ANYDOMAIN &&
+ (zone->uz_flags & UMA_ZONE_ROUNDROBIN) == 0) {
+ bucket_cache_reclaim_domain(zone, drain, domain);
+ } else {
+ for (i = 0; i < vm_ndomains; i++)
+ bucket_cache_reclaim_domain(zone, drain, i);
+ }
}
static void
@@ -1561,63 +1572,65 @@ keg_drain_domain(uma_keg_t keg, int domain)
* Returns nothing.
*/
static void
-keg_drain(uma_keg_t keg)
+keg_drain(uma_keg_t keg, int domain)
{
int i;
if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0)
return;
- for (i = 0; i < vm_ndomains; i++)
- keg_drain_domain(keg, i);
+ if (domain != UMA_ANYDOMAIN) {
+ keg_drain_domain(keg, domain);
+ } else {
+ for (i = 0; i < vm_ndomains; i++)
+ keg_drain_domain(keg, i);
+ }
}
static void
-zone_reclaim(uma_zone_t zone, int waitok, bool drain)
+zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain)
{
-
/*
- * Set draining to interlock with zone_dtor() so we can release our
- * locks as we go. Only dtor() should do a WAITOK call since it
- * is the only call that knows the structure will still be available
- * when it wakes up.
+ * Count active reclaim operations in order to interlock with
+ * zone_dtor(), which removes the zone from global lists before
+ * attempting to reclaim items itself.
+ *
+ * The zone may be destroyed while sleeping, so only zone_dtor() should
+ * specify M_WAITOK.
*/
ZONE_LOCK(zone);
- while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
- if (waitok == M_NOWAIT)
- goto out;
- msleep(zone, &ZDOM_GET(zone, 0)->uzd_lock, PVM, "zonedrain",
- 1);
+ if (waitok == M_WAITOK) {
+ while (zone->uz_reclaimers > 0)
+ msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1);
}
- zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
+ zone->uz_reclaimers++;
ZONE_UNLOCK(zone);
- bucket_cache_reclaim(zone, drain);
+ bucket_cache_reclaim(zone, drain, domain);
- /*
- * The DRAINING flag protects us from being freed while
- * we're running. Normally the uma_rwlock would protect us but we
- * must be able to release and acquire the right lock for each keg.
- */
if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
- keg_drain(zone->uz_keg);
+ keg_drain(zone->uz_keg, domain);
ZONE_LOCK(zone);
- zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
- wakeup(zone);
-out:
+ zone->uz_reclaimers--;
+ if (zone->uz_reclaimers == 0)
+ wakeup(zone);
ZONE_UNLOCK(zone);
}
static void
-zone_drain(uma_zone_t zone, void *unused)
+zone_drain(uma_zone_t zone, void *arg)
{
+ int domain;
- zone_reclaim(zone, M_NOWAIT, true);
+ domain = (int)(uintptr_t)arg;
+ zone_reclaim(zone, domain, M_NOWAIT, true);
}
static void
-zone_trim(uma_zone_t zone, void *unused)
+zone_trim(uma_zone_t zone, void *arg)
{
+ int domain;
- zone_reclaim(zone, M_NOWAIT, false);
+ domain = (int)(uintptr_t)arg;
+ zone_reclaim(zone, domain, M_NOWAIT, false);
}
/*
@@ -2883,7 +2896,7 @@ zone_dtor(void *arg, int size, void *udata)
keg = zone->uz_keg;
keg->uk_reserve = 0;
}
- zone_reclaim(zone, M_WAITOK, true);
+ zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true);
/*
* We only destroy kegs from non secondary/non cache zones.
@@ -3153,9 +3166,9 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
args.flags = flags;
args.keg = NULL;
- sx_slock(&uma_reclaim_lock);
+ sx_xlock(&uma_reclaim_lock);
res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
- sx_sunlock(&uma_reclaim_lock);
+ sx_xunlock(&uma_reclaim_lock);
return (res);
}
@@ -3181,9 +3194,9 @@ uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
args.keg = keg;
- sx_slock(&uma_reclaim_lock);
+ sx_xlock(&uma_reclaim_lock);
res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
- sx_sunlock(&uma_reclaim_lock);
+ sx_xunlock(&uma_reclaim_lock);
return (res);
}
@@ -3224,9 +3237,9 @@ uma_zdestroy(uma_zone_t zone)
if (booted == BOOT_SHUTDOWN &&
zone->uz_fini == NULL && zone->uz_release == zone_release)
return;
- sx_slock(&uma_reclaim_lock);
+ sx_xlock(&uma_reclaim_lock);
zone_free_item(zones, zone, NULL, SKIP_NONE);
- sx_sunlock(&uma_reclaim_lock);
+ sx_xunlock(&uma_reclaim_lock);
}
void
@@ -5035,22 +5048,29 @@ uma_zone_memory(uma_zone_t zone)
void
uma_reclaim(int req)
{
+ uma_reclaim_domain(req, UMA_ANYDOMAIN);
+}
+
+void
+uma_reclaim_domain(int req, int domain)
+{
+ void *arg;
- CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
- sx_xlock(&uma_reclaim_lock);
bucket_enable();
+ arg = (void *)(uintptr_t)domain;
+ sx_slock(&uma_reclaim_lock);
switch (req) {
case UMA_RECLAIM_TRIM:
- zone_foreach(zone_trim, NULL);
+ zone_foreach(zone_trim, arg);
break;
case UMA_RECLAIM_DRAIN:
+ zone_foreach(zone_drain, arg);
+ break;
case UMA_RECLAIM_DRAIN_CPU:
- zone_foreach(zone_drain, NULL);
- if (req == UMA_RECLAIM_DRAIN_CPU) {
- pcpu_cache_drain_safe(NULL);
- zone_foreach(zone_drain, NULL);
- }
+ zone_foreach(zone_drain, arg);
+ pcpu_cache_drain_safe(NULL);
+ zone_foreach(zone_drain, arg);
break;
default:
panic("unhandled reclamation request %d", req);
@@ -5061,10 +5081,10 @@ uma_reclaim(int req)
* we visit again so that we can free pages that are empty once other
* zones are drained. We have to do the same for buckets.
*/
- zone_drain(slabzones[0], NULL);
- zone_drain(slabzones[1], NULL);
- bucket_zone_drain();
- sx_xunlock(&uma_reclaim_lock);
+ zone_drain(slabzones[0], arg);
+ zone_drain(slabzones[1], arg);
+ bucket_zone_drain(domain);
+ sx_sunlock(&uma_reclaim_lock);
}
static volatile int uma_reclaim_needed;
@@ -5099,17 +5119,25 @@ uma_reclaim_worker(void *arg __unused)
void
uma_zone_reclaim(uma_zone_t zone, int req)
{
+ uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN);
+}
+
+void
+uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain)
+{
+ void *arg;
+ arg = (void *)(uintptr_t)domain;
switch (req) {
case UMA_RECLAIM_TRIM:
- zone_trim(zone, NULL);
+ zone_trim(zone, arg);
break;
case UMA_RECLAIM_DRAIN:
- zone_drain(zone, NULL);
+ zone_drain(zone, arg);
break;
case UMA_RECLAIM_DRAIN_CPU:
pcpu_cache_drain_safe(zone);
- zone_drain(zone, NULL);
+ zone_drain(zone, arg);
break;
default:
panic("unhandled reclamation request %d", req);
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 9965e486ca53..93910e78165b 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -162,7 +162,6 @@
#define UMA_ZFLAG_CTORDTOR 0x01000000 /* Zone has ctor/dtor set. */
#define UMA_ZFLAG_LIMIT 0x02000000 /* Zone has limit set. */
#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */
-#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
#define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */
@@ -175,7 +174,6 @@
"\37TRASH" \
"\36INTERNAL" \
"\35BUCKET" \
- "\34RECLAIMING" \
"\33CACHE" \
"\32LIMIT" \
"\31CTORDTOR" \
@@ -490,7 +488,7 @@ struct uma_zone {
char *uz_ctlname; /* sysctl safe name string. */
int uz_namecnt; /* duplicate name count. */
uint16_t uz_bucket_size_min; /* Min number of items in bucket */
- uint16_t uz_pad0;
+ uint16_t uz_reclaimers; /* pending reclaim operations. */
/* Offset 192, rare read-only. */
struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
@@ -582,6 +580,7 @@ static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
#define ZONE_LOCK(z) ZDOM_LOCK(ZDOM_GET((z), 0))
#define ZONE_UNLOCK(z) ZDOM_UNLOCK(ZDOM_GET((z), 0))
+#define ZONE_LOCKPTR(z) (&ZDOM_GET((z), 0)->uzd_lock)
#define ZONE_CROSS_LOCK_INIT(z) \
mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index a69493d1323f..7ab1fdb8950e 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -907,7 +907,6 @@ debug_vm_lowmem(SYSCTL_HANDLER_ARGS)
EVENTHANDLER_INVOKE(vm_lowmem, i);
return (0);
}
-
SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem,
CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_vm_lowmem, "I",
"set to trigger vm_lowmem event with given flags");
@@ -919,7 +918,7 @@ debug_uma_reclaim(SYSCTL_HANDLER_ARGS)
i = 0;
error = sysctl_handle_int(oidp, &i, 0, req);
- if (error != 0)
+ if (error != 0 || req->newptr == NULL)
return (error);
if (i != UMA_RECLAIM_TRIM && i != UMA_RECLAIM_DRAIN &&
i != UMA_RECLAIM_DRAIN_CPU)
@@ -927,7 +926,31 @@ debug_uma_reclaim(SYSCTL_HANDLER_ARGS)
uma_reclaim(i);
return (0);
}
-
SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim,
CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_uma_reclaim, "I",
"set to generate request to reclaim uma caches");
+
+static int
+debug_uma_reclaim_domain(SYSCTL_HANDLER_ARGS)
+{
+ int domain, error, request;
+
+ request = 0;
+ error = sysctl_handle_int(oidp, &request, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ domain = request >> 4;
+ request &= 0xf;
+ if (request != UMA_RECLAIM_TRIM && request != UMA_RECLAIM_DRAIN &&
+ request != UMA_RECLAIM_DRAIN_CPU)
+ return (EINVAL);
+ if (domain < 0 || domain >= vm_ndomains)
+ return (EINVAL);
+ uma_reclaim_domain(request, domain);
+ return (0);
+}
+SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim_domain,
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0,
+ debug_uma_reclaim_domain, "I",
+ "");