aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Somers <asomers@FreeBSD.org>2021-02-27 15:59:40 +0000
committerAlan Somers <asomers@FreeBSD.org>2021-03-03 01:33:45 +0000
commitab63da3564e8ab0907f9d8eb565774848ffdadeb (patch)
tree6871f9096ed113eec6aec62afb57227951a254d6
parentafb1aa4e6df245d38fd2ba683fa521d5dabe8392 (diff)
downloadsrc-ab63da3564e8ab0907f9d8eb565774848ffdadeb.tar.gz
src-ab63da3564e8ab0907f9d8eb565774848ffdadeb.zip
Speed up geom_stats_resync in the presence of many devices
The old code had a O(n) loop, where n is the size of /dev/devstat. Multiply that by another O(n) loop in devstat_mmap for a total of O(n^2). This change adds DIOCGMEDIASIZE support to /dev/devstat so userland can quickly determine the right amount of memory to map, eliminating the O(n) loop in userland. This change decreases the time to run "gstat -bI0.001" with 16,384 md devices from 29.7s to 4.2s. Also, fix a memory leak first reported as PR 203097. Sponsored by: Axcient Reviewed by: mav, imp MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D28968
-rw-r--r--lib/libgeom/geom_stats.c26
-rw-r--r--sys/kern/subr_devstat.c21
2 files changed, 38 insertions, 9 deletions
diff --git a/lib/libgeom/geom_stats.c b/lib/libgeom/geom_stats.c
index ebf7868c3c65..450ee491ea1c 100644
--- a/lib/libgeom/geom_stats.c
+++ b/lib/libgeom/geom_stats.c
@@ -32,9 +32,12 @@
*/
#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
#include <sys/devicestat.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <paths.h>
@@ -53,7 +56,7 @@ geom_stats_close(void)
{
if (statsfd == -1)
return;
- munmap(statp, npages *pagesize);
+ munmap(statp, npages * pagesize);
statp = NULL;
close (statsfd);
statsfd = -1;
@@ -63,17 +66,22 @@ void
geom_stats_resync(void)
{
void *p;
+ off_t mediasize;
+ int error;
if (statsfd == -1)
return;
- for (;;) {
- p = mmap(statp, (npages + 1) * pagesize,
- PROT_READ, MAP_SHARED, statsfd, 0);
- if (p == MAP_FAILED)
- break;
- else
- statp = p;
- npages++;
+ error = ioctl(statsfd, DIOCGMEDIASIZE, &mediasize);
+ if (error)
+ err(1, "DIOCGMEDIASIZE(" _PATH_DEV DEVSTAT_DEVICE_NAME ")");
+
+ munmap(statp, npages * pagesize);
+ p = mmap(statp, mediasize, PROT_READ, MAP_SHARED, statsfd, 0);
+ if (p == MAP_FAILED)
+ err(1, "mmap(/dev/devstat):");
+ else {
+ statp = p;
+ npages = mediasize / pagesize;
}
}
diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c
index 091164a11fcf..98a41fd179c2 100644
--- a/sys/kern/subr_devstat.c
+++ b/sys/kern/subr_devstat.c
@@ -32,6 +32,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/disk.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/bio.h>
@@ -473,10 +474,12 @@ SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD,
#define statsperpage (PAGE_SIZE / sizeof(struct devstat))
+static d_ioctl_t devstat_ioctl;
static d_mmap_t devstat_mmap;
static struct cdevsw devstat_cdevsw = {
.d_version = D_VERSION,
+ .d_ioctl = devstat_ioctl,
.d_mmap = devstat_mmap,
.d_name = "devstat",
};
@@ -487,10 +490,27 @@ struct statspage {
u_int nfree;
};
+static size_t pagelist_pages = 0;
static TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist);
static MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics");
static int
+devstat_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ int error = ENOTTY;
+
+ switch (cmd) {
+ case DIOCGMEDIASIZE:
+ error = 0;
+ *(off_t *)data = pagelist_pages * PAGE_SIZE;
+ break;
+ }
+
+ return (error);
+}
+
+static int
devstat_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int nprot, vm_memattr_t *memattr)
{
@@ -556,6 +576,7 @@ devstat_alloc(void)
* head but the order on the list determine the
* sequence of the mapping so we can't do that.
*/
+ pagelist_pages++;
TAILQ_INSERT_TAIL(&pagelist, spp, list);
} else
break;