aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorWill Andrews <will@FreeBSD.org>2016-07-18 01:55:25 +0000
committerWill Andrews <will@FreeBSD.org>2016-07-18 01:55:25 +0000
commitffdeef3234496b79624f4ba34e3d0a6893f41404 (patch)
treef3433c72f62be02adf456241724fa38f416ac461 /lib
parent8fb15a24ceb2fa5cfd4849466dd9ca2a27ce2bff (diff)
downloadsrc-ffdeef3234496b79624f4ba34e3d0a6893f41404.tar.gz
src-ffdeef3234496b79624f4ba34e3d0a6893f41404.zip
libkvm: Improve physical address lookup scaling.
Instead of using a hash table to convert physical page addresses to offsets in the sparse page array, cache the number of bits set for each 4MB chunk of physical pages. Upon lookup, find the nearest cached population count, then add/subtract the number of bits from that point to the page's PTE bit. Then multiply by page size and add to the sparse page map's base offset. This replaces O(n) worst-case lookup with O(1) (plus a small number of bits to scan in the bitmap). Also, for a 128GB system, a typical kernel core of about 8GB will now only require ~4.5MB of RAM for this approach instead of ~48MB as with the hash table. More concretely, /usr/sbin/crashinfo against the same core improves from a max RSS of 188MB and wall time of 43.72s (33.25 user 2.94 sys) to 135MB and 9.43s (2.58 user 1.47 sys). Running "thread apply all bt" in kgdb has a similar RSS improvement, and wall time drops from 4.44s to 1.93s. Reviewed by: jhb Sponsored by: Backtrace I/O
Notes
Notes: svn path=/head/; revision=302976
Diffstat (limited to 'lib')
-rw-r--r--lib/libkvm/kvm.c2
-rw-r--r--lib/libkvm/kvm_minidump_aarch64.c37
-rw-r--r--lib/libkvm/kvm_minidump_amd64.c57
-rw-r--r--lib/libkvm/kvm_minidump_arm.c37
-rw-r--r--lib/libkvm/kvm_minidump_i386.c35
-rw-r--r--lib/libkvm/kvm_minidump_mips.c37
-rw-r--r--lib/libkvm/kvm_private.c212
-rw-r--r--lib/libkvm/kvm_private.h31
8 files changed, 235 insertions, 213 deletions
diff --git a/lib/libkvm/kvm.c b/lib/libkvm/kvm.c
index 504c7ba9b5d6..26fdbe750c84 100644
--- a/lib/libkvm/kvm.c
+++ b/lib/libkvm/kvm.c
@@ -283,6 +283,8 @@ kvm_close(kvm_t *kd)
free((void *) kd->argspc);
if (kd->argv != 0)
free((void *)kd->argv);
+ if (kd->pt_map != NULL)
+ free(kd->pt_map);
free((void *)kd);
return (0);
diff --git a/lib/libkvm/kvm_minidump_aarch64.c b/lib/libkvm/kvm_minidump_aarch64.c
index d5c5197307fe..ffe27bdd527d 100644
--- a/lib/libkvm/kvm_minidump_aarch64.c
+++ b/lib/libkvm/kvm_minidump_aarch64.c
@@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
- struct hpt hpt;
uint64_t *page_map;
};
@@ -67,7 +66,6 @@ _aarch64_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
- _kvm_hpt_free(&vm->hpt);
free(vm->page_map);
free(vm);
kd->vmst = NULL;
@@ -77,8 +75,7 @@ static int
_aarch64_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
- uint64_t *bitmap;
- off_t off;
+ off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@@ -114,19 +111,12 @@ _aarch64_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = AARCH64_PAGE_SIZE + aarch64_round_page(vmst->hdr.msgbufsize);
- bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
- if (bitmap == NULL) {
- _kvm_err(kd, kd->program,
- "cannot allocate %d bytes for bitmap",
- vmst->hdr.bitmapsize);
- return (-1);
- }
- if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
- (ssize_t)vmst->hdr.bitmapsize) {
- _kvm_err(kd, kd->program,
- "cannot read %d bytes for page bitmap",
- vmst->hdr.bitmapsize);
- free(bitmap);
+ /* build physical address lookup table for sparse pages */
+ sparse_off = off + aarch64_round_page(vmst->hdr.bitmapsize) +
+ aarch64_round_page(vmst->hdr.pmapsize);
+ if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
+ AARCH64_PAGE_SIZE, sizeof(uint64_t)) == -1) {
+ _kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += aarch64_round_page(vmst->hdr.bitmapsize);
@@ -136,7 +126,6 @@ _aarch64_minidump_initvtop(kvm_t *kd)
_kvm_err(kd, kd->program,
"cannot allocate %d bytes for page_map",
vmst->hdr.pmapsize);
- free(bitmap);
return (-1);
}
/* This is the end of the dump, savecore may have truncated it. */
@@ -149,15 +138,9 @@ _aarch64_minidump_initvtop(kvm_t *kd)
AARCH64_PAGE_SIZE) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page_map",
vmst->hdr.pmapsize);
- free(bitmap);
return (-1);
}
- off += vmst->hdr.pmapsize;
-
- /* build physical address hash table for sparse pages */
- _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
- AARCH64_PAGE_SIZE, sizeof(*bitmap));
- free(bitmap);
+ off += aarch64_round_page(vmst->hdr.pmapsize);
return (0);
}
@@ -178,7 +161,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase + vm->hdr.dmapphys) &
~AARCH64_PAGE_MASK;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_aarch64_minidump_vatop: "
"direct map address 0x%jx not in minidump",
@@ -198,7 +181,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = l3 & ~AARCH64_ATTR_MASK;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_aarch64_minidump_vatop: "
"physical address 0x%jx not in minidump",
diff --git a/lib/libkvm/kvm_minidump_amd64.c b/lib/libkvm/kvm_minidump_amd64.c
index 3df7a62461f0..21bb5d4774b2 100644
--- a/lib/libkvm/kvm_minidump_amd64.c
+++ b/lib/libkvm/kvm_minidump_amd64.c
@@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
- struct hpt hpt;
amd64_pte_t *page_map;
};
@@ -66,9 +65,7 @@ _amd64_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
- _kvm_hpt_free(&vm->hpt);
- if (vm->page_map)
- free(vm->page_map);
+ free(vm->page_map);
free(vm);
kd->vmst = NULL;
}
@@ -77,8 +74,7 @@ static int
_amd64_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
- uint64_t *bitmap;
- off_t off;
+ off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@@ -116,37 +112,28 @@ _amd64_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize);
- bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
- if (bitmap == NULL) {
- _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize);
- return (-1);
- }
- if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
- (ssize_t)vmst->hdr.bitmapsize) {
- _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize);
- free(bitmap);
+ sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) +
+ amd64_round_page(vmst->hdr.pmapsize);
+ if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
+ AMD64_PAGE_SIZE, sizeof(uint64_t)) == -1) {
+ _kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += amd64_round_page(vmst->hdr.bitmapsize);
vmst->page_map = _kvm_malloc(kd, vmst->hdr.pmapsize);
if (vmst->page_map == NULL) {
- _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", vmst->hdr.pmapsize);
- free(bitmap);
+ _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map",
+ vmst->hdr.pmapsize);
return (-1);
}
if (pread(kd->pmfd, vmst->page_map, vmst->hdr.pmapsize, off) !=
(ssize_t)vmst->hdr.pmapsize) {
- _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", vmst->hdr.pmapsize);
- free(bitmap);
+ _kvm_err(kd, kd->program, "cannot read %d bytes for page_map",
+ vmst->hdr.pmapsize);
return (-1);
}
- off += vmst->hdr.pmapsize;
-
- /* build physical address hash table for sparse pages */
- _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
- AMD64_PAGE_SIZE, sizeof(*bitmap));
- free(bitmap);
+ off += amd64_round_page(vmst->hdr.pmapsize);
return (0);
}
@@ -175,7 +162,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & AMD64_PG_FRAME;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump",
@@ -186,7 +173,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
return (AMD64_PAGE_SIZE - offset);
} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump",
@@ -235,20 +222,20 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
}
if ((pde & AMD64_PG_PS) == 0) {
a = pde & AMD64_PG_FRAME;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ /* TODO: Just read the single PTE */
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
- "_amd64_minidump_vatop: pt physical address 0x%jx not in minidump",
+ "cannot find page table entry for %ju",
(uintmax_t)a);
goto invalid;
}
- /* TODO: Just read the single PTE */
if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) !=
AMD64_PAGE_SIZE) {
_kvm_err(kd, kd->program,
- "cannot read %d bytes for page table",
- AMD64_PAGE_SIZE);
- return (-1);
+ "cannot read page table entry for %ju",
+ (uintmax_t)a);
+ goto invalid;
}
pteindex = (va >> AMD64_PAGE_SHIFT) &
(AMD64_NPTEPG - 1);
@@ -263,7 +250,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
a = pde & AMD64_PG_PS_FRAME;
a += (va & AMD64_PDRMASK) ^ offset;
}
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop: physical address 0x%jx not in minidump",
@@ -274,7 +261,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
return (AMD64_PAGE_SIZE - offset);
} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop: direct map address 0x%jx not in minidump",
diff --git a/lib/libkvm/kvm_minidump_arm.c b/lib/libkvm/kvm_minidump_arm.c
index f24a3eea1f3e..53ca3f02fa73 100644
--- a/lib/libkvm/kvm_minidump_arm.c
+++ b/lib/libkvm/kvm_minidump_arm.c
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
- struct hpt hpt;
void *ptemap;
unsigned char ei_data;
};
@@ -69,9 +68,7 @@ _arm_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
- _kvm_hpt_free(&vm->hpt);
- if (vm->ptemap)
- free(vm->ptemap);
+ free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@@ -80,8 +77,7 @@ static int
_arm_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
- uint32_t *bitmap;
- off_t off;
+ off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@@ -122,18 +118,11 @@ _arm_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = ARM_PAGE_SIZE + arm_round_page(vmst->hdr.msgbufsize);
- bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
- if (bitmap == NULL) {
- _kvm_err(kd, kd->program, "cannot allocate %d bytes for "
- "bitmap", vmst->hdr.bitmapsize);
- return (-1);
- }
-
- if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
- (ssize_t)vmst->hdr.bitmapsize) {
- _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap",
- vmst->hdr.bitmapsize);
- free(bitmap);
+ sparse_off = off + arm_round_page(vmst->hdr.bitmapsize) +
+ arm_round_page(vmst->hdr.ptesize);
+ if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
+ ARM_PAGE_SIZE, sizeof(uint32_t)) == -1) {
+ _kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += arm_round_page(vmst->hdr.bitmapsize);
@@ -142,7 +131,6 @@ _arm_minidump_initvtop(kvm_t *kd)
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"ptemap", vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
@@ -150,16 +138,9 @@ _arm_minidump_initvtop(kvm_t *kd)
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap",
vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
-
- off += vmst->hdr.ptesize;
-
- /* Build physical address hash table for sparse pages */
- _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
- ARM_PAGE_SIZE, sizeof(*bitmap));
- free(bitmap);
+ off += arm_round_page(vmst->hdr.ptesize);
return (0);
}
@@ -209,7 +190,7 @@ _arm_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
a = pte & ARM_L2_S_FRAME;
}
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_arm_minidump_kvatop: "
"physical address 0x%jx not in minidump",
diff --git a/lib/libkvm/kvm_minidump_i386.c b/lib/libkvm/kvm_minidump_i386.c
index a154adf91a5f..c8fc38eb45bc 100644
--- a/lib/libkvm/kvm_minidump_i386.c
+++ b/lib/libkvm/kvm_minidump_i386.c
@@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
- struct hpt hpt;
void *ptemap;
};
@@ -66,9 +65,7 @@ _i386_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
- _kvm_hpt_free(&vm->hpt);
- if (vm->ptemap)
- free(vm->ptemap);
+ free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@@ -77,8 +74,7 @@ static int
_i386_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
- uint32_t *bitmap;
- off_t off;
+ off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@@ -110,15 +106,11 @@ _i386_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = I386_PAGE_SIZE + i386_round_page(vmst->hdr.msgbufsize);
- bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
- if (bitmap == NULL) {
- _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize);
- return (-1);
- }
- if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
- (ssize_t)vmst->hdr.bitmapsize) {
- _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize);
- free(bitmap);
+ sparse_off = off + i386_round_page(vmst->hdr.bitmapsize) +
+ i386_round_page(vmst->hdr.ptesize);
+ if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
+ I386_PAGE_SIZE, sizeof(uint32_t)) == -1) {
+ _kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += i386_round_page(vmst->hdr.bitmapsize);
@@ -126,21 +118,14 @@ _i386_minidump_initvtop(kvm_t *kd)
vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize);
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for ptemap", vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) !=
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
- off += vmst->hdr.ptesize;
-
- /* build physical address hash table for sparse pages */
- _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
- I386_PAGE_SIZE, sizeof(*bitmap));
- free(bitmap);
+ off += i386_round_page(vmst->hdr.ptesize);
return (0);
}
@@ -171,7 +156,7 @@ _i386_minidump_vatop_pae(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & I386_PG_FRAME_PAE;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_i386_minidump_vatop_pae: physical address 0x%jx not in minidump",
@@ -218,7 +203,7 @@ _i386_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & I386_PG_FRAME;
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_i386_minidump_vatop: physical address 0x%jx not in minidump",
diff --git a/lib/libkvm/kvm_minidump_mips.c b/lib/libkvm/kvm_minidump_mips.c
index bec6426e1682..77d1892df3a2 100644
--- a/lib/libkvm/kvm_minidump_mips.c
+++ b/lib/libkvm/kvm_minidump_mips.c
@@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
- struct hpt hpt;
void *ptemap;
int pte_size;
};
@@ -74,9 +73,7 @@ _mips_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
- _kvm_hpt_free(&vm->hpt);
- if (vm->ptemap)
- free(vm->ptemap);
+ free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@@ -85,8 +82,7 @@ static int
_mips_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
- uint32_t *bitmap;
- off_t off;
+ off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@@ -129,18 +125,11 @@ _mips_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = MIPS_PAGE_SIZE + mips_round_page(vmst->hdr.msgbufsize);
- bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
- if (bitmap == NULL) {
- _kvm_err(kd, kd->program, "cannot allocate %d bytes for "
- "bitmap", vmst->hdr.bitmapsize);
- return (-1);
- }
-
- if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
- (ssize_t)vmst->hdr.bitmapsize) {
- _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap",
- vmst->hdr.bitmapsize);
- free(bitmap);
+ sparse_off = off + mips_round_page(vmst->hdr.bitmapsize) +
+ mips_round_page(vmst->hdr.ptesize);
+ if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
+ MIPS_PAGE_SIZE, sizeof(uint32_t)) == -1) {
+ _kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += mips_round_page(vmst->hdr.bitmapsize);
@@ -149,7 +138,6 @@ _mips_minidump_initvtop(kvm_t *kd)
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"ptemap", vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
@@ -157,16 +145,9 @@ _mips_minidump_initvtop(kvm_t *kd)
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap",
vmst->hdr.ptesize);
- free(bitmap);
return (-1);
}
-
- off += vmst->hdr.ptesize;
-
- /* Build physical address hash table for sparse pages */
- _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
- MIPS_PAGE_SIZE, sizeof(*bitmap));
- free(bitmap);
+ off += mips_round_page(vmst->hdr.ptesize);
return (0);
}
@@ -243,7 +224,7 @@ _mips_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
}
found:
- ofs = _kvm_hpt_find(&vm->hpt, a);
+ ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_mips_minidump_kvatop: physical "
"address 0x%jx not in minidump", (uintmax_t)a);
diff --git a/lib/libkvm/kvm_private.c b/lib/libkvm/kvm_private.c
index 940a5a92eaa7..f790757adbf0 100644
--- a/lib/libkvm/kvm_private.c
+++ b/lib/libkvm/kvm_private.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
+#include <assert.h>
#include <fcntl.h>
#include <kvm.h>
#include <limits.h>
@@ -213,73 +214,178 @@ bad:
return (-1);
}
-static void
-_kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off)
+/*
+ * Transform v such that only bits [bit0, bitN) may be set. Generates a
+ * bitmask covering the number of bits, then shifts so +bit0+ is the first.
+ */
+static uint64_t
+bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN)
{
- struct hpte *hpte;
- uint32_t fnv = FNV1_32_INIT;
-
- fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
- fnv &= (HPT_SIZE - 1);
- hpte = malloc(sizeof(*hpte));
- hpte->pa = pa;
- hpte->off = off;
- hpte->next = hpt->hpt_head[fnv];
- hpt->hpt_head[fnv] = hpte;
+ if (bit0 == 0 && bitN == BITS_IN(v))
+ return (v);
+
+ return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0));
}
-void
-_kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off,
- int page_size, int word_size)
+/*
+ * Returns the number of bits in a given byte array range starting at a
+ * given base, from bit0 to bitN. bit0 may be non-zero in the case of
+ * counting backwards from bitN.
+ */
+static uint64_t
+popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN)
{
- uint64_t bits, idx, pa;
- uint64_t *base64;
- uint32_t *base32;
-
- base64 = base;
- base32 = base;
- for (idx = 0; idx < len / word_size; idx++) {
- if (word_size == sizeof(uint64_t))
- bits = _kvm64toh(kd, base64[idx]);
- else
- bits = _kvm32toh(kd, base32[idx]);
- pa = idx * word_size * NBBY * page_size;
- for (; bits != 0; bits >>= 1, pa += page_size) {
- if ((bits & 1) == 0)
- continue;
- _kvm_hpt_insert(hpt, pa, off);
- off += page_size;
- }
+ uint32_t res = bitN - bit0;
+ uint64_t count = 0;
+ uint32_t bound;
+
+ /* Align to 64-bit boundary on the left side if needed. */
+ if ((bit0 % BITS_IN(*addr)) != 0) {
+ bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr)));
+ count += __bitcount64(bitmask_range(*addr, bit0, bound));
+ res -= (bound - bit0);
+ addr++;
+ }
+
+ while (res > 0) {
+ bound = MIN(res, BITS_IN(*addr));
+ count += __bitcount64(bitmask_range(*addr, 0, bound));
+ res -= bound;
+ addr++;
}
+
+ return (count);
}
-off_t
-_kvm_hpt_find(struct hpt *hpt, uint64_t pa)
+int
+_kvm_pt_init(kvm_t *kd, size_t map_len, off_t map_off, off_t sparse_off,
+ int page_size, int word_size)
{
- struct hpte *hpte;
- uint32_t fnv = FNV1_32_INIT;
-
- fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
- fnv &= (HPT_SIZE - 1);
- for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) {
- if (pa == hpte->pa)
- return (hpte->off);
+ uint64_t *addr;
+ uint32_t *popcount_bin;
+ int bin_popcounts = 0;
+ uint64_t pc_bins, res;
+ ssize_t rd;
+
+ /*
+ * Map the bitmap specified by the arguments.
+ */
+ kd->pt_map = _kvm_malloc(kd, map_len);
+ if (kd->pt_map == NULL) {
+ _kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap",
+ map_len);
+ return (-1);
}
- return (-1);
+ rd = pread(kd->pmfd, kd->pt_map, map_len, map_off);
+ if (rd < 0 || rd != (ssize_t)map_len) {
+ _kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap",
+ map_len);
+ return (-1);
+ }
+ kd->pt_map_size = map_len;
+
+ /*
+ * Generate a popcount cache for every POPCOUNT_BITS in the bitmap,
+ * so lookups only have to calculate the number of bits set between
+ * a cache point and their bit. This reduces lookups to O(1),
+ * without significantly increasing memory requirements.
+ *
+ * Round up the number of bins so that 'upper half' lookups work for
+ * the final bin, if needed. The first popcount is 0, since no bits
+ * precede bit 0, so add 1 for that also. Without this, extra work
+ * would be needed to handle the first PTEs in _kvm_pt_find().
+ */
+ addr = kd->pt_map;
+ res = map_len;
+ pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS;
+ kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t));
+ if (kd->pt_popcounts == NULL)
+ return (-1);
+
+ for (popcount_bin = &kd->pt_popcounts[1]; res > 0;
+ addr++, res -= sizeof(*addr)) {
+ *popcount_bin += popcount_bytes(addr, 0,
+ MIN(res * NBBY, BITS_IN(*addr)));
+ if (++bin_popcounts == POPCOUNTS_IN(*addr)) {
+ popcount_bin++;
+ *popcount_bin = *(popcount_bin - 1);
+ bin_popcounts = 0;
+ }
+ }
+
+ assert(pc_bins * sizeof(*popcount_bin) ==
+ ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts));
+
+ kd->pt_sparse_off = sparse_off;
+ kd->pt_sparse_size = (uint64_t)*popcount_bin * PAGE_SIZE;
+ kd->pt_page_size = page_size;
+ kd->pt_word_size = word_size;
+ return (0);
}
-void
-_kvm_hpt_free(struct hpt *hpt)
+/*
+ * Find the offset for the given physical page address; returns -1 otherwise.
+ *
+ * A page's offset is represented by the sparse page base offset plus the
+ * number of bits set before its bit multiplied by PAGE_SIZE. This means
+ * that if a page exists in the dump, it's necessary to know how many pages
+ * in the dump precede it. Reduce this O(n) counting to O(1) by caching the
+ * number of bits set at POPCOUNT_BITS intervals.
+ *
+ * Then to find the number of pages before the requested address, simply
+ * index into the cache and count the number of bits set between that cache
+ * bin and the page's bit. Halve the number of bytes that have to be
+ * checked by also counting down from the next higher bin if it's closer.
+ */
+off_t
+_kvm_pt_find(kvm_t *kd, uint64_t pa)
{
- struct hpte *hpte, *next;
- int i;
+ uint64_t *bitmap = kd->pt_map;
+ uint64_t pte_bit_id = pa / PAGE_SIZE;
+ uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap);
+ uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS;
+ uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap));
+ uint64_t bitN;
+ uint32_t count;
+
+ /* Check whether the page address requested is in the dump. */
+ if (pte_bit_id >= (kd->pt_map_size * NBBY) ||
+ (bitmap[pte_u64] & pte_mask) == 0)
+ return (-1);
- for (i = 0; i < HPT_SIZE; i++) {
- for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) {
- next = hpte->next;
- free(hpte);
- }
+ /*
+ * Add/sub popcounts from the bitmap until the PTE's bit is reached.
+ * For bits that are in the upper half between the calculated
+ * popcount id and the next one, use the next one and subtract to
+ * minimize the number of popcounts required.
+ */
+ if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) {
+ count = kd->pt_popcounts[popcount_id] + popcount_bytes(
+ bitmap + popcount_id * POPCOUNTS_IN(*bitmap),
+ 0, pte_bit_id - popcount_id * POPCOUNT_BITS);
+ } else {
+ /*
+ * Counting in reverse is trickier, since we must avoid
+ * reading from bytes that are not in range, and invert.
+ */
+ uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap);
+
+ popcount_id++;
+ bitN = MIN(popcount_id * POPCOUNT_BITS,
+ kd->pt_map_size * BITS_IN(uint8_t));
+ count = kd->pt_popcounts[popcount_id] - popcount_bytes(
+ bitmap + pte_u64,
+ pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off);
}
+
+ /*
+ * This can only happen if the core is truncated. Treat these
+ * entries as if they don't exist, since their backing doesn't.
+ */
+ if (count >= (kd->pt_sparse_size / PAGE_SIZE))
+ return (-1);
+
+ return (kd->pt_sparse_off + (uint64_t)count * PAGE_SIZE);
}
static int
diff --git a/lib/libkvm/kvm_private.h b/lib/libkvm/kvm_private.h
index 78b48458cd6f..814a06f63b99 100644
--- a/lib/libkvm/kvm_private.h
+++ b/lib/libkvm/kvm_private.h
@@ -97,23 +97,21 @@ struct __kvm {
uintptr_t *dpcpu_off; /* base array, indexed by CPU ID */
u_int dpcpu_curcpu; /* CPU we're currently working with */
kvaddr_t dpcpu_curoff; /* dpcpu base of current CPU */
-};
-/*
- * Page table hash used by minidump backends to map physical addresses
- * to file offsets.
- */
-struct hpte {
- struct hpte *next;
- uint64_t pa;
- off_t off;
+ /* Page table lookup structures. */
+ uint64_t *pt_map;
+ size_t pt_map_size;
+ off_t pt_sparse_off;
+ uint64_t pt_sparse_size;
+ uint32_t *pt_popcounts;
+ unsigned int pt_page_size;
+ unsigned int pt_word_size;
};
-#define HPT_SIZE 1024
-
-struct hpt {
- struct hpte *hpt_head[HPT_SIZE];
-};
+/* Page table lookup constants. */
+#define POPCOUNT_BITS 1024
+#define BITS_IN(v) (sizeof(v) * NBBY)
+#define POPCOUNTS_IN(v) (POPCOUNT_BITS / BITS_IN(v))
/*
* Functions used internally by kvm, but across kvm modules.
@@ -154,6 +152,5 @@ kvaddr_t _kvm_dpcpu_validaddr(kvm_t *, kvaddr_t);
int _kvm_probe_elf_kernel(kvm_t *, int, int);
int _kvm_is_minidump(kvm_t *);
int _kvm_read_core_phdrs(kvm_t *, size_t *, GElf_Phdr **);
-void _kvm_hpt_init(kvm_t *, struct hpt *, void *, size_t, off_t, int, int);
-off_t _kvm_hpt_find(struct hpt *, uint64_t);
-void _kvm_hpt_free(struct hpt *);
+int _kvm_pt_init(kvm_t *, size_t, off_t, off_t, int, int);
+off_t _kvm_pt_find(kvm_t *, uint64_t);