diff options
author | Konstantin Belousov <kib@FreeBSD.org> | 2020-09-09 22:12:51 +0000 |
---|---|---|
committer | Konstantin Belousov <kib@FreeBSD.org> | 2020-09-09 22:12:51 +0000 |
commit | d301b3580f60be417581db4b592f88ce9c916e4b (patch) | |
tree | dcc9c17a38f1081490426b2cf4ef0c0cd3d774d3 /sys/vm | |
parent | aa8f9f90ff9ae6299edbf4073fd81900eed7a7fd (diff) | |
download | src-d301b3580f60be417581db4b592f88ce9c916e4b.tar.gz src-d301b3580f60be417581db4b592f88ce9c916e4b.zip |
Support for userspace non-transparent superpages (largepages).
Created with shm_open2(SHM_LARGEPAGE) and then configured with
FIOSSHMLPGCNF ioctl, largepages posix shared memory objects guarantee
that all userspace mappings of it are served by superpage non-managed
mappings.
Only amd64 for now, both 2M and 1G superpages can be requested, the
later requires CPU feature.
Reviewed by: markj
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D24652
Notes
Notes:
svn path=/head/; revision=365522
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/vm_fault.c | 74 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 15 |
2 files changed, 82 insertions, 7 deletions
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 9330a393aa2b..545d5d01509d 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -420,7 +420,7 @@ vm_fault_populate(struct faultstate *fs) vm_offset_t vaddr; vm_page_t m; vm_pindex_t map_first, map_last, pager_first, pager_last, pidx; - int i, npages, psind, rv; + int bdry_idx, i, npages, psind, rv; MPASS(fs->object == fs->first_object); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); @@ -442,7 +442,8 @@ vm_fault_populate(struct faultstate *fs) * to the driver. */ rv = vm_pager_populate(fs->first_object, fs->first_pindex, - fs->fault_type, fs->entry->max_protection, &pager_first, &pager_last); + fs->fault_type, fs->entry->max_protection, &pager_first, + &pager_last); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); if (rv == VM_PAGER_BAD) { @@ -465,15 +466,57 @@ vm_fault_populate(struct faultstate *fs) MPASS(pager_last < fs->first_object->size); vm_fault_restore_map_lock(fs); + bdry_idx = (fs->entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >> + MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; if (fs->map->timestamp != fs->map_generation) { - vm_fault_populate_cleanup(fs->first_object, pager_first, - pager_last); + if (bdry_idx == 0) { + vm_fault_populate_cleanup(fs->first_object, pager_first, + pager_last); + } else { + m = vm_page_lookup(fs->first_object, pager_first); + if (m != fs->m) + vm_page_xunbusy(m); + } return (KERN_RESTART); } /* * The map is unchanged after our last unlock. Process the fault. * + * First, the special case of largepage mappings, where + * populate only busies the first page in superpage run. + */ + if (bdry_idx != 0) { + m = vm_page_lookup(fs->first_object, pager_first); + vm_fault_populate_check_page(m); + VM_OBJECT_WUNLOCK(fs->first_object); + vaddr = fs->entry->start + IDX_TO_OFF(pager_first) - + fs->entry->offset; + /* assert alignment for entry */ + KASSERT((vaddr & (pagesizes[bdry_idx] - 1)) == 0, + ("unaligned superpage start %#jx pager_first %#jx offset %#jx vaddr %#jx", + (uintmax_t)fs->entry->start, (uintmax_t)pager_first, + (uintmax_t)fs->entry->offset, (uintmax_t)vaddr)); + KASSERT((VM_PAGE_TO_PHYS(m) & (pagesizes[bdry_idx] - 1)) == 0, + ("unaligned superpage m %p %#jx", m, + (uintmax_t)VM_PAGE_TO_PHYS(m))); + rv = pmap_enter(fs->map->pmap, vaddr, m, fs->prot, + fs->fault_type | (fs->wired ? PMAP_ENTER_WIRED : 0) | + PMAP_ENTER_LARGEPAGE, bdry_idx); + VM_OBJECT_WLOCK(fs->first_object); + vm_page_xunbusy(m); + if ((fs->fault_flags & VM_FAULT_WIRE) != 0) { + for (i = 0; i < atop(pagesizes[bdry_idx]); i++) + vm_page_wire(m + i); + } + if (fs->m_hold != NULL) { + *fs->m_hold = m + (fs->first_pindex - pager_first); + vm_page_wire(*fs->m_hold); + } + goto out; + } + + /* * The range [pager_first, pager_last] that is given to the * pager is only a hint. The pager may populate any range * within the object that includes the requested page index. @@ -539,6 +582,7 @@ vm_fault_populate(struct faultstate *fs) vm_page_xunbusy(&m[i]); } } +out: curthread->td_ru.ru_majflt++; return (KERN_SUCCESS); } @@ -1253,6 +1297,7 @@ RetryFault: * multiple page faults of a similar type to run in parallel. */ if (fs.vp == NULL /* avoid locked vnode leak */ && + (fs.entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) == 0 && (fs.fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0) { VM_OBJECT_RLOCK(fs.first_object); rv = vm_fault_soft_fast(&fs); @@ -1285,6 +1330,27 @@ RetryFault: */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; + + if ((fs.entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) != 0) { + rv = vm_fault_allocate(&fs); + switch (rv) { + case KERN_RESTART: + unlock_and_deallocate(&fs); + /* FALLTHROUGH */ + case KERN_RESOURCE_SHORTAGE: + goto RetryFault; + case KERN_SUCCESS: + case KERN_FAILURE: + case KERN_OUT_OF_BOUNDS: + unlock_and_deallocate(&fs); + return (rv); + case KERN_NOT_RECEIVER: + break; + default: + panic("vm_fault: Unhandled rv %d", rv); + } + } + while (TRUE) { KASSERT(fs.m == NULL, ("page still set %p at loop start", fs.m)); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index d0c38c2e8082..7888ff15e36c 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -219,14 +219,14 @@ kern_mmap_req(struct thread *td, const struct mmap_req *mrp) struct file *fp; struct proc *p; off_t pos; - vm_offset_t addr; + vm_offset_t addr, orig_addr; vm_size_t len, pageoff, size; vm_prot_t cap_maxprot; int align, error, fd, flags, max_prot, prot; cap_rights_t rights; mmap_check_fp_fn check_fp_fn; - addr = mrp->mr_hint; + orig_addr = addr = mrp->mr_hint; len = mrp->mr_len; prot = mrp->mr_prot; flags = mrp->mr_flags; @@ -422,6 +422,8 @@ kern_mmap_req(struct thread *td, const struct mmap_req *mrp) if (error != 0) goto done; } + if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data)) + addr = orig_addr; /* This relies on VM_PROT_* matching PROT_*. */ error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, max_prot & cap_maxprot, flags, pos, td); @@ -1104,7 +1106,14 @@ kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) PROC_UNLOCK(proc); } #endif - return (error == KERN_SUCCESS ? 0 : ENOMEM); + switch (error) { + case KERN_SUCCESS: + return (0); + case KERN_INVALID_ARGUMENT: + return (EINVAL); + default: + return (ENOMEM); + } } #ifndef _SYS_SYSPROTO_H_ |