aboutsummaryrefslogtreecommitdiff
path: root/sys/vm
diff options
context:
space:
mode:
authorAlan Cox <alc@FreeBSD.org>2011-11-16 16:46:09 +0000
committerAlan Cox <alc@FreeBSD.org>2011-11-16 16:46:09 +0000
commitfbd80bd0474b3fc6e3ec658fc75e378559571eae (patch)
tree272387d8a035403f23822efe5dfd1df09f424088 /sys/vm
parentd1c5fc763a8b7e9cf29960238eb5f81ac06a5b26 (diff)
downloadsrc-fbd80bd0474b3fc6e3ec658fc75e378559571eae.tar.gz
src-fbd80bd0474b3fc6e3ec658fc75e378559571eae.zip
Refactor the code that performs physically contiguous memory allocation,
yielding a new public interface, vm_page_alloc_contig(). This new function addresses some of the limitations of the current interfaces, contigmalloc() and kmem_alloc_contig(). For example, the physically contiguous memory that is allocated with those interfaces can only be allocated to the kernel vm object and must be mapped into the kernel virtual address space. It also provides functionality that vm_phys_alloc_contig() doesn't, such as wiring the returned pages. Moreover, unlike that function, it respects the low water marks on the paging queues and wakes up the page daemon when necessary. That said, at present, this new function can't be applied to all types of vm objects. However, that restriction will be eliminated in the coming weeks. From a design standpoint, this change also addresses an inconsistency between vm_phys_alloc_contig() and the other vm_phys_alloc*() functions. Specifically, vm_phys_alloc_contig() manipulated vm_page fields that other functions in vm/vm_phys.c didn't. Moreover, vm_phys_alloc_contig() knew about vnodes and reservations. Now, vm_page_alloc_contig() is responsible for these things. Reviewed by: kib Discussed with: jhb
Notes
Notes: svn path=/head/; revision=227568
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm_contig.c119
-rw-r--r--sys/vm/vm_page.c158
-rw-r--r--sys/vm/vm_page.h4
-rw-r--r--sys/vm/vm_phys.c47
-rw-r--r--sys/vm/vm_phys.h3
5 files changed, 222 insertions, 109 deletions
diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c
index 50f95a506090..ea2c9046f455 100644
--- a/sys/vm/vm_contig.c
+++ b/sys/vm/vm_contig.c
@@ -82,7 +82,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
-#include <vm/vm_phys.h>
#include <vm/vm_extern.h>
static int
@@ -185,22 +184,6 @@ vm_contig_launder(int queue, vm_paddr_t low, vm_paddr_t high)
}
/*
- * Frees the given physically contiguous pages.
- *
- * N.B.: Any pages with PG_ZERO set must, in fact, be zero filled.
- */
-static void
-vm_page_release_contig(vm_page_t m, vm_pindex_t count)
-{
-
- while (count--) {
- /* Leave PG_ZERO unchanged. */
- vm_page_free_toq(m);
- m++;
- }
-}
-
-/*
* Increase the number of cached pages.
*/
void
@@ -238,9 +221,10 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr)
{
vm_object_t object = kernel_object;
- vm_offset_t addr, i, offset;
+ vm_offset_t addr;
+ vm_ooffset_t end_offset, offset;
vm_page_t m;
- int tries;
+ int pflags, tries;
size = round_page(size);
vm_map_lock(map);
@@ -252,11 +236,19 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
vm_object_reference(object);
vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
VM_PROT_ALL, 0);
+ if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+ pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+ else
+ pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+ if (flags & M_ZERO)
+ pflags |= VM_ALLOC_ZERO;
VM_OBJECT_LOCK(object);
- for (i = 0; i < size; i += PAGE_SIZE) {
+ end_offset = offset + size;
+ for (; offset < end_offset; offset += PAGE_SIZE) {
tries = 0;
retry:
- m = vm_phys_alloc_contig(1, low, high, PAGE_SIZE, 0);
+ m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
+ low, high, PAGE_SIZE, 0, memattr);
if (m == NULL) {
VM_OBJECT_UNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
@@ -277,9 +269,6 @@ retry:
vm_map_unlock(map);
return (0);
}
- if (memattr != VM_MEMATTR_DEFAULT)
- pmap_page_set_memattr(m, memattr);
- vm_page_insert(m, object, OFF_TO_IDX(offset + i));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
@@ -299,65 +288,61 @@ retry:
* specified through the given flags, then the pages are zeroed
* before they are mapped.
*/
-static vm_offset_t
-contigmapping(vm_map_t map, vm_size_t size, vm_page_t m, vm_memattr_t memattr,
- int flags)
+vm_offset_t
+kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
+ vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+ vm_memattr_t memattr)
{
vm_object_t object = kernel_object;
- vm_offset_t addr, tmp_addr;
+ vm_offset_t addr;
+ vm_ooffset_t offset;
+ vm_page_t end_m, m;
+ int pflags, tries;
+ size = round_page(size);
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
return (0);
}
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(object);
- vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
- addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
- vm_map_unlock(map);
+ vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
+ VM_PROT_ALL, 0);
+ if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+ pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+ else
+ pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+ if (flags & M_ZERO)
+ pflags |= VM_ALLOC_ZERO;
VM_OBJECT_LOCK(object);
- for (tmp_addr = addr; tmp_addr < addr + size; tmp_addr += PAGE_SIZE) {
- if (memattr != VM_MEMATTR_DEFAULT)
- pmap_page_set_memattr(m, memattr);
- vm_page_insert(m, object,
- OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
- if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
- m->valid = VM_PAGE_BITS_ALL;
- m++;
- }
- VM_OBJECT_UNLOCK(object);
- vm_map_wire(map, addr, addr + size,
- VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
- return (addr);
-}
-
-vm_offset_t
-kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
- vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
- vm_memattr_t memattr)
-{
- vm_offset_t ret;
- vm_page_t pages;
- u_long npgs;
- int tries;
-
- size = round_page(size);
- npgs = size >> PAGE_SHIFT;
tries = 0;
retry:
- pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
- if (pages == NULL) {
+ m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
+ atop(size), low, high, alignment, boundary, memattr);
+ if (m == NULL) {
+ VM_OBJECT_UNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+ vm_map_unlock(map);
vm_contig_grow_cache(tries, low, high);
+ vm_map_lock(map);
+ VM_OBJECT_LOCK(object);
tries++;
goto retry;
}
- ret = 0;
- } else {
- ret = contigmapping(map, size, pages, memattr, flags);
- if (ret == 0)
- vm_page_release_contig(pages, npgs);
+ vm_map_delete(map, addr, addr + size);
+ vm_map_unlock(map);
+ return (0);
+ }
+ end_m = m + atop(size);
+ for (; m < end_m; m++) {
+ if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ m->valid = VM_PAGE_BITS_ALL;
}
- return (ret);
+ VM_OBJECT_UNLOCK(object);
+ vm_map_unlock(map);
+ vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
+ VM_MAP_WIRE_NOHOLES);
+ return (addr);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 28f68ea01c88..3a887afa2bfb 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -137,6 +137,7 @@ SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
static uma_zone_t fakepg_zone;
+static struct vnode *vm_page_alloc_init(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_queue_remove(int queue, vm_page_t m);
static void vm_page_enqueue(int queue, vm_page_t m);
@@ -1481,6 +1482,155 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
}
/*
+ * vm_page_alloc_contig:
+ *
+ * Allocate a contiguous set of physical pages of the given size "npages"
+ * from the free lists. All of the physical pages must be at or above
+ * the given physical address "low" and below the given physical address
+ * "high". The given value "alignment" determines the alignment of the
+ * first physical page in the set. If the given value "boundary" is
+ * non-zero, then the set of physical pages cannot cross any physical
+ * address boundary that is a multiple of that value. Both "alignment"
+ * and "boundary" must be a power of two.
+ *
+ * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
+ * then the memory attribute setting for the physical pages is configured
+ * to the object's memory attribute setting. Otherwise, the memory
+ * attribute setting for the physical pages is configured to "memattr",
+ * overriding the object's memory attribute setting. However, if the
+ * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
+ * memory attribute setting for the physical pages cannot be configured
+ * to VM_MEMATTR_DEFAULT.
+ *
+ * The caller must always specify an allocation class.
+ *
+ * allocation classes:
+ * VM_ALLOC_NORMAL normal process request
+ * VM_ALLOC_SYSTEM system *really* needs a page
+ * VM_ALLOC_INTERRUPT interrupt time request
+ *
+ * optional allocation flags:
+ * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page
+ * VM_ALLOC_NOOBJ page is not associated with an object and
+ * should not have the flag VPO_BUSY set
+ * VM_ALLOC_WIRED wire the allocated page
+ * VM_ALLOC_ZERO prefer a zeroed page
+ *
+ * This routine may not sleep.
+ */
+vm_page_t
+vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
+ u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+ vm_paddr_t boundary, vm_memattr_t memattr)
+{
+ struct vnode *drop;
+ vm_page_t deferred_vdrop_list, m, m_ret;
+ u_int flags, oflags;
+ int req_class;
+
+ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
+ ("vm_page_alloc_contig: inconsistent object/req"));
+ if (object != NULL) {
+ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+ KASSERT(object->type == OBJT_PHYS,
+ ("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
+ object));
+ }
+ KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
+ req_class = req & VM_ALLOC_CLASS_MASK;
+
+ /*
+ * The page daemon is allowed to dig deeper into the free page list.
+ */
+ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
+ req_class = VM_ALLOC_SYSTEM;
+
+ deferred_vdrop_list = NULL;
+ mtx_lock(&vm_page_queue_free_mtx);
+ if (cnt.v_free_count + cnt.v_cache_count >= npages +
+ cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
+ cnt.v_free_count + cnt.v_cache_count >= npages +
+ cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
+ cnt.v_free_count + cnt.v_cache_count >= npages)) {
+#if VM_NRESERVLEVEL > 0
+retry:
+#endif
+ m_ret = vm_phys_alloc_contig(npages, low, high, alignment,
+ boundary);
+ } else {
+ mtx_unlock(&vm_page_queue_free_mtx);
+ atomic_add_int(&vm_pageout_deficit, npages);
+ pagedaemon_wakeup();
+ return (NULL);
+ }
+ if (m_ret != NULL)
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ drop = vm_page_alloc_init(m);
+ if (drop != NULL) {
+ /*
+ * Enqueue the vnode for deferred vdrop().
+ *
+ * Once the pages are removed from the free
+ * page list, "pageq" can be safely abused to
+ * construct a short-lived list of vnodes.
+ */
+ m->pageq.tqe_prev = (void *)drop;
+ m->pageq.tqe_next = deferred_vdrop_list;
+ deferred_vdrop_list = m;
+ }
+ }
+ else {
+#if VM_NRESERVLEVEL > 0
+ if (vm_reserv_reclaim_contig(npages << PAGE_SHIFT, low, high,
+ alignment, boundary))
+ goto retry;
+#endif
+ }
+ mtx_unlock(&vm_page_queue_free_mtx);
+ if (m_ret == NULL)
+ return (NULL);
+
+ /*
+ * Initialize the pages. Only the PG_ZERO flag is inherited.
+ */
+ flags = 0;
+ if ((req & VM_ALLOC_ZERO) != 0)
+ flags = PG_ZERO;
+ if ((req & VM_ALLOC_WIRED) != 0)
+ atomic_add_int(&cnt.v_wire_count, npages);
+ oflags = VPO_UNMANAGED;
+ if (object != NULL) {
+ if ((req & VM_ALLOC_NOBUSY) == 0)
+ oflags |= VPO_BUSY;
+ if (object->memattr != VM_MEMATTR_DEFAULT &&
+ memattr == VM_MEMATTR_DEFAULT)
+ memattr = object->memattr;
+ }
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ m->aflags = 0;
+ m->flags &= flags;
+ if ((req & VM_ALLOC_WIRED) != 0)
+ m->wire_count = 1;
+ /* Unmanaged pages don't use "act_count". */
+ m->oflags = oflags;
+ if (memattr != VM_MEMATTR_DEFAULT)
+ pmap_page_set_memattr(m, memattr);
+ if (object != NULL)
+ vm_page_insert(m, object, pindex);
+ else
+ m->pindex = pindex;
+ pindex++;
+ }
+ while (deferred_vdrop_list != NULL) {
+ vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
+ deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
+ }
+ if (vm_paging_needed())
+ pagedaemon_wakeup();
+ return (m_ret);
+}
+
+/*
* Initialize a page that has been freshly dequeued from a freelist.
* The caller has to drop the vnode returned, if it is not NULL.
*
@@ -1488,7 +1638,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
*
* To be called with vm_page_queue_free_mtx held.
*/
-struct vnode *
+static struct vnode *
vm_page_alloc_init(vm_page_t m)
{
struct vnode *drop;
@@ -1529,9 +1679,6 @@ vm_page_alloc_init(vm_page_t m)
}
/* Don't clear the PG_ZERO flag; we'll need it later. */
m->flags &= PG_ZERO;
- m->aflags = 0;
- m->oflags = VPO_UNMANAGED;
- /* Unmanaged pages don't use "act_count". */
return (drop);
}
@@ -1598,6 +1745,7 @@ vm_page_alloc_freelist(int flind, int req)
/*
* Initialize the page. Only the PG_ZERO flag is inherited.
*/
+ m->aflags = 0;
flags = 0;
if ((req & VM_ALLOC_ZERO) != 0)
flags = PG_ZERO;
@@ -1610,6 +1758,8 @@ vm_page_alloc_freelist(int flind, int req)
atomic_add_int(&cnt.v_wire_count, 1);
m->wire_count = 1;
}
+ /* Unmanaged pages don't use "act_count". */
+ m->oflags = VPO_UNMANAGED;
if (drop != NULL)
vdrop(drop);
if (vm_paging_needed())
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 7099b70dd408..151710d48957 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -359,8 +359,10 @@ void vm_pageq_remove(vm_page_t m);
void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
+vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
+ u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+ vm_paddr_t boundary, vm_memattr_t memattr);
vm_page_t vm_page_alloc_freelist(int, int);
-struct vnode *vm_page_alloc_init(vm_page_t);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache(vm_page_t);
void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 1793ed83259c..8c026e6d8ddc 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -29,11 +29,17 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * Physical memory system implementation
+ *
+ * Any external functions defined by this module are only to be used by the
+ * virtual memory system.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ddb.h"
-#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -45,7 +51,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
-#include <sys/vnode.h>
#include <ddb/ddb.h>
@@ -55,7 +60,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
-#include <vm/vm_reserv.h>
/*
* VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
@@ -755,12 +759,12 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
- struct vnode *vp;
vm_paddr_t pa, pa_last, size;
- vm_page_t deferred_vdrop_list, m, m_ret;
+ vm_page_t m, m_ret;
u_long npages_end;
- int domain, flind, i, oind, order, pind;
+ int domain, flind, oind, order, pind;
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
#if VM_NDOMAIN > 1
domain = PCPU_GET(domain);
#else
@@ -773,13 +777,8 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
("vm_phys_alloc_contig: alignment must be a power of 2"));
KASSERT((boundary & (boundary - 1)) == 0,
("vm_phys_alloc_contig: boundary must be a power of 2"));
- deferred_vdrop_list = NULL;
/* Compute the queue that is the best fit for npages. */
for (order = 0; (1 << order) < npages; order++);
- mtx_lock(&vm_page_queue_free_mtx);
-#if VM_NRESERVLEVEL > 0
-retry:
-#endif
for (flind = 0; flind < vm_nfreelists; flind++) {
for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
@@ -838,11 +837,6 @@ retry:
}
}
}
-#if VM_NRESERVLEVEL > 0
- if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
- goto retry;
-#endif
- mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
done:
for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
@@ -855,31 +849,10 @@ done:
vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
fl = (*seg->free_queues)[m_ret->pool];
vm_phys_split_pages(m_ret, oind, fl, order);
- for (i = 0; i < npages; i++) {
- m = &m_ret[i];
- vp = vm_page_alloc_init(m);
- if (vp != NULL) {
- /*
- * Enqueue the vnode for deferred vdrop().
- *
- * Unmanaged pages don't use "pageq", so it
- * can be safely abused to construct a short-
- * lived queue of vnodes.
- */
- m->pageq.tqe_prev = (void *)vp;
- m->pageq.tqe_next = deferred_vdrop_list;
- deferred_vdrop_list = m;
- }
- }
/* Return excess pages to the free lists. */
npages_end = roundup2(npages, 1 << imin(oind, order));
if (npages < npages_end)
vm_phys_free_contig(&m_ret[npages], npages_end - npages);
- mtx_unlock(&vm_page_queue_free_mtx);
- while (deferred_vdrop_list != NULL) {
- vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
- deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
- }
return (m_ret);
}
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
index 847a63328184..047e4a9b15d0 100644
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -49,6 +49,9 @@ struct mem_affinity {
extern struct mem_affinity *mem_affinity;
+/*
+ * The following functions are only to be used by the virtual memory system.
+ */
void vm_phys_add_page(vm_paddr_t pa);
vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary);