diff options
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/pmap.h | 1 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 143 | ||||
-rw-r--r-- | sys/vm/vm_fault.c | 11 | ||||
-rw-r--r-- | sys/vm/vm_kern.c | 13 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 11 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 6 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 5 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 27 | ||||
-rw-r--r-- | sys/vm/vm_phys.c | 24 | ||||
-rw-r--r-- | sys/vm/vm_swapout.c | 127 |
10 files changed, 211 insertions, 157 deletions
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 669fe784b323..5f16f85b76f4 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -142,6 +142,7 @@ void pmap_init(void); boolean_t pmap_is_modified(vm_page_t m); boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t va); boolean_t pmap_is_referenced(vm_page_t m); +boolean_t pmap_is_valid_memattr(pmap_t, vm_memattr_t); vm_offset_t pmap_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index d7e0cea762ed..858a9e884626 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -403,11 +403,32 @@ static daddr_t swp_pager_getswapspace(int npages); /* * Metadata functions */ -static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); +static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); +static void +swp_pager_init_freerange(daddr_t *start, daddr_t *num) +{ + + *start = SWAPBLK_NONE; + *num = 0; +} + +static void +swp_pager_update_freerange(daddr_t *start, daddr_t *num, daddr_t addr) +{ + + if (*start + *num == addr) { + (*num)++; + } else { + swp_pager_freeswapspace(*start, *num); + *start = addr; + *num = 1; + } +} + static void * swblk_trie_alloc(struct pctrie *ptree) { @@ -861,7 +882,9 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) int n = 0; daddr_t blk = SWAPBLK_NONE; vm_pindex_t beg = start; /* save start index */ + daddr_t addr, n_free, s_free; + swp_pager_init_freerange(&s_free, &n_free); VM_OBJECT_WLOCK(object); while (size) { if (n == 0) { @@ -875,12 +898,15 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) } } } - swp_pager_meta_build(object, start, blk); + addr = swp_pager_meta_build(object, start, blk); + if (addr != SWAPBLK_NONE) + swp_pager_update_freerange(&s_free, &n_free, addr); --size; ++start; ++blk; --n; } + swp_pager_freeswapspace(s_free, n_free); swp_pager_meta_free(object, start, n); VM_OBJECT_WUNLOCK(object); return (0); @@ -910,7 +936,7 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t offset, int destroysource) { vm_pindex_t i; - daddr_t dstaddr, first_free, num_free, srcaddr; + daddr_t dstaddr, n_free, s_free, srcaddr; VM_OBJECT_ASSERT_WLOCKED(srcobject); VM_OBJECT_ASSERT_WLOCKED(dstobject); @@ -937,42 +963,38 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, /* * Transfer source to destination. */ - first_free = SWAPBLK_NONE; - num_free = 0; + swp_pager_init_freerange(&s_free, &n_free); for (i = 0; i < dstobject->size; ++i) { srcaddr = swp_pager_meta_ctl(srcobject, i + offset, SWM_POP); if (srcaddr == SWAPBLK_NONE) continue; dstaddr = swp_pager_meta_ctl(dstobject, i, 0); - if (dstaddr == SWAPBLK_NONE) { - /* - * Destination has no swapblk and is not resident, - * copy source. - * - * swp_pager_meta_build() can sleep. - */ - vm_object_pip_add(srcobject, 1); - VM_OBJECT_WUNLOCK(srcobject); - vm_object_pip_add(dstobject, 1); - swp_pager_meta_build(dstobject, i, srcaddr); - vm_object_pip_wakeup(dstobject); - VM_OBJECT_WLOCK(srcobject); - vm_object_pip_wakeup(srcobject); - } else { + if (dstaddr != SWAPBLK_NONE) { /* * Destination has valid swapblk or it is represented - * by a resident page. We destroy the sourceblock. + * by a resident page. We destroy the source block. */ - if (first_free + num_free == srcaddr) - num_free++; - else { - swp_pager_freeswapspace(first_free, num_free); - first_free = srcaddr; - num_free = 1; - } + swp_pager_update_freerange(&s_free, &n_free, srcaddr); + continue; } + + /* + * Destination has no swapblk and is not resident, + * copy source. + * + * swp_pager_meta_build() can sleep. + */ + vm_object_pip_add(srcobject, 1); + VM_OBJECT_WUNLOCK(srcobject); + vm_object_pip_add(dstobject, 1); + dstaddr = swp_pager_meta_build(dstobject, i, srcaddr); + KASSERT(dstaddr == SWAPBLK_NONE, + ("Unexpected destination swapblk")); + vm_object_pip_wakeup(dstobject); + VM_OBJECT_WLOCK(srcobject); + vm_object_pip_wakeup(srcobject); } - swp_pager_freeswapspace(first_free, num_free); + swp_pager_freeswapspace(s_free, n_free); /* * Free left over swap blocks in source. @@ -1307,7 +1329,9 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, { int i, n; boolean_t sync; + daddr_t addr, n_free, s_free; + swp_pager_init_freerange(&s_free, &n_free); if (count && ma[0]->object != object) { panic("swap_pager_putpages: object mismatch %p/%p", object, @@ -1322,8 +1346,11 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, * check for bogus sysops * force sync if not pageout process */ - if (object->type != OBJT_SWAP) - swp_pager_meta_build(object, 0, SWAPBLK_NONE); + if (object->type != OBJT_SWAP) { + addr = swp_pager_meta_build(object, 0, SWAPBLK_NONE); + KASSERT(addr == SWAPBLK_NONE, + ("unexpected object swap block")); + } VM_OBJECT_WUNLOCK(object); n = 0; @@ -1391,11 +1418,11 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, for (j = 0; j < n; ++j) { vm_page_t mreq = ma[i+j]; - swp_pager_meta_build( - mreq->object, - mreq->pindex, - blk + j - ); + addr = swp_pager_meta_build(mreq->object, mreq->pindex, + blk + j); + if (addr != SWAPBLK_NONE) + swp_pager_update_freerange(&s_free, &n_free, + addr); MPASS(mreq->dirty == VM_PAGE_BITS_ALL); mreq->oflags |= VPO_SWAPINPROG; bp->b_pages[j] = mreq; @@ -1453,6 +1480,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, swp_pager_async_iodone(bp); } VM_OBJECT_WLOCK(object); + swp_pager_freeswapspace(s_free, n_free); } /* @@ -1783,14 +1811,15 @@ swp_pager_swblk_empty(struct swblk *sb, int start, int limit) * * The specified swapblk is added to the object's swap metadata. If * the swapblk is not valid, it is freed instead. Any previously - * assigned swapblk is freed. + * assigned swapblk is returned. */ -static void +static daddr_t swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) { static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted; struct swblk *sb, *sb1; vm_pindex_t modpi, rdpi; + daddr_t prev_swapblk; int error, i; VM_OBJECT_ASSERT_WLOCKED(object); @@ -1815,7 +1844,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); if (sb == NULL) { if (swapblk == SWAPBLK_NONE) - return; + return (SWAPBLK_NONE); for (;;) { sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc == pageproc ? M_USE_RESERVE : 0)); @@ -1882,9 +1911,8 @@ allocated: MPASS(sb->p == rdpi); modpi = pindex % SWAP_META_PAGES; - /* Delete prior contents of metadata. */ - if (sb->d[modpi] != SWAPBLK_NONE) - swp_pager_freeswapspace(sb->d[modpi], 1); + /* Return prior contents of metadata. */ + prev_swapblk = sb->d[modpi]; /* Enter block into metadata. */ sb->d[modpi] = swapblk; @@ -1896,6 +1924,7 @@ allocated: SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, rdpi); uma_zfree(swblk_zone, sb); } + return (prev_swapblk); } /* @@ -1912,7 +1941,7 @@ static void swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) { struct swblk *sb; - daddr_t first_free, num_free; + daddr_t n_free, s_free; vm_pindex_t last; int i, limit, start; @@ -1920,8 +1949,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) if (object->type != OBJT_SWAP || count == 0) return; - first_free = SWAPBLK_NONE; - num_free = 0; + swp_pager_init_freerange(&s_free, &n_free); last = pindex + count; for (;;) { sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, @@ -1934,13 +1962,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) for (i = start; i < limit; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; - if (first_free + num_free == sb->d[i]) - num_free++; - else { - swp_pager_freeswapspace(first_free, num_free); - first_free = sb->d[i]; - num_free = 1; - } + swp_pager_update_freerange(&s_free, &n_free, sb->d[i]); sb->d[i] = SWAPBLK_NONE; } if (swp_pager_swblk_empty(sb, 0, start) && @@ -1951,7 +1973,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) } pindex = sb->p + SWAP_META_PAGES; } - swp_pager_freeswapspace(first_free, num_free); + swp_pager_freeswapspace(s_free, n_free); } /* @@ -1964,7 +1986,7 @@ static void swp_pager_meta_free_all(vm_object_t object) { struct swblk *sb; - daddr_t first_free, num_free; + daddr_t n_free, s_free; vm_pindex_t pindex; int i; @@ -1972,26 +1994,19 @@ swp_pager_meta_free_all(vm_object_t object) if (object->type != OBJT_SWAP) return; - first_free = SWAPBLK_NONE; - num_free = 0; + swp_pager_init_freerange(&s_free, &n_free); for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( &object->un_pager.swp.swp_blks, pindex)) != NULL;) { pindex = sb->p + SWAP_META_PAGES; for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; - if (first_free + num_free == sb->d[i]) - num_free++; - else { - swp_pager_freeswapspace(first_free, num_free); - first_free = sb->d[i]; - num_free = 1; - } + swp_pager_update_freerange(&s_free, &n_free, sb->d[i]); } SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); uma_zfree(swblk_zone, sb); } - swp_pager_freeswapspace(first_free, num_free); + swp_pager_freeswapspace(s_free, n_free); } /* diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 48a6bf51473e..9b8d90a423af 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -270,8 +270,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { vm_page_t m, m_map; -#if (defined(__amd64__) || defined(__i386__) || defined(__aarch64__)) && \ - VM_NRESERVLEVEL > 0 +#if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ + __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; #endif @@ -285,8 +285,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, return (KERN_FAILURE); m_map = m; psind = 0; -#if (defined(__amd64__) || defined(__i386__) || defined(__aarch64__)) && \ - VM_NRESERVLEVEL > 0 +#if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ + __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 if ((m->flags & PG_FICTITIOUS) == 0 && (m_super = vm_reserv_to_superpage(m)) != NULL && rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && @@ -462,7 +462,8 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, pidx <= pager_last; pidx += npages, m = vm_page_next(&m[npages - 1])) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; -#if defined(__amd64__) || defined(__i386__) || defined(__aarch64__) +#if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ + __ARM_ARCH >= 6) || defined(__i386__) psind = m->psind; if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 || pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last || diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 724205c95309..b10579d60357 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -700,23 +700,28 @@ kmem_bootstrap_free(vm_offset_t start, vm_size_t size) { #if defined(__i386__) || defined(__amd64__) struct vm_domain *vmd; - vm_offset_t end; + vm_offset_t end, va; vm_paddr_t pa; vm_page_t m; end = trunc_page(start + size); start = round_page(start); - (void)vm_map_remove(kernel_map, start, end); - for (; start < end; start += PAGE_SIZE) { - pa = pmap_kextract(start); + for (va = start; va < end; va += PAGE_SIZE) { + pa = pmap_kextract(va); m = PHYS_TO_VM_PAGE(pa); vmd = vm_pagequeue_domain(m); vm_domain_free_lock(vmd); vm_phys_free_pages(m, 0); + vmd->vmd_page_count++; vm_domain_free_unlock(vmd); + + vm_domain_freecnt_inc(vmd, 1); + vm_cnt.v_page_count++; } + pmap_remove(kernel_pmap, start, end); + (void)vmem_add(kernel_arena, start, end - start, M_WAITOK); #endif } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 97162082dd0b..c2a7128137a2 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1277,10 +1277,9 @@ charged: vm_object_clear_flag(object, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(object); } else if (prev_entry != &map->header && - prev_entry->eflags == protoeflags && + (prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) == protoeflags && (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 && - prev_entry->end == start && prev_entry->wired_count == 0 && - (prev_entry->cred == cred || + prev_entry->end == start && (prev_entry->cred == cred || (prev_entry->object.vm_object != NULL && prev_entry->object.vm_object->cred == cred)) && vm_object_coalesce(prev_entry->object.vm_object, @@ -1295,7 +1294,11 @@ charged: */ if (prev_entry->inheritance == inheritance && prev_entry->protection == prot && - prev_entry->max_protection == max) { + prev_entry->max_protection == max && + prev_entry->wired_count == 0) { + KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) == + 0, ("prev_entry %p has incoherent wiring", + prev_entry)); if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) map->size += end - prev_entry->end; prev_entry->end = end; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 72de144ec8ef..e184b8ff5c1f 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -600,6 +600,12 @@ kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); +#ifdef COMPAT_FREEBSD32 + if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + if (((addr + size) & 0xffffffff) < addr) + return (EINVAL); + } else +#endif if (addr + size < addr) return (EINVAL); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 71ac3e3a8539..38d66e1a90ea 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2142,8 +2142,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; - if ((prev_object->ref_count > 1) && - (prev_object->size != next_pindex)) { + if (prev_object->ref_count > 1 && + prev_object->size != next_pindex && + (prev_object->flags & OBJ_ONEMAPPING) == 0) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 416c36d9c4bf..6881319e4d9d 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -152,7 +152,6 @@ static int vm_pageout_oom_seq = 12; static int vm_pageout_update_period; static int disable_swap_pageouts; static int lowmem_period = 10; -static time_t lowmem_uptime; static int swapdev_enabled; static int vm_panic_on_oom = 0; @@ -1856,12 +1855,17 @@ vm_pageout_oom(int shortage) } } -static void -vm_pageout_lowmem(struct vm_domain *vmd) +static bool +vm_pageout_lowmem(void) { + static int lowmem_ticks = 0; + int last; + + last = atomic_load_int(&lowmem_ticks); + while ((u_int)(ticks - last) / hz >= lowmem_period) { + if (atomic_fcmpset_int(&lowmem_ticks, &last, ticks) == 0) + continue; - if (vmd == VM_DOMAIN(0) && - time_uptime - lowmem_uptime >= lowmem_period) { /* * Decrease registered cache sizes. */ @@ -1873,14 +1877,16 @@ vm_pageout_lowmem(struct vm_domain *vmd) * drained above. */ uma_reclaim(); - lowmem_uptime = time_uptime; + return (true); } + return (false); } static void vm_pageout_worker(void *arg) { struct vm_domain *vmd; + u_int ofree; int addl_shortage, domain, shortage; bool target_met; @@ -1939,11 +1945,16 @@ vm_pageout_worker(void *arg) /* * Use the controller to calculate how many pages to free in - * this interval, and scan the inactive queue. + * this interval, and scan the inactive queue. If the lowmem + * handlers appear to have freed up some pages, subtract the + * difference from the inactive queue scan target. */ shortage = pidctrl_daemon(&vmd->vmd_pid, vmd->vmd_free_count); if (shortage > 0) { - vm_pageout_lowmem(vmd); + ofree = vmd->vmd_free_count; + if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree) + shortage -= min(vmd->vmd_free_count - ofree, + (u_int)shortage); target_met = vm_pageout_scan_inactive(vmd, shortage, &addl_shortage); } else diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 6d008d4d9114..6f0440cfdc2d 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -115,9 +115,6 @@ static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; CTASSERT(VM_FREELIST_DEFAULT == 0); -#ifdef VM_FREELIST_ISADMA -#define VM_ISADMA_BOUNDARY 16777216 -#endif #ifdef VM_FREELIST_DMA32 #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) #endif @@ -126,9 +123,6 @@ CTASSERT(VM_FREELIST_DEFAULT == 0); * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about * the ordering of the free list boundaries. */ -#if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) -CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); -#endif #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); #endif @@ -442,12 +436,6 @@ vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) * list boundaries. */ paddr = start; -#ifdef VM_FREELIST_ISADMA - if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { - vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); - paddr = VM_ISADMA_BOUNDARY; - } -#endif #ifdef VM_FREELIST_LOWMEM if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); @@ -486,11 +474,6 @@ vm_phys_init(void) npages = 0; for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { seg = &vm_phys_segs[segind]; -#ifdef VM_FREELIST_ISADMA - if (seg->end <= VM_ISADMA_BOUNDARY) - vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; - else -#endif #ifdef VM_FREELIST_LOWMEM if (seg->end <= VM_LOWMEM_BOUNDARY) vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; @@ -541,13 +524,6 @@ vm_phys_init(void) #else seg->first_page = PHYS_TO_VM_PAGE(seg->start); #endif -#ifdef VM_FREELIST_ISADMA - if (seg->end <= VM_ISADMA_BOUNDARY) { - flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; - KASSERT(flind >= 0, - ("vm_phys_init: ISADMA flind < 0")); - } else -#endif #ifdef VM_FREELIST_LOWMEM if (seg->end <= VM_LOWMEM_BOUNDARY) { flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index d4e6e3a86770..343ff830f0c6 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -158,13 +158,14 @@ static struct mtx vm_daemon_mtx; /* Allow for use by vm_pageout before vm_daemon is initialized. */ MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); +static int swapped_cnt; + static void swapclear(struct proc *); static int swapout(struct proc *); static void vm_swapout_map_deactivate_pages(vm_map_t, long); static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long); static void swapout_procs(int action); static void vm_req_vmdaemon(int req); -static void vm_thread_swapin(struct thread *td); static void vm_thread_swapout(struct thread *td); /* @@ -563,7 +564,7 @@ vm_thread_swapout(struct thread *td) * Bring the kernel stack for a specified thread back in. */ static void -vm_thread_swapin(struct thread *td) +vm_thread_swapin(struct thread *td, int oom_alloc) { vm_object_t ksobj; vm_page_t ma[KSTACK_MAX_PAGES]; @@ -572,7 +573,7 @@ vm_thread_swapin(struct thread *td) pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED, ma, + (void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma, pages); for (i = 0; i < pages;) { vm_page_assert_xbusied(ma[i]); @@ -605,8 +606,10 @@ void faultin(struct proc *p) { struct thread *td; + int oom_alloc; PROC_LOCK_ASSERT(p, MA_OWNED); + /* * If another process is swapping in this process, * just wait until it finishes. @@ -616,7 +619,11 @@ faultin(struct proc *p) msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0); return; } + if ((p->p_flag & P_INMEM) == 0) { + oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM : + VM_ALLOC_NORMAL; + /* * Don't let another thread swap process p out while we are * busy swapping it in. @@ -624,6 +631,10 @@ faultin(struct proc *p) ++p->p_lock; p->p_flag |= P_SWAPPINGIN; PROC_UNLOCK(p); + sx_xlock(&allproc_lock); + MPASS(swapped_cnt > 0); + swapped_cnt--; + sx_xunlock(&allproc_lock); /* * We hold no lock here because the list of threads @@ -631,14 +642,14 @@ faultin(struct proc *p) * swapped out. */ FOREACH_THREAD_IN_PROC(p, td) - vm_thread_swapin(td); + vm_thread_swapin(td, oom_alloc); + PROC_LOCK(p); swapclear(p); p->p_swtick = ticks; - wakeup(&p->p_flag); - /* Allow other threads to swap p out now. */ + wakeup(&p->p_flag); --p->p_lock; } } @@ -648,26 +659,38 @@ faultin(struct proc *p) * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. */ -void -swapper(void) + +static struct proc * +swapper_selector(void) { - struct proc *p, *pp; + struct proc *p, *res; struct thread *td; - int ppri, pri, slptime, swtime; - -loop: - if (vm_page_count_min()) { - vm_wait_min(); - goto loop; - } + int min_flag, ppri, pri, slptime, swtime; - pp = NULL; + sx_assert(&allproc_lock, SA_SLOCKED); + if (swapped_cnt == 0) + return (NULL); + res = NULL; ppri = INT_MIN; - sx_slock(&allproc_lock); + min_flag = vm_page_count_min(); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - if (p->p_state == PRS_NEW || - p->p_flag & (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) { + if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT | + P_SWAPPINGIN | P_INMEM)) != 0) { + PROC_UNLOCK(p); + continue; + } + if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) { + /* + * A swapped-out process might have mapped a + * large portion of the system's pages as + * anonymous memory. There is no other way to + * release the memory other than to kill the + * process, for which we need to swap it in. + */ + return (p); + } + if (min_flag) { PROC_UNLOCK(p); continue; } @@ -690,7 +713,7 @@ loop: * selection. */ if (pri > ppri) { - pp = p; + res = p; ppri = pri; } } @@ -698,33 +721,40 @@ loop: } PROC_UNLOCK(p); } - sx_sunlock(&allproc_lock); + if (res != NULL) + PROC_LOCK(res); + return (res); +} - /* - * Nothing to do, back to sleep. - */ - if ((p = pp) == NULL) { - tsleep(&proc0, PVM, "swapin", MAXSLP * hz / 2); - goto loop; - } - PROC_LOCK(p); +void +swapper(void) +{ + struct proc *p; - /* - * Another process may be bringing or may have already - * brought this process in while we traverse all threads. - * Or, this process may even be being swapped out again. - */ - if (p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) { - PROC_UNLOCK(p); - goto loop; - } + for (;;) { + sx_slock(&allproc_lock); + p = swapper_selector(); + sx_sunlock(&allproc_lock); - /* - * We would like to bring someone in. - */ - faultin(p); - PROC_UNLOCK(p); - goto loop; + if (p == NULL) { + tsleep(&proc0, PVM, "swapin", MAXSLP * hz / 2); + } else { + PROC_LOCK_ASSERT(p, MA_OWNED); + + /* + * Another process may be bringing or may have + * already brought this process in while we + * traverse all threads. Or, this process may + * have exited or even being swapped out + * again. + */ + if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM | + P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) { + faultin(p); + } + PROC_UNLOCK(p); + } + } } /* @@ -803,7 +833,12 @@ swapout_procs(int action) didswap = true; PROC_UNLOCK(p); - sx_slock(&allproc_lock); + if (didswap) { + sx_xlock(&allproc_lock); + swapped_cnt++; + sx_downgrade(&allproc_lock); + } else + sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); |