diff options
| author | svn2git <svn2git@FreeBSD.org> | 1994-05-01 08:00:00 +0000 |
|---|---|---|
| committer | svn2git <svn2git@FreeBSD.org> | 1994-05-01 08:00:00 +0000 |
| commit | a16f65c7d117419bd266c28a1901ef129a337569 (patch) | |
| tree | 2626602f66dc3551e7a7c7bc9ad763c3bc7ab40a /sys/vm | |
| parent | 8503f4f13f77abf7adc8f7e329c6f9c1d52b6a20 (diff) | |
Release FreeBSD 1.1upstream/1.1.0_cvsrelease/1.1.0_cvs
This commit was manufactured to restore the state of the 1.1-RELEASE image.
Releases prior to 5.3-RELEASE are omitting the secure/ and crypto/ subdirs.
Diffstat (limited to 'sys/vm')
35 files changed, 4449 insertions, 2035 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index a057e4e8726b..01ce7305d5a1 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer @@ -35,30 +35,29 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)device_pager.c 7.2 (Berkeley) 4/20/91 - * $Id: device_pager.c,v 1.4 1993/10/16 16:20:10 rgrimes Exp $ + * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 */ /* * Page to/from special files. */ -#include "devpager.h" -#if NDEVPAGER > 0 +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/mman.h> +#include <sys/malloc.h> -#include "param.h" -#include "conf.h" -#include "mman.h" -#include "malloc.h" +#include <vm/vm.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/device_pager.h> -#include "vm.h" -#include "vm_page.h" -#include "vm_kern.h" -#include "device_pager.h" #include "vnode.h" #include "specdev.h" -queue_head_t dev_pager_list; /* list of managed devices */ +queue_head_t dev_pager_list; /* list of managed devices */ +queue_head_t dev_pager_fakelist; /* list of available vm_page_t's */ #ifdef DEBUG int dpagerdebug = 0; @@ -68,7 +67,29 @@ int dpagerdebug = 0; #define DDB_FAIL 0x08 #endif -void +static vm_pager_t dev_pager_alloc + __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); +static void dev_pager_dealloc __P((vm_pager_t)); +static int dev_pager_getpage + __P((vm_pager_t, vm_page_t, boolean_t)); +static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t)); +static void dev_pager_init __P((void)); +static int dev_pager_putpage + __P((vm_pager_t, vm_page_t, boolean_t)); +static vm_page_t dev_pager_getfake __P((vm_offset_t)); +static void dev_pager_putfake __P((vm_page_t)); + +struct pagerops devicepagerops = { + dev_pager_init, + dev_pager_alloc, + dev_pager_dealloc, + dev_pager_getpage, + 0, + dev_pager_putpage, + dev_pager_haspage +}; + +static void dev_pager_init() { #ifdef DEBUG @@ -76,59 +97,68 @@ dev_pager_init() printf("dev_pager_init()\n"); #endif queue_init(&dev_pager_list); + queue_init(&dev_pager_fakelist); } -vm_pager_t -dev_pager_alloc(handle, size, prot) +static vm_pager_t +dev_pager_alloc(handle, size, prot, foff) caddr_t handle; vm_size_t size; vm_prot_t prot; + vm_offset_t foff; { dev_t dev; vm_pager_t pager; - int (*mapfunc)(), nprot; - register vm_object_t object; - register vm_page_t page; - register dev_pager_t devp; - register int npages, off; - extern int nullop(), enodev(); - + int (*mapfunc)(); + vm_object_t object; + dev_pager_t devp; + unsigned int npages, off; #ifdef DEBUG if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_alloc(%x, %x, %x)\n", handle, size, prot); + printf("dev_pager_alloc(%x, %x, %x, %x)\n", + handle, size, prot, foff); #endif +#ifdef DIAGNOSTIC /* * Pageout to device, should never happen. */ if (handle == NULL) panic("dev_pager_alloc called"); +#endif + + /* + * Make sure this device can be mapped. + */ + dev = (dev_t)(u_long)handle; + mapfunc = cdevsw[major(dev)].d_mmap; + if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop) + return(NULL); + + /* + * Offset should be page aligned. + */ + if (foff & (PAGE_SIZE-1)) + return(NULL); + + /* + * Check that the specified range of the device allows the + * desired protection. + * + * XXX assumes VM_PROT_* == PROT_* + */ + npages = atop(round_page(size)); + for (off = foff; npages--; off += PAGE_SIZE) + if ((*mapfunc)(dev, off, (int)prot) == -1) + return(NULL); /* - * Look it up, creating as necessary + * Look up pager, creating as necessary. */ +top: pager = vm_pager_lookup(&dev_pager_list, handle); if (pager == NULL) { /* - * Validation. Make sure this device can be mapped - * and that range to map is acceptible to device. - */ - dev = ((struct vnode *) handle)->v_rdev; - mapfunc = cdevsw[major(dev)].d_mmap; - if (!mapfunc || mapfunc == enodev || mapfunc == nullop) - return(NULL); - nprot = 0; - if (prot & VM_PROT_READ) - nprot |= PROT_READ; - if (prot & VM_PROT_WRITE) - nprot |= PROT_WRITE; - if (prot & VM_PROT_EXECUTE) - nprot |= PROT_EXEC; - npages = atop(round_page(size)); - for (off = 0; npages--; off += PAGE_SIZE) - if ((*mapfunc)(dev, off, nprot) == -1) - return(NULL); - /* * Allocate and initialize pager structs */ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); @@ -139,102 +169,141 @@ dev_pager_alloc(handle, size, prot) free((caddr_t)pager, M_VMPAGER); return(NULL); } - devp->devp_dev = dev; - devp->devp_npages = atop(round_page(size)); pager->pg_handle = handle; pager->pg_ops = &devicepagerops; pager->pg_type = PG_DEVICE; pager->pg_data = (caddr_t)devp; + queue_init(&devp->devp_pglist); /* - * Allocate object and vm_page structures to describe memory + * Allocate object and associate it with the pager. */ - npages = devp->devp_npages; - object = devp->devp_object = vm_object_allocate(ptoa(npages)); + object = devp->devp_object = vm_object_allocate(0); vm_object_enter(object, pager); - vm_object_setpager(object, pager, (vm_offset_t)0, FALSE); - devp->devp_pages = (vm_page_t) - kmem_alloc(kernel_map, npages*sizeof(struct vm_page)); - off = 0; - for (page = devp->devp_pages; - page < &devp->devp_pages[npages]; page++) { - vm_object_lock(object); - vm_page_init(page, object, off); - page->phys_addr = - pmap_phys_address((*mapfunc)(dev, off, nprot)); - page->wire_count = 1; - page->fictitious = TRUE; - PAGE_WAKEUP(page); - vm_object_unlock(object); - off += PAGE_SIZE; - } + vm_object_setpager(object, pager, (vm_offset_t)foff, FALSE); /* * Finally, put it on the managed list so other can find it. + * First we re-lookup in case someone else beat us to this + * point (due to blocking in the various mallocs). If so, + * we free everything and start over. */ + if (vm_pager_lookup(&dev_pager_list, handle)) { + free((caddr_t)devp, M_VMPGDATA); + free((caddr_t)pager, M_VMPAGER); + goto top; + } queue_enter(&dev_pager_list, pager, vm_pager_t, pg_list); #ifdef DEBUG - if (dpagerdebug & DDB_ALLOC) - printf("dev_pager_alloc: pages %d@%x\n", - devp->devp_npages, devp->devp_pages); + if (dpagerdebug & DDB_ALLOC) { + printf("dev_pager_alloc: pager %x devp %x object %x\n", + pager, devp, object); + vm_object_print(object, FALSE); + } #endif } else { /* * vm_object_lookup() gains a reference and also * removes the object from the cache. */ + object = vm_object_lookup(pager); +#ifdef DIAGNOSTIC devp = (dev_pager_t)pager->pg_data; - if (vm_object_lookup(pager) != devp->devp_object) + if (object != devp->devp_object) panic("dev_pager_setup: bad object"); - } -#ifdef DEBUG - if (dpagerdebug & DDB_ALLOC) { - printf("dev_pager_alloc: pager %x devp %x object %x\n", - pager, devp, object); - vm_object_print(object, FALSE); - } #endif + } return(pager); - } -void +static void dev_pager_dealloc(pager) vm_pager_t pager; { - dev_pager_t devp = (dev_pager_t)pager->pg_data; - register vm_object_t object; + dev_pager_t devp; + vm_object_t object; + vm_page_t m; #ifdef DEBUG if (dpagerdebug & DDB_FOLLOW) printf("dev_pager_dealloc(%x)\n", pager); #endif queue_remove(&dev_pager_list, pager, vm_pager_t, pg_list); + /* + * Get the object. + * Note: cannot use vm_object_lookup since object has already + * been removed from the hash chain. + */ + devp = (dev_pager_t)pager->pg_data; object = devp->devp_object; #ifdef DEBUG if (dpagerdebug & DDB_ALLOC) - printf("dev_pager_dealloc: devp %x object %x pages %d@%x\n", - devp, object, devp->devp_npages, devp->devp_pages); + printf("dev_pager_dealloc: devp %x object %x\n", devp, object); #endif - while (!queue_empty(&object->memq)) - vm_page_remove((vm_page_t)queue_first(&object->memq)); - kmem_free(kernel_map, devp->devp_pages, - devp->devp_npages * sizeof(struct vm_page)); + /* + * Free up our fake pages. + */ + while (!queue_empty(&devp->devp_pglist)) { + queue_remove_first(&devp->devp_pglist, m, vm_page_t, pageq); + dev_pager_putfake(m); + } free((caddr_t)devp, M_VMPGDATA); free((caddr_t)pager, M_VMPAGER); - pager->pg_data = 0; } +static int dev_pager_getpage(pager, m, sync) vm_pager_t pager; vm_page_t m; boolean_t sync; { + register vm_object_t object; + vm_offset_t offset, paddr; + vm_page_t page; + dev_t dev; + int s; + int (*mapfunc)(), prot; + #ifdef DEBUG if (dpagerdebug & DDB_FOLLOW) printf("dev_pager_getpage(%x, %x)\n", pager, m); #endif - return(VM_PAGER_BAD); + + object = m->object; + dev = (dev_t)(u_long)pager->pg_handle; + offset = m->offset + object->paging_offset; + prot = PROT_READ; /* XXX should pass in? */ + mapfunc = cdevsw[major(dev)].d_mmap; + + if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop) + panic("dev_pager_getpage: no map function"); + + paddr = pmap_phys_address((*mapfunc)((dev_t)dev, (int)offset, prot)); +#ifdef DIAGNOSTIC + if (paddr == -1) + panic("dev_pager_getpage: map function returns error"); +#endif + /* + * Replace the passed in page with our own fake page and free + * up the original. + */ + page = dev_pager_getfake(paddr); + queue_enter(&((dev_pager_t)pager->pg_data)->devp_pglist, + page, vm_page_t, pageq); + vm_object_lock(object); + vm_page_lock_queues(); + vm_page_free(m); + vm_page_unlock_queues(); + s = splhigh(); + vm_page_insert(page, object, offset); + splx(s); + PAGE_WAKEUP(m); + if (offset + PAGE_SIZE > object->size) + object->size = offset + PAGE_SIZE; /* XXX anal */ + vm_object_unlock(object); + + return(VM_PAGER_OK); } +static int dev_pager_putpage(pager, m, sync) vm_pager_t pager; vm_page_t m; @@ -245,11 +314,11 @@ dev_pager_putpage(pager, m, sync) printf("dev_pager_putpage(%x, %x)\n", pager, m); #endif if (pager == NULL) - return; + return 0; panic("dev_pager_putpage called"); } -boolean_t +static boolean_t dev_pager_haspage(pager, offset) vm_pager_t pager; vm_offset_t offset; @@ -260,4 +329,38 @@ dev_pager_haspage(pager, offset) #endif return(TRUE); } + +static vm_page_t +dev_pager_getfake(paddr) + vm_offset_t paddr; +{ + vm_page_t m; + int i; + + if (queue_empty(&dev_pager_fakelist)) { + m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK); + for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) { + queue_enter(&dev_pager_fakelist, m, vm_page_t, pageq); + m++; + } + } + queue_remove_first(&dev_pager_fakelist, m, vm_page_t, pageq); + + m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS; + + m->wire_count = 1; + m->phys_addr = paddr; + + return(m); +} + +static void +dev_pager_putfake(m) + vm_page_t m; +{ +#ifdef DIAGNOSTIC + if (!(m->flags & PG_FICTITIOUS)) + panic("dev_pager_putfake: bad page"); #endif + queue_enter(&dev_pager_fakelist, m, vm_page_t, pageq); +} diff --git a/sys/vm/device_pager.h b/sys/vm/device_pager.h index eb60b3bd7ce9..7fc6e165bc78 100644 --- a/sys/vm/device_pager.h +++ b/sys/vm/device_pager.h @@ -1,7 +1,7 @@ /* * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer @@ -35,8 +35,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)device_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: device_pager.h,v 1.2 1993/10/16 16:20:12 rgrimes Exp $ + * @(#)device_pager.h 8.1 (Berkeley) 6/11/93 */ #ifndef _DEVICE_PAGER_ @@ -46,34 +45,9 @@ * Device pager private data. */ struct devpager { - queue_head_t devp_list; /* list of managed devices */ - dev_t devp_dev; /* devno of device */ - vm_page_t devp_pages; /* page structs for device */ - int devp_npages; /* size of device in pages */ - int devp_count; /* reference count */ + queue_head_t devp_pglist; /* list of pages allocated */ vm_object_t devp_object; /* object representing this device */ }; typedef struct devpager *dev_pager_t; -#define DEV_PAGER_NULL ((dev_pager_t)0) - -#ifdef KERNEL - -void dev_pager_init(); -vm_pager_t dev_pager_alloc(); -void dev_pager_dealloc(); -boolean_t dev_pager_getpage(), dev_pager_putpage(); -boolean_t dev_pager_haspage(); - -struct pagerops devicepagerops = { - dev_pager_init, - dev_pager_alloc, - dev_pager_dealloc, - dev_pager_getpage, - dev_pager_putpage, - dev_pager_haspage -}; - -#endif - #endif /* _DEVICE_PAGER_ */ diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c index 35f1327ae3c2..8bb028b5c834 100644 --- a/sys/vm/kern_lock.c +++ b/sys/vm/kern_lock.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)kern_lock.c 7.4 (Berkeley) 4/21/91 - * $Id: kern_lock.c,v 1.2 1993/10/16 16:20:13 rgrimes Exp $ + * $Id: kern_lock.c,v 1.3 1993/12/19 00:55:55 wollman Exp $ */ /* @@ -69,9 +69,12 @@ */ #include "param.h" +#include "systm.h" #include "vm_param.h" #include "lock.h" +#include "vm/vm.h" + /* XXX */ #include "proc.h" typedef int *thread_t; diff --git a/sys/vm/lock.h b/sys/vm/lock.h index aaf1738c360f..e393e1444ddd 100644 --- a/sys/vm/lock.h +++ b/sys/vm/lock.h @@ -91,7 +91,7 @@ typedef struct slock *simple_lock_t; */ struct lock { -#ifdef vax +#if defined(vax) /* * Efficient VAX implementation -- see field description below. */ @@ -103,8 +103,7 @@ struct lock { :0; simple_lock_data_t interlock; -#else vax -#ifdef ns32000 +#elif defined(ns32000) /* * Efficient ns32000 implementation -- * see field description below. @@ -117,7 +116,7 @@ struct lock { can_sleep:1, :0; -#else ns32000 +#else /* neither vax nor ns32000 */ /* Only the "interlock" field is used for hardware exclusion; * other fields are modified with normal instructions after * acquiring the interlock bit. @@ -129,7 +128,6 @@ struct lock { boolean_t waiting; /* Someone is sleeping on lock */ boolean_t can_sleep; /* Can attempts to lock go to sleep */ int read_count; /* Number of accepted readers */ -#endif /* ns32000 */ #endif /* vax */ char *thread; /* Thread that has lock, if recursive locking allowed */ /* (should be thread_t, but but we then have mutually @@ -145,7 +143,7 @@ void simple_lock_init(); void simple_lock(); void simple_unlock(); boolean_t simple_lock_try(); -#else NCPUS > 1 +#else /* NCPUS == 1 */ /* * No multiprocessor locking is necessary. */ @@ -174,4 +172,17 @@ boolean_t lock_try_read_to_write(); void lock_set_recursive(); void lock_clear_recursive(); +/* + * Try to get semi-meaningful wait messages into thread_sleep... + */ +extern void thread_sleep_(int, simple_lock_t, const char *); +#if __GNUC__ >= 2 +#define thread_sleep(a,b,c) thread_sleep_((a), (b), __FUNCTION__) +#else +#define thread_sleep(a,b,c) thread_sleep_((a), (b), "vmslp") +#endif +#define thread_sleep_new thread_sleep_ +extern void thread_wakeup(int); + + #endif /* !_LOCK_H_ */ diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index b5745ebab9bf..3c9537cec333 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.h 7.4 (Berkeley) 5/7/91 - * $Id: pmap.h,v 1.2 1993/10/16 16:20:16 rgrimes Exp $ + * $Id: pmap.h,v 1.4 1994/01/31 04:19:41 davidg Exp $ */ /* @@ -84,12 +84,11 @@ pmap_t pmap_create(); void pmap_destroy(); void pmap_reference(); void pmap_remove(); -void pmap_page_protect(); +void pmap_page_protect(vm_offset_t, vm_prot_t); void pmap_protect(); void pmap_enter(); vm_offset_t pmap_extract(); void pmap_update(); -void pmap_collect(); void pmap_activate(); void pmap_deactivate(); void pmap_copy(); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 0f4f088feb74..bf3f38f9ce8d 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -35,24 +35,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ - * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91 - * $Id: swap_pager.c,v 1.2 1993/10/16 16:20:19 rgrimes Exp $ + * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ + * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91 + * + * $Id: swap_pager.c,v 1.17.2.1 1994/03/07 02:07:06 rgrimes Exp $ */ /* - * Quick hack to page to dedicated partition(s). - * TODO: - * Add multiprocessor locks - * Deal with async writes in a better fashion + * Mostly rewritten by John Dyson with help from David Greenman, 12-Jan-1994 */ -#include "swappager.h" -#if NSWAPPAGER > 0 - #include "param.h" #include "proc.h" #include "buf.h" +#include "kernel.h" #include "systm.h" #include "specdev.h" #include "vnode.h" @@ -63,30 +59,27 @@ #include "vm_param.h" #include "queue.h" #include "lock.h" +#include "vm.h" #include "vm_prot.h" #include "vm_object.h" #include "vm_page.h" #include "vm_pageout.h" #include "swap_pager.h" +#include "vm_map.h" -#define NSWSIZES 16 /* size of swtab */ -#define NPENDINGIO 64 /* max # of pending cleans */ -#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ - -#ifdef DEBUG -int swpagerdebug = 0 /*0x100*/; -#define SDB_FOLLOW 0x001 -#define SDB_INIT 0x002 -#define SDB_ALLOC 0x004 -#define SDB_IO 0x008 -#define SDB_WRITE 0x010 -#define SDB_FAIL 0x020 -#define SDB_ALLOCBLK 0x040 -#define SDB_FULL 0x080 -#define SDB_ANOM 0x100 -#define SDB_ANOMPANIC 0x200 +#ifndef NPENDINGIO +#define NPENDINGIO 96 #endif +extern int nswbuf; +int nswiodone; +extern int vm_pageout_rate_limit; +static int cleandone; +extern int hz; +int swap_pager_full; +extern vm_map_t pager_map; +extern int vm_pageout_pages_needed; + struct swpagerclean { queue_head_t spc_list; int spc_flags; @@ -94,94 +87,77 @@ struct swpagerclean { sw_pager_t spc_swp; vm_offset_t spc_kva; vm_page_t spc_m; -} swcleanlist[NPENDINGIO]; +} swcleanlist [NPENDINGIO] ; + typedef struct swpagerclean *swp_clean_t; +extern vm_map_t kernel_map; + /* spc_flags values */ -#define SPC_FREE 0x00 -#define SPC_BUSY 0x01 -#define SPC_DONE 0x02 -#define SPC_ERROR 0x04 -#define SPC_DIRTY 0x08 - -struct swtab { - vm_size_t st_osize; /* size of object (bytes) */ - int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ -#ifdef DEBUG - u_long st_inuse; /* number in this range in use */ - u_long st_usecnt; /* total used of this size */ -#endif -} swtab[NSWSIZES+1]; +#define SPC_ERROR 0x01 -#ifdef DEBUG -int swap_pager_pendingio; /* max pending async "clean" ops */ -int swap_pager_poip; /* pageouts in progress */ -int swap_pager_piip; /* pageins in progress */ -#endif +#define SWB_EMPTY (-1) +queue_head_t swap_pager_done; /* list of compileted page cleans */ queue_head_t swap_pager_inuse; /* list of pending page cleans */ queue_head_t swap_pager_free; /* list of free pager clean structs */ queue_head_t swap_pager_list; /* list of "named" anon regions */ +queue_head_t swap_pager_un_list; /* list of "unnamed" anon pagers */ +#define SWAP_FREE_NEEDED 0x1 /* need a swap block */ +int swap_pager_needflags; + +static queue_head_t *swp_qs[]={ + &swap_pager_list, &swap_pager_un_list, (queue_head_t *) 0 +}; + +struct pagerops swappagerops = { + swap_pager_init, + swap_pager_alloc, + swap_pager_dealloc, + swap_pager_getpage, + swap_pager_getmulti, + swap_pager_putpage, + swap_pager_haspage +}; + +extern int nswbuf; + +int npendingio = NPENDINGIO; +int pendingiowait; +int require_swap_init; +void swap_pager_finish(); +int dmmin, dmmax; +extern int vm_page_count; + +struct buf * getpbuf() ; +void relpbuf(struct buf *bp) ; void swap_pager_init() { - register swp_clean_t spc; - register int i, bsize; + register int i; extern int dmmin, dmmax; - int maxbsize; -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) - printf("swpg_init()\n"); -#endif dfltpagerops = &swappagerops; queue_init(&swap_pager_list); + queue_init(&swap_pager_un_list); /* * Initialize clean lists */ queue_init(&swap_pager_inuse); + queue_init(&swap_pager_done); queue_init(&swap_pager_free); - for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { - queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); - spc->spc_flags = SPC_FREE; - } + + require_swap_init = 1; /* * Calculate the swap allocation constants. */ - if (dmmin == 0) { - dmmin = DMMIN; - if (dmmin < CLBYTES/DEV_BSIZE) - dmmin = CLBYTES/DEV_BSIZE; - } - if (dmmax == 0) - dmmax = DMMAX; - /* - * Fill in our table of object size vs. allocation size - */ - bsize = btodb(PAGE_SIZE); - if (bsize < dmmin) - bsize = dmmin; - maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); - if (maxbsize > dmmax) - maxbsize = dmmax; - for (i = 0; i < NSWSIZES; i++) { - swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); - swtab[i].st_bsize = bsize; -#ifdef DEBUG - if (swpagerdebug & SDB_INIT) - printf("swpg_init: ix %d, size %x, bsize %x\n", - i, swtab[i].st_osize, swtab[i].st_bsize); -#endif - if (bsize >= maxbsize) - break; - bsize *= 2; - } - swtab[i].st_osize = 0; - swtab[i].st_bsize = bsize; + dmmin = CLBYTES/DEV_BSIZE; + dmmax = btodb(SWB_NPAGES*NBPG)*2; + } /* @@ -190,20 +166,41 @@ swap_pager_init() * we should not wait for memory as it could resulting in deadlock. */ vm_pager_t -swap_pager_alloc(handle, size, prot) +swap_pager_alloc(handle, size, prot, offset) caddr_t handle; register vm_size_t size; vm_prot_t prot; + vm_offset_t offset; { register vm_pager_t pager; register sw_pager_t swp; - struct swtab *swt; int waitok; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); -#endif + int i,j; + + if (require_swap_init) { + register swp_clean_t spc; + struct buf *bp; + /* + * kva's are allocated here so that we dont need to keep + * doing kmem_alloc pageables at runtime + */ + for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { + spc->spc_kva = kmem_alloc_pageable(pager_map, NBPG); + if (!spc->spc_kva) { + break; + } + spc->spc_bp = malloc(sizeof( *bp), M_TEMP, + M_NOWAIT); + if (!spc->spc_bp) { + kmem_free_wakeup(pager_map, spc->spc_kva, NBPG); + break; + } + spc->spc_flags = 0; + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + } + require_swap_init = 0; + } + /* * If this is a "named" anonymous region, look it up and * return the appropriate pager if it exists. @@ -221,50 +218,42 @@ swap_pager_alloc(handle, size, prot) return(pager); } } + + if (swap_pager_full) + return(NULL); + /* * Pager doesn't exist, allocate swap management resources * and initialize. */ - waitok = handle ? M_WAITOK : M_NOWAIT; + waitok = handle ? M_WAITOK : M_NOWAIT; pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); if (pager == NULL) return(NULL); swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); if (swp == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: swpager malloc failed\n"); -#endif free((caddr_t)pager, M_VMPAGER); return(NULL); } size = round_page(size); - for (swt = swtab; swt->st_osize; swt++) - if (size <= swt->st_osize) - break; -#ifdef DEBUG - swt->st_inuse++; - swt->st_usecnt++; -#endif swp->sw_osize = size; - swp->sw_bsize = swt->st_bsize; - swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; + swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * NBPG) - 1) / btodb(SWB_NPAGES*NBPG); swp->sw_blocks = (sw_blk_t) malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), - M_VMPGDATA, M_NOWAIT); + M_VMPGDATA, waitok); if (swp->sw_blocks == NULL) { free((caddr_t)swp, M_VMPGDATA); free((caddr_t)pager, M_VMPAGER); -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: sw_blocks malloc failed\n"); - swt->st_inuse--; - swt->st_usecnt--; -#endif - return(FALSE); + return(NULL); } bzero((caddr_t)swp->sw_blocks, swp->sw_nblocks * sizeof(*swp->sw_blocks)); + + for (i = 0; i < swp->sw_nblocks; i++) { + for (j = 0; j < SWB_NPAGES; j++) + swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; + } + swp->sw_poip = 0; if (handle) { vm_object_t object; @@ -282,211 +271,644 @@ swap_pager_alloc(handle, size, prot) } else { swp->sw_flags = 0; queue_init(&pager->pg_list); + queue_enter(&swap_pager_un_list, pager, vm_pager_t, pg_list); } pager->pg_handle = handle; pager->pg_ops = &swappagerops; pager->pg_type = PG_SWAP; pager->pg_data = (caddr_t)swp; -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOC) - printf("swpg_alloc: pg_data %x, %x of %x at %x\n", - swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); -#endif return(pager); } +/* + * returns disk block associated with pager and offset + * additionally, as a side effect returns a flag indicating + * if the block has been written + */ + +static int * +swap_pager_diskaddr(swp, offset, valid) + sw_pager_t swp; + vm_offset_t offset; + int *valid; +{ + register sw_blk_t swb; + int ix; + + if (valid) + *valid = 0; + ix = offset / (SWB_NPAGES*NBPG); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + return(FALSE); + } + swb = &swp->sw_blocks[ix]; + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + if (valid) + *valid = swb->swb_valid & (1<<ix); + return &swb->swb_block[ix]; +} + +/* + * Utility routine to set the valid (written) bit for + * a block associated with a pager and offset + */ +static void +swap_pager_setvalid(swp, offset, valid) + sw_pager_t swp; + vm_offset_t offset; + int valid; +{ + register sw_blk_t swb; + int ix; + + ix = offset / (SWB_NPAGES*NBPG); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) + return; + + swb = &swp->sw_blocks[ix]; + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + if (valid) + swb->swb_valid |= (1 << ix); + else + swb->swb_valid &= ~(1 << ix); + return; +} + +/* + * this routine frees swap blocks from a specified pager + */ +void +swap_pager_freespace(pager, start, size) + vm_pager_t pager; + vm_offset_t start; + vm_offset_t size; +{ + sw_pager_t swp = (sw_pager_t) pager->pg_data; + vm_offset_t i; + int s; + + s = splbio(); + for (i = start; i < round_page(start + size - 1); i += NBPG) { + int *addr = swap_pager_diskaddr(swp, i, 0); + if (addr && *addr != SWB_EMPTY) { + rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); + *addr = SWB_EMPTY; + swap_pager_full = 0; + } + } + splx(s); +} + +/* + * swap_pager_reclaim frees up over-allocated space from all pagers + * this eliminates internal fragmentation due to allocation of space + * for segments that are never swapped to. It has been written so that + * it does not block until the rlist_free operation occurs; it keeps + * the queues consistant. + */ + +/* + * Maximum number of blocks (pages) to reclaim per pass + */ +#define MAXRECLAIM 256 + +void +swap_pager_reclaim() +{ + vm_pager_t p; + sw_pager_t swp; + int i, j, k; + int s; + int reclaimcount; + static int reclaims[MAXRECLAIM]; + static int in_reclaim; + +/* + * allow only one process to be in the swap_pager_reclaim subroutine + */ + s = splbio(); + if (in_reclaim) { + tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); + splx(s); + return; + } + in_reclaim = 1; + reclaimcount = 0; + + /* for each pager queue */ + for (k = 0; swp_qs[k]; k++) { + + p = (vm_pager_t) queue_first(swp_qs[k]); + while (reclaimcount < MAXRECLAIM && + !queue_end(swp_qs[k], (queue_entry_t) p)) { + + /* + * see if any blocks associated with a pager has been + * allocated but not used (written) + */ + swp = (sw_pager_t) p->pg_data; + for (i = 0; i < swp->sw_nblocks; i++) { + sw_blk_t swb = &swp->sw_blocks[i]; + for (j = 0; j < SWB_NPAGES; j++) { + if (swb->swb_block[j] != SWB_EMPTY && + (swb->swb_valid & (1 << j)) == 0) { + reclaims[reclaimcount++] = swb->swb_block[j]; + swb->swb_block[j] = SWB_EMPTY; + if (reclaimcount >= MAXRECLAIM) + goto rfinished; + } + } + } + p = (vm_pager_t) queue_next(&p->pg_list); + } + } + +rfinished: + +/* + * free the blocks that have been added to the reclaim list + */ + for (i = 0; i < reclaimcount; i++) { + rlist_free(&swapmap, reclaims[i], reclaims[i] + btodb(NBPG) - 1); + wakeup((caddr_t) &in_reclaim); + swap_pager_full = 0; + } + + splx(s); + in_reclaim = 0; + wakeup((caddr_t) &in_reclaim); +} + + +/* + * swap_pager_copy copies blocks from one pager to another and + * destroys the source pager + */ + +void +swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) + vm_pager_t srcpager; + vm_offset_t srcoffset; + vm_pager_t dstpager; + vm_offset_t dstoffset; + vm_offset_t offset; +{ + sw_pager_t srcswp, dstswp; + vm_offset_t i; + int s; + + srcswp = (sw_pager_t) srcpager->pg_data; + dstswp = (sw_pager_t) dstpager->pg_data; + +/* + * remove the source pager from the swap_pager internal queue + */ + s = splbio(); + if (srcswp->sw_flags & SW_NAMED) { + queue_remove(&swap_pager_list, srcpager, vm_pager_t, pg_list); + srcswp->sw_flags &= ~SW_NAMED; + } else { + queue_remove(&swap_pager_un_list, srcpager, vm_pager_t, pg_list); + } + + while (srcswp->sw_poip) { + tsleep((caddr_t)srcswp, PVM, "spgout", 0); + } + splx(s); + +/* + * clean all of the pages that are currently active and finished + */ + (void) swap_pager_clean(NULL, B_WRITE); + + s = splbio(); +/* + * clear source block before destination object + * (release allocated space) + */ + for (i = 0; i < offset + srcoffset; i += NBPG) { + int *addr = swap_pager_diskaddr(srcswp, i, 0); + if (addr && *addr != SWB_EMPTY) { + rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); + *addr = SWB_EMPTY; + swap_pager_full = 0; + } + } +/* + * transfer source to destination + */ + for (i = 0; i < dstswp->sw_osize; i += NBPG) { + int srcvalid, dstvalid; + int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, + &srcvalid); + int *dstaddrp; + /* + * see if the source has space allocated + */ + if (srcaddrp && *srcaddrp != SWB_EMPTY) { + /* + * if the source is valid and the dest has no space, then + * copy the allocation from the srouce to the dest. + */ + if (srcvalid) { + dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); + /* + * if the dest already has a valid block, deallocate the + * source block without copying. + */ + if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { + rlist_free(&swapmap, *dstaddrp, *dstaddrp + btodb(NBPG) - 1); + *dstaddrp = SWB_EMPTY; + swap_pager_full = 0; + } + if (dstaddrp && *dstaddrp == SWB_EMPTY) { + *dstaddrp = *srcaddrp; + *srcaddrp = SWB_EMPTY; + swap_pager_setvalid(dstswp, i + dstoffset, 1); + } + } + /* + * if the source is not empty at this point, then deallocate the space. + */ + if (*srcaddrp != SWB_EMPTY) { + rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + *srcaddrp = SWB_EMPTY; + swap_pager_full = 0; + } + } + } + +/* + * deallocate the rest of the source object + */ + for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += NBPG) { + int *srcaddrp = swap_pager_diskaddr(srcswp, i, 0); + if (srcaddrp && *srcaddrp != SWB_EMPTY) { + rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + *srcaddrp = SWB_EMPTY; + swap_pager_full = 0; + } + } + + splx(s); + + free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); + srcswp->sw_blocks = 0; + free((caddr_t)srcswp, M_VMPGDATA); + srcpager->pg_data = 0; + free((caddr_t)srcpager, M_VMPAGER); + + return; +} + + void swap_pager_dealloc(pager) vm_pager_t pager; { - register int i; + register int i,j; register sw_blk_t bp; register sw_pager_t swp; - struct swtab *swt; int s; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_dealloc(%x)\n", pager); -#endif /* * Remove from list right away so lookups will fail if we * block for pageout completion. */ + s = splbio(); swp = (sw_pager_t) pager->pg_data; if (swp->sw_flags & SW_NAMED) { queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); swp->sw_flags &= ~SW_NAMED; + } else { + queue_remove(&swap_pager_un_list, pager, vm_pager_t, pg_list); } -#ifdef DEBUG - for (swt = swtab; swt->st_osize; swt++) - if (swp->sw_osize <= swt->st_osize) - break; - swt->st_inuse--; -#endif - /* * Wait for all pageouts to finish and remove * all entries from cleaning list. */ - s = splbio(); + while (swp->sw_poip) { - swp->sw_flags |= SW_WANTED; - assert_wait((int)swp); - thread_block(); + tsleep((caddr_t)swp, PVM, "swpout", 0); } splx(s); + + (void) swap_pager_clean(NULL, B_WRITE); /* * Free left over swap blocks */ s = splbio(); - for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) - if (bp->swb_block) { -#ifdef DEBUG - if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) - printf("swpg_dealloc: blk %x\n", - bp->swb_block); -#endif - rlist_free(&swapmap, (unsigned)bp->swb_block, - (unsigned)bp->swb_block + swp->sw_bsize - 1); + for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { + for (j = 0; j < SWB_NPAGES; j++) + if (bp->swb_block[j] != SWB_EMPTY) { + rlist_free(&swapmap, (unsigned)bp->swb_block[j], + (unsigned)bp->swb_block[j] + btodb(NBPG) - 1); + bp->swb_block[j] = SWB_EMPTY; + swap_pager_full = 0; } + } splx(s); + /* * Free swap management resources */ free((caddr_t)swp->sw_blocks, M_VMPGDATA); + swp->sw_blocks = 0; free((caddr_t)swp, M_VMPGDATA); + pager->pg_data = 0; free((caddr_t)pager, M_VMPAGER); } +/* + * swap_pager_getmulti can get multiple pages. + */ +int +swap_pager_getmulti(pager, m, count, reqpage, sync) + vm_pager_t pager; + vm_page_t *m; + int count; + int reqpage; + boolean_t sync; +{ + return swap_pager_io((sw_pager_t) pager->pg_data, m, count, reqpage, B_READ); +} + +/* + * swap_pager_getpage gets individual pages + */ +int swap_pager_getpage(pager, m, sync) vm_pager_t pager; vm_page_t m; boolean_t sync; { -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); -#endif - return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); + vm_page_t marray[1]; + + marray[0] = m; + return swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, B_READ); } +/* + * swap_pager_putpage writes individual pages + */ +int swap_pager_putpage(pager, m, sync) vm_pager_t pager; vm_page_t m; boolean_t sync; { int flags; + vm_page_t marray[1]; + -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); -#endif if (pager == NULL) { (void) swap_pager_clean(NULL, B_WRITE); - return; + return VM_PAGER_OK; } + + marray[0] = m; flags = B_WRITE; if (!sync) flags |= B_ASYNC; - return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); + return(swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, flags)); } -boolean_t -swap_pager_haspage(pager, offset) - vm_pager_t pager; +static inline int +swap_pager_block_index(swp, offset) + sw_pager_t swp; + vm_offset_t offset; +{ + return (offset / (SWB_NPAGES*NBPG)); +} + +static inline int +swap_pager_block_offset(swp, offset) + sw_pager_t swp; + vm_offset_t offset; +{ + return (offset % (SWB_NPAGES*NBPG)); +} + +/* + * _swap_pager_haspage returns TRUE if the pager has data that has + * been written out. + */ +static boolean_t +_swap_pager_haspage(swp, offset) + sw_pager_t swp; vm_offset_t offset; { - register sw_pager_t swp; register sw_blk_t swb; int ix; -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage(%x, %x) ", pager, offset); -#endif - swp = (sw_pager_t) pager->pg_data; - ix = offset / dbtob(swp->sw_bsize); + ix = offset / (SWB_NPAGES*NBPG); if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage: %x bad offset %x, ix %x\n", - swp->sw_blocks, offset, ix); -#endif return(FALSE); } swb = &swp->sw_blocks[ix]; - if (swb->swb_block) - ix = atop(offset % dbtob(swp->sw_bsize)); -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOCBLK) - printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("-> %c\n", - "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); -#endif - if (swb->swb_block && (swb->swb_mask & (1 << ix))) - return(TRUE); + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + if (swb->swb_block[ix] != SWB_EMPTY) { + if (swb->swb_valid & (1 << ix)) + return TRUE; + } + return(FALSE); } /* + * swap_pager_haspage is the externally accessible version of + * _swap_pager_haspage above. this routine takes a vm_pager_t + * for an argument instead of sw_pager_t. + */ +boolean_t +swap_pager_haspage(pager, offset) + vm_pager_t pager; + vm_offset_t offset; +{ + return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); +} + +/* + * swap_pager_freepage is a convienience routine that clears the busy + * bit and deallocates a page. + */ +static void +swap_pager_freepage(m) + vm_page_t m; +{ + PAGE_WAKEUP(m); + vm_page_free(m); +} + +/* + * swap_pager_ridpages is a convienience routine that deallocates all + * but the required page. this is usually used in error returns that + * need to invalidate the "extra" readahead pages. + */ +static void +swap_pager_ridpages(m, count, reqpage) + vm_page_t *m; + int count; + int reqpage; +{ + int i; + int s; + + for (i = 0; i < count; i++) + if (i != reqpage) + swap_pager_freepage(m[i]); +} + +int swapwritecount=0; + +/* + * swap_pager_iodone1 is the completion routine for both reads and async writes + */ +void +swap_pager_iodone1(bp) + struct buf *bp; +{ + bp->b_flags |= B_DONE; + bp->b_flags &= ~B_ASYNC; + wakeup((caddr_t)bp); + if ((bp->b_flags & B_READ) == 0) + vwakeup(bp); +} +/* * Scaled down version of swap(). - * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. * BOGUS: lower level IO routines expect a KVA so we have to map our * provided physical page into the KVA to keep them happy. */ -swap_pager_io(swp, m, flags) +int +swap_pager_io(swp, m, count, reqpage, flags) register sw_pager_t swp; - vm_page_t m; + vm_page_t *m; + int count, reqpage; int flags; { register struct buf *bp; register sw_blk_t swb; register int s; - int ix; + int i, ix; boolean_t rv; vm_offset_t kva, off; swp_clean_t spc; + int cluster; + vm_offset_t paging_offset; + vm_object_t object; + int reqaddr, mydskregion; + extern int dmmin, dmmax; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) - printf("swpg_io(%x, %x, %x)\n", swp, m, flags); -#endif + spc = NULL; + object = m[reqpage]->object; + paging_offset = object->paging_offset; /* - * For reads (pageins) and synchronous writes, we clean up - * all completed async pageouts. + * First determine if the page exists in the pager if this is + * a sync read. This quickly handles cases where we are + * following shadow chains looking for the top level object + * with the page. */ - if ((flags & B_ASYNC) == 0) { - s = splbio(); -#ifdef DEBUG + off = m[reqpage]->offset + paging_offset; + ix = swap_pager_block_index(swp, off); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + /* printf("swap pager: out of range\n"); */ + swap_pager_ridpages(m, count, reqpage); + return(VM_PAGER_FAIL); + } + + + swb = &swp->sw_blocks[ix]; + off = swap_pager_block_offset(swp, off) / NBPG; + reqaddr = swb->swb_block[off]; + + /* make sure that our I/O request is contiguous */ + if (flags & B_READ) { + int first = 0, last = count; + int failed = 0; + int reqdskregion = reqaddr / dmmax; + int valid; + + if (reqaddr == SWB_EMPTY || + (swb->swb_valid & (1 << off)) == 0) { + swap_pager_ridpages(m, count, reqpage); + return(VM_PAGER_FAIL); + } + /* - * Check to see if this page is currently being cleaned. - * If it is, we just wait til the operation is done before - * continuing. + * search backwards for the first contiguous page to transfer */ - while (swap_pager_clean(m, flags&B_READ)) { - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_io: page %x cleaning\n", m); - - swp->sw_flags |= SW_WANTED; - assert_wait((int)swp); - thread_block(); + for (i = reqpage - 1; i >= 0; --i) { + int *tmpaddr = swap_pager_diskaddr(swp, + m[i]->offset + paging_offset,&valid); + if (tmpaddr == 0 || failed || !valid || + *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG)) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if (first == 0) + first = i + 1; + } else { + mydskregion = *tmpaddr / dmmax; + if (mydskregion != reqdskregion) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + first = i + 1; + } + } + } + /* + * search forwards for the last contiguous page to transfer + */ + failed = 0; + for (i = reqpage + 1; i < count; i++) { + int *tmpaddr = swap_pager_diskaddr(swp, m[i]->offset + paging_offset,&valid); + if (tmpaddr == 0 || failed || !valid || + *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG) ) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if (last == count) + last = i; + } else { + mydskregion = *tmpaddr / dmmax; + if (mydskregion != reqdskregion) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if (last == count) + last = i; + } + } + } + count = last; + if (first != 0) { + for (i = first; i < count; i++) { + m[i - first] = m[i]; + } + count -= first; + reqpage -= first; } -#else - (void) swap_pager_clean(m, flags&B_READ); -#endif - splx(s); + } + + /* + * at this point: + * "m" is a pointer to the array of vm_page_t for paging I/O + * "count" is the number of vm_page_t entries represented by "m" + * "object" is the vm_object_t for I/O + * "reqpage" is the index into "m" for the page actually faulted + */ + + /* + * For reads (pageins) and synchronous writes, we clean up + * all completed async pageouts. + */ + if ((flags & B_ASYNC) == 0) { + swap_pager_clean(NULL, flags); } /* * For async writes (pageouts), we cleanup completed pageouts so @@ -494,103 +916,162 @@ swap_pager_io(swp, m, flags) * page is already being cleaned. If it is, or no resources * are available, we try again later. */ - else if (swap_pager_clean(m, B_WRITE) || - queue_empty(&swap_pager_free)) { -#ifdef DEBUG - if ((swpagerdebug & SDB_ANOM) && - !queue_empty(&swap_pager_free)) - printf("swap_pager_io: page %x already cleaning\n", m); -#endif - return(VM_PAGER_FAIL); + else if (swap_pager_clean(m[reqpage], B_WRITE)) { + swap_pager_ridpages(m, count, reqpage); + return VM_PAGER_TRYAGAIN; } + spc = NULL; /* we might not use an spc data structure */ + kva = 0; + /* - * Determine swap block and allocate as necessary. + * we allocate a new kva for transfers > 1 page + * but for transfers == 1 page, the swap_pager_free list contains + * entries that have pre-allocated kva's (for efficiency). */ - off = m->offset + m->object->paging_offset; - ix = off / dbtob(swp->sw_bsize); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_io: bad offset %x+%x(%d) in %x\n", - m->offset, m->object->paging_offset, - ix, swp->sw_blocks); -#endif - return(VM_PAGER_FAIL); + if ((flags & B_READ) && count > 1) { + kva = kmem_alloc_pageable(pager_map, count*NBPG); } - s = splbio(); - swb = &swp->sw_blocks[ix]; - off = off % dbtob(swp->sw_bsize); - if (flags & B_READ) { - if (swb->swb_block == 0 || - (swb->swb_mask & (1 << atop(off))) == 0) { -#ifdef DEBUG - if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) - printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", - swp->sw_blocks, - swb->swb_block, atop(off), - swb->swb_mask, - m->offset, m->object->paging_offset); -#endif - /* XXX: should we zero page here?? */ - splx(s); - return(VM_PAGER_FAIL); + + + if (!kva) { + /* + * if a kva has not been allocated, we can only do a one page transfer, + * so we free the other pages that might have been allocated by vm_fault. + */ + for (i = 0; i < count; i++) { + if (i != reqpage) { + swap_pager_freepage(m[i]); + m[i] = 0; + } } - } else if (swb->swb_block == 0) { -#ifdef old - swb->swb_block = rmalloc(swapmap, swp->sw_bsize); - if (swb->swb_block == 0) { -#else - if (!rlist_alloc(&swapmap, (unsigned)swp->sw_bsize, - (unsigned *)&swb->swb_block)) { -#endif -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_io: rmalloc of %x failed\n", - swp->sw_bsize); -#endif - splx(s); - return(VM_PAGER_FAIL); + count = 1; + m[0] = m[reqpage]; + reqpage = 0; + /* + * get a swap pager clean data structure, block until we get it + */ + if (queue_empty(&swap_pager_free)) { +/* + if ((flags & (B_ASYNC|B_READ)) == B_ASYNC) + return VM_PAGER_TRYAGAIN; +*/ + s = splbio(); + if( curproc == pageproc) + (void) swap_pager_clean(NULL, B_WRITE); + else + wakeup((caddr_t) &vm_pages_needed); + while (queue_empty(&swap_pager_free)) { + swap_pager_needflags |= SWAP_FREE_NEEDED; + tsleep((caddr_t)&swap_pager_free, + PVM, "swpfre", 0); + if (curproc == pageproc) + (void) swap_pager_clean(NULL, B_WRITE); + else + wakeup((caddr_t) &vm_pages_needed); + } + splx(s); } -#ifdef DEBUG - if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) - printf("swpg_io: %x alloc blk %x at ix %x\n", - swp->sw_blocks, swb->swb_block, ix); -#endif + queue_remove_first(&swap_pager_free, spc, swp_clean_t, spc_list); + kva = spc->spc_kva; + } + + + /* + * Determine swap block and allocate as necessary. + * We try to get SWB_NPAGES first, but then we punt and try + * to get one page. If that fails, we look at the allocation + * data structures to find unused but allocated pages in other + * pagers allocations. + */ + if (reqaddr == SWB_EMPTY) { + int blk; + int tries; + int ntoget; + + tries = 0; + s = splbio(); + /* + * if any other pages have been allocated in this block, we + * only try to get one page. + */ + for (i = 0; i < SWB_NPAGES; i++) { + if (swb->swb_block[i] != SWB_EMPTY) + break; + } + + ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; +retrygetspace: + if (ntoget == SWB_NPAGES && + rlist_alloc(&swapmap, btodb(ntoget * NBPG),&blk)) { + for (i = 0; i < ntoget; i++) + swb->swb_block[i] = blk + btodb(NBPG) * i; + } else if (!rlist_alloc(&swapmap, btodb(NBPG), &swb->swb_block[off])) { + /* + * if the allocation has failed, we try to reclaim space and + * retry. + */ + if (++tries == 1) { + swap_pager_reclaim(); + goto retrygetspace; + } + /* + * here on swap space full. + */ + if (spc) + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + if (swap_pager_full == 0) + printf("swap_pager: out of swap space !!!\n"); + swap_pager_full = 1; + swap_pager_ridpages(m, count, reqpage); + splx(s); + return(VM_PAGER_TRYAGAIN); + } + splx(s); + swap_pager_full = 0; } - splx(s); /* - * Allocate a kernel virtual address and initialize so that PTE - * is available for lower level IO drivers. + * map our page(s) into kva for I/O */ - kva = vm_pager_map_page(m); + for (i = 0; i < count; i++) { + pmap_enter(vm_map_pmap(pager_map), kva + NBPG * i, + VM_PAGE_TO_PHYS(m[i]), VM_PROT_ALL, TRUE); + } + /* - * Get a swap buffer header and perform the IO + * get the base I/O offset into the swap file */ - s = splbio(); - while (bswlist.av_forw == NULL) { + off = swap_pager_block_offset(swp, m[0]->offset + paging_offset) / NBPG; + #ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_io: wait on swbuf for %x (%d)\n", - m, flags); + if (flags & B_READ && count > 1) + printf("obj: 0x%x off: 0x%x poff: 0x%x off: 0x%x, sz: %d blk: %d op: %s\n", + object, m[0]->offset, paging_offset, off, count, swb->swb_block[off], flags&B_READ?"r":"w"); #endif - bswlist.b_flags |= B_WANTED; - sleep((caddr_t)&bswlist, PSWP+1); + + s = splbio(); + /* + * Get a swap buffer header and perform the IO + */ + if (spc) { + bp = spc->spc_bp; + bzero(bp, sizeof *bp); + bp->b_spc = spc; + } else { + bp = getpbuf(); } - bp = bswlist.av_forw; - bswlist.av_forw = bp->av_forw; - splx(s); bp->b_flags = B_BUSY | (flags & B_READ); bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ - bp->b_un.b_addr = (caddr_t)kva; - bp->b_blkno = swb->swb_block + btodb(off); + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = swb->swb_block[off]; VHOLD(swapdev_vp); bp->b_vp = swapdev_vp; if (swapdev_vp->v_type == VBLK) bp->b_dev = swapdev_vp->v_rdev; - bp->b_bcount = PAGE_SIZE; + bp->b_bcount = NBPG*count; if ((bp->b_flags & B_READ) == 0) swapdev_vp->v_numoutput++; @@ -599,92 +1080,139 @@ swap_pager_io(swp, m, flags) * and place a "cleaning" entry on the inuse queue. */ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG - if (queue_empty(&swap_pager_free)) - panic("swpg_io: lost spc"); -#endif - queue_remove_first(&swap_pager_free, - spc, swp_clean_t, spc_list); -#ifdef DEBUG - if (spc->spc_flags != SPC_FREE) - panic("swpg_io: bad free spc"); -#endif - spc->spc_flags = SPC_BUSY; - spc->spc_bp = bp; + spc->spc_flags = 0; spc->spc_swp = swp; - spc->spc_kva = kva; - spc->spc_m = m; + spc->spc_m = m[reqpage]; + /* + * the completion routine for async writes + */ bp->b_flags |= B_CALL; bp->b_iodone = swap_pager_iodone; - s = splbio(); + bp->b_dirtyoff = 0; + bp->b_dirtyend = bp->b_bcount; swp->sw_poip++; queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); - -#ifdef DEBUG - swap_pager_poip++; - if (swpagerdebug & SDB_WRITE) - printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", - bp, swp, spc, swp->sw_poip); - if ((swpagerdebug & SDB_ALLOCBLK) && - (swb->swb_mask & (1 << atop(off))) == 0) - printf("swpg_io: %x write blk %x+%x\n", - swp->sw_blocks, swb->swb_block, atop(off)); -#endif - swb->swb_mask |= (1 << atop(off)); - splx(s); + /* + * we remember that we have used a block for paging. + */ + swb->swb_valid |= (1 << off); + } else { + /* + * here for sync write or any read + */ + if ((flags & B_READ) == 0) { + /* + * if we are writing, we remember that we have + * actually used a block for paging. + */ + swb->swb_valid |= (1 << off); + swp->sw_poip++; + } else { + swp->sw_piip++; + } + /* + * the completion routine for reads and sync writes + */ + bp->b_flags |= B_CALL; + bp->b_iodone = swap_pager_iodone1; } -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", - bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); -#endif + /* + * perform the I/O + */ VOP_STRATEGY(bp); - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO started: bp %x\n", bp); -#endif + if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { + if ((bp->b_flags & B_DONE) == B_DONE) { + swap_pager_clean(NULL, flags); + } + splx(s); return(VM_PAGER_PEND); } - s = splbio(); -#ifdef DEBUG - if (flags & B_READ) - swap_pager_piip++; - else - swap_pager_poip++; -#endif + + /* + * wait for the sync I/O to complete + */ while ((bp->b_flags & B_DONE) == 0) { - assert_wait((int)bp); - thread_block(); + tsleep((caddr_t)bp, PVM, (flags & B_READ)?"swread":"swwrt", 0); } -#ifdef DEBUG - if (flags & B_READ) - --swap_pager_piip; - else - --swap_pager_poip; -#endif rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY); - bp->av_forw = bswlist.av_forw; - bswlist.av_forw = bp; + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); + + if (bp->b_flags & B_READ) { + --swp->sw_piip; + if (swp->sw_piip == 0) + wakeup((caddr_t) swp); + } else { + --swp->sw_poip; + if (swp->sw_poip == 0) + wakeup((caddr_t) swp); + } + if (bp->b_vp) brelvp(bp); - if (bswlist.b_flags & B_WANTED) { - bswlist.b_flags &= ~B_WANTED; - thread_wakeup((int)&bswlist); - } + + /* + * release the physical I/O buffer + */ + if (!spc) + relpbuf(bp); + + splx(s); + + /* + * remove the mapping for kernel virtual + */ + pmap_remove(vm_map_pmap(pager_map), kva, kva + count * NBPG); + + /* + * if we have written the page, then indicate that the page + * is clean. + */ if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { - m->clean = TRUE; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m[reqpage]->flags |= PG_CLEAN; + pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); + /* + * optimization, if a page has been read during the + * pageout process, we activate it. + */ + if ( (m[reqpage]->flags & PG_ACTIVE) == 0 && + pmap_is_referenced(VM_PAGE_TO_PHYS(m[reqpage]))) + vm_page_activate(m[reqpage]); + } + + if (spc) { + /* + * if we have used an spc, we need to free it. + */ + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + } else { + for (i = 0; i < count; i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + if (i != reqpage) { + /* + * whether or not to leave the page activated + * is up in the air, but we should put the page + * on a page queue somewhere. (it already is in + * the object). + * After some emperical results, it is best + * to deactivate the readahead pages. + */ + vm_page_deactivate(m[i]); + + /* + * just in case someone was asking for this + * page we now tell them that it is ok to use + */ + m[i]->flags &= ~PG_FAKE; + PAGE_WAKEUP(m[i]); + } + } +/* + * and free the kernel virtual addresses + */ + kmem_free_wakeup(pager_map, kva, count * NBPG); } - splx(s); -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); - if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL) - printf("swpg_io: IO error\n"); -#endif - vm_pager_unmap_page(kva); return(rv); } @@ -696,127 +1224,58 @@ swap_pager_clean(m, rw) register swp_clean_t spc, tspc; register int s; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_clean(%x, %d)\n", m, rw); -#endif tspc = NULL; + if (queue_empty(&swap_pager_done)) + return FALSE; for (;;) { + s = splbio(); /* - * Look up and removal from inuse list must be done + * Look up and removal from done list must be done * at splbio() to avoid conflicts with swap_pager_iodone. */ - s = splbio(); - spc = (swp_clean_t) queue_first(&swap_pager_inuse); - while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { - if ((spc->spc_flags & SPC_DONE) && - swap_pager_finish(spc)) { - queue_remove(&swap_pager_inuse, spc, - swp_clean_t, spc_list); - break; - } - if (m && m == spc->spc_m) { -#ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x on list, flags %x\n", - m, spc->spc_flags); -#endif - tspc = spc; - } - spc = (swp_clean_t) queue_next(&spc->spc_list); + spc = (swp_clean_t) queue_first(&swap_pager_done); + while (!queue_end(&swap_pager_done, (queue_entry_t)spc)) { + pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, ((vm_offset_t) spc->spc_kva) + NBPG); + swap_pager_finish(spc); + queue_remove(&swap_pager_done, spc, swp_clean_t, spc_list); + goto doclean; } /* * No operations done, thats all we can do for now. */ - if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) - break; + splx(s); + break; /* * The desired page was found to be busy earlier in * the scan but has since completed. */ +doclean: if (tspc && tspc == spc) { -#ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x done while looking\n", - m); -#endif tspc = NULL; } - spc->spc_flags = SPC_FREE; - vm_pager_unmap_page(spc->spc_kva); + spc->spc_flags = 0; queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); -#ifdef DEBUG - if (swpagerdebug & SDB_WRITE) - printf("swpg_clean: free spc %x\n", spc); -#endif - } -#ifdef DEBUG - /* - * If we found that the desired page is already being cleaned - * mark it so that swap_pager_iodone() will not set the clean - * flag before the pageout daemon has another chance to clean it. - */ - if (tspc && rw == B_WRITE) { - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x on clean list\n", - tspc); - tspc->spc_flags |= SPC_DIRTY; + ++cleandone; + splx(s); } -#endif - splx(s); -#ifdef DEBUG - if (swpagerdebug & SDB_WRITE) - printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); - if ((swpagerdebug & SDB_ANOM) && tspc) - printf("swpg_clean: %s of cleaning page %x\n", - rw == B_READ ? "get" : "put", m); -#endif return(tspc ? TRUE : FALSE); } +void swap_pager_finish(spc) register swp_clean_t spc; { - vm_object_t object = spc->spc_m->object; - - /* - * Mark the paging operation as done. - * (XXX) If we cannot get the lock, leave it til later. - * (XXX) Also we are assuming that an async write is a - * pageout operation that has incremented the counter. - */ - if (!vm_object_lock_try(object)) - return(0); + vm_page_t m = spc->spc_m; + vm_object_t object = m->object; + extern int vm_pageout_free_min; - if (--object->paging_in_progress == 0) + if (--object->paging_in_progress == 0) thread_wakeup((int) object); -#ifdef DEBUG - /* - * XXX: this isn't even close to the right thing to do, - * introduces a variety of race conditions. - * - * If dirty, vm_pageout() has attempted to clean the page - * again. In this case we do not do anything as we will - * see the page again shortly. - */ - if (spc->spc_flags & SPC_DIRTY) { - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_finish: page %x dirty again\n", - spc->spc_m); - spc->spc_m->busy = FALSE; - PAGE_WAKEUP(spc->spc_m); - vm_object_unlock(object); - return(1); - } -#endif /* * If no error mark as clean and inform the pmap system. * If error, mark as dirty so we will try again. @@ -824,19 +1283,43 @@ swap_pager_finish(spc) */ if (spc->spc_flags & SPC_ERROR) { printf("swap_pager_finish: clean of page %x failed\n", - VM_PAGE_TO_PHYS(spc->spc_m)); - spc->spc_m->laundry = TRUE; + VM_PAGE_TO_PHYS(m)); + m->flags |= PG_LAUNDRY; } else { - spc->spc_m->clean = TRUE; - pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; } - spc->spc_m->busy = FALSE; - PAGE_WAKEUP(spc->spc_m); - vm_object_unlock(object); - return(1); + /* + * if a page has been read during pageout, then + * we activate the page. + */ + if ((m->flags & PG_ACTIVE) == 0 && + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) + vm_page_activate(m); + + /* + * we wakeup any processes that are waiting on + * this page. + */ + PAGE_WAKEUP(m); + /* + * if we need memory desperately, then free it now + */ + if (vm_page_free_count < vm_page_free_reserved && + (m->flags & PG_CLEAN) && m->wire_count == 0) { + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + vm_page_free(m); + } + --nswiodone; + + return; } +/* + * swap_pager_iodone + */ +void swap_pager_iodone(bp) register struct buf *bp; { @@ -844,58 +1327,97 @@ swap_pager_iodone(bp) daddr_t blk; int s; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_iodone(%x)\n", bp); -#endif s = splbio(); - spc = (swp_clean_t) queue_first(&swap_pager_inuse); - while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { - if (spc->spc_bp == bp) - break; - spc = (swp_clean_t) queue_next(&spc->spc_list); - } -#ifdef DEBUG - if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) - panic("swap_pager_iodone: bp not found"); -#endif - - spc->spc_flags &= ~SPC_BUSY; - spc->spc_flags |= SPC_DONE; + spc = (swp_clean_t) bp->b_spc; + queue_remove(&swap_pager_inuse, spc, swp_clean_t, spc_list); + queue_enter(&swap_pager_done, spc, swp_clean_t, spc_list); if (bp->b_flags & B_ERROR) { spc->spc_flags |= SPC_ERROR; -printf("error %d blkno %d sz %d ", bp->b_error, bp->b_blkno, bp->b_bcount); + printf("error %d blkno %d sz %d ", + bp->b_error, bp->b_blkno, bp->b_bcount); } - spc->spc_bp = NULL; - blk = bp->b_blkno; -#ifdef DEBUG - --swap_pager_poip; - if (swpagerdebug & SDB_WRITE) - printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", - bp, spc->spc_swp, spc->spc_swp->sw_flags, - spc, spc->spc_swp->sw_poip); -#endif + if ((bp->b_flags & B_READ) == 0) + vwakeup(bp); + + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); + if (bp->b_vp) { + brelvp(bp); + } - spc->spc_swp->sw_poip--; - if (spc->spc_swp->sw_flags & SW_WANTED) { - spc->spc_swp->sw_flags &= ~SW_WANTED; - thread_wakeup((int)spc->spc_swp); + nswiodone++; + if (--spc->spc_swp->sw_poip == 0) { + wakeup((caddr_t)spc->spc_swp); } - - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY); + + if ((swap_pager_needflags & SWAP_FREE_NEEDED) || + queue_empty(&swap_pager_inuse)) { + swap_pager_needflags &= ~SWAP_FREE_NEEDED; + wakeup((caddr_t)&swap_pager_free); + wakeup((caddr_t)&vm_pages_needed); + } + + if (vm_pageout_pages_needed) { + wakeup((caddr_t)&vm_pageout_pages_needed); + } + + if (queue_empty(&swap_pager_inuse) || + (vm_page_free_count < vm_page_free_min && + nswiodone + vm_page_free_count >= vm_page_free_min) ) { + wakeup((caddr_t)&vm_pages_needed); + } + splx(s); +} + +/* + * allocate a physical buffer + */ +struct buf * +getpbuf() { + int s; + struct buf *bp; + + s = splbio(); + /* get a bp from the swap buffer header pool */ + while (bswlist.av_forw == NULL) { + bswlist.b_flags |= B_WANTED; + tsleep((caddr_t)&bswlist, PVM, "wswbuf", 0); + } + bp = bswlist.av_forw; + bswlist.av_forw = bp->av_forw; + + splx(s); + + bzero(bp, sizeof *bp); + return bp; +} + +/* + * release a physical buffer + */ +void +relpbuf(bp) + struct buf *bp; +{ + int s; + + s = splbio(); bp->av_forw = bswlist.av_forw; bswlist.av_forw = bp; - if (bp->b_vp) - brelvp(bp); if (bswlist.b_flags & B_WANTED) { bswlist.b_flags &= ~B_WANTED; - thread_wakeup((int)&bswlist); + wakeup((caddr_t)&bswlist); } - thread_wakeup((int) &vm_pages_needed); splx(s); } -#endif + +/* + * return true if any swap control structures can be allocated + */ +int +swap_pager_ready() { + if( queue_empty( &swap_pager_free)) + return 0; + else + return 1; +} diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index f49a03d121d6..e505e436d1ce 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -36,39 +36,29 @@ * SUCH DAMAGE. * * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: swap_pager.h,v 1.2 1993/10/16 16:20:21 rgrimes Exp $ + * $Id: swap_pager.h,v 1.7 1994/01/17 09:33:25 davidg Exp $ + */ + +/* + * Modifications to the block allocation data structure by John S. Dyson + * 18 Dec 93. */ #ifndef _SWAP_PAGER_ #define _SWAP_PAGER_ 1 /* - * In the swap pager, the backing store for an object is organized as an - * array of some number of "swap blocks". A swap block consists of a bitmask - * and some number of contiguous DEV_BSIZE disk blocks. The minimum size - * of a swap block is: - * - * max(PAGE_SIZE, dmmin*DEV_BSIZE) [ 32k currently ] - * - * bytes (since the pager interface is page oriented), the maximum size is: - * - * min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE) [ 128k currently ] - * - * where dmmin and dmmax are left over from the old VM interface. The bitmask - * (swb_mask) is used by swap_pager_haspage() to determine if a particular - * page has actually been written; i.e. the pager copy of the page is valid. - * All swap blocks in the backing store of an object will be the same size. - * - * The reason for variable sized swap blocks is to reduce fragmentation of - * swap resources. Whenever possible we allocate smaller swap blocks to - * smaller objects. The swap block size is determined from a table of - * object-size vs. swap-block-size computed at boot time. + * SWB_NPAGES can be set to any value from 1 to 32 pages per allocation, + * however, due to the allocation spilling into non-swap pager backed memory, + * suggest keeping SWB_NPAGES small (1-4). If high performance is manditory + * perhaps up to 8 pages might be in order???? + * Above problem has been fixed, now we support 16 pages per block. Unused + * space is recovered by the swap pager now... */ -typedef int sw_bm_t; /* pager bitmask */ - +#define SWB_NPAGES 8 struct swblock { - sw_bm_t swb_mask; /* bitmask of valid pages in this block */ - daddr_t swb_block; /* starting disk block for this block */ + unsigned int swb_valid; /* bitmask for valid pages */ + int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */ }; typedef struct swblock *sw_blk_t; @@ -77,11 +67,11 @@ typedef struct swblock *sw_blk_t; */ struct swpager { vm_size_t sw_osize; /* size of object we are backing (bytes) */ - int sw_bsize; /* size of swap blocks (DEV_BSIZE units) */ int sw_nblocks;/* number of blocks in list (sw_blk_t units) */ sw_blk_t sw_blocks; /* pointer to list of swap blocks */ short sw_flags; /* flags */ short sw_poip; /* pageouts in progress */ + short sw_piip; /* pageins in progress */ }; typedef struct swpager *sw_pager_t; @@ -90,23 +80,18 @@ typedef struct swpager *sw_pager_t; #ifdef KERNEL -void swap_pager_init(); -vm_pager_t swap_pager_alloc(); -void swap_pager_dealloc(); -boolean_t swap_pager_getpage(), swap_pager_putpage(); -boolean_t swap_pager_haspage(); - -struct pagerops swappagerops = { - swap_pager_init, - swap_pager_alloc, - swap_pager_dealloc, - swap_pager_getpage, - swap_pager_putpage, - swap_pager_haspage -}; +void swap_pager_init(void); +vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); +void swap_pager_dealloc(vm_pager_t); +boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); +boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); +boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); +boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); +int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); +void swap_pager_iodone(struct buf *); +boolean_t swap_pager_clean(vm_page_t, int); -int swap_pager_iodone(); -boolean_t swap_pager_clean(); +extern struct pagerops swappagerops; #endif diff --git a/sys/vm/vm.h b/sys/vm/vm.h index ac04bd7092b0..8bcc5220adb1 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -31,11 +31,12 @@ * SUCH DAMAGE. * * from: @(#)vm.h 7.1 (Berkeley) 5/5/91 - * $Id: vm.h,v 1.2 1993/10/16 16:20:22 rgrimes Exp $ + * $Id: vm.h,v 1.6 1993/12/19 00:55:58 wollman Exp $ */ -#ifndef VM_H -#define VM_H +#ifndef _VM_H_ +#define _VM_H_ 1 + #include <vm/vm_param.h> #include <vm/lock.h> #include <vm/queue.h> @@ -66,10 +67,19 @@ struct vmspace { caddr_t vm_taddr; /* user virtual address of text XXX */ caddr_t vm_daddr; /* user virtual address of data XXX */ caddr_t vm_maxsaddr; /* user VA at max stack growth */ + caddr_t vm_minsaddr; /* user VA of initial stack base */ }; struct vmspace *vmspace_alloc __P((vm_offset_t min, vm_offset_t max, int pageable)); struct vmspace *vmspace_fork __P((struct vmspace *)); void vmspace_free __P((struct vmspace *)); -#endif /* VM_H */ + +extern void thread_block(const char *); + +/* This really doesn't belong here, but the VM code doesn't distinguish + * very well between internal and interface code. */ +#define assert_wait(e, r) (curproc->p_thread = (e)) +#define thread_wakeup(e) (wakeup((caddr_t)(e))) + +#endif /* _VM_H_ */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 2151a5fa50e8..c7254bce4c51 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1,6 +1,10 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. + * Copyright (c) John S. Dyson + * All rights reserved. + * Copyright (c) David Greenman + * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. @@ -33,11 +37,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)vm_fault.c 7.6 (Berkeley) 5/7/91 - * $Id: vm_fault.c,v 1.5 1993/10/16 16:20:24 rgrimes Exp $ - */ - -/* + * @(#)vm_fault.c 7.6 (Berkeley) 5/7/91 + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -63,6 +65,9 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ +/* + * $Id: vm_fault.c,v 1.14.2.1 1994/03/24 07:20:29 rgrimes Exp $ + */ /* * Page fault handling module. @@ -73,7 +78,18 @@ #include "vm.h" #include "vm_page.h" #include "vm_pageout.h" - +#include "proc.h" +#include "resource.h" +#include "resourcevar.h" + +#define VM_FAULT_READ_AHEAD 3 +#define VM_FAULT_READ_AHEAD_MIN 1 +#define VM_FAULT_READ_BEHIND 2 +#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) +extern int swap_pager_full; +extern int vm_pageout_proc_limit; + +vm_statistics_data_t vm_stat; /* * vm_fault: * @@ -92,6 +108,7 @@ * The map in question must be referenced, and remains so. * Caller may hold no locks. */ +int vm_fault(map, vaddr, fault_type, change_wiring) vm_map_t map; vm_offset_t vaddr; @@ -113,6 +130,9 @@ vm_fault(map, vaddr, fault_type, change_wiring) boolean_t page_exists; vm_page_t old_m; vm_object_t next_object; + vm_page_t marray[VM_FAULT_READ]; + int reqpage; + int spl; vm_stat.faults++; /* needs lock XXX */ /* @@ -141,11 +161,15 @@ vm_fault(map, vaddr, fault_type, change_wiring) #define UNLOCK_THINGS { \ object->paging_in_progress--; \ + if (object->paging_in_progress == 0) \ + wakeup((caddr_t)object); \ vm_object_unlock(object); \ if (object != first_object) { \ vm_object_lock(first_object); \ FREE_PAGE(first_m); \ first_object->paging_in_progress--; \ + if (first_object->paging_in_progress == 0) \ + wakeup((caddr_t)first_object); \ vm_object_unlock(first_object); \ } \ UNLOCK_MAP; \ @@ -156,6 +180,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_object_deallocate(first_object); \ } + RetryFault: ; /* @@ -164,8 +189,8 @@ vm_fault(map, vaddr, fault_type, change_wiring) */ if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, - &first_object, &first_offset, - &prot, &wired, &su)) != KERN_SUCCESS) { + &first_object, &first_offset, + &prot, &wired, &su)) != KERN_SUCCESS) { return(result); } lookup_still_valid = TRUE; @@ -240,98 +265,76 @@ vm_fault(map, vaddr, fault_type, change_wiring) * If the page is being brought in, * wait for it and then retry. */ - if (m->busy) { -#ifdef DOTHREADS - int wait_result; - - PAGE_ASSERT_WAIT(m, !change_wiring); + if (m->flags & PG_BUSY) { UNLOCK_THINGS; - thread_block(); - wait_result = current_thread()->wait_result; - vm_object_deallocate(first_object); - if (wait_result != THREAD_AWAKENED) - return(KERN_SUCCESS); - goto RetryFault; -#else - PAGE_ASSERT_WAIT(m, !change_wiring); - UNLOCK_THINGS; -thread_wakeup(&vm_pages_needed); /* XXX! */ - thread_block(); + if (m->flags & PG_BUSY) { + m->flags |= PG_WANTED; + tsleep((caddr_t)m,PSWP,"vmpfw",0); + } vm_object_deallocate(first_object); goto RetryFault; -#endif } - if (m->absent) + if (m->flags & PG_ABSENT) panic("vm_fault: absent"); /* - * If the desired access to this page has - * been locked out, request that it be unlocked. - */ - - if (fault_type & m->page_lock) { -#ifdef DOTHREADS - int wait_result; - - if ((fault_type & m->unlock_request) != fault_type) - panic("vm_fault: pager_data_unlock"); - - PAGE_ASSERT_WAIT(m, !change_wiring); - UNLOCK_THINGS; - thread_block(); - wait_result = current_thread()->wait_result; - vm_object_deallocate(first_object); - if (wait_result != THREAD_AWAKENED) - return(KERN_SUCCESS); - goto RetryFault; -#else - if ((fault_type & m->unlock_request) != fault_type) - panic("vm_fault: pager_data_unlock"); - - PAGE_ASSERT_WAIT(m, !change_wiring); - UNLOCK_THINGS; -thread_wakeup(&vm_pages_needed); /* XXX */ - thread_block(); - vm_object_deallocate(first_object); - goto RetryFault; -#endif - } - - /* * Remove the page from the pageout daemon's * reach while we play with it. */ vm_page_lock_queues(); - if (m->inactive) { + spl = vm_disable_intr(); + if (m->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); - m->inactive = FALSE; + m->flags &= ~PG_INACTIVE; vm_page_inactive_count--; vm_stat.reactivations++; } - if (m->active) { + if (m->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); - m->active = FALSE; + m->flags &= ~PG_ACTIVE; vm_page_active_count--; } + vm_set_intr(spl); vm_page_unlock_queues(); /* * Mark page busy for other threads. */ - m->busy = TRUE; - m->absent = FALSE; + m->flags |= PG_BUSY; + m->flags &= ~PG_ABSENT; break; } if (((object->pager != NULL) && - (!change_wiring || wired)) + (!change_wiring || wired)) || (object == first_object)) { +#if 0 + if (curproc && (vaddr < VM_MAXUSER_ADDRESS) && + (curproc->p_rlimit[RLIMIT_RSS].rlim_max < + curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) { + UNLOCK_AND_DEALLOCATE; + vm_fault_free_pages(curproc); + goto RetryFault; + } +#endif + + if (swap_pager_full && !object->shadow && (!object->pager || + (object->pager && object->pager->pg_type == PG_SWAP && + !vm_pager_has_page(object->pager, offset+object->paging_offset)))) { + if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ { + UNLOCK_AND_DEALLOCATE; + printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid); + psignal(curproc, SIGKILL); + return KERN_RESOURCE_SHORTAGE; + } + } + /* * Allocate a new page for this object/offset * pair. @@ -346,15 +349,29 @@ thread_wakeup(&vm_pages_needed); /* XXX */ } } - if ((object->pager != NULL) && - (!change_wiring || wired)) { + if ((object->pager != NULL) && (!change_wiring || wired)) { int rv; + int faultcount; + int reqpage; /* * Now that we have a busy page, we can * release the object lock. */ vm_object_unlock(object); + /* + * now we find out if any other pages should + * be paged in at this time + * this routine checks to see if the pages surrounding this fault + * reside in the same object as the page for this fault. If + * they do, then they are faulted in also into the + * object. The array "marray" returned contains an array of vm_page_t structs + * where one of them is the vm_page_t passed to the routine. The reqpage + * return value is the index into the marray for the vm_page_t passed to the + * routine. + */ + faultcount = vm_fault_additional_pages(first_object, first_offset, m, + VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, marray, &reqpage); /* * Call the pager to retrieve the data, if any, @@ -362,7 +379,13 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ UNLOCK_MAP; - rv = vm_pager_get(object->pager, m, TRUE); + if (faultcount != 1) { + rv = faultcount ? + vm_pager_getmulti(object->pager, marray, faultcount, reqpage, TRUE): + VM_PAGER_FAIL; + } else { + rv = vm_pager_get(object->pager, m, TRUE); + } if (rv == VM_PAGER_OK) { /* * Found the page. @@ -378,7 +401,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ m = vm_page_lookup(object, offset); vm_stat.pageins++; - m->fake = FALSE; + m->flags &= ~PG_FAKE; pmap_clear_modify(VM_PAGE_TO_PHYS(m)); break; } @@ -438,6 +461,8 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ if (object != first_object) { object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t) object); vm_object_unlock(object); object = first_object; @@ -449,21 +474,24 @@ thread_wakeup(&vm_pages_needed); /* XXX */ vm_page_zero_fill(m); vm_stat.zero_fill_count++; - m->fake = FALSE; - m->absent = FALSE; + m->flags &= ~(PG_FAKE|PG_ABSENT); break; } else { vm_object_lock(next_object); - if (object != first_object) + if (object != first_object) { object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t) object); + } vm_object_unlock(object); object = next_object; object->paging_in_progress++; } } - if (m->absent || m->active || m->inactive || !m->busy) + if ((m->flags & (PG_ABSENT|PG_ACTIVE|PG_INACTIVE) != 0) || + (m->flags & PG_BUSY) == 0) panic("vm_fault: absent or active or inactive or not busy after main loop"); /* @@ -511,8 +539,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ vm_page_copy(m, first_m); - first_m->fake = FALSE; - first_m->absent = FALSE; + first_m->flags &= ~(PG_FAKE|PG_ABSENT); /* * If another map is truly sharing this @@ -528,9 +555,11 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ vm_page_lock_queues(); + vm_page_activate(m); - vm_page_deactivate(m); pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + if ((m->flags & PG_CLEAN) == 0) + m->flags |= PG_LAUNDRY; vm_page_unlock_queues(); /* @@ -538,6 +567,8 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ PAGE_WAKEUP(m); object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t) object); vm_object_unlock(object); /* @@ -559,16 +590,18 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * paging_in_progress to do that... */ object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t) object); vm_object_collapse(object); object->paging_in_progress++; } else { prot &= (~VM_PROT_WRITE); - m->copy_on_write = TRUE; + m->flags |= PG_COPY_ON_WRITE; } } - if (m->active || m->inactive) + if (m->flags & (PG_ACTIVE|PG_INACTIVE)) panic("vm_fault: active or inactive before copy object handling"); /* @@ -586,7 +619,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ if ((fault_type & VM_PROT_WRITE) == 0) { prot &= ~VM_PROT_WRITE; - m->copy_on_write = TRUE; + m->flags |= PG_COPY_ON_WRITE; } else { /* @@ -613,40 +646,19 @@ thread_wakeup(&vm_pages_needed); /* XXX */ - copy_object->shadow_offset; copy_m = vm_page_lookup(copy_object, copy_offset); if (page_exists = (copy_m != NULL)) { - if (copy_m->busy) { -#ifdef DOTHREADS - int wait_result; - - /* - * If the page is being brought - * in, wait for it and then retry. - */ - PAGE_ASSERT_WAIT(copy_m, !change_wiring); - RELEASE_PAGE(m); - copy_object->ref_count--; - vm_object_unlock(copy_object); - UNLOCK_THINGS; - thread_block(); - wait_result = current_thread()->wait_result; - vm_object_deallocate(first_object); - if (wait_result != THREAD_AWAKENED) - return(KERN_SUCCESS); - goto RetryFault; -#else + if (copy_m->flags & PG_BUSY) { /* * If the page is being brought * in, wait for it and then retry. */ - PAGE_ASSERT_WAIT(copy_m, !change_wiring); + PAGE_ASSERT_WAIT(copy_m, !change_wiring); RELEASE_PAGE(m); copy_object->ref_count--; vm_object_unlock(copy_object); UNLOCK_THINGS; -thread_wakeup(&vm_pages_needed); /* XXX */ - thread_block(); + thread_block("fltcpy"); vm_object_deallocate(first_object); goto RetryFault; -#endif } } @@ -668,8 +680,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * found that the copy_object's pager * doesn't have the page... */ - copy_m = vm_page_alloc(copy_object, - copy_offset); + copy_m = vm_page_alloc(copy_object, copy_offset); if (copy_m == NULL) { /* * Wait for a page, then retry. @@ -730,8 +741,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * Must copy page into copy-object. */ vm_page_copy(m, copy_m); - copy_m->fake = FALSE; - copy_m->absent = FALSE; + copy_m->flags &= ~(PG_FAKE|PG_ABSENT); /* * Things to remember: @@ -744,10 +754,16 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * pmaps use it.) */ vm_page_lock_queues(); + + vm_page_activate(old_m); + + pmap_page_protect(VM_PAGE_TO_PHYS(old_m), VM_PROT_NONE); - copy_m->clean = FALSE; - vm_page_activate(copy_m); /* XXX */ + if ((old_m->flags & PG_CLEAN) == 0) + old_m->flags |= PG_LAUNDRY; + copy_m->flags &= ~PG_CLEAN; + vm_page_activate(copy_m); vm_page_unlock_queues(); PAGE_WAKEUP(copy_m); @@ -761,11 +777,11 @@ thread_wakeup(&vm_pages_needed); /* XXX */ */ copy_object->ref_count--; vm_object_unlock(copy_object); - m->copy_on_write = FALSE; + m->flags &= ~PG_COPY_ON_WRITE; } } - if (m->active || m->inactive) + if (m->flags & (PG_ACTIVE|PG_INACTIVE)) panic("vm_fault: active or inactive before retrying lookup"); /* @@ -830,7 +846,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * can't mark the page write-enabled after all. */ prot &= retry_prot; - if (m->copy_on_write) + if (m->flags & PG_COPY_ON_WRITE) prot &= ~VM_PROT_WRITE; } @@ -841,15 +857,16 @@ thread_wakeup(&vm_pages_needed); /* XXX */ /* XXX This distorts the meaning of the copy_on_write bit */ - if (prot & VM_PROT_WRITE) - m->copy_on_write = FALSE; + if (prot & VM_PROT_WRITE) { + m->flags &= ~PG_COPY_ON_WRITE; + } /* * It's critically important that a wired-down page be faulted * only once in each map for which it is wired. */ - if (m->active || m->inactive) + if (m->flags & (PG_ACTIVE|PG_INACTIVE)) panic("vm_fault: active or inactive before pmap_enter"); vm_object_unlock(object); @@ -862,8 +879,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * that the page-out daemon won't find us (yet). */ - pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), - prot & ~(m->page_lock), wired); + pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); /* * If the page is not wired down, then put it where the @@ -877,8 +893,10 @@ thread_wakeup(&vm_pages_needed); /* XXX */ else vm_page_unwire(m); } - else + else { vm_page_activate(m); + vm_pageout_deact_bump(m); + } vm_page_unlock_queues(); /* @@ -897,7 +915,8 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * * Wire down a range of virtual addresses in a map. */ -void vm_fault_wire(map, start, end) +void +vm_fault_wire(map, start, end) vm_map_t map; vm_offset_t start, end; { @@ -931,7 +950,8 @@ void vm_fault_wire(map, start, end) * * Unwire a range of virtual addresses in a map. */ -void vm_fault_unwire(map, start, end) +void +vm_fault_unwire(map, start, end) vm_map_t map; vm_offset_t start, end; { @@ -980,13 +1000,13 @@ void vm_fault_unwire(map, start, end) * entry corresponding to a main map entry that is wired down). */ -void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) +void +vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) vm_map_t dst_map; vm_map_t src_map; vm_map_entry_t dst_entry; vm_map_entry_t src_entry; { - vm_object_t dst_object; vm_object_t src_object; vm_offset_t dst_offset; @@ -1069,5 +1089,209 @@ void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) PAGE_WAKEUP(dst_m); vm_object_unlock(dst_object); } +} + + +/* + * looks page up in shadow chain + */ + +int +vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) + vm_object_t object; + vm_offset_t offset; + vm_object_t *rtobject; + vm_offset_t *rtoffset; + vm_page_t *rtm; +{ + vm_page_t m; + vm_object_t first_object = object; + + *rtm = 0; + *rtobject = 0; + *rtoffset = 0; + + + while (!(m=vm_page_lookup(object, offset))) { + if (object->pager) { + if (vm_pager_has_page(object->pager, object->paging_offset+offset)) { + *rtobject = object; + *rtoffset = offset; + return 1; + } + } + + if (!object->shadow) + return 0; + else { + offset += object->shadow_offset; + object = object->shadow; + } + } + *rtobject = object; + *rtoffset = offset; + *rtm = m; + return 1; +} + +/* + * This routine checks around the requested page for other pages that + * might be able to be faulted in. + * + * Inputs: + * first_object, first_offset, m, rbehind, rahead + * + * Outputs: + * marray (array of vm_page_t), reqpage (index of requested page) + * + * Return value: + * number of pages in marray + */ +int +vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) + vm_object_t first_object; + vm_offset_t first_offset; + vm_page_t m; + int rbehind; + int raheada; + vm_page_t *marray; + int *reqpage; +{ + int i; + vm_page_t tmpm; + vm_object_t object; + vm_offset_t offset, startoffset, endoffset, toffset, size; + vm_object_t rtobject; + vm_page_t rtm; + vm_offset_t rtoffset; + vm_offset_t offsetdiff; + int rahead; + int treqpage; + + object = m->object; + offset = m->offset; + + offsetdiff = offset - first_offset; + + /* + * if the requested page is not available, then give up now + */ + + if (!vm_pager_has_page(object->pager, object->paging_offset+offset)) + return 0; + + /* + * if there is no getmulti routine for this pager, then just allow + * one page to be read. + */ + if (!object->pager->pg_ops->pgo_getmulti) { + *reqpage = 0; + marray[0] = m; + return 1; + } + + /* + * try to do any readahead that we might have free pages for. + */ + rahead = raheada; + if (rahead > (vm_page_free_count - vm_page_free_reserved)) { + rahead = vm_page_free_count - vm_page_free_reserved; + rbehind = 0; + } + + if (vm_page_free_count < vm_page_free_min) { + if (rahead > VM_FAULT_READ_AHEAD_MIN) + rahead = VM_FAULT_READ_AHEAD_MIN; + rbehind = 0; + } + + /* + * if we don't have any free pages, then just read one page. + */ + if (rahead <= 0) { + *reqpage = 0; + marray[0] = m; + return 1; + } + + /* + * scan backward for the read behind pages -- + * in memory or on disk not in same object + */ + toffset = offset - NBPG; + if( rbehind*NBPG > offset) + rbehind = offset / NBPG; + startoffset = offset - rbehind*NBPG; + while (toffset >= startoffset) { + if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || + rtm != 0 || rtobject != object) { + startoffset = toffset + NBPG; + break; + } + if( toffset == 0) + break; + toffset -= NBPG; + } + + /* + * scan forward for the read ahead pages -- + * in memory or on disk not in same object + */ + toffset = offset + NBPG; + endoffset = offset + (rahead+1)*NBPG; + while (toffset < object->size && toffset < endoffset) { + if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || + rtm != 0 || rtobject != object) { + break; + } + toffset += NBPG; + } + endoffset = toffset; + + /* calculate number of bytes of pages */ + size = (endoffset - startoffset) / NBPG; + + /* calculate the page offset of the required page */ + treqpage = (offset - startoffset) / NBPG; + + /* see if we have space (again) */ + if (vm_page_free_count >= vm_page_free_reserved + size) { + bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); + /* + * get our pages and don't block for them + */ + for (i = 0; i < size; i++) { + if (i != treqpage) + rtm = vm_page_alloc(object, startoffset + i * NBPG); + else + rtm = m; + marray[i] = rtm; + } + + for (i = 0; i < size; i++) { + if (marray[i] == 0) + break; + } + + /* + * if we could not get our block of pages, then + * free the readahead/readbehind pages. + */ + if (i < size) { + for (i = 0; i < size; i++) { + if (i != treqpage && marray[i]) + FREE_PAGE(marray[i]); + } + *reqpage = 0; + marray[0] = m; + return 1; + } + *reqpage = treqpage; + return size; + } + *reqpage = 0; + marray[0] = m; + return 1; } + diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 59e6b3f50a18..bd2fd07a5445 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -1,6 +1,8 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. + * Copyright (c) John S. Dyson + * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. @@ -33,11 +35,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)vm_glue.c 7.8 (Berkeley) 5/15/91 - * $Id: vm_glue.c,v 1.9 1993/10/19 00:54:49 nate Exp $ - */ - -/* + * @(#)vm_glue.c 7.8 (Berkeley) 5/15/91 + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -69,14 +69,20 @@ #include "resourcevar.h" #include "buf.h" #include "user.h" +#include "kernel.h" #include "vm.h" #include "vm_page.h" #include "vm_kern.h" +#include "machine/stdarg.h" +extern char kstack[]; int avefree = 0; /* XXX */ int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ +/* vm_map_t upages_map; */ +void swapout(struct proc *p); +int kernacc(addr, len, rw) caddr_t addr; int len, rw; @@ -97,12 +103,10 @@ kernacc(addr, len, rw) * or worse, inconsistencies at the pmap level. We only worry * about the buffer cache for now. */ - if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers && - saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf)) - rv = FALSE; return(rv == TRUE); } +int useracc(addr, len, rw) caddr_t addr; int len, rw; @@ -121,10 +125,12 @@ useracc(addr, len, rw) * only used (as an end address) in trap.c. Use it as an end * address here too. */ - if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS + if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS - || (vm_offset_t) addr + len <= (vm_offset_t) addr) + || (vm_offset_t) addr + len <= (vm_offset_t) addr) { + printf("address wrap\n"); return (FALSE); + } rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, trunc_page(addr), round_page(addr+len), prot); @@ -147,7 +153,7 @@ chgkprot(addr, len, rw) round_page(addr+len), prot, FALSE); } #endif - +void vslock(addr, len) caddr_t addr; u_int len; @@ -156,6 +162,7 @@ vslock(addr, len) round_page(addr+len), FALSE); } +void vsunlock(addr, len, dirtied) caddr_t addr; u_int len; @@ -164,7 +171,7 @@ vsunlock(addr, len, dirtied) #ifdef lint dirtied++; #endif lint - vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), + vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), round_page(addr+len), TRUE); } @@ -179,21 +186,22 @@ vsunlock(addr, len, dirtied) * after cpu_fork returns in the child process. We do nothing here * after cpu_fork returns. */ +int vm_fork(p1, p2, isvfork) register struct proc *p1, *p2; int isvfork; { register struct user *up; - vm_offset_t addr; + vm_offset_t addr, ptaddr; + int i; + struct vm_map *vp; -#ifdef i386 /* * avoid copying any of the parent's pagetables or other per-process * objects that reside in the map by marking all of them non-inheritable */ (void)vm_map_inherit(&p1->p_vmspace->vm_map, - UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); -#endif + UPT_MIN_ADDRESS - UPAGES * NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); p2->p_vmspace = vmspace_fork(p1->p_vmspace); #ifdef SYSVSHM @@ -204,13 +212,38 @@ vm_fork(p1, p2, isvfork) /* * Allocate a wired-down (for now) pcb and kernel stack for the process */ -#ifdef notyet - addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES)); - vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE); -#else - addr = kmem_alloc(kernel_map, ctob(UPAGES)); -#endif - up = (struct user *)addr; + + /* addr = UPT_MIN_ADDRESS - UPAGES*NBPG; */ + addr = (vm_offset_t) kstack; + + vp = &p2->p_vmspace->vm_map; + + /* ream out old pagetables and kernel stack */ + (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); + + /* get new pagetables and kernel stack */ + (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); + + /* force in the page table encompassing the UPAGES */ + ptaddr = trunc_page((u_int)vtopte(addr)); + vm_map_pageable(vp, ptaddr, ptaddr + NBPG, FALSE); + + /* and force in (demand-zero) the UPAGES */ + vm_map_pageable(vp, addr, addr + UPAGES * NBPG, FALSE); + + /* get a kernel virtual address for the UPAGES for this proc */ + up = (struct user *)kmem_alloc_pageable(kernel_map, UPAGES * NBPG); + + /* and force-map the upages into the kernel pmap */ + for (i = 0; i < UPAGES; i++) + pmap_enter(vm_map_pmap(kernel_map), + ((vm_offset_t) up) + NBPG * i, + pmap_extract(vp->pmap, addr + NBPG * i), + VM_PROT_READ|VM_PROT_WRITE, 1); + + /* and allow the UPAGES page table entry to be paged (at the vm system level) */ + vm_map_pageable(vp, ptaddr, ptaddr + NBPG, TRUE); + p2->p_addr = up; /* @@ -229,16 +262,7 @@ vm_fork(p1, p2, isvfork) ((caddr_t)&up->u_stats.pstat_endcopy - (caddr_t)&up->u_stats.pstat_startcopy)); -#ifdef i386 - { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp; - - vp = &p2->p_vmspace->vm_map; - - /* ream out old pagetables and kernel stack */ - (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); - (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); - } -#endif + /* * cpu_fork will copy and update the kernel stack and pcb, * and make the child ready to run. It marks the child @@ -253,9 +277,11 @@ vm_fork(p1, p2, isvfork) * Set default limits for VM system. * Called for proc 0, and then inherited by all others. */ +void vm_init_limits(p) register struct proc *p; { + int tmp; /* * Set up the initial limits on process VM. @@ -268,8 +294,11 @@ vm_init_limits(p) p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; - p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max = - ptoa(vm_page_free_count); + tmp = ((2 * vm_page_free_count) / 3) - 32; + if (vm_page_free_count < 512) + tmp = vm_page_free_count; + p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(tmp); + p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; } #include "../vm/vm_pageout.h" @@ -282,6 +311,64 @@ int swapdebug = 0; #define SDB_SWAPOUT 4 #endif +void +faultin(p) +struct proc *p; +{ + vm_offset_t i; + vm_offset_t vaddr, ptaddr; + vm_offset_t v, v1; + struct user *up; + int s; + int opflag; + + if ((p->p_flag & SLOAD) == 0) { + int rv0, rv1; + vm_map_t map; + + opflag = p->p_flag; + p->p_flag |= SLOCK; + + map = &p->p_vmspace->vm_map; + /* force the page table encompassing the kernel stack (upages) */ + ptaddr = trunc_page((u_int)vtopte(kstack)); + vm_map_pageable(map, ptaddr, ptaddr + NBPG, FALSE); + + /* wire in the UPAGES */ + vm_map_pageable(map, (vm_offset_t) kstack, + (vm_offset_t) kstack + UPAGES * NBPG, FALSE); + + /* and map them nicely into the kernel pmap */ + for (i = 0; i < UPAGES; i++) { + vm_offset_t off = i * NBPG; + vm_offset_t pa = (vm_offset_t) + pmap_extract(&p->p_vmspace->vm_pmap, + (vm_offset_t) kstack + off); + pmap_enter(vm_map_pmap(kernel_map), + ((vm_offset_t)p->p_addr) + off, + pa, VM_PROT_READ|VM_PROT_WRITE, 1); + } + + /* and let the page table pages go (at least above pmap level) */ + vm_map_pageable(map, ptaddr, ptaddr + NBPG, TRUE); + + s = splhigh(); + + if (p->p_stat == SRUN) + setrq(p); + + p->p_flag |= SLOAD; + + /* undo the effect of setting SLOCK above */ + p->p_flag &= ~SLOCK; + p->p_flag |= opflag & SLOCK; + splx(s); + + } + +} + +int swapinreq; /* * Brutally simple: * 1. Attempt to swapin every swaped-out, runnable process in @@ -289,6 +376,7 @@ int swapdebug = 0; * 2. If not enough memory, wake the pageout daemon and let it * clear some space. */ +void sched() { register struct proc *p; @@ -296,9 +384,10 @@ sched() struct proc *pp; int ppri; vm_offset_t addr; - vm_size_t size; + /* printf("vm_page_free_count: %d\n", vm_page_free_count); */ loop: + vmmeter(); #ifdef DEBUG if (!enableswap) { pp = NULL; @@ -324,7 +413,7 @@ noswap: * Nothing to do, back to sleep */ if ((p = pp) == NULL) { - sleep((caddr_t)&proc0, PVM); + tsleep((caddr_t)&proc0, PVM, "sched", 0); goto loop; } @@ -333,24 +422,16 @@ noswap: * This part is really bogus cuz we could deadlock on memory * despite our feeble check. */ - size = round_page(ctob(UPAGES)); - addr = (vm_offset_t) p->p_addr; - if (vm_page_free_count > atop(size)) { -#ifdef DEBUG - if (swapdebug & SDB_SWAPIN) - printf("swapin: pid %d(%s)@%x, pri %d free %d\n", - p->p_pid, p->p_comm, p->p_addr, - ppri, vm_page_free_count); -#endif - vm_map_pageable(kernel_map, addr, addr+size, FALSE); - (void) splclock(); - if (p->p_stat == SRUN) - setrq(p); - p->p_flag |= SLOAD; - (void) spl0(); + (void) splhigh(); + if (((vm_page_free_count + vm_page_inactive_count) >= + (vm_page_inactive_target + vm_page_free_reserved)) || + (vm_page_free_count >= vm_page_free_min)) { + spl0(); + faultin(p); p->p_time = 0; goto loop; - } + } + ++swapinreq; /* * Not enough memory, jab the pageout daemon and wait til the * coast is clear. @@ -360,7 +441,6 @@ noswap: printf("sched: no room for pid %d(%s), free %d\n", p->p_pid, p->p_comm, vm_page_free_count); #endif - (void) splhigh(); VM_WAIT; (void) spl0(); #ifdef DEBUG @@ -371,8 +451,9 @@ noswap: } #define swappable(p) \ - (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) + (((p)->p_flag & (STRC|SSYS|SLOAD|SLOCK|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) +extern int vm_pageout_free_min; /* * Swapout is driven by the pageout daemon. Very simple, we find eligible * procs and unwire their u-areas. We try to always "swap" at least one @@ -381,39 +462,47 @@ noswap: * they are swapped. Else, we swap the longest-sleeping or stopped process, * if any, otherwise the longest-resident process. */ +void swapout_threads() { register struct proc *p; struct proc *outp, *outp2; int outpri, outpri2; + int tpri; int didswap = 0; extern int maxslp; + int s; #ifdef DEBUG if (!enableswap) return; #endif + outp = outp2 = NULL; - outpri = outpri2 = 0; + outpri = outpri2 = INT_MIN; for (p = allproc; p != NULL; p = p->p_nxt) { if (!swappable(p)) continue; switch (p->p_stat) { case SRUN: - if (p->p_time > outpri2) { + if (p->p_pri < PUSER) + continue; + if ((tpri = p->p_time + p->p_nice * 8) > outpri2) { outp2 = p; - outpri2 = p->p_time; + outpri2 = tpri; } continue; case SSLEEP: case SSTOP: + if (p->p_pri <= PRIBIO) + continue; if (p->p_slptime > maxslp) { swapout(p); didswap++; - } else if (p->p_slptime > outpri) { + } else if ((tpri = p->p_slptime + p->p_nice * 8) > outpri) { outp = p; - outpri = p->p_slptime; + outpri = tpri ; } continue; } @@ -424,24 +513,37 @@ swapout_threads() * if we are real low on memory since we don't gain much by doing * it (UPAGES pages). */ - if (didswap == 0 && - vm_page_free_count <= atop(round_page(ctob(UPAGES)))) { - if ((p = outp) == 0) + if (didswap == 0 && (swapinreq && + vm_page_free_count <= vm_pageout_free_min)) { + if ((p = outp) == 0 && + (vm_page_free_count <= vm_pageout_free_min)) p = outp2; #ifdef DEBUG if (swapdebug & SDB_SWAPOUT) printf("swapout_threads: no duds, try procp %x\n", p); #endif - if (p) + if (p) { swapout(p); + didswap = 1; + } + } + + if (didswap) { + if (swapinreq) + wakeup((caddr_t)&proc0); + swapinreq = 0; } } +void swapout(p) register struct proc *p; { vm_offset_t addr; - vm_size_t size; + struct pmap *pmap = &p->p_vmspace->vm_pmap; + vm_map_t map = &p->p_vmspace->vm_map; + vm_offset_t ptaddr; + int i; #ifdef DEBUG if (swapdebug & SDB_SWAPOUT) @@ -449,38 +551,23 @@ swapout(p) p->p_pid, p->p_comm, p->p_addr, p->p_stat, p->p_slptime, vm_page_free_count); #endif - size = round_page(ctob(UPAGES)); - addr = (vm_offset_t) p->p_addr; - p->p_stats->p_ru.ru_nswap++ ; /* record in resource stats */ -#ifdef notyet -#ifdef hp300 - /* - * Ugh! u-area is double mapped to a fixed address behind the - * back of the VM system and accesses are usually through that - * address rather than the per-process address. Hence reference - * and modify information are recorded at the fixed address and - * lost at context switch time. We assume the u-struct and - * kernel stack are always accessed/modified and force it to be so. - */ - { - register int i; - volatile long tmp; - for (i = 0; i < UPAGES; i++) { - tmp = *(long *)addr; *(long *)addr = tmp; - addr += NBPG; - } - addr = (vm_offset_t) p->p_addr; - } -#endif - vm_map_pageable(kernel_map, addr, addr+size, TRUE); - pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); -#endif + (void) splhigh(); p->p_flag &= ~SLOAD; if (p->p_stat == SRUN) remrq(p); (void) spl0(); + + p->p_flag |= SLOCK; +/* let the upages be paged */ + pmap_remove(vm_map_pmap(kernel_map), + (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + UPAGES * NBPG); + + vm_map_pageable(map, (vm_offset_t) kstack, + (vm_offset_t) kstack + UPAGES * NBPG, TRUE); + + p->p_flag &= ~SLOCK; p->p_time = 0; } @@ -488,6 +575,7 @@ swapout(p) * The rest of these routines fake thread handling */ +#ifndef assert_wait void assert_wait(event, ruptible) int event; @@ -498,42 +586,43 @@ assert_wait(event, ruptible) #endif curproc->p_thread = event; } +#endif void -thread_block() +thread_block(const char *msg) { - int s = splhigh(); - if (curproc->p_thread) - sleep((caddr_t)curproc->p_thread, PVM); - splx(s); + tsleep((caddr_t)curproc->p_thread, PVM, msg, 0); } -thread_sleep(event, lock, ruptible) + +void +thread_sleep_(event, lock, wmesg) int event; simple_lock_t lock; - boolean_t ruptible; + const char *wmesg; { -#ifdef lint - ruptible++; -#endif - int s = splhigh(); curproc->p_thread = event; simple_unlock(lock); - if (curproc->p_thread) - sleep((caddr_t)event, PVM); - splx(s); + if (curproc->p_thread) { + tsleep((caddr_t)event, PVM, wmesg, 0); + } } +#ifndef thread_wakeup +void thread_wakeup(event) int event; { - int s = splhigh(); - wakeup((caddr_t)event); - splx(s); } +#endif + +/* + * DEBUG stuff + */ + /* * DEBUG stuff @@ -543,18 +632,26 @@ thread_wakeup(event) int indent = 0; /*ARGSUSED2*/ -iprintf(a, b, c, d, e, f, g, h) - char *a; +void +iprintf(const char *fmt, ...) { - register int i; + va_list args; + int i, j = 0; + char indentbuf[indent + 1]; + va_start(args, fmt); i = indent; while (i >= 8) { - printf("\t"); - i -= 8; + indentbuf[j++] = '\t'; + i -= 8; } for (; i > 0; --i) - printf(" "); - printf(a, b, c, d, e, f, g, h); + indentbuf[j++] = ' '; + + indentbuf[j++] = '\0'; + + printf("%s%r", indentbuf, fmt, args); + va_end(args); } #endif /* defined(DEBUG) || (NDDB > 0) */ + diff --git a/sys/vm/vm_inherit.h b/sys/vm/vm_inherit.h index e748162bb172..9283ff2036e4 100644 --- a/sys/vm/vm_inherit.h +++ b/sys/vm/vm_inherit.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_inherit.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_inherit.h,v 1.2 1993/10/16 16:20:27 rgrimes Exp $ + * $Id: vm_inherit.h,v 1.3 1994/01/17 09:33:39 davidg Exp $ */ /* @@ -77,7 +77,7 @@ * vm_inherit_t inheritance codes. */ -typedef int vm_inherit_t; /* might want to change this */ +typedef char vm_inherit_t; /* might want to change this */ /* * Enumeration of valid values for vm_inherit_t. diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 1899300bac56..b7f831515827 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_init.c 7.3 (Berkeley) 4/21/91 - * $Id: vm_init.c,v 1.2 1993/10/16 16:20:28 rgrimes Exp $ + * $Id: vm_init.c,v 1.3 1994/01/14 16:27:17 davidg Exp $ */ /* @@ -81,7 +81,8 @@ * The start and end address of physical memory is passed in. */ -void vm_mem_init() +void +vm_mem_init() { extern vm_offset_t avail_start, avail_end; extern vm_offset_t virtual_avail, virtual_end; diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index e042d0ad9b1f..89031b862a8d 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -33,11 +33,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)vm_kern.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_kern.c,v 1.3 1993/10/16 16:20:30 rgrimes Exp $ - */ - -/* + * @(#)vm_kern.c 7.4 (Berkeley) 5/7/91 + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -62,7 +60,6 @@ * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. - * */ /* @@ -70,12 +67,19 @@ */ #include "param.h" -#include "syslog.h" #include "vm.h" #include "vm_page.h" #include "vm_pageout.h" #include "vm_kern.h" +#include "machine/pmap.h" + +vm_map_t kernel_map; +vm_map_t mb_map; +vm_map_t kmem_map; +vm_map_t phys_map; +vm_map_t buffer_map; + /* * kmem_alloc_pageable: @@ -84,7 +88,8 @@ * map must be "kernel_map" below. */ -vm_offset_t kmem_alloc_pageable(map, size) +vm_offset_t +kmem_alloc_pageable(map, size) vm_map_t map; register vm_size_t size; { @@ -112,7 +117,8 @@ vm_offset_t kmem_alloc_pageable(map, size) * Allocate wired-down memory in the kernel's address map * or a submap. */ -vm_offset_t kmem_alloc(map, size) +vm_offset_t +kmem_alloc(map, size) register vm_map_t map; register vm_size_t size; { @@ -121,6 +127,7 @@ vm_offset_t kmem_alloc(map, size) register vm_offset_t offset; extern vm_object_t kernel_object; vm_offset_t i; + unsigned v; size = round_page(size); @@ -177,6 +184,7 @@ vm_offset_t kmem_alloc(map, size) */ vm_object_lock(kernel_object); + for (i = 0 ; i < size; i+= PAGE_SIZE) { vm_page_t mem; @@ -186,7 +194,7 @@ vm_offset_t kmem_alloc(map, size) vm_object_lock(kernel_object); } vm_page_zero_fill(mem); - mem->busy = FALSE; + mem->flags &= ~PG_BUSY; } vm_object_unlock(kernel_object); @@ -196,6 +204,16 @@ vm_offset_t kmem_alloc(map, size) (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); + vm_map_lock(map); + for (i = 0; i < size; i += PAGE_SIZE) { + vm_page_t m; + vm_object_lock(kernel_object); + m = vm_page_lookup(kernel_object, offset + i); + vm_object_unlock(kernel_object); + pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), + VM_PROT_DEFAULT, TRUE); + } + vm_map_unlock(map); /* * Try to coalesce the map */ @@ -212,13 +230,15 @@ vm_offset_t kmem_alloc(map, size) * with kmem_alloc, and return the physical pages * associated with that region. */ -void kmem_free(map, addr, size) +void +kmem_free(map, addr, size) vm_map_t map; register vm_offset_t addr; vm_size_t size; { (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); vm_map_simplify(map, addr); + wakeup((caddr_t)map); } /* @@ -234,7 +254,8 @@ void kmem_free(map, addr, size) * min, max Returned endpoints of map * pageable Can the region be paged */ -vm_map_t kmem_suballoc(parent, min, max, size, pageable) +vm_map_t +kmem_suballoc(parent, min, max, size, pageable) register vm_map_t parent; vm_offset_t *min, *max; register vm_size_t size; @@ -259,9 +280,11 @@ vm_map_t kmem_suballoc(parent, min, max, size, pageable) panic("kmem_suballoc: cannot create submap"); if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); + return(result); } +#if 0 /* * vm_move: * @@ -280,7 +303,8 @@ vm_map_t kmem_suballoc(parent, min, max, size, pageable) * * Returns new destination address or 0 (if a failure occurs). */ -vm_offset_t vm_move(src_map,src_addr,dst_map,num_bytes,src_dealloc) +vm_offset_t +vm_move(src_map, src_addr, dst_map, num_bytes, src_dealloc) vm_map_t src_map; register vm_offset_t src_addr; register vm_map_t dst_map; @@ -337,7 +361,7 @@ vm_offset_t vm_move(src_map,src_addr,dst_map,num_bytes,src_dealloc) return(dst_start + (src_addr - src_start)); return(0); } - +#endif /* * Allocate wired-down memory in the kernel's address map for the higher * level kernel memory allocator (kern/kern_malloc.c). We cannot use @@ -365,23 +389,26 @@ kmem_malloc(map, size, canwait) vm_offset_t addr; vm_page_t m; extern vm_object_t kmem_object; + int result; - if (map != kmem_map && map != mb_map) - panic("kern_malloc_alloc: map != {kmem,mb}_map"); + if (map != kmem_map && map != mb_map && map != buffer_map) + panic("kern_malloc_alloc: map != {kmem,mb,buffer}_map"); size = round_page(size); addr = vm_map_min(map); - if (vm_map_find(map, NULL, (vm_offset_t)0, - &addr, size, TRUE) != KERN_SUCCESS) { - if (!canwait) { - if (map == kmem_map) - panic("kmem_malloc: kmem_map too small"); - else if (map == mb_map) - log(LOG_WARNING, - "kmem_malloc: mb_map too small (can't wait)\n"); + result = vm_map_find(map, NULL, (vm_offset_t)0, &addr, size, TRUE); + if (result != KERN_SUCCESS) { + printf("vm_map_find failure: %d\n", result); + if (canwait) { + return(0); + } else { + if (map != buffer_map) { + /* panic("kmem_malloc: kmem_map too small"); */ + } + return 0; } - return 0; + panic("kmem_malloc: not enough map entries or map too small\n"); } /* @@ -430,12 +457,9 @@ kmem_malloc(map, size, canwait) vm_object_unlock(kmem_object); vm_map_delete(map, addr, addr + size); vm_map_unlock(map); + thread_wakeup((int)&vm_pages_needed); return(0); } -#if 0 - vm_page_zero_fill(m); -#endif - m->busy = FALSE; } vm_object_unlock(kmem_object); @@ -462,6 +486,8 @@ kmem_malloc(map, size, canwait) vm_object_unlock(kmem_object); pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, TRUE); + PAGE_WAKEUP(m); + vm_page_wire(m); } vm_map_unlock(map); @@ -476,7 +502,8 @@ kmem_malloc(map, size, canwait) * has no room, the caller sleeps waiting for more memory in the submap. * */ -vm_offset_t kmem_alloc_wait(map, size) +vm_offset_t +kmem_alloc_wait(map, size) vm_map_t map; vm_size_t size; { @@ -501,15 +528,14 @@ vm_offset_t kmem_alloc_wait(map, size) lock_clear_recursive(&map->lock); if (result != KERN_SUCCESS) { - if ( (vm_map_max(map) - vm_map_min(map)) < size ) { + if ((vm_map_max(map) - vm_map_min(map)) < size) { vm_map_unlock(map); return(0); } - assert_wait((int)map, TRUE); vm_map_unlock(map); -thread_wakeup(&vm_pages_needed); /* XXX */ - thread_block(); + thread_wakeup((int)&vm_pages_needed); /* XXX */ + tsleep((caddr_t)map, PVM, "kmemaw", 0); } else { vm_map_unlock(map); @@ -520,6 +546,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ return(addr); } +#if 0 /* * kmem_alloc_wired_wait * @@ -527,7 +554,8 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * has no room, the caller sleeps waiting for more memory in the submap. * */ -vm_offset_t kmem_alloc_wired_wait(map, size) +vm_offset_t +kmem_alloc_wired_wait(map, size) vm_map_t map; vm_size_t size; { @@ -552,15 +580,14 @@ vm_offset_t kmem_alloc_wired_wait(map, size) lock_clear_recursive(&map->lock); if (result != KERN_SUCCESS) { - if ( (vm_map_max(map) - vm_map_min(map)) < size ) { + if ((vm_map_max(map) - vm_map_min(map)) < size) { vm_map_unlock(map); return(0); } - assert_wait((int)map, TRUE); vm_map_unlock(map); -thread_wakeup(&vm_pages_needed); /* XXX */ - thread_block(); + thread_wakeup((int)&vm_pages_needed); /* XXX */ + tsleep((caddr_t)map, PVM, "kmemaww", 0); } else { vm_map_unlock(map); @@ -570,6 +597,7 @@ thread_wakeup(&vm_pages_needed); /* XXX */ return(addr); } +#endif /* * kmem_free_wakeup @@ -577,16 +605,17 @@ thread_wakeup(&vm_pages_needed); /* XXX */ * Returns memory to a submap of the kernel, and wakes up any threads * waiting for memory in that map. */ -void kmem_free_wakeup(map, addr, size) +void +kmem_free_wakeup(map, addr, size) vm_map_t map; vm_offset_t addr; vm_size_t size; { vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); - thread_wakeup((int)map); vm_map_unlock(map); vm_map_simplify(map, addr); + thread_wakeup((int)map); } /* @@ -595,7 +624,8 @@ void kmem_free_wakeup(map, addr, size) * Initialize the kernel's virtual memory map, taking * into account all memory allocated up to this time. */ -void kmem_init(start, end) +void +kmem_init(start, end) vm_offset_t start; vm_offset_t end; { diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h index ed0abc0f97ac..788c175619cf 100644 --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_kern.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_kern.h,v 1.2 1993/10/16 16:20:31 rgrimes Exp $ + * $Id: vm_kern.h,v 1.4 1993/12/19 00:56:02 wollman Exp $ */ /* @@ -64,6 +64,9 @@ * rights to redistribute these changes. */ +#ifndef _VM_VM_KERN_H_ +#define _VM_VM_KERN_H_ 1 + /* * Kernel memory management definitions. */ @@ -79,8 +82,10 @@ vm_offset_t vm_move(); vm_offset_t kmem_alloc_wait(); void kmem_free_wakeup(); -vm_map_t kernel_map; -vm_map_t mb_map; -vm_map_t kmem_map; -vm_map_t phys_map; -vm_map_t buffer_map; +extern vm_map_t kernel_map; +extern vm_map_t mb_map; +extern vm_map_t kmem_map; +extern vm_map_t phys_map; +extern vm_map_t buffer_map; + +#endif /* _VM_VM_KERN_H_ */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 3422d4073dbc..07cc00fd1552 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -34,10 +34,9 @@ * SUCH DAMAGE. * * from: @(#)vm_map.c 7.3 (Berkeley) 4/21/91 - * $Id: vm_map.c,v 1.7 1993/10/16 16:20:33 rgrimes Exp $ - */ - -/* + * $Id: vm_map.c,v 1.11 1994/01/31 04:20:10 davidg Exp $ + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -67,14 +66,13 @@ /* * Virtual memory mapping module. */ - #include "ddb.h" #include "param.h" -#include "systm.h" #include "malloc.h" #include "vm.h" #include "vm_page.h" #include "vm_object.h" +#include "systm.h" /* * Virtual memory maps provide for the mapping, protection, @@ -137,9 +135,15 @@ vm_offset_t kentry_data; vm_size_t kentry_data_size; vm_map_entry_t kentry_free; +int kentry_count; vm_map_t kmap_free; +static vm_offset_t mapvm=0; +static int mapvmpgcnt=0; +extern vm_map_t kernel_map, kmem_map, pager_map; +extern int vm_page_count; -void vm_map_startup() +void +vm_map_startup() { register int i; register vm_map_entry_t mep; @@ -161,11 +165,13 @@ void vm_map_startup() * Form a free list of statically allocated kernel map entries * with the rest. */ + kentry_count = 0; kentry_free = mep = (vm_map_entry_t) mp; i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; while (--i > 0) { mep->next = mep + 1; mep++; + kentry_count++; } mep->next = NULL; } @@ -181,6 +187,16 @@ vmspace_alloc(min, max, pageable) int pageable; { register struct vmspace *vm; + int s; + + if (mapvmpgcnt == 0 && mapvm == 0) { + mapvmpgcnt = (vm_page_count * sizeof(struct vm_map_entry) + NBPG - 1) / NBPG; + s = splimp(); + mapvm = kmem_alloc_pageable(kmem_map, mapvmpgcnt * NBPG); + splx(s); + if (!mapvm) + mapvmpgcnt = 0; + } MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); @@ -192,22 +208,20 @@ vmspace_alloc(min, max, pageable) } void -vmspace_free(vm) +_vmspace_free(vm) register struct vmspace *vm; { - if (--vm->vm_refcnt == 0) { - /* - * Lock the map, to wait out all other references to it. - * Delete all of the mappings and pages they hold, - * then call the pmap module to reclaim anything left. - */ - vm_map_lock(&vm->vm_map); - (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, - vm->vm_map.max_offset); - pmap_release(&vm->vm_pmap); - FREE(vm, M_VMMAP); - } + /* + * Lock the map, to wait out all other references to it. + * Delete all of the mappings and pages they hold, + * then call the pmap module to reclaim anything left. + */ + vm_map_lock(&vm->vm_map); + (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, + vm->vm_map.max_offset); + pmap_release(&vm->vm_pmap); + FREE(vm, M_VMMAP); } /* @@ -217,13 +231,13 @@ vmspace_free(vm) * the given physical map structure, and having * the given lower and upper address bounds. */ -vm_map_t vm_map_create(pmap, min, max, pageable) +vm_map_t +vm_map_create(pmap, min, max, pageable) pmap_t pmap; vm_offset_t min, max; boolean_t pageable; { register vm_map_t result; - extern vm_map_t kernel_map, kmem_map; if (kmem_map == NULL) { result = kmap_free; @@ -272,19 +286,71 @@ vm_map_init(map, min, max, pageable) * Allocates a VM map entry for insertion. * No entry fields are filled in. This routine is */ -vm_map_entry_t vm_map_entry_create(map) +static struct vm_map_entry *mappool; +static int mappoolcnt; +void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); + +vm_map_entry_t +vm_map_entry_create(map) vm_map_t map; { vm_map_entry_t entry; - extern vm_map_t kernel_map, kmem_map, mb_map, buffer_map, pager_map; + int s; + int i; +#define KENTRY_LOW_WATER 64 +#define MAPENTRY_LOW_WATER 64 - if (map == kernel_map || map == kmem_map || map == mb_map - || map == buffer_map || map == pager_map) { - if (entry = kentry_free) - kentry_free = kentry_free->next; - } else + /* + * This is a *very* nasty (and sort of incomplete) hack!!!! + */ + if (kentry_count < KENTRY_LOW_WATER) { + if (mapvmpgcnt && mapvm) { + vm_page_t m; + if (m = vm_page_alloc(kmem_object, mapvm-vm_map_min(kmem_map))) { + int newentries; + newentries = (NBPG/sizeof (struct vm_map_entry)); + vm_page_wire(m); + m->flags &= ~PG_BUSY; + pmap_enter(vm_map_pmap(kmem_map), mapvm, + VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); + + entry = (vm_map_entry_t) mapvm; + mapvm += NBPG; + --mapvmpgcnt; + + for (i = 0; i < newentries; i++) { + vm_map_entry_dispose(kernel_map, entry); + entry++; + } + } + } + } + + if (map == kernel_map || map == kmem_map || map == pager_map) { + + if (entry = kentry_free) { + kentry_free = entry->next; + --kentry_count; + return entry; + } + + if (entry = mappool) { + mappool = entry->next; + --mappoolcnt; + return entry; + } + + } else { + if (entry = mappool) { + mappool = entry->next; + --mappoolcnt; + return entry; + } + MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), M_VMMAPENT, M_WAITOK); + } +dopanic: if (entry == NULL) panic("vm_map_entry_create: out of map entries"); @@ -296,18 +362,29 @@ vm_map_entry_t vm_map_entry_create(map) * * Inverse of vm_map_entry_create. */ -void vm_map_entry_dispose(map, entry) +void +vm_map_entry_dispose(map, entry) vm_map_t map; vm_map_entry_t entry; { - extern vm_map_t kernel_map, kmem_map, mb_map, buffer_map, pager_map; + extern vm_map_t kernel_map, kmem_map, pager_map; + int s; - if (map == kernel_map || map == kmem_map || map == mb_map - || map == buffer_map || map == pager_map) { + if (map == kernel_map || map == kmem_map || map == pager_map || + kentry_count < KENTRY_LOW_WATER) { entry->next = kentry_free; kentry_free = entry; - } else + ++kentry_count; + } else { + if (mappoolcnt < MAPENTRY_LOW_WATER) { + entry->next = mappool; + mappool = entry; + ++mappoolcnt; + return; + } + FREE(entry, M_VMMAPENT); + } } /* @@ -336,7 +413,8 @@ void vm_map_entry_dispose(map, entry) * Creates another valid reference to the given map. * */ -void vm_map_reference(map) +inline void +vm_map_reference(map) register vm_map_t map; { if (map == NULL) @@ -354,7 +432,8 @@ void vm_map_reference(map) * destroying it if no references remain. * The map should not be locked. */ -void vm_map_deallocate(map) +void +vm_map_deallocate(map) register vm_map_t map; { register int c; @@ -393,6 +472,7 @@ void vm_map_deallocate(map) * * Requires that the map be locked, and leaves it so. */ +int vm_map_insert(map, object, offset, start, end) vm_map_t map; vm_object_t object; @@ -418,8 +498,9 @@ vm_map_insert(map, object, offset, start, end) * existing entry, this range is bogus. */ - if (vm_map_lookup_entry(map, start, &temp_entry)) + if (vm_map_lookup_entry(map, start, &temp_entry)) { return(KERN_NO_SPACE); + } prev_entry = temp_entry; @@ -429,8 +510,9 @@ vm_map_insert(map, object, offset, start, end) */ if ((prev_entry->next != &map->header) && - (prev_entry->next->start < end)) + (prev_entry->next->start < end)) { return(KERN_NO_SPACE); + } /* * See if we can avoid creating a new entry by @@ -528,7 +610,8 @@ vm_map_insert(map, object, offset, start, end) * result indicates whether the address is * actually contained in the map. */ -boolean_t vm_map_lookup_entry(map, address, entry) +boolean_t +vm_map_lookup_entry(map, address, entry) register vm_map_t map; register vm_offset_t address; vm_map_entry_t *entry; /* OUT */ @@ -606,6 +689,7 @@ boolean_t vm_map_lookup_entry(map, address, entry) * returned in the same parameter. * */ +int vm_map_find(map, object, offset, addr, length, find_space) vm_map_t map; vm_object_t object; @@ -619,10 +703,10 @@ vm_map_find(map, object, offset, addr, length, find_space) register vm_offset_t end; int result; - start = *addr; - vm_map_lock(map); + start = *addr; + if (find_space) { /* * Calculate the first possible address. @@ -714,7 +798,8 @@ vm_map_find(map, object, offset, addr, length, find_space) * removing extra sharing maps * [XXX maybe later] merging with a neighbor */ -void vm_map_simplify_entry(map, entry) +void +vm_map_simplify_entry(map, entry) vm_map_t map; vm_map_entry_t entry; { @@ -788,7 +873,8 @@ void vm_map_simplify_entry(map, entry) * This routine is called only when it is known that * the entry must be split. */ -void _vm_map_clip_start(map, entry, start) +void +_vm_map_clip_start(map, entry, start) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t start; @@ -799,7 +885,7 @@ void _vm_map_clip_start(map, entry, start) * See if we can simplify this entry first */ - vm_map_simplify_entry(map, entry); + /* vm_map_simplify_entry(map, entry); */ /* * Split off the front portion -- @@ -843,7 +929,8 @@ void _vm_map_clip_end(); * This routine is called only when it is known that * the entry must be split. */ -void _vm_map_clip_end(map, entry, end) +inline void +_vm_map_clip_end(map, entry, end) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t end; @@ -903,6 +990,7 @@ void _vm_map_clip_end(map, entry, end) * range from the superior map, and then destroy the * submap (if desired). [Better yet, don't try it.] */ +int vm_map_submap(map, start, end, submap) register vm_map_t map; register vm_offset_t start; @@ -946,6 +1034,7 @@ vm_map_submap(map, start, end, submap) * specified, the maximum protection is to be set; * otherwise, only the current protection is affected. */ +int vm_map_protect(map, start, end, new_prot, set_max) register vm_map_t map; register vm_offset_t start; @@ -1067,6 +1156,7 @@ vm_map_protect(map, start, end, new_prot, set_max) * affects how the map will be shared with * child maps at the time of vm_map_fork. */ +int vm_map_inherit(map, start, end, new_inheritance) register vm_map_t map; register vm_offset_t start; @@ -1119,6 +1209,7 @@ vm_map_inherit(map, start, end, new_inheritance) * The map must not be locked, but a reference * must remain to the map throughout the call. */ +int vm_map_pageable(map, start, end, new_pageable) register vm_map_t map; register vm_offset_t start; @@ -1127,6 +1218,7 @@ vm_map_pageable(map, start, end, new_pageable) { register vm_map_entry_t entry; vm_map_entry_t temp_entry; + extern vm_map_t kernel_map; vm_map_lock(map); @@ -1308,7 +1400,8 @@ vm_map_pageable(map, start, end, new_pageable) * The map in question should be locked. * [This is the reason for this routine's existence.] */ -void vm_map_entry_unwire(map, entry) +void +vm_map_entry_unwire(map, entry) vm_map_t map; register vm_map_entry_t entry; { @@ -1321,20 +1414,23 @@ void vm_map_entry_unwire(map, entry) * * Deallocate the given entry from the target map. */ -void vm_map_entry_delete(map, entry) +void +vm_map_entry_delete(map, entry) register vm_map_t map; register vm_map_entry_t entry; { + int prev_ref_count; if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); vm_map_entry_unlink(map, entry); map->size -= entry->end - entry->start; - if (entry->is_a_map || entry->is_sub_map) + if (entry->is_a_map || entry->is_sub_map) { vm_map_deallocate(entry->object.share_map); - else + } else { vm_object_deallocate(entry->object.vm_object); + } vm_map_entry_dispose(map, entry); } @@ -1348,6 +1444,7 @@ void vm_map_entry_delete(map, entry) * When called with a sharing map, removes pages from * that region from all physical maps. */ +int vm_map_delete(map, start, end) register vm_map_t map; vm_offset_t start; @@ -1356,6 +1453,7 @@ vm_map_delete(map, start, end) register vm_map_entry_t entry; vm_map_entry_t first_entry; + /* * Find the start of the region, and clip it */ @@ -1413,15 +1511,19 @@ vm_map_delete(map, start, end) * it. */ - if (object == kernel_object || object == kmem_object) + if (object == kernel_object || object == kmem_object) { vm_object_page_remove(object, entry->offset, entry->offset + (e - s)); - else if (!map->is_main_map) + } else if (!map->is_main_map) { vm_object_pmap_remove(object, entry->offset, entry->offset + (e - s)); - else + } else { + /* + * save the pmap info + */ pmap_remove(map->pmap, s, e); + } /* * Delete the entry (which may delete the object) @@ -1443,6 +1545,7 @@ vm_map_delete(map, start, end) * Remove the given address range from the target map. * This is the exported form of vm_map_delete. */ +int vm_map_remove(map, start, end) register vm_map_t map; register vm_offset_t start; @@ -1465,7 +1568,8 @@ vm_map_remove(map, start, end) * privilege on the entire address region given. * The entire region must be allocated. */ -boolean_t vm_map_check_protection(map, start, end, protection) +boolean_t +vm_map_check_protection(map, start, end, protection) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; @@ -1515,7 +1619,8 @@ boolean_t vm_map_check_protection(map, start, end, protection) * Copies the contents of the source entry to the destination * entry. The entries *must* be aligned properly. */ -void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) +void +vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) vm_map_t src_map, dst_map; register vm_map_entry_t src_entry, dst_entry; { @@ -1543,13 +1648,14 @@ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) * this sharing map belongs in). */ - if (dst_map->is_main_map) + if (dst_map->is_main_map) { pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); - else + } else { vm_object_pmap_remove(dst_entry->object.vm_object, dst_entry->offset, dst_entry->offset + (dst_entry->end - dst_entry->start)); + } if (src_entry->wired_count == 0) { @@ -1652,9 +1758,8 @@ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) * map to make copies. This also reduces map * fragmentation.] */ -vm_map_copy(dst_map, src_map, - dst_addr, len, src_addr, - dst_alloc, src_destroy) +int +vm_map_copy(dst_map, src_map, dst_addr, len, src_addr, dst_alloc, src_destroy) vm_map_t dst_map; vm_map_t src_map; vm_offset_t dst_addr; @@ -1681,14 +1786,12 @@ vm_map_copy(dst_map, src_map, * XXX While we figure out why src_destroy screws up, * we'll do it by explicitly vm_map_delete'ing at the end. */ - old_src_destroy = src_destroy; src_destroy = FALSE; /* * Compute start and end of region in both maps */ - src_start = src_addr; src_end = src_start + len; dst_start = dst_addr; @@ -1698,7 +1801,6 @@ vm_map_copy(dst_map, src_map, * Check that the region can exist in both source * and destination. */ - if ((dst_end < dst_start) || (src_end < src_start)) return(KERN_NO_SPACE); @@ -1706,7 +1808,6 @@ vm_map_copy(dst_map, src_map, * Lock the maps in question -- we avoid deadlock * by ordering lock acquisition by map value */ - if (src_map == dst_map) { vm_map_lock(src_map); } @@ -1727,7 +1828,6 @@ vm_map_copy(dst_map, src_map, * about protection, but instead about whether the region * exists.] */ - if (src_map->is_main_map && dst_map->is_main_map) { if (!vm_map_check_protection(src_map, src_start, src_end, VM_PROT_READ)) { @@ -1758,7 +1858,6 @@ vm_map_copy(dst_map, src_map, * until we have done the first clip, as the clip * may affect which entry we get! */ - (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; vm_map_clip_start(src_map, src_entry, src_start); @@ -1771,7 +1870,6 @@ vm_map_copy(dst_map, src_map, * If both source and destination entries are the same, * retry the first lookup, as it may have changed. */ - if (src_entry == dst_entry) { (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; @@ -1781,7 +1879,6 @@ vm_map_copy(dst_map, src_map, * If source and destination entries are still the same, * a null copy is being performed. */ - if (src_entry == dst_entry) goto Return; @@ -1789,19 +1886,16 @@ vm_map_copy(dst_map, src_map, * Go through entries until we get to the end of the * region. */ - while (src_start < src_end) { /* * Clip the entries to the endpoint of the entire region. */ - vm_map_clip_end(src_map, src_entry, src_end); vm_map_clip_end(dst_map, dst_entry, dst_end); /* * Clip each entry to the endpoint of the other entry. */ - src_clip = src_entry->start + (dst_entry->end - dst_entry->start); vm_map_clip_end(src_map, src_entry, src_clip); @@ -1814,7 +1908,6 @@ vm_map_copy(dst_map, src_map, * If both entries refer to a VM object, we can * deal with them now. */ - if (!src_entry->is_a_map && !dst_entry->is_a_map) { vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry); @@ -1829,7 +1922,6 @@ vm_map_copy(dst_map, src_map, /* * We have to follow at least one sharing map. */ - new_size = (dst_entry->end - dst_entry->start); if (src_entry->is_a_map) { @@ -1858,7 +1950,6 @@ vm_map_copy(dst_map, src_map, * Note that we can only do so if the * source and destination do not overlap. */ - new_dst_end = new_dst_start + new_size; if (new_dst_map != new_src_map) { @@ -1883,7 +1974,6 @@ vm_map_copy(dst_map, src_map, /* * Recursively copy the sharing map. */ - (void) vm_map_copy(new_dst_map, new_src_map, new_dst_start, new_size, new_src_start, FALSE, FALSE); @@ -1897,7 +1987,6 @@ vm_map_copy(dst_map, src_map, /* * Update variables for next pass through the loop. */ - src_start = src_entry->end; src_entry = src_entry->next; dst_start = dst_entry->end; @@ -1907,7 +1996,6 @@ vm_map_copy(dst_map, src_map, * If the source is to be destroyed, here is the * place to do it. */ - if (src_destroy && src_map->is_main_map && dst_map->is_main_map) vm_map_entry_delete(src_map, src_entry->prev); @@ -1916,10 +2004,10 @@ vm_map_copy(dst_map, src_map, /* * Update the physical maps as appropriate */ - if (src_map->is_main_map && dst_map->is_main_map) { - if (src_destroy) + if (src_destroy) { pmap_remove(src_map->pmap, src_addr, src_addr + len); + } } /* @@ -1927,9 +2015,10 @@ vm_map_copy(dst_map, src_map, */ Return: ; - - if (old_src_destroy) + + if (old_src_destroy) { vm_map_delete(src_map, src_addr, src_addr + len); + } vm_map_unlock(src_map); if (src_map != dst_map) @@ -2092,6 +2181,23 @@ vmspace_fork(vm1) } /* + * vmspace_deallocate + * + * clean up old parent vmspace references + * + */ + +void +vmspace_free(struct vmspace *vm) { + + if (vm == 0 || --vm->vm_refcnt != 0) { + return; + } + _vmspace_free(vm); +} + + +/* * vm_map_lookup: * * Finds the VM object, offset, and @@ -2113,8 +2219,8 @@ vmspace_fork(vm1) * copying operations, although the data referenced will * remain the same. */ -vm_map_lookup(var_map, vaddr, fault_type, out_entry, - object, offset, out_prot, wired, single_use) +int +vm_map_lookup(var_map, vaddr, fault_type, out_entry, object, offset, out_prot, wired, single_use) vm_map_t *var_map; /* IN/OUT */ register vm_offset_t vaddr; register vm_prot_t fault_type; @@ -2166,8 +2272,9 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, * Entry was either not a valid hint, or the vaddr * was not contained in the entry, so do a full lookup. */ - if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) + if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { RETURN(KERN_INVALID_ADDRESS); + } entry = tmp_entry; *out_entry = entry; @@ -2332,7 +2439,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, * (according to the handle returned by that lookup). */ -void vm_map_lookup_done(map, entry) +void +vm_map_lookup_done(map, entry) register vm_map_t map; vm_map_entry_t entry; { @@ -2362,7 +2470,8 @@ void vm_map_lookup_done(map, entry) * at allocation time because the adjacent entry * is often wired down. */ -void vm_map_simplify(map, start) +void +vm_map_simplify(map, start) vm_map_t map; vm_offset_t start; { @@ -2398,11 +2507,13 @@ void vm_map_simplify(map, start) if (map->first_free == this_entry) map->first_free = prev_entry; - SAVE_HINT(map, prev_entry); - vm_map_entry_unlink(map, this_entry); - prev_entry->end = this_entry->end; - vm_object_deallocate(this_entry->object.vm_object); - vm_map_entry_dispose(map, this_entry); + if (!this_entry->object.vm_object->paging_in_progress) { + SAVE_HINT(map, prev_entry); + vm_map_entry_unlink(map, this_entry); + prev_entry->end = this_entry->end; + vm_object_deallocate(this_entry->object.vm_object); + vm_map_entry_dispose(map, this_entry); + } } vm_map_unlock(map); } @@ -2411,24 +2522,32 @@ void vm_map_simplify(map, start) /* * vm_map_print: [ debug ] */ -void vm_map_print(map, full) +void +vm_map_print(map, full) register vm_map_t map; boolean_t full; { register vm_map_entry_t entry; extern int indent; + static int nmaps; + if (indent == 0) + nmaps = 0; iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", (map->is_main_map ? "Task" : "Share"), (int) map, (int) (map->pmap), map->ref_count, map->nentries, map->timestamp); +/* if (!full && indent) return; +*/ indent += 2; for (entry = map->header.next; entry != &map->header; entry = entry->next) { + nmaps++; + if (full || indent == 2) { iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", (int) entry, (int) entry->start, (int) entry->end); if (map->is_main_map) { @@ -2441,8 +2560,10 @@ void vm_map_print(map, full) if (entry->wired_count != 0) printf("wired, "); } + } if (entry->is_a_map || entry->is_sub_map) { + if (full || indent == 2) printf("share=0x%x, offset=0x%x\n", (int) entry->object.share_map, (int) entry->offset); @@ -2456,7 +2577,8 @@ void vm_map_print(map, full) } } - else { + else if (full || indent == 2) { + printf("object=0x%x, offset=0x%x", (int) entry->object.vm_object, (int) entry->offset); @@ -2476,5 +2598,8 @@ void vm_map_print(map, full) } } indent -= 2; + + if (indent == 0) + printf("nmaps=%d\n", nmaps); } -#endif /* defined(DEBUG) || (NDDB > 0) */ +#endif diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 7066bd054765..7dc7ee020a75 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_map.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_map.h,v 1.2 1993/10/16 16:20:36 rgrimes Exp $ + * $Id: vm_map.h,v 1.5 1994/01/17 09:33:43 davidg Exp $ */ /* @@ -100,6 +100,7 @@ typedef union vm_map_object vm_map_object_t; * and user-exported inheritance and protection information. * Also included is control information for virtual copy operations. */ + struct vm_map_entry { struct vm_map_entry *prev; /* previous entry */ struct vm_map_entry *next; /* next entry */ @@ -107,11 +108,11 @@ struct vm_map_entry { vm_offset_t end; /* end address */ union vm_map_object object; /* object I point to */ vm_offset_t offset; /* offset into object */ - boolean_t is_a_map; /* Is "object" a map? */ - boolean_t is_sub_map; /* Is "object" a submap? */ + unsigned char is_a_map:1, /* Is "object" a map? */ + is_sub_map:1, /* Is "object" a submap? */ /* Only in sharing maps: */ - boolean_t copy_on_write; /* is data copy-on-write */ - boolean_t needs_copy; /* does object need to be copied */ + copy_on_write:1,/* is data copy-on-write */ + needs_copy:1;/* does object need to be copied */ /* Only in task maps: */ vm_prot_t protection; /* protection code */ vm_prot_t max_protection; /* maximum protection */ @@ -176,22 +177,50 @@ typedef struct { /* * Exported procedures that operate on vm_map_t. */ +extern int vm_map_pageable(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); +extern void vm_map_startup(void); +struct pmap; struct vm_object; +extern vm_map_t vm_map_create(struct pmap *, vm_offset_t, vm_offset_t, + boolean_t); +extern void vm_map_init(struct vm_map *, vm_offset_t, vm_offset_t, boolean_t); +extern vm_map_entry_t vm_map_entry_create(vm_map_t); +extern void vm_map_entry_dispose(vm_map_t, vm_map_entry_t); +extern void vm_map_reference(vm_map_t); +extern void vm_map_deallocate(vm_map_t); +extern int vm_map_insert(vm_map_t, struct vm_object *, vm_offset_t, vm_offset_t, + vm_offset_t); +extern boolean_t vm_map_lookup_entry(vm_map_t, vm_offset_t, vm_map_entry_t *); +extern int vm_map_find(vm_map_t, struct vm_object *, vm_offset_t, vm_offset_t*, + vm_size_t, boolean_t); +extern void vm_map_simplify_entry(vm_map_t, vm_map_entry_t); +extern int vm_map_submap(vm_map_t, vm_offset_t, vm_offset_t, vm_map_t); +extern int vm_map_protect(vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t, + boolean_t); +extern int vm_map_inherit(vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t); +extern int vm_map_pageable(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); +extern void vm_map_entry_unwire(vm_map_t, vm_map_entry_t); +extern void vm_map_entry_delete(vm_map_t, vm_map_entry_t); +extern int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t); +extern int vm_map_remove(vm_map_t, vm_offset_t, vm_offset_t); +extern boolean_t vm_map_check_protection(vm_map_t, vm_offset_t, vm_offset_t, + vm_prot_t); +extern void vm_map_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, + vm_map_entry_t); +extern int vm_map_copy(vm_map_t, vm_map_t, vm_offset_t, vm_size_t, vm_offset_t, + boolean_t, boolean_t); +extern int vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t, + vm_map_entry_t *, struct vm_object **, vm_offset_t *, + vm_prot_t *, boolean_t *, boolean_t *); +extern void vm_map_lookup_done(vm_map_t, vm_map_entry_t); +extern void vm_map_simplify(vm_map_t, vm_offset_t); +extern void vm_map_print(vm_map_t, boolean_t); /* f defined(DEBUG) || NDDB>0 */ + +extern int vm_fault(struct vm_map *, vm_offset_t, vm_prot_t, boolean_t); +extern void vm_fault_wire(struct vm_map *, vm_offset_t, vm_offset_t); +extern void vm_fault_unwire(struct vm_map *, vm_offset_t, vm_offset_t); +extern void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, + vm_map_entry_t); -void vm_map_init(); -vm_map_t vm_map_create(); -void vm_map_deallocate(); -void vm_map_reference(); -int vm_map_find(); -int vm_map_remove(); -int vm_map_lookup(); -void vm_map_lookup_done(); -int vm_map_protect(); -int vm_map_inherit(); -int vm_map_copy(); -void vm_map_print(); -void vm_map_copy_entry(); -boolean_t vm_map_verify(); -void vm_map_verify_done(); /* * Functions implemented as macros @@ -203,10 +232,6 @@ void vm_map_verify_done(); /* XXX: number of kernel maps and entries to statically allocate */ #define MAX_KMAP 10 -#ifdef OMIT -#define MAX_KMAPENT 500 -#else /* !OMIT*/ -#define MAX_KMAPENT 1000 /* 15 Aug 92*/ -#endif /* !OMIT*/ +#define MAX_KMAPENT 128 #endif _VM_MAP_ diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 219fd50a1e72..910402dd9ed0 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * from: @(#)vm_meter.c 7.11 (Berkeley) 4/20/91 - * $Id: vm_meter.c,v 1.2 1993/10/16 16:20:37 rgrimes Exp $ + * $Id: vm_meter.c,v 1.4 1993/11/25 01:39:07 wollman Exp $ */ #include "param.h" @@ -42,12 +42,32 @@ #include "vm_param.h" #include "vmmeter.h" +struct vmtotal total; +struct vmmeter cnt, rate, sum; + +#include "dkstat.h" /* a convenient place to put these */ +long cp_time[CPUSTATES]; /* cpu time per state */ +int dk_busy; /* number of busy disks */ +long dk_time[DK_NDRIVE]; /* access time per disk */ +long dk_seek[DK_NDRIVE]; /* seeks per disk */ +long dk_wds[DK_NDRIVE]; /* */ +long dk_wpms[DK_NDRIVE]; /* */ +long dk_xfer[DK_NDRIVE]; /* */ +long tk_nin; /* total characters in */ +long tk_nout; /* total characters out */ +long tk_cancc; /* total canonical characters */ +long tk_rawcc; /* total raw characters */ + fixpt_t averunnable[3]; /* load average, of runnable procs */ +static void vmtotal(void); +static void loadav(fixpt_t *, int); + int maxslp = MAXSLP; int saferss = SAFERSS; +void vmmeter() { register unsigned *cp, *rp, *sp; @@ -58,6 +78,7 @@ vmmeter() wakeup((caddr_t)&proc0); } +static void vmtotal() { register struct proc *p; @@ -128,6 +149,7 @@ fixpt_t cexp[3] = { * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ +void loadav(avg, n) register fixpt_t *avg; int n; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index f963e25c8ffd..e6759a756844 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -37,7 +37,7 @@ * * from: Utah $Hdr: vm_mmap.c 1.3 90/01/21$ * from: @(#)vm_mmap.c 7.5 (Berkeley) 6/28/91 - * $Id: vm_mmap.c,v 1.8 1993/10/16 16:20:39 rgrimes Exp $ + * $Id: vm_mmap.c,v 1.21 1994/01/31 04:20:26 davidg Exp $ */ /* @@ -58,6 +58,10 @@ #include "vm_pager.h" #include "vm_prot.h" #include "vm_statistics.h" +#include "vm_user.h" + +static boolean_t vm_map_is_allocated(vm_map_t, vm_offset_t, vm_offset_t, + boolean_t); #ifdef DEBUG int mmapdebug = 0; @@ -67,6 +71,7 @@ int mmapdebug = 0; #endif /* ARGSUSED */ +int getpagesize(p, uap, retval) struct proc *p; void *uap; @@ -82,6 +87,7 @@ struct sbrk_args { }; /* ARGSUSED */ +int sbrk(p, uap, retval) struct proc *p; struct sbrk_args *uap; @@ -97,6 +103,7 @@ struct sstk_args { }; /* ARGSUSED */ +int sstk(p, uap, retval) struct proc *p; struct sstk_args *uap; @@ -116,13 +123,14 @@ struct smmap_args { off_t pos; }; +int smmap(p, uap, retval) struct proc *p; register struct smmap_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; - register struct file *fp; + register struct file *fp = 0; struct vnode *vp; vm_offset_t addr; vm_size_t size; @@ -138,6 +146,7 @@ smmap(p, uap, retval) p->p_pid, uap->addr, uap->len, uap->prot, uap->flags, uap->fd, uap->pos); #endif + /* * Make sure one of the sharing types is specified */ @@ -149,16 +158,16 @@ smmap(p, uap, retval) default: return(EINVAL); } + /* * Address (if FIXED) must be page aligned. * Size is implicitly rounded to a page boundary. */ addr = (vm_offset_t) uap->addr; - if ((flags & MAP_FIXED) && (addr & page_mask) || uap->len < 0) + if ((flags & MAP_FIXED) && (addr & PAGE_MASK) || uap->len < 0) return(EINVAL); size = (vm_size_t) round_page(uap->len); - if ((uap->flags & MAP_FIXED) && (addr + size > VM_MAXUSER_ADDRESS)) - return(EINVAL); + /* * XXX if no hint provided for a non-fixed mapping place it after * the end of the largest possible heap. @@ -168,6 +177,15 @@ smmap(p, uap, retval) */ if (addr == 0 && (flags & MAP_FIXED) == 0) addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); + + /* + * Check address range for validity + */ + if (addr + size >= VM_MAXUSER_ADDRESS) + return(EINVAL); + if (addr > addr + size) + return(EINVAL); + /* * Mapping file or named anonymous, get fp for validation */ @@ -176,6 +194,23 @@ smmap(p, uap, retval) (fp = fdp->fd_ofiles[uap->fd]) == NULL) return(EBADF); } + + /* + * Set initial maximum protection + */ + maxprot = VM_PROT_ALL; + + /* + * Map protections to MACH style + */ + prot = VM_PROT_NONE; + if (uap->prot & PROT_READ) + prot |= VM_PROT_READ; + if (uap->prot & PROT_WRITE) + prot |= VM_PROT_WRITE; + if (uap->prot & PROT_EXEC) + prot |= VM_PROT_EXECUTE; + /* * If we are mapping a file we need to check various * file/vnode related things. @@ -189,45 +224,36 @@ smmap(p, uap, retval) vp = (struct vnode *)fp->f_data; if (vp->v_type != VREG && vp->v_type != VCHR) return(EINVAL); + /* - * Ensure that file protection and desired protection - * are compatible. Note that we only worry about writability - * if mapping is shared. - */ - if ((uap->prot & PROT_READ) && (fp->f_flag & FREAD) == 0 || - ((flags & MAP_SHARED) && - (uap->prot & PROT_WRITE) && (fp->f_flag & FWRITE) == 0)) - return(EACCES); - handle = (caddr_t)vp; - /* - * PATCH GVR 25-03-93 - * Map protections to MACH style + * Set maxprot according to file protection. + * If the file is the backing store, enable maxprot write + * if the file protection allows. If the file isn't + * the backing store, enable writes. */ - if(uap->flags & MAP_SHARED) { - maxprot = VM_PROT_EXECUTE; - if (fp->f_flag & FREAD) - maxprot |= VM_PROT_READ; + maxprot = VM_PROT_NONE; + if (fp->f_flag & FREAD) + maxprot |= VM_PROT_READ|VM_PROT_EXECUTE; + if (uap->flags & MAP_SHARED) { if (fp->f_flag & FWRITE) maxprot |= VM_PROT_WRITE; - } else - maxprot = VM_PROT_ALL; + } else { + maxprot |= VM_PROT_WRITE; + } + + /* + * Ensure that calculated maximum protection and desired + * protection are compatible. + */ + if ((maxprot & prot) != prot) + return(EACCES); + + handle = (caddr_t)vp; } else if (uap->fd != -1) { - maxprot = VM_PROT_ALL; handle = (caddr_t)fp; } else { - maxprot = VM_PROT_ALL; handle = NULL; } - /* - * Map protections to MACH style - */ - prot = VM_PROT_NONE; - if (uap->prot & PROT_READ) - prot |= VM_PROT_READ; - if (uap->prot & PROT_WRITE) - prot |= VM_PROT_WRITE; - if (uap->prot & PROT_EXEC) - prot |= VM_PROT_EXECUTE; error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, flags, handle, (vm_offset_t)uap->pos); @@ -241,6 +267,7 @@ struct msync_args { int len; }; +int msync(p, uap, retval) struct proc *p; struct msync_args *uap; @@ -259,7 +286,7 @@ msync(p, uap, retval) printf("msync(%d): addr %x len %x\n", p->p_pid, uap->addr, uap->len); #endif - if (((int)uap->addr & page_mask) || uap->len < 0) + if (((int)uap->addr & PAGE_MASK) || uap->len < 0) return(EINVAL); addr = oaddr = (vm_offset_t)uap->addr; osize = (vm_size_t)uap->len; @@ -315,6 +342,7 @@ struct munmap_args { int len; }; +int munmap(p, uap, retval) register struct proc *p; register struct munmap_args *uap; @@ -330,13 +358,15 @@ munmap(p, uap, retval) #endif addr = (vm_offset_t) uap->addr; - if ((addr & page_mask) || uap->len < 0) + if ((addr & PAGE_MASK) || uap->len < 0) return(EINVAL); size = (vm_size_t) round_page(uap->len); if (size == 0) return(0); if (addr + size >= VM_MAXUSER_ADDRESS) return(EINVAL); + if (addr >= addr + size) + return(EINVAL); if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+size, FALSE)) return(EINVAL); @@ -345,8 +375,10 @@ munmap(p, uap, retval) return(0); } +int munmapfd(p, fd) register struct proc *p; + int fd; { #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) @@ -357,6 +389,7 @@ munmapfd(p, fd) * XXX -- should vm_deallocate any regions mapped to this file */ p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; + return 0; } struct mprotect_args { @@ -365,6 +398,7 @@ struct mprotect_args { int prot; }; +int mprotect(p, uap, retval) struct proc *p; struct mprotect_args *uap; @@ -381,7 +415,7 @@ mprotect(p, uap, retval) #endif addr = (vm_offset_t) uap->addr; - if ((addr & page_mask) || uap->len < 0) + if ((addr & PAGE_MASK) || uap->len < 0) return(EINVAL); size = (vm_size_t) uap->len; /* @@ -412,6 +446,7 @@ struct madvise_args { }; /* ARGSUSED */ +int madvise(p, uap, retval) struct proc *p; struct madvise_args *uap; @@ -429,6 +464,7 @@ struct mincore_args { }; /* ARGSUSED */ +int mincore(p, uap, retval) struct proc *p; struct mincore_args *uap; @@ -446,6 +482,7 @@ mincore(p, uap, retval) * MAP_FILE: a vnode pointer * MAP_ANON: NULL or a file pointer */ +int vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) register vm_map_t map; register vm_offset_t *addr; @@ -459,7 +496,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) register vm_pager_t pager; boolean_t fitit; vm_object_t object; - struct vnode *vp; + struct vnode *vp = 0; int type; int rv = KERN_SUCCESS; @@ -485,11 +522,11 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) vp = (struct vnode *)handle; if (vp->v_type == VCHR) { type = PG_DEVICE; - handle = (caddr_t)vp; + handle = (caddr_t)(u_long)vp->v_rdev; } else type = PG_VNODE; } - pager = vm_pager_allocate(type, handle, size, prot); + pager = vm_pager_allocate(type, handle, size, prot, foff); if (pager == NULL) return (type == PG_DEVICE ? EINVAL : ENOMEM); /* @@ -512,13 +549,6 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) goto out; } /* - * The object of unnamed anonymous regions was just created - * find it for pager_cache. - */ - if (handle == NULL) - object = vm_object_lookup(pager); - - /* * Don't cache anonymous objects. * Loses the reference gained by vm_pager_allocate. */ @@ -675,17 +705,25 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) * Changed again: indeed set maximum protection based on * object permissions. */ - rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); + /* + * We only need to set max_protection in case it's + * unequal to its default, which is VM_PROT_DEFAULT. + */ + if (maxprot != VM_PROT_DEFAULT) { + rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); if (rv != KERN_SUCCESS) { (void) vm_deallocate(map, *addr, size); goto out; } + } /* - * We only need to set max_protection in case it's - * unequal to its default, which is VM_PROT_DEFAULT. + * We only need to set the protection if it is unequal + * to the maximum (if it is equal to maxprot, and isn't + * the default, then it would have been set above when + * maximum prot was set. */ - if(maxprot != VM_PROT_DEFAULT) { - rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); + if (prot != maxprot) { + rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); if (rv != KERN_SUCCESS) { (void) vm_deallocate(map, *addr, size); goto out; @@ -724,6 +762,7 @@ out: * Given address and size it returns map attributes as well * as the (locked) object mapped at that location. */ +int vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff) vm_map_t map; vm_offset_t *addr; /* IN/OUT */ @@ -799,6 +838,7 @@ vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff) /* * Yet another bastard routine. */ +int vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) register vm_map_t map; register vm_offset_t *addr; @@ -826,7 +866,8 @@ vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) vm_stat.lookups++; if (object == NULL) { object = vm_object_allocate(size); - vm_object_enter(object, pager); + if (!internal) + vm_object_enter(object, pager); } else vm_stat.hits++; object->internal = internal; @@ -848,7 +889,7 @@ vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) * * start and end should be page aligned. */ -boolean_t +static boolean_t vm_map_is_allocated(map, start, end, single_entry) vm_map_t map; vm_offset_t start, end; diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 56c9bd954d9e..5c463f64d6c5 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -34,10 +34,9 @@ * SUCH DAMAGE. * * from: @(#)vm_object.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_object.c,v 1.6.2.1 1993/11/14 21:20:24 rgrimes Exp $ - */ - -/* + * $Id: vm_object.c,v 1.21.2.1 1994/03/07 02:22:13 rgrimes Exp $ + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -69,12 +68,19 @@ */ #include "ddb.h" - #include "param.h" #include "malloc.h" +#include "systm.h" #include "vm.h" #include "vm_page.h" +#include "proc.h" + + +static void _vm_object_allocate(vm_size_t, vm_object_t); +void vm_object_deactivate_pages(vm_object_t); +static void vm_object_cache_trim(void); +static void vm_object_remove(vm_pager_t); /* * Virtual memory objects maintain the actual data @@ -102,23 +108,69 @@ * */ + +queue_head_t vm_object_cached_list; /* list of objects persisting */ +int vm_object_cached; /* size of cached list */ +simple_lock_data_t vm_cache_lock; /* lock for object cache */ + +queue_head_t vm_object_list; /* list of allocated objects */ +long vm_object_count; /* count of all objects */ +simple_lock_data_t vm_object_list_lock; + /* lock for object list and count */ + +vm_object_t kernel_object; /* the single kernel object */ +vm_object_t kmem_object; /* the kernel malloc object */ struct vm_object kernel_object_store; struct vm_object kmem_object_store; -#define VM_OBJECT_HASH_COUNT 157 +extern int vm_cache_max; +#define VM_OBJECT_HASH_COUNT 127 -int vm_cache_max = 100; /* can patch if necessary */ queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; long object_collapses = 0; long object_bypasses = 0; /* + * internal version of vm_object_allocate + */ +static inline void +_vm_object_allocate(size, object) + vm_size_t size; + register vm_object_t object; +{ + queue_init(&object->memq); + vm_object_lock_init(object); + object->ref_count = 1; + object->resident_page_count = 0; + object->size = size; + object->can_persist = FALSE; + object->paging_in_progress = 0; + object->copy = NULL; + + /* + * Object starts out read-write, with no pager. + */ + + object->pager = NULL; + object->internal = TRUE; /* vm_allocate_with_pager will reset */ + object->paging_offset = 0; + object->shadow = NULL; + object->shadow_offset = (vm_offset_t) 0; + + simple_lock(&vm_object_list_lock); + queue_enter(&vm_object_list, object, vm_object_t, object_list); + vm_object_count++; + simple_unlock(&vm_object_list_lock); +} + +/* * vm_object_init: * * Initialize the VM objects module. */ -void vm_object_init() +void +vm_object_init() { register int i; @@ -136,7 +188,8 @@ void vm_object_init() kernel_object); kmem_object = &kmem_object_store; - _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object); + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + kmem_object); } /* @@ -145,55 +198,30 @@ void vm_object_init() * Returns a new object with the given size. */ -vm_object_t vm_object_allocate(size) +vm_object_t +vm_object_allocate(size) vm_size_t size; { register vm_object_t result; + int s; result = (vm_object_t) malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK); + _vm_object_allocate(size, result); return(result); } -_vm_object_allocate(size, object) - vm_size_t size; - register vm_object_t object; -{ - queue_init(&object->memq); - vm_object_lock_init(object); - object->ref_count = 1; - object->resident_page_count = 0; - object->size = size; - object->can_persist = FALSE; - object->paging_in_progress = 0; - object->copy = NULL; - - /* - * Object starts out read-write, with no pager. - */ - - object->pager = NULL; - object->pager_ready = FALSE; - object->internal = TRUE; /* vm_allocate_with_pager will reset */ - object->paging_offset = 0; - object->shadow = NULL; - object->shadow_offset = (vm_offset_t) 0; - - simple_lock(&vm_object_list_lock); - queue_enter(&vm_object_list, object, vm_object_t, object_list); - vm_object_count++; - simple_unlock(&vm_object_list_lock); -} /* * vm_object_reference: * * Gets another reference to the given object. */ -void vm_object_reference(object) +inline void +vm_object_reference(object) register vm_object_t object; { if (object == NULL) @@ -215,8 +243,9 @@ void vm_object_reference(object) * * No object may be locked. */ -void vm_object_deallocate(object) - register vm_object_t object; +void +vm_object_deallocate(object) + vm_object_t object; { vm_object_t temp; @@ -236,11 +265,11 @@ void vm_object_deallocate(object) vm_object_lock(object); if (--(object->ref_count) != 0) { + vm_object_unlock(object); /* * If there are still references, then * we are done. */ - vm_object_unlock(object); vm_object_cache_unlock(); return; } @@ -252,48 +281,25 @@ void vm_object_deallocate(object) */ if (object->can_persist) { -#ifdef DIAGNOSTIC - register vm_page_t p; - - /* - * Check for dirty pages in object - * Print warning as this may signify kernel bugs - * pk@cs.few.eur.nl - 4/15/93 - */ - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - VM_PAGE_CHECK(p); - - if (pmap_is_modified(VM_PAGE_TO_PHYS(p)) || - !p->clean) { - - printf("vm_object_dealloc: persistent object %x isn't clean\n", object); - goto cant_persist; - } - - p = (vm_page_t) queue_next(&p->listq); - } -#endif /* DIAGNOSTIC */ queue_enter(&vm_object_cached_list, object, vm_object_t, cached_list); vm_object_cached++; vm_object_cache_unlock(); - vm_object_deactivate_pages(object); + /* vm_object_deactivate_pages(object); */ vm_object_unlock(object); vm_object_cache_trim(); return; } - cant_persist:; /* * Make sure no one can look us up now. */ vm_object_remove(object->pager); vm_object_cache_unlock(); - + temp = object->shadow; vm_object_terminate(object); /* unlocks and deallocates object */ @@ -301,18 +307,19 @@ void vm_object_deallocate(object) } } - /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. */ -void vm_object_terminate(object) +void +vm_object_terminate(object) register vm_object_t object; { register vm_page_t p; vm_object_t shadow_object; + int s; /* * Detach the object from its shadow if we are the shadow's @@ -322,14 +329,52 @@ void vm_object_terminate(object) vm_object_lock(shadow_object); if (shadow_object->copy == object) shadow_object->copy = NULL; -#if 0 +/* else if (shadow_object->copy != NULL) panic("vm_object_terminate: copy/shadow inconsistency"); -#endif +*/ vm_object_unlock(shadow_object); } /* + * optim: get rid of any pages that we can right now + * so the pageout daemon can't get any more to page + * out at rundown. + */ +#if 0 + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + vm_page_t next = (vm_page_t) queue_next(&p->listq); + VM_PAGE_CHECK(p); + vm_page_lock_queues(); + + if (p->flags & PG_BUSY) { + p = next; + vm_page_unlock_queues(); + continue; + } + if (!object->internal) { + if ((p->flags & PG_CLEAN) == 0) { + p = next; + vm_page_unlock_queues(); + continue; + } + + if (pmap_is_modified(VM_PAGE_TO_PHYS(p))) { + p->flags &= ~PG_CLEAN; + p = next; + vm_page_unlock_queues(); + continue; + } + } + + vm_page_free(p); + vm_page_unlock_queues(); + p = next; + } +#endif + + /* * Wait until the pageout daemon is through * with the object. */ @@ -339,7 +384,6 @@ void vm_object_terminate(object) vm_object_lock(object); } - /* * While the paging system is locked, * pull the object's pages off the active @@ -357,19 +401,21 @@ void vm_object_terminate(object) VM_PAGE_CHECK(p); vm_page_lock_queues(); - if (p->active) { + s = vm_disable_intr(); + if (p->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); - p->active = FALSE; + p->flags &= ~PG_ACTIVE; vm_page_active_count--; } - if (p->inactive) { + if (p->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, p, vm_page_t, pageq); - p->inactive = FALSE; + p->flags &= ~PG_INACTIVE; vm_page_inactive_count--; } + vm_set_intr(s); vm_page_unlock_queues(); p = (vm_page_t) queue_next(&p->listq); } @@ -390,6 +436,7 @@ void vm_object_terminate(object) vm_object_page_clean(object, 0, 0); vm_object_unlock(object); } + while (!queue_empty(&object->memq)) { p = (vm_page_t) queue_first(&object->memq); @@ -404,8 +451,9 @@ void vm_object_terminate(object) * Let the pager know object is dead. */ - if (object->pager != NULL) + if (object->pager != NULL) { vm_pager_deallocate(object->pager); + } simple_lock(&vm_object_list_lock); @@ -430,38 +478,58 @@ void vm_object_terminate(object) * * The object must be locked. */ +void vm_object_page_clean(object, start, end) register vm_object_t object; register vm_offset_t start; register vm_offset_t end; { register vm_page_t p; + int s; + int size; if (object->pager == NULL) return; + if (start != end) { + start = trunc_page(start); + end = round_page(end); + } + size = end - start; + again: p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - if (start == end || - p->offset >= start && p->offset < end) { - if (p->clean && pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->clean = FALSE; - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - if (!p->clean) { - p->busy = TRUE; + while (!queue_end(&object->memq, (queue_entry_t) p) && ((start == end) || (size != 0) ) ) { + if (start == end || (p->offset >= start && p->offset < end)) { + if (p->flags & PG_BUSY) + goto next; + + size -= PAGE_SIZE; + + if ((p->flags & PG_CLEAN) + && pmap_is_modified(VM_PAGE_TO_PHYS(p))) + p->flags &= ~PG_CLEAN; + + if (p->flags & PG_ACTIVE) + vm_page_deactivate(p); + + if ((p->flags & PG_CLEAN) == 0) { + p->flags |= PG_BUSY; object->paging_in_progress++; vm_object_unlock(object); (void) vm_pager_put(object->pager, p, TRUE); vm_object_lock(object); object->paging_in_progress--; - p->busy = FALSE; + if (object->paging_in_progress == 0) + wakeup((caddr_t) object); PAGE_WAKEUP(p); goto again; } } +next: p = (vm_page_t) queue_next(&p->listq); } + wakeup((caddr_t)object); } /* @@ -472,6 +540,7 @@ again: * * The object must be locked. */ +void vm_object_deactivate_pages(object) register vm_object_t object; { @@ -481,7 +550,8 @@ vm_object_deactivate_pages(object) while (!queue_end(&object->memq, (queue_entry_t) p)) { next = (vm_page_t) queue_next(&p->listq); vm_page_lock_queues(); - if (!p->busy) + if ((p->flags & (PG_INACTIVE|PG_BUSY)) == 0 && + p->wire_count == 0) vm_page_deactivate(p); /* optimisation from mach 3.0 - * andrew@werple.apana.org.au, * Feb '93 @@ -494,6 +564,7 @@ vm_object_deactivate_pages(object) /* * Trim the object cache to size. */ +void vm_object_cache_trim() { register vm_object_t object; @@ -513,7 +584,6 @@ vm_object_cache_trim() vm_object_cache_unlock(); } - /* * vm_object_shutdown() * @@ -526,7 +596,9 @@ vm_object_cache_trim() * race conditions! */ -void vm_object_shutdown() +#if 0 +void +vm_object_shutdown() { register vm_object_t object; @@ -537,7 +609,6 @@ void vm_object_shutdown() vm_object_cache_clear(); - printf("free paging spaces: "); /* * First we gain a reference to each object so that @@ -568,7 +639,7 @@ void vm_object_shutdown() } printf("done.\n"); } - +#endif /* * vm_object_pmap_copy: * @@ -578,12 +649,19 @@ void vm_object_shutdown() * * The object must *not* be locked. */ -void vm_object_pmap_copy(object, start, end) +void +vm_object_pmap_copy(object, start, end) register vm_object_t object; register vm_offset_t start; register vm_offset_t end; { register vm_page_t p; + vm_offset_t amount; + + start = trunc_page(start); + end = round_page(end); + + amount = ((end - start) + PAGE_SIZE - 1) / PAGE_SIZE; if (object == NULL) return; @@ -593,7 +671,10 @@ void vm_object_pmap_copy(object, start, end) while (!queue_end(&object->memq, (queue_entry_t) p)) { if ((start <= p->offset) && (p->offset < end)) { pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ); - p->copy_on_write = TRUE; + p->flags |= PG_COPY_ON_WRITE; + amount -= 1; + if (amount <= 0) + break; } p = (vm_page_t) queue_next(&p->listq); } @@ -608,21 +689,34 @@ void vm_object_pmap_copy(object, start, end) * * The object must *not* be locked. */ -void vm_object_pmap_remove(object, start, end) +void +vm_object_pmap_remove(object, start, end) register vm_object_t object; register vm_offset_t start; register vm_offset_t end; { register vm_page_t p; + vm_offset_t size; if (object == NULL) return; vm_object_lock(object); +again: + size = ((end - start) + PAGE_SIZE - 1) / PAGE_SIZE; p = (vm_page_t) queue_first(&object->memq); while (!queue_end(&object->memq, (queue_entry_t) p)) { - if ((start <= p->offset) && (p->offset < end)) + if ((start <= p->offset) && (p->offset < end)) { + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep((caddr_t) p, PVM, "vmopmr", 0); + goto again; + } pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); + if ((p->flags & PG_CLEAN) == 0) + p->flags |= PG_LAUNDRY; + if (--size <= 0) break; + } p = (vm_page_t) queue_next(&p->listq); } vm_object_unlock(object); @@ -639,8 +733,8 @@ void vm_object_pmap_remove(object, start, end) * May defer the copy until later if the object is not backed * up by a non-default pager. */ -void vm_object_copy(src_object, src_offset, size, - dst_object, dst_offset, src_needs_copy) +void +vm_object_copy(src_object, src_offset, size, dst_object, dst_offset, src_needs_copy) register vm_object_t src_object; vm_offset_t src_offset; vm_size_t size; @@ -651,6 +745,8 @@ void vm_object_copy(src_object, src_offset, size, register vm_object_t new_copy; register vm_object_t old_copy; vm_offset_t new_start, new_end; + vm_offset_t src_offset_end; + vm_offset_t tmpsize; register vm_page_t p; @@ -669,10 +765,19 @@ void vm_object_copy(src_object, src_offset, size, * default pager, we don't have to make a copy * of it. Instead, we set the needs copy flag and * make a shadow later. + * DYSON: check for swap(default) pager too.... */ vm_object_lock(src_object); + + /* + * Try to collapse the object before copying it. + */ + + vm_object_collapse(src_object); + if (src_object->pager == NULL || + src_object->pager->pg_type == PG_SWAP || src_object->internal) { /* @@ -683,12 +788,16 @@ void vm_object_copy(src_object, src_offset, size, /* * Mark all of the pages copy-on-write. */ + tmpsize = size; + src_offset_end = src_offset + size; for (p = (vm_page_t) queue_first(&src_object->memq); - !queue_end(&src_object->memq, (queue_entry_t)p); + !queue_end(&src_object->memq, (queue_entry_t)p) && tmpsize > 0; p = (vm_page_t) queue_next(&p->listq)) { if (src_offset <= p->offset && - p->offset < src_offset + size) - p->copy_on_write = TRUE; + p->offset < src_offset_end) { + p->flags |= PG_COPY_ON_WRITE; + tmpsize -= PAGE_SIZE; + } } vm_object_unlock(src_object); @@ -703,11 +812,6 @@ void vm_object_copy(src_object, src_offset, size, } /* - * Try to collapse the object before copying it. - */ - vm_object_collapse(src_object); - - /* * If the object has a pager, the pager wants to * see all of the changes. We need a copy-object * for the changed pages. @@ -814,10 +918,13 @@ void vm_object_copy(src_object, src_offset, size, * Mark all the affected pages of the existing object * copy-on-write. */ + tmpsize = size; p = (vm_page_t) queue_first(&src_object->memq); - while (!queue_end(&src_object->memq, (queue_entry_t) p)) { - if ((new_start <= p->offset) && (p->offset < new_end)) - p->copy_on_write = TRUE; + while (!queue_end(&src_object->memq, (queue_entry_t) p) && tmpsize > 0) { + if ((new_start <= p->offset) && (p->offset < new_end)) { + p->flags |= PG_COPY_ON_WRITE; + tmpsize -= PAGE_SIZE; + } p = (vm_page_t) queue_next(&p->listq); } @@ -839,7 +946,8 @@ void vm_object_copy(src_object, src_offset, size, * are returned in the source parameters. */ -void vm_object_shadow(object, offset, length) +void +vm_object_shadow(object, offset, length) vm_object_t *object; /* IN/OUT */ vm_offset_t *offset; /* IN/OUT */ vm_size_t length; @@ -884,7 +992,8 @@ void vm_object_shadow(object, offset, length) * Set the specified object's pager to the specified pager. */ -void vm_object_setpager(object, pager, paging_offset, +void +vm_object_setpager(object, pager, paging_offset, read_only) vm_object_t object; vm_pager_t pager; @@ -896,6 +1005,9 @@ void vm_object_setpager(object, pager, paging_offset, #endif lint vm_object_lock(object); /* XXX ? */ + if (object->pager && object->pager != pager) { + panic("!!!pager already allocated!!!\n"); + } object->pager = pager; object->paging_offset = paging_offset; vm_object_unlock(object); /* XXX ? */ @@ -906,14 +1018,15 @@ void vm_object_setpager(object, pager, paging_offset, */ #define vm_object_hash(pager) \ - (((unsigned)pager)%VM_OBJECT_HASH_COUNT) + ((((unsigned)pager) >> 5)%VM_OBJECT_HASH_COUNT) /* * vm_object_lookup looks in the object cache for an object with the * specified pager and paging id. */ -vm_object_t vm_object_lookup(pager) +vm_object_t +vm_object_lookup(pager) vm_pager_t pager; { register queue_t bucket; @@ -951,7 +1064,8 @@ vm_object_t vm_object_lookup(pager) * the hash table. */ -void vm_object_enter(object, pager) +void +vm_object_enter(object, pager) vm_object_t object; vm_pager_t pager; { @@ -987,6 +1101,7 @@ void vm_object_enter(object, pager) * is locked. XXX this should be fixed * by reorganizing vm_object_deallocate. */ +void vm_object_remove(pager) register vm_pager_t pager; { @@ -1013,8 +1128,8 @@ vm_object_remove(pager) * vm_object_cache_clear removes all objects from the cache. * */ - -void vm_object_cache_clear() +void +vm_object_cache_clear() { register vm_object_t object; @@ -1053,8 +1168,12 @@ boolean_t vm_object_collapse_allowed = TRUE; * Requires that the object be locked and the page * queues be unlocked. * + * This routine has significant changes by John S. Dyson + * to fix some swap memory leaks. 18 Dec 93 + * */ -void vm_object_collapse(object) +void +vm_object_collapse(object) register vm_object_t object; { @@ -1072,11 +1191,10 @@ void vm_object_collapse(object) * Verify that the conditions are right for collapse: * * The object exists and no pages in it are currently - * being paged out (or have ever been paged out). + * being paged out. */ if (object == NULL || - object->paging_in_progress != 0 || - object->pager != NULL) + object->paging_in_progress != 0) return; /* @@ -1096,7 +1214,7 @@ void vm_object_collapse(object) */ if (!backing_object->internal || - backing_object->paging_in_progress != 0) { + backing_object->paging_in_progress != 0 ) { vm_object_unlock(backing_object); return; } @@ -1112,10 +1230,22 @@ void vm_object_collapse(object) * parent object. */ if (backing_object->shadow != NULL && - backing_object->shadow->copy != NULL) { + backing_object->shadow->copy == backing_object) { + vm_object_unlock(backing_object); + return; + } + + /* + * we can deal only with the swap pager + */ + if ((object->pager && + object->pager->pg_type != PG_SWAP) || + (backing_object->pager && + backing_object->pager->pg_type != PG_SWAP)) { vm_object_unlock(backing_object); return; } + /* * We know that we can either collapse the backing @@ -1165,36 +1295,12 @@ void vm_object_collapse(object) vm_page_unlock_queues(); } else { pp = vm_page_lookup(object, new_offset); - if (pp != NULL && !pp->fake) { + if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, + object->paging_offset + new_offset))) { vm_page_lock_queues(); vm_page_free(p); vm_page_unlock_queues(); - } - else { - if (pp) { -#if 1 - /* - * This should never happen -- the - * parent cannot have ever had an - * external memory object, and thus - * cannot have absent pages. - */ - panic("vm_object_collapse: bad case"); - /* andrew@werple.apana.org.au - from - mach 3.0 VM */ -#else - /* may be someone waiting for it */ - PAGE_WAKEUP(pp); - vm_page_lock_queues(); - vm_page_free(pp); - vm_page_unlock_queues(); -#endif - } - /* - * Parent now has no page. - * Move the backing object's page - * up. - */ + } else { vm_page_rename(p, object, new_offset); } } @@ -1202,46 +1308,49 @@ void vm_object_collapse(object) /* * Move the pager from backing_object to object. - * - * XXX We're only using part of the paging space - * for keeps now... we ought to discard the - * unused portion. - */ - - /* - * Remove backing_object from the object hashtable now. - * This is necessary since its pager is going away - * and therefore it is not going to be removed from - * hashtable in vm_object_deallocate(). - * - * NOTE - backing_object can only get at this stage if - * it has an internal pager. It is not normally on the - * hashtable unless it was put there by eg. vm_mmap() - * - * XXX - Need I worry here about *named* ANON pagers ? */ if (backing_object->pager) { - vm_object_remove(backing_object->pager); + backing_object->paging_in_progress++; + if (object->pager) { + vm_pager_t bopager; + object->paging_in_progress++; + /* + * copy shadow object pages into ours + * and destroy unneeded pages in shadow object. + */ + bopager = backing_object->pager; + backing_object->pager = NULL; + vm_object_remove(backing_object->pager); + swap_pager_copy( + bopager, backing_object->paging_offset, + object->pager, object->paging_offset, + object->shadow_offset); + object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t)object); + } else { + object->paging_in_progress++; + /* + * grab the shadow objects pager + */ + object->pager = backing_object->pager; + object->paging_offset = backing_object->paging_offset + backing_offset; + vm_object_remove(backing_object->pager); + backing_object->pager = NULL; + /* + * free unnecessary blocks + */ + swap_pager_freespace(object->pager, 0, object->paging_offset); + object->paging_in_progress--; + if (object->paging_in_progress == 0) + wakeup((caddr_t)object); + } + backing_object->paging_in_progress--; + if (backing_object->paging_in_progress == 0) + wakeup((caddr_t)backing_object); } - object->pager = backing_object->pager; -#if 1 - /* Mach 3.0 code */ - /* andrew@werple.apana.org.au, 12 Feb 1993 */ - /* - * If there is no pager, leave paging-offset alone. - */ - if (object->pager) - object->paging_offset = - backing_object->paging_offset + - backing_offset; -#else - /* old VM 2.5 version */ - object->paging_offset += backing_offset; -#endif - - backing_object->pager = NULL; /* * Object now shadows whatever backing_object did. @@ -1315,9 +1424,8 @@ void vm_object_collapse(object) if (p->offset >= backing_offset && new_offset <= size && - ((pp = vm_page_lookup(object, new_offset)) - == NULL || - pp->fake)) { + ((pp = vm_page_lookup(object, new_offset)) == NULL || (pp->flags & PG_FAKE)) && + (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset+new_offset))) { /* * Page still needed. * Can't go any further. @@ -1355,6 +1463,8 @@ void vm_object_collapse(object) * will not vanish; so we don't need to call * vm_object_deallocate. */ + if (backing_object->ref_count == 1) + printf("should have called obj deallocate\n"); backing_object->ref_count--; vm_object_unlock(backing_object); @@ -1376,26 +1486,58 @@ void vm_object_collapse(object) * * The object must be locked. */ -void vm_object_page_remove(object, start, end) +void +vm_object_page_remove(object, start, end) register vm_object_t object; register vm_offset_t start; register vm_offset_t end; { register vm_page_t p, next; + vm_offset_t size; + int cnt; + int s; if (object == NULL) return; - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - next = (vm_page_t) queue_next(&p->listq); - if ((start <= p->offset) && (p->offset < end)) { - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); + start = trunc_page(start); + end = round_page(end); +again: + size = end-start; + if (size > 4*PAGE_SIZE || size >= object->size/4) { + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p) && size > 0) { + next = (vm_page_t) queue_next(&p->listq); + if ((start <= p->offset) && (p->offset < end)) { + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep((caddr_t) p, PVM, "vmopar", 0); + goto again; + } + pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + size -= PAGE_SIZE; + } + p = next; + } + } else { + while (size > 0) { + while (p = vm_page_lookup(object, start)) { + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep((caddr_t) p, PVM, "vmopar", 0); + goto again; + } + pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + } + start += PAGE_SIZE; + size -= PAGE_SIZE; } - p = next; } } @@ -1421,10 +1563,8 @@ void vm_object_page_remove(object, start, end) * Conditions: * The object must *not* be locked. */ -boolean_t vm_object_coalesce(prev_object, next_object, - prev_offset, next_offset, - prev_size, next_size) - +boolean_t +vm_object_coalesce(prev_object, next_object, prev_offset, next_offset, prev_size, next_size) register vm_object_t prev_object; vm_object_t next_object; vm_offset_t prev_offset, next_offset; @@ -1460,7 +1600,6 @@ boolean_t vm_object_coalesce(prev_object, next_object, * (any of which mean that the pages not mapped to * prev_entry may be in use anyway) */ - if (prev_object->ref_count > 1 || prev_object->pager != NULL || prev_object->shadow != NULL || @@ -1473,7 +1612,6 @@ boolean_t vm_object_coalesce(prev_object, next_object, * Remove any pages that may still be in the object from * a previous deallocation. */ - vm_object_page_remove(prev_object, prev_offset + prev_size, prev_offset + prev_size + next_size); @@ -1489,11 +1627,31 @@ boolean_t vm_object_coalesce(prev_object, next_object, return(TRUE); } +/* + * returns page after looking up in shadow chain + */ + +vm_page_t +vm_object_page_lookup(object, offset) + vm_object_t object; + vm_offset_t offset; +{ + vm_page_t m; + if (!(m=vm_page_lookup(object, offset))) { + if (!object->shadow) + return 0; + else + return vm_object_page_lookup(object->shadow, offset + object->shadow_offset); + } + return m; +} + #if defined(DEBUG) || (NDDB > 0) /* * vm_object_print: [ debug ] */ -void vm_object_print(object, full) +void +vm_object_print(object, full) vm_object_t object; boolean_t full; { diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index df243bf985b3..60e7677b27ed 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_object.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_object.h,v 1.2 1993/10/16 16:20:43 rgrimes Exp $ + * $Id: vm_object.h,v 1.6 1994/01/14 16:27:25 davidg Exp $ */ /* @@ -114,21 +114,21 @@ struct vm_object_hash_entry { typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #ifdef KERNEL -queue_head_t vm_object_cached_list; /* list of objects persisting */ -int vm_object_cached; /* size of cached list */ -simple_lock_data_t vm_cache_lock; /* lock for object cache */ +extern queue_head_t vm_object_cached_list; /* list of objects persisting */ +extern int vm_object_cached; /* size of cached list */ +extern simple_lock_data_t vm_cache_lock; /* lock for object cache */ -queue_head_t vm_object_list; /* list of allocated objects */ -long vm_object_count; /* count of all objects */ -simple_lock_data_t vm_object_list_lock; +extern queue_head_t vm_object_list; /* list of allocated objects */ +extern long vm_object_count; /* count of all objects */ +extern simple_lock_data_t vm_object_list_lock; /* lock for object list and count */ -vm_object_t kernel_object; /* the single kernel object */ -vm_object_t kmem_object; +extern vm_object_t kernel_object; /* the single kernel object */ +extern vm_object_t kmem_object; #define vm_object_cache_lock() simple_lock(&vm_cache_lock) #define vm_object_cache_unlock() simple_unlock(&vm_cache_lock) -#endif KERNEL +#endif /* KERNEL */ /* * Declare procedures that operate on VM objects. @@ -158,16 +158,19 @@ void vm_object_print(); #define vm_object_lock_init(object) { simple_lock_init(&(object)->Lock); (object)->LockHolder = 0; } #define vm_object_lock(object) { simple_lock(&(object)->Lock); (object)->LockHolder = (int) current_thread(); } #define vm_object_unlock(object) { (object)->LockHolder = 0; simple_unlock(&(object)->Lock); } -#define vm_object_lock_try(object) (simple_lock_try(&(object)->Lock) ? ( ((object)->LockHolder = (int) current_thread()) , TRUE) : FALSE) +#define vm_object_lock_try(object) (simple_lock_try(&(object)->Lock) ? (((object)->LockHolder = (int) current_thread()) , TRUE) : FALSE) #define vm_object_sleep(event, object, interruptible) \ - { (object)->LockHolder = 0; thread_sleep((event), &(object)->Lock, (interruptible)); } -#else VM_OBJECT_DEBUG + { (object)->LockHolder = 0; thread_sleep((int)(event), &(object)->Lock, (interruptible)); } +#else /* VM_OBJECT_DEBUG */ #define vm_object_lock_init(object) simple_lock_init(&(object)->Lock) #define vm_object_lock(object) simple_lock(&(object)->Lock) #define vm_object_unlock(object) simple_unlock(&(object)->Lock) #define vm_object_lock_try(object) simple_lock_try(&(object)->Lock) #define vm_object_sleep(event, object, interruptible) \ - thread_sleep((event), &(object)->Lock, (interruptible)) -#endif VM_OBJECT_DEBUG + thread_sleep((int)(event), &(object)->Lock, (interruptible)) +#endif /* VM_OBJECT_DEBUG */ -#endif _VM_OBJECT_ +extern void vm_object_page_clean(vm_object_t, vm_offset_t, vm_offset_t); +extern int pager_cache(vm_object_t, boolean_t); + +#endif /* _VM_OBJECT_ */ diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 2935a67bbe9a..31a99382a7d6 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.2 1993/10/16 16:20:44 rgrimes Exp $ + * $Id: vm_page.c,v 1.12 1994/02/09 07:03:10 davidg Exp $ */ /* @@ -69,11 +69,13 @@ */ #include "param.h" +#include "systm.h" #include "vm.h" #include "vm_map.h" #include "vm_page.h" #include "vm_pageout.h" +#include "proc.h" /* * Associated with page of user-allocatable memory is a @@ -85,10 +87,6 @@ int vm_page_bucket_count = 0; /* How big is array? */ int vm_page_hash_mask; /* Mask for hash function */ simple_lock_data_t bucket_lock; /* lock for all buckets XXX */ -vm_size_t page_size = 4096; -vm_size_t page_mask = 4095; -int page_shift = 12; - queue_head_t vm_page_queue_free; queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; @@ -106,33 +104,15 @@ int vm_page_active_count; int vm_page_inactive_count; int vm_page_wire_count; int vm_page_laundry_count; +int vm_page_count; +extern int vm_pageout_pages_needed; int vm_page_free_target = 0; int vm_page_free_min = 0; int vm_page_inactive_target = 0; int vm_page_free_reserved = 0; -/* - * vm_set_page_size: - * - * Sets the page size, perhaps based upon the memory - * size. Must be called before any use of page-size - * dependent functions. - * - * Sets page_shift and page_mask from page_size. - */ -void vm_set_page_size() -{ - page_mask = page_size - 1; - - if ((page_mask & page_size) != 0) - panic("vm_set_page_size: page size not a power of two"); - - for (page_shift = 0; ; page_shift++) - if ((1 << page_shift) == page_size) - break; -} - +vm_size_t page_size = PAGE_SIZE; /* * vm_page_startup: @@ -143,21 +123,55 @@ void vm_set_page_size() * for the object/offset-to-page hash table headers. * Each page cell is initialized and placed on the free list. */ -vm_offset_t vm_page_startup(start, end, vaddr) - register vm_offset_t start; - vm_offset_t end; + +vm_offset_t +vm_page_startup(starta, enda, vaddr) + register vm_offset_t starta; + vm_offset_t enda; register vm_offset_t vaddr; { register vm_offset_t mapped; register vm_page_t m; register queue_t bucket; - vm_size_t npages; + vm_size_t npages, page_range; register vm_offset_t new_start; int i; vm_offset_t pa; + int nblocks; + vm_offset_t first_managed_page; + int size; extern vm_offset_t kentry_data; extern vm_size_t kentry_data_size; + extern vm_offset_t phys_avail[]; +/* the biggest memory array is the second group of pages */ + vm_offset_t start; + vm_offset_t biggestone, biggestsize; + + vm_offset_t total; + + total = 0; + biggestsize = 0; + biggestone = 0; + nblocks = 0; + vaddr = round_page(vaddr); + + for (i = 0; phys_avail[i + 1]; i += 2) { + phys_avail[i] = round_page(phys_avail[i]); + phys_avail[i+1] = trunc_page(phys_avail[i+1]); + } + + for (i = 0; phys_avail[i + 1]; i += 2) { + int size = phys_avail[i+1] - phys_avail[i]; + if (size > biggestsize) { + biggestone = i; + biggestsize = size; + } + ++nblocks; + total += size; + } + + start = phys_avail[biggestone]; /* @@ -186,22 +200,23 @@ vm_offset_t vm_page_startup(start, end, vaddr) * Note: * This computation can be tweaked if desired. */ - vm_page_buckets = (queue_t) vaddr; bucket = vm_page_buckets; if (vm_page_bucket_count == 0) { vm_page_bucket_count = 1; - while (vm_page_bucket_count < atop(end - start)) + while (vm_page_bucket_count < atop(total)) vm_page_bucket_count <<= 1; } + vm_page_hash_mask = vm_page_bucket_count - 1; /* * Validate these addresses. */ - new_start = round_page(((queue_t)start) + vm_page_bucket_count); + new_start = start + vm_page_bucket_count * sizeof(struct queue_entry); + new_start = round_page(new_start); mapped = vaddr; vaddr = pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE); @@ -209,7 +224,7 @@ vm_offset_t vm_page_startup(start, end, vaddr) bzero((caddr_t) mapped, vaddr - mapped); mapped = vaddr; - for (i = vm_page_bucket_count; i--;) { + for (i = 0; i< vm_page_bucket_count; i++) { queue_init(bucket); bucket++; } @@ -220,8 +235,6 @@ vm_offset_t vm_page_startup(start, end, vaddr) * round (or truncate) the addresses to our page size. */ - end = trunc_page(end); - /* * Pre-allocate maps and map entries that cannot be dynamically * allocated via malloc(). The maps include the kernel_map and @@ -247,8 +260,7 @@ vm_offset_t vm_page_startup(start, end, vaddr) new_start = start + (vaddr - mapped); pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE); bzero((caddr_t) mapped, (vaddr - mapped)); - mapped = vaddr; - start = new_start; + start = round_page(new_start); /* * Compute the number of pages of memory that will be @@ -256,49 +268,52 @@ vm_offset_t vm_page_startup(start, end, vaddr) * of a page structure per page). */ - vm_page_free_count = npages = - (end - start + sizeof(struct vm_page))/(PAGE_SIZE + sizeof(struct vm_page)); + npages = (total - (start - phys_avail[biggestone])) / (PAGE_SIZE + sizeof(struct vm_page)); + first_page = phys_avail[0] / PAGE_SIZE; + page_range = (phys_avail[(nblocks-1)*2 + 1] - phys_avail[0]) / PAGE_SIZE; /* * Initialize the mem entry structures now, and * put them in the free queue. */ - m = vm_page_array = (vm_page_t) vaddr; - first_page = start; - first_page += npages*sizeof(struct vm_page); - first_page = atop(round_page(first_page)); - last_page = first_page + npages - 1; + vm_page_array = (vm_page_t) vaddr; + mapped = vaddr; - first_phys_addr = ptoa(first_page); - last_phys_addr = ptoa(last_page) + page_mask; /* * Validate these addresses. */ - new_start = start + (round_page(m + npages) - mapped); + new_start = round_page(start + page_range * sizeof (struct vm_page)); mapped = pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE); start = new_start; + first_managed_page = start / PAGE_SIZE; + /* * Clear all of the page structures */ - bzero((caddr_t)m, npages * sizeof(*m)); - - pa = first_phys_addr; - while (npages--) { - m->copy_on_write = FALSE; - m->wanted = FALSE; - m->inactive = FALSE; - m->active = FALSE; - m->busy = FALSE; - m->object = NULL; - m->phys_addr = pa; - queue_enter(&vm_page_queue_free, m, vm_page_t, pageq); - m++; - pa += PAGE_SIZE; + bzero((caddr_t)vm_page_array, page_range * sizeof(struct vm_page)); + + vm_page_count = 0; + vm_page_free_count = 0; + for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { + if (i == biggestone) + pa = ptoa(first_managed_page); + else + pa = phys_avail[i]; + while (pa < phys_avail[i + 1] && npages-- > 0) { + ++vm_page_count; + ++vm_page_free_count; + m = PHYS_TO_VM_PAGE(pa); + m->flags = 0; + m->object = 0; + m->phys_addr = pa; + queue_enter(&vm_page_queue_free, m, vm_page_t, pageq); + pa += PAGE_SIZE; + } } /* @@ -317,8 +332,13 @@ vm_offset_t vm_page_startup(start, end, vaddr) * * NOTE: This macro depends on vm_page_bucket_count being a power of 2. */ -#define vm_page_hash(object, offset) \ - (((unsigned)object+(unsigned)atop(offset))&vm_page_hash_mask) +inline const int +vm_page_hash(object, offset) + vm_object_t object; + vm_offset_t offset; +{ + return ((unsigned)object + offset/NBPG) & vm_page_hash_mask; +} /* * vm_page_insert: [ internal use only ] @@ -327,9 +347,11 @@ vm_offset_t vm_page_startup(start, end, vaddr) * table and object list. * * The object and page must be locked. + * interrupts must be disable in this routine!!! */ -void vm_page_insert(mem, object, offset) +void +vm_page_insert(mem, object, offset) register vm_page_t mem; register vm_object_t object; register vm_offset_t offset; @@ -339,7 +361,7 @@ void vm_page_insert(mem, object, offset) VM_PAGE_CHECK(mem); - if (mem->tabled) + if (mem->flags & PG_TABLED) panic("vm_page_insert: already inserted"); /* @@ -354,18 +376,16 @@ void vm_page_insert(mem, object, offset) */ bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - spl = splimp(); simple_lock(&bucket_lock); queue_enter(bucket, mem, vm_page_t, hashq); simple_unlock(&bucket_lock); - (void) splx(spl); /* * Now link into the object's list of backed pages. */ queue_enter(&object->memq, mem, vm_page_t, listq); - mem->tabled = TRUE; + mem->flags |= PG_TABLED; /* * And show that the object has one more resident @@ -382,9 +402,12 @@ void vm_page_insert(mem, object, offset) * table and the object page list. * * The object and page must be locked. + * + * interrupts must be disable in this routine!!! */ -void vm_page_remove(mem) +void +vm_page_remove(mem) register vm_page_t mem; { register queue_t bucket; @@ -392,19 +415,19 @@ void vm_page_remove(mem) VM_PAGE_CHECK(mem); - if (!mem->tabled) + if (!(mem->flags & PG_TABLED)) { + printf("page not tabled?????\n"); return; + } /* * Remove from the object_object/offset hash table */ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; - spl = splimp(); simple_lock(&bucket_lock); queue_remove(bucket, mem, vm_page_t, hashq); simple_unlock(&bucket_lock); - (void) splx(spl); /* * Now remove from the object's list of backed pages. @@ -418,8 +441,9 @@ void vm_page_remove(mem) */ mem->object->resident_page_count--; + mem->object = 0; - mem->tabled = FALSE; + mem->flags &= ~PG_TABLED; } /* @@ -431,7 +455,8 @@ void vm_page_remove(mem) * The object must be locked. No side effects. */ -vm_page_t vm_page_lookup(object, offset) +vm_page_t +vm_page_lookup(object, offset) register vm_object_t object; register vm_offset_t offset; { @@ -444,22 +469,22 @@ vm_page_t vm_page_lookup(object, offset) */ bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + spl = vm_disable_intr(); - spl = splimp(); simple_lock(&bucket_lock); mem = (vm_page_t) queue_first(bucket); while (!queue_end(bucket, (queue_entry_t) mem)) { VM_PAGE_CHECK(mem); if ((mem->object == object) && (mem->offset == offset)) { simple_unlock(&bucket_lock); - splx(spl); + vm_set_intr(spl); return(mem); } mem = (vm_page_t) queue_next(&mem->hashq); } simple_unlock(&bucket_lock); - splx(spl); + vm_set_intr(spl); return(NULL); } @@ -471,67 +496,25 @@ vm_page_t vm_page_lookup(object, offset) * * The object must be locked. */ -void vm_page_rename(mem, new_object, new_offset) +void +vm_page_rename(mem, new_object, new_offset) register vm_page_t mem; register vm_object_t new_object; vm_offset_t new_offset; { + int spl; if (mem->object == new_object) return; vm_page_lock_queues(); /* keep page from moving out from under pageout daemon */ + spl = vm_disable_intr(); vm_page_remove(mem); vm_page_insert(mem, new_object, new_offset); + vm_set_intr(spl); vm_page_unlock_queues(); } -void vm_page_init(mem, object, offset) - vm_page_t mem; - vm_object_t object; - vm_offset_t offset; -{ -#ifdef DEBUG -#define vm_page_init(mem, object, offset) {\ - (mem)->busy = TRUE; \ - (mem)->tabled = FALSE; \ - vm_page_insert((mem), (object), (offset)); \ - (mem)->absent = FALSE; \ - (mem)->fictitious = FALSE; \ - (mem)->page_lock = VM_PROT_NONE; \ - (mem)->unlock_request = VM_PROT_NONE; \ - (mem)->laundry = FALSE; \ - (mem)->active = FALSE; \ - (mem)->inactive = FALSE; \ - (mem)->wire_count = 0; \ - (mem)->clean = TRUE; \ - (mem)->copy_on_write = FALSE; \ - (mem)->fake = TRUE; \ - (mem)->pagerowned = FALSE; \ - (mem)->ptpage = FALSE; \ - } -#else -#define vm_page_init(mem, object, offset) {\ - (mem)->busy = TRUE; \ - (mem)->tabled = FALSE; \ - vm_page_insert((mem), (object), (offset)); \ - (mem)->absent = FALSE; \ - (mem)->fictitious = FALSE; \ - (mem)->page_lock = VM_PROT_NONE; \ - (mem)->unlock_request = VM_PROT_NONE; \ - (mem)->laundry = FALSE; \ - (mem)->active = FALSE; \ - (mem)->inactive = FALSE; \ - (mem)->wire_count = 0; \ - (mem)->clean = TRUE; \ - (mem)->copy_on_write = FALSE; \ - (mem)->fake = TRUE; \ - } -#endif - - vm_page_init(mem, object, offset); -} - /* * vm_page_alloc: * @@ -540,26 +523,41 @@ void vm_page_init(mem, object, offset) * * Object must be locked. */ -vm_page_t vm_page_alloc(object, offset) +vm_page_t +vm_page_alloc(object, offset) vm_object_t object; vm_offset_t offset; { register vm_page_t mem; int spl; - spl = splimp(); /* XXX */ + spl = vm_disable_intr(); simple_lock(&vm_page_queue_free_lock); if ( object != kernel_object && object != kmem_object && - vm_page_free_count <= vm_page_free_reserved) { + curproc != pageproc && curproc != &proc0 && + vm_page_free_count < vm_page_free_reserved) { simple_unlock(&vm_page_queue_free_lock); - splx(spl); + vm_set_intr(spl); + /* + * this wakeup seems unnecessary, but there is code that + * might just check to see if there are free pages, and + * punt if there aren't. VM_WAIT does this too, but + * redundant wakeups aren't that bad... + */ + if (curproc != pageproc) + wakeup((caddr_t) &vm_pages_needed); return(NULL); } if (queue_empty(&vm_page_queue_free)) { simple_unlock(&vm_page_queue_free_lock); - splx(spl); + vm_set_intr(spl); + /* + * comment above re: wakeups applies here too... + */ + if (curproc != pageproc) + wakeup((caddr_t) &vm_pages_needed); return(NULL); } @@ -567,25 +565,21 @@ vm_page_t vm_page_alloc(object, offset) vm_page_free_count--; simple_unlock(&vm_page_queue_free_lock); - splx(spl); - vm_page_init(mem, object, offset); + mem->flags = PG_BUSY|PG_CLEAN|PG_FAKE; + vm_page_insert(mem, object, offset); + mem->wire_count = 0; + mem->deact = 0; + vm_set_intr(spl); - /* - * Decide if we should poke the pageout daemon. - * We do this if the free count is less than the low - * water mark, or if the free count is less than the high - * water mark (but above the low water mark) and the inactive - * count is less than its target. - * - * We don't have the counts locked ... if they change a little, - * it doesn't really matter. - */ +/* + * don't wakeup too often, so we wakeup the pageout daemon when + * we would be nearly out of memory. + */ + if (curproc != pageproc && + (vm_page_free_count < vm_page_free_reserved)) + wakeup((caddr_t) &vm_pages_needed); - if ((vm_page_free_count < vm_page_free_min) || - ((vm_page_free_count < vm_page_free_target) && - (vm_page_inactive_count < vm_page_inactive_target))) - thread_wakeup(&vm_pages_needed); return(mem); } @@ -597,33 +591,68 @@ vm_page_t vm_page_alloc(object, offset) * * Object and page must be locked prior to entry. */ -void vm_page_free(mem) +void +vm_page_free(mem) register vm_page_t mem; { + int spl; + + spl = vm_disable_intr(); + vm_page_remove(mem); - if (mem->active) { + mem->deact = 0; + if (mem->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, mem, vm_page_t, pageq); - mem->active = FALSE; + mem->flags &= ~PG_ACTIVE; vm_page_active_count--; } - if (mem->inactive) { + if (mem->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, mem, vm_page_t, pageq); - mem->inactive = FALSE; + mem->flags &= ~PG_INACTIVE; vm_page_inactive_count--; } - if (!mem->fictitious) { - int spl; - spl = splimp(); + if (!(mem->flags & PG_FICTITIOUS)) { simple_lock(&vm_page_queue_free_lock); + if (mem->wire_count) { + vm_page_wire_count--; + mem->wire_count = 0; + } queue_enter(&vm_page_queue_free, mem, vm_page_t, pageq); vm_page_free_count++; simple_unlock(&vm_page_queue_free_lock); - splx(spl); + vm_set_intr(spl); + + /* + * if pageout daemon needs pages, then tell it that there + * are some free. + */ + if (vm_pageout_pages_needed) + wakeup((caddr_t)&vm_pageout_pages_needed); + + /* + * wakeup processes that are waiting on memory if we + * hit a high water mark. + */ + if (vm_page_free_count == vm_page_free_min) { + wakeup((caddr_t)&vm_page_free_count); + } + + /* + * wakeup scheduler process if we have lots of memory. + * this process will swapin processes. + */ + if (vm_page_free_count == vm_page_free_target) { + wakeup((caddr_t)&proc0); + } + + } else { + vm_set_intr(spl); } + wakeup((caddr_t) mem); } /* @@ -635,27 +664,31 @@ void vm_page_free(mem) * * The page queues must be locked. */ -void vm_page_wire(mem) +void +vm_page_wire(mem) register vm_page_t mem; { + int spl; VM_PAGE_CHECK(mem); + spl = vm_disable_intr(); if (mem->wire_count == 0) { - if (mem->active) { + if (mem->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, mem, vm_page_t, pageq); vm_page_active_count--; - mem->active = FALSE; + mem->flags &= ~PG_ACTIVE; } - if (mem->inactive) { + if (mem->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, mem, vm_page_t, pageq); vm_page_inactive_count--; - mem->inactive = FALSE; + mem->flags &= ~PG_INACTIVE; } vm_page_wire_count++; } mem->wire_count++; + vm_set_intr(spl); } /* @@ -666,18 +699,24 @@ void vm_page_wire(mem) * * The page queues must be locked. */ -void vm_page_unwire(mem) +void +vm_page_unwire(mem) register vm_page_t mem; { + int spl; VM_PAGE_CHECK(mem); - mem->wire_count--; + spl = vm_disable_intr(); + if (mem->wire_count != 0) + mem->wire_count--; if (mem->wire_count == 0) { queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); vm_page_active_count++; - mem->active = TRUE; + mem->flags |= PG_ACTIVE; vm_page_wire_count--; + vm_pageout_deact_bump(mem); } + vm_set_intr(spl); } /* @@ -689,9 +728,11 @@ void vm_page_unwire(mem) * * The page queues must be locked. */ -void vm_page_deactivate(m) +void +vm_page_deactivate(m) register vm_page_t m; { + int spl; VM_PAGE_CHECK(m); /* @@ -704,20 +745,37 @@ void vm_page_deactivate(m) * Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. */ - if (!m->inactive && m->wire_count == 0) { + spl = splhigh(); + m->deact = 0; + if (!(m->flags & PG_INACTIVE) && m->wire_count == 0) { pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - if (m->active) { + if (m->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); - m->active = FALSE; + m->flags &= ~PG_ACTIVE; vm_page_active_count--; } queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); - m->inactive = TRUE; + m->flags |= PG_INACTIVE; vm_page_inactive_count++; - if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) - m->clean = FALSE; - m->laundry = !m->clean; - } + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + if ((m->flags & PG_CLEAN) == 0) + m->flags |= PG_LAUNDRY; + } + splx(spl); +} + +/* + * vm_page_makefault + * + * Cause next access of this page to fault + */ +void +vm_page_makefault(m) + vm_page_t m; +{ + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + if ((m->flags & PG_CLEAN) == 0) + m->flags |= PG_LAUNDRY; } /* @@ -727,26 +785,36 @@ void vm_page_deactivate(m) * * The page queues must be locked. */ - -void vm_page_activate(m) +void +vm_page_activate(m) register vm_page_t m; { + int spl; VM_PAGE_CHECK(m); - if (m->inactive) { + vm_pageout_deact_bump(m); + + spl = vm_disable_intr(); + + if (m->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); vm_page_inactive_count--; - m->inactive = FALSE; + m->flags &= ~PG_INACTIVE; } if (m->wire_count == 0) { - if (m->active) + if (m->flags & PG_ACTIVE) panic("vm_page_activate: already active"); + m->flags |= PG_ACTIVE; queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); - m->active = TRUE; + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); vm_page_active_count++; + } + + vm_set_intr(spl); } /* @@ -757,7 +825,8 @@ void vm_page_activate(m) * be used by the zero-fill object. */ -boolean_t vm_page_zero_fill(m) +boolean_t +vm_page_zero_fill(m) vm_page_t m; { VM_PAGE_CHECK(m); @@ -771,8 +840,8 @@ boolean_t vm_page_zero_fill(m) * * Copy one page to another */ - -void vm_page_copy(src_m, dest_m) +void +vm_page_copy(src_m, dest_m) vm_page_t src_m; vm_page_t dest_m; { diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 072c6a29ba70..33ada305990b 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_page.h,v 1.2 1993/10/16 16:20:46 rgrimes Exp $ + * $Id: vm_page.h,v 1.8 1994/01/31 04:21:19 davidg Exp $ */ /* @@ -96,58 +96,53 @@ * queues (P). */ +#define PG_INACTIVE 0x0001 +#define PG_ACTIVE 0x0002 +#define PG_LAUNDRY 0x0004 +#define PG_CLEAN 0x0008 +#define PG_BUSY 0x0010 +#define PG_WANTED 0x0020 +#define PG_TABLED 0x0040 +#define PG_COPY_ON_WRITE 0x0080 +#define PG_FICTITIOUS 0x0100 +#define PG_ABSENT 0x0200 +#define PG_FAKE 0x0400 +#define PG_PAGEROWNED 0x0800 +#define PG_PTPAGE 0x1000 + struct vm_page { - queue_chain_t pageq; /* queue info for FIFO - * queue or free list (P) */ + queue_chain_t pageq; /* queue info for FIFO */ + /* queue or free list (P) */ queue_chain_t hashq; /* hash table links (O)*/ queue_chain_t listq; /* all pages in same object (O)*/ vm_object_t object; /* which object am I in (O,P)*/ vm_offset_t offset; /* offset into that object (O,P) */ - unsigned int wire_count:16, /* how many wired down maps use me? - (P) */ - /* boolean_t */ inactive:1, /* page is in inactive list (P) */ - active:1, /* page is in active list (P) */ - laundry:1, /* page is being cleaned now (P)*/ -#ifdef DEBUG - pagerowned:1, /* async paging op in progress */ - ptpage:1, /* is a user page table page */ -#endif - :0; /* (force to 'long' boundary) */ -#ifdef ns32000 - int pad; /* extra space for ns32000 bit ops */ -#endif ns32000 - boolean_t clean; /* page has not been modified */ - unsigned int - /* boolean_t */ busy:1, /* page is in transit (O) */ - wanted:1, /* someone is waiting for page (O) */ - tabled:1, /* page is in VP table (O) */ - copy_on_write:1,/* page must be copied before being - changed (O) */ - fictitious:1, /* physical page doesn't exist (O) */ - absent:1, /* virtual page doesn't exist (O) */ - fake:1, /* page is a placeholder for page-in - (O) */ - :0; + unsigned int wire_count; /* how many wired down maps use me? */ + unsigned short flags; /* bit encoded flags */ + unsigned short deact; /* deactivation count */ vm_offset_t phys_addr; /* physical address of page */ - vm_prot_t page_lock; /* Uses prohibited by data manager */ - vm_prot_t unlock_request; /* Outstanding unlock request */ }; typedef struct vm_page *vm_page_t; +#define DEACT_START 5 +#define DEACT_DELAY 2 +#define DEACT_CLEAN 1 +#define DEACT_FREE 0 + #if VM_PAGE_DEBUG #define VM_PAGE_CHECK(mem) { \ - if ( (((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \ + if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \ (((unsigned int) mem) > ((unsigned int) &vm_page_array[last_page-first_page])) || \ - (mem->active && mem->inactive) \ + ((mem->flags & PG_ACTIVE) && (mem->flags & PG_INACTIVE)) \ ) panic("vm_page_check: not valid!"); \ } -#else VM_PAGE_DEBUG +#else /* VM_PAGE_DEBUG */ #define VM_PAGE_CHECK(mem) -#endif VM_PAGE_DEBUG +#endif /* VM_PAGE_DEBUG */ #ifdef KERNEL /* @@ -223,7 +218,6 @@ simple_lock_data_t vm_page_queue_free_lock; vm_offset_t vm_page_startup(); vm_page_t vm_page_lookup(); vm_page_t vm_page_alloc(); -void vm_page_init(); void vm_page_free(); void vm_page_activate(); void vm_page_deactivate(); @@ -236,21 +230,20 @@ void vm_page_copy(); void vm_page_wire(); void vm_page_unwire(); -void vm_set_page_size(); /* * Functions implemented as macros */ #define PAGE_ASSERT_WAIT(m, interruptible) { \ - (m)->wanted = TRUE; \ + (m)->flags |= PG_WANTED; \ assert_wait((int) (m), (interruptible)); \ } #define PAGE_WAKEUP(m) { \ - (m)->busy = FALSE; \ - if ((m)->wanted) { \ - (m)->wanted = FALSE; \ + (m)->flags &= ~PG_BUSY; \ + if ((m)->flags & PG_WANTED) { \ + (m)->flags &= ~PG_WANTED; \ thread_wakeup((int) (m)); \ } \ } @@ -258,6 +251,33 @@ void vm_set_page_size(); #define vm_page_lock_queues() simple_lock(&vm_page_queue_lock) #define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock) -#define vm_page_set_modified(m) { (m)->clean = FALSE; } -#endif KERNEL -#endif _VM_PAGE_ +#define vm_page_set_modified(m) { (m)->flags &= ~PG_CLEAN; } + +/* Some pmap things are declared here for the convenience of other bits of + code. */ +extern void pmap_bootstrap(vm_offset_t, vm_offset_t); +extern void pmap_init(vm_offset_t, vm_offset_t); +extern vm_offset_t pmap_map(vm_offset_t, vm_offset_t, vm_offset_t, int); +extern void pmap_remove_all(vm_offset_t); +extern void pmap_copy_on_write(vm_offset_t); +extern void pmap_page_protect(vm_offset_t, vm_prot_t); +extern void pmap_update(void); +extern void pmap_zero_page(vm_offset_t); +extern void pmap_copy_page(vm_offset_t, vm_offset_t); +extern void pmap_clear_modify(vm_offset_t); +extern void pmap_clear_reference(vm_offset_t); +extern boolean_t pmap_is_referenced(vm_offset_t); +extern boolean_t pmap_is_modified(vm_offset_t); +extern vm_offset_t pmap_phys_ddress(int); + + +/* + * these macros are *MUCH* faster on a 386/486 type machine + * eventually they need to be implemented correctly and put + * somewhere in the machine dependant stuff. + */ +#define vm_disable_intr() (disable_intr(), 0) +#define vm_set_intr(spl) enable_intr() + +#endif /* KERNEL */ +#endif /* _VM_PAGE_ */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 1800729df785..229a4090922c 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1,6 +1,10 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 1994 David Greenman + * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. @@ -33,11 +37,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_pageout.c,v 1.2 1993/10/16 16:20:47 rgrimes Exp $ - */ - -/* + * @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -62,6 +64,8 @@ * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. + * + * $Id: vm_pageout.c,v 1.13 1994/02/10 08:08:37 davidg Exp $ */ /* @@ -73,54 +77,517 @@ #include "vm.h" #include "vm_page.h" #include "vm_pageout.h" +#include "malloc.h" +#include "proc.h" +#include "resource.h" +#include "resourcevar.h" #include "vmmeter.h" +extern vm_map_t kmem_map; int vm_pages_needed; /* Event on which pageout daemon sleeps */ int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */ -int vm_page_free_min_sanity = 40; +int vm_pageout_pages_needed = 0; /* flag saying that the pageout daemon needs pages */ +int vm_page_pagesfreed; + +extern int npendingio; +extern int hz; +int vm_pageout_proc_limit; +extern int nswiodone; + +#define MAXREF 32767 +#define DEACT_MAX (DEACT_START * 4) +#define MINSCAN 512 /* minimum number of pages to scan in active queue */ + /* set the "clock" hands to be (MINSCAN * 4096) Bytes */ +static int minscan; +void vm_pageout_deact_bump(vm_page_t m) ; -int vm_page_pagesfreed; /* Pages freed by page daemon */ /* - * vm_pageout_scan does the dirty work for the pageout daemon. + * vm_pageout_clean: + * cleans a vm_page */ -vm_pageout_scan() +int +vm_pageout_clean(m, wait) + register vm_page_t m; + int wait; { - register vm_page_t m; - register int page_shortage; - register int s; - register int pages_freed; - int free; + /* + * Clean the page and remove it from the + * laundry. + * + * We set the busy bit to cause + * potential page faults on this page to + * block. + * + * And we set pageout-in-progress to keep + * the object from disappearing during + * pageout. This guarantees that the + * page won't move from the inactive + * queue. (However, any other page on + * the inactive queue may move!) + */ + + register vm_object_t object; + register vm_pager_t pager; + int pageout_status; + + object = m->object; + if (!object) { + printf("pager: object missing\n"); + return 0; + } /* - * Only continue when we want more pages to be "free" + * Try to collapse the object before + * making a pager for it. We must + * unlock the page queues first. + * We try to defer the creation of a pager + * until all shadows are not paging. This + * allows vm_object_collapse to work better and + * helps control swap space size. + * (J. Dyson 11 Nov 93) */ - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - free = vm_page_free_count; - simple_unlock(&vm_page_queue_free_lock); + if (!object->pager && + vm_page_free_count < vm_pageout_free_min) + return 0; + +collapseagain: + if (!object->pager && + object->shadow && + object->shadow->paging_in_progress) + return 0; + + if (object->shadow) { + vm_offset_t offset = m->offset; + vm_object_collapse(object); + if (!vm_page_lookup(object, offset)) + return 0; + } + +waitagain: + if (!wait && (m->flags & PG_BUSY)) { + return 0; + } else if (m->flags & PG_BUSY) { + int s = splhigh(); + m->flags |= PG_WANTED; + tsleep((caddr_t)m, PVM, "clnslp", 0); + splx(s); + goto waitagain; + } + + m->flags |= PG_BUSY; + + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); + + vm_stat.pageouts++; + + object->paging_in_progress++; + + /* + * If there is no pager for the page, + * use the default pager. If there's + * no place to put the page at the + * moment, leave it in the laundry and + * hope that there will be paging space + * later. + */ + + if ((pager = object->pager) == NULL) { + pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, + object->size, VM_PROT_ALL, 0); + if (pager != NULL) { + vm_object_setpager(object, pager, 0, FALSE); + } + } + if ((pager && pager->pg_type == PG_SWAP) || + vm_page_free_count >= vm_pageout_free_min) { + pageout_status = pager ? + vm_pager_put(pager, m, (((object == kernel_object) || wait) ? TRUE: FALSE)) : + VM_PAGER_FAIL; + } else + pageout_status = VM_PAGER_FAIL; + + switch (pageout_status) { + case VM_PAGER_OK: + m->flags &= ~PG_LAUNDRY; + break; + case VM_PAGER_PEND: + m->flags &= ~PG_LAUNDRY; + break; + case VM_PAGER_BAD: + /* + * Page outside of range of object. + * Right now we essentially lose the + * changes by pretending it worked. + */ + m->flags &= ~PG_LAUNDRY; + m->flags |= PG_CLEAN; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + break; + case VM_PAGER_FAIL: + /* + * If page couldn't be paged out, then + * reactivate the page so it doesn't + * clog the inactive list. (We will + * try paging out it again later). + */ + if ((m->flags & PG_ACTIVE) == 0) + vm_page_activate(m); + break; + case VM_PAGER_TRYAGAIN: + break; + } + + + /* + * If the operation is still going, leave + * the page busy to block all other accesses. + * Also, leave the paging in progress + * indicator set so that we don't attempt an + * object collapse. + */ + if (pageout_status != VM_PAGER_PEND) { + if ((m->flags & PG_ACTIVE) == 0 && + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + vm_page_activate(m); + } + PAGE_WAKEUP(m); + if (--object->paging_in_progress == 0) + wakeup((caddr_t) object); + } + return (pageout_status == VM_PAGER_PEND || + pageout_status == VM_PAGER_OK) ? 1 : 0; +} + +int +vm_fault_object_deactivate_pages(map, object, dummy) + vm_map_t map; + vm_object_t object; + int dummy; +{ + register vm_page_t p, next; + int rcount; + int s; + int dcount; + int count; + + dcount = 0; + /* + * deactivate the pages in the objects shadow + */ + + if (object->shadow) + dcount += vm_fault_object_deactivate_pages(map, object->shadow, 0); + + /* + * scan the objects memory queue and remove 20% of the active pages + */ + rcount = object->resident_page_count; + count = rcount; + if (count == 0) + return dcount; +#define MINOBJWRITE 10 +#define OBJDIVISOR 5 + if (count > MINOBJWRITE) { + count = MINOBJWRITE + ((count - MINOBJWRITE) / OBJDIVISOR); + } + p = (vm_page_t) queue_first(&object->memq); + while ((rcount-- > 0) && !queue_end(&object->memq, (queue_entry_t) p) ) { + next = (vm_page_t) queue_next(&p->listq); + vm_page_lock_queues(); + /* + * if a page is active, not wired and is in the processes pmap, + * then deactivate the page. + */ + if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE && + p->wire_count == 0 && + pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { + if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) { + vm_page_deactivate(p); + if ((p->flags & PG_CLEAN) == 0) { + vm_pageout_clean(p, 0); + } + ++dcount; + if (--count <= 0) { + vm_page_unlock_queues(); + s = splbio(); + while (object->paging_in_progress) { + tsleep((caddr_t) object,PVM,"vmfobw",0); + } + splx(s); + return dcount; + } + } else { + vm_pageout_deact_bump(p); + pmap_clear_reference(VM_PAGE_TO_PHYS(p)); + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, p, vm_page_t, pageq); + } + /* + * if a page is inactive and has been modified, clean it now + */ + } else if ((p->flags & (PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) { + if ((p->flags & PG_CLEAN) && + pmap_is_modified(VM_PAGE_TO_PHYS(p))) + p->flags &= ~PG_CLEAN; + + if ((p->flags & PG_CLEAN) == 0) + vm_pageout_clean(p, 0); + } + + vm_page_unlock_queues(); + p = next; + } + s = splbio(); + while (object->paging_in_progress) { + tsleep((caddr_t)object,PVM,"vmfobw",0); + } splx(s); + return dcount; +} - if (free < vm_page_free_target) { -#ifdef OMIT - swapout_threads(); -#endif /* OMIT*/ +/* + * vm_pageout_object_deactivate_pages + * + * deactivate enough pages to satisfy the inactive target + * requirements or if vm_page_proc_limit is set, then + * deactivate all of the pages in the object and its + * shadows. + * + * The object and map must be locked. + */ +int +vm_pageout_object_deactivate_pages(map, object, count) + vm_map_t map; + vm_object_t object; + int count; +{ + register vm_page_t p, next; + int rcount; + int s; + int dcount; + + dcount = 0; + if (count == 0) + count = 1; + if (object->shadow) { + dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count); + } + + if (object->paging_in_progress) + return dcount; + + /* + * scan the objects entire memory queue + */ + rcount = object->resident_page_count; + p = (vm_page_t) queue_first(&object->memq); + while ((rcount-- > 0) && !queue_end(&object->memq, (queue_entry_t) p) ) { + next = (vm_page_t) queue_next(&p->listq); + vm_page_lock_queues(); /* - * Be sure the pmap system is updated so - * we can scan the inactive queue. + * if a page is active, not wired and is in the processes pmap, + * then deactivate the page. */ + if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE && + p->wire_count == 0 && + pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { + if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) { + if (object->ref_count <= 1) + vm_page_deactivate(p); + else + vm_page_pageout_deactivate(p); + if (((p->flags & PG_INACTIVE)) && + (p->flags & PG_CLEAN) == 0) + vm_pageout_clean(p, 0); + /* + * see if we are done yet + */ + if (p->flags & PG_INACTIVE) { + --count; + ++dcount; + if (count <= 0 && + vm_page_inactive_count > vm_page_inactive_target) { + vm_page_unlock_queues(); + return dcount; + } + } + + } else { + vm_pageout_deact_bump(p); + pmap_clear_reference(VM_PAGE_TO_PHYS(p)); + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, p, vm_page_t, pageq); + } + /* + * if a page is inactive and has been modified, clean it now + */ + } else if ((p->flags & (PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) { + if ((p->flags & PG_CLEAN) && + pmap_is_modified(VM_PAGE_TO_PHYS(p))) + p->flags &= ~PG_CLEAN; - pmap_update(); + if ((p->flags & PG_CLEAN) == 0) + vm_pageout_clean(p, 0); + } + + vm_page_unlock_queues(); + p = next; } + return dcount; +} + + +/* + * deactivate some number of pages in a map, try to do it fairly, but + * that is really hard to do. + */ + +void +vm_pageout_map_deactivate_pages(map, entry, count, freeer) + vm_map_t map; + vm_map_entry_t entry; + int *count; + int (*freeer)(vm_map_t, vm_object_t, int); +{ + vm_map_t tmpm; + vm_map_entry_t tmpe; + vm_object_t obj; + if (*count <= 0) + return; + vm_map_reference(map); + if (!lock_try_read(&map->lock)) { + vm_map_deallocate(map); + return; + } + if (entry == 0) { + tmpe = map->header.next; + while (tmpe != &map->header && *count > 0) { + vm_pageout_map_deactivate_pages(map, tmpe, count, freeer); + tmpe = tmpe->next; + }; + } else if (entry->is_sub_map || entry->is_a_map) { + tmpm = entry->object.share_map; + tmpe = tmpm->header.next; + while (tmpe != &tmpm->header && *count > 0) { + vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer); + tmpe = tmpe->next; + }; + } else if (obj = entry->object.vm_object) { + *count -= (*freeer)(map, obj, *count); + } + lock_read_done(&map->lock); + vm_map_deallocate(map); + return; +} + +void +vm_fault_free_pages(p) + struct proc *p; +{ + int overage = 1; + vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, + (vm_map_entry_t) 0, &overage, vm_fault_object_deactivate_pages); +} + +/* + * vm_pageout_scan does the dirty work for the pageout daemon. + */ +void +vm_pageout_scan() +{ + vm_page_t m; + int page_shortage, maxscan, maxlaunder; + int pages_freed, free, nproc, nbusy; + vm_page_t next; + struct proc *p; + vm_object_t object; + int s; /* - * Acquire the resident page system lock, - * as we may be changing what's resident quite a bit. + * deactivate objects with ref_counts == 0 */ - vm_page_lock_queues(); + object = (vm_object_t) queue_first(&vm_object_list); + while (!queue_end(&vm_object_list, (queue_entry_t) object)) { + if (object->ref_count == 0) + vm_object_deactivate_pages(object); + object = (vm_object_t) queue_next(&object->object_list); + } + +rerun: +#if 1 + /* + * next scan the processes for exceeding their rlimits or if process + * is swapped out -- deactivate pages + */ + +rescanproc1a: + for (p = allproc; p != NULL; p = p->p_nxt) + p->p_flag &= ~SPAGEDAEMON; + +rescanproc1: + for (p = allproc; p != NULL; p = p->p_nxt) { + vm_offset_t size; + int overage; + vm_offset_t limit; + + /* + * if this is a system process or if we have already + * looked at this process, skip it. + */ + if (p->p_flag & (SSYS|SPAGEDAEMON|SWEXIT)) { + continue; + } + + /* + * if the process is in a non-running type state, + * don't touch it. + */ + if (p->p_stat != SRUN && p->p_stat != SSLEEP) { + continue; + } + + /* + * get a limit + */ + limit = min(p->p_rlimit[RLIMIT_RSS].rlim_cur, + p->p_rlimit[RLIMIT_RSS].rlim_max); + + /* + * let processes that are swapped out really be swapped out + * set the limit to nothing (will force a swap-out.) + */ + if ((p->p_flag & SLOAD) == 0) + limit = 0; + + size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG; + if (size >= limit) { + overage = (size - limit) / NBPG; + vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, + (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages); + p->p_flag |= SPAGEDAEMON; + goto rescanproc1; + } + p->p_flag |= SPAGEDAEMON; + } + +#if 0 + if (((vm_page_free_count + vm_page_inactive_count) >= + (vm_page_inactive_target + vm_page_free_target)) && + (vm_page_free_count >= vm_page_free_target)) + return; +#endif + +#endif + + pages_freed = 0; /* * Start scanning the inactive queue for pages we can free. @@ -129,46 +596,66 @@ vm_pageout_scan() * encounter dirty pages, we start cleaning them. */ - pages_freed = 0; + maxlaunder = (vm_page_free_target - vm_page_free_count); +rescan: m = (vm_page_t) queue_first(&vm_page_queue_inactive); - while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) { + maxscan = vm_page_inactive_count; + while (maxscan-- > 0) { vm_page_t next; - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - free = vm_page_free_count; - simple_unlock(&vm_page_queue_free_lock); - splx(s); - if (free >= vm_page_free_target) + if (queue_end(&vm_page_queue_inactive, (queue_entry_t) m) + || (vm_page_free_count >= vm_page_free_target)) { break; + } - if (m->clean) { - next = (vm_page_t) queue_next(&m->pageq); - if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + next = (vm_page_t) queue_next(&m->pageq); + + /* + * dont mess with busy pages + */ + if (m->flags & PG_BUSY) { + queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); + m = next; + continue; + } + + /* + * if page is clean and but the page has been referenced, + * then reactivate the page, but if we are very low on memory + * or the page has not been referenced, then we free it to the + * vm system. + */ + if (m->flags & PG_CLEAN) { + if ((vm_page_free_count > vm_pageout_free_min) + && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { vm_page_activate(m); - vm_stat.reactivations++; + ++vm_stat.reactivations; + m = next; + continue; } else { - register vm_object_t object; - object = m->object; - if (!vm_object_lock_try(object)) { - /* - * Can't lock object - - * skip page. - */ - m = next; - continue; - } pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - vm_page_free(m); /* will dequeue */ - pages_freed++; - vm_object_unlock(object); + vm_page_free(m); + ++pages_freed; + m = next; + continue; } - m = next; - } - else { + } else if ((m->flags & PG_LAUNDRY) && maxlaunder > 0) { + /* + * if a page has been used even if it is in the laundry, + * activate it. + */ + + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + vm_page_activate(m); + m->flags &= ~PG_LAUNDRY; + m = next; + continue; + } + /* * If a page is dirty, then it is either * being washed (but not yet cleaned) @@ -177,220 +664,257 @@ vm_pageout_scan() * cleaning operation. */ - if (m->laundry) { - /* - * Clean the page and remove it from the - * laundry. - * - * We set the busy bit to cause - * potential page faults on this page to - * block. - * - * And we set pageout-in-progress to keep - * the object from disappearing during - * pageout. This guarantees that the - * page won't move from the inactive - * queue. (However, any other page on - * the inactive queue may move!) - */ - - register vm_object_t object; - register vm_pager_t pager; - int pageout_status; - - object = m->object; - if (!vm_object_lock_try(object)) { - /* - * Skip page if we can't lock - * its object - */ - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - - pmap_page_protect(VM_PAGE_TO_PHYS(m), - VM_PROT_NONE); - m->busy = TRUE; - vm_stat.pageouts++; - + if (vm_pageout_clean(m,0)) { + --maxlaunder; /* - * Try to collapse the object before - * making a pager for it. We must - * unlock the page queues first. + * if the next page has been re-activated, start scanning again */ - vm_page_unlock_queues(); - - vm_object_collapse(object); - - object->paging_in_progress++; - vm_object_unlock(object); - - /* - * Do a wakeup here in case the following - * operations block. - */ - thread_wakeup((int) &vm_page_free_count); + if ((next->flags & PG_INACTIVE) == 0) + goto rescan; + } + } else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + vm_page_activate(m); + } + m = next; + } - /* - * If there is no pager for the page, - * use the default pager. If there's - * no place to put the page at the - * moment, leave it in the laundry and - * hope that there will be paging space - * later. - */ + /* + * now check malloc area or swap processes out if we are in low + * memory conditions + */ + free = vm_page_free_count; + if (free <= vm_page_free_min) { + /* + * Be sure the pmap system is updated so + * we can scan the inactive queue. + */ + pmap_update(); - if ((pager = object->pager) == NULL) { - pager = vm_pager_allocate(PG_DFLT, - (caddr_t)0, - object->size, - VM_PROT_ALL); - if (pager != NULL) { - vm_object_setpager(object, - pager, 0, FALSE); - } - } - pageout_status = pager ? - vm_pager_put(pager, m, FALSE) : - VM_PAGER_FAIL; - vm_object_lock(object); - vm_page_lock_queues(); - next = (vm_page_t) queue_next(&m->pageq); - - switch (pageout_status) { - case VM_PAGER_OK: - case VM_PAGER_PEND: - m->laundry = FALSE; - break; - case VM_PAGER_BAD: - /* - * Page outside of range of object. - * Right now we essentially lose the - * changes by pretending it worked. - * XXX dubious, what should we do? - */ - m->laundry = FALSE; - m->clean = TRUE; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - break; - case VM_PAGER_FAIL: - /* - * If page couldn't be paged out, then - * reactivate the page so it doesn't - * clog the inactive list. (We will - * try paging out it again later). - */ - vm_page_activate(m); - break; - } + /* + * swap out inactive processes + */ + swapout_threads(); - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); +#if 0 + /* + * see if malloc has anything for us + */ + if (free <= vm_page_free_reserved) + malloc_gc(); +#endif + } - /* - * If the operation is still going, leave - * the page busy to block all other accesses. - * Also, leave the paging in progress - * indicator set so that we don't attempt an - * object collapse. - */ - if (pageout_status != VM_PAGER_PEND) { - m->busy = FALSE; - PAGE_WAKEUP(m); - object->paging_in_progress--; - } - thread_wakeup((int) object); - vm_object_unlock(object); - m = next; - } - else - m = (vm_page_t) queue_next(&m->pageq); - } +skipfree: + /* + * If we did not free any pages, but we need to do so, we grow the + * inactive target. But as we successfully free pages, then we + * shrink the inactive target. + */ + if (pages_freed == 0 && vm_page_free_count < vm_page_free_min) { + vm_page_inactive_target += (vm_page_free_min - vm_page_free_count); + if (vm_page_inactive_target > vm_page_free_target*5) + vm_page_inactive_target = vm_page_free_target*5; + } else if (pages_freed > 0) { + vm_page_inactive_target -= vm_page_free_min/2; + if (vm_page_inactive_target < vm_page_free_target*2) + vm_page_inactive_target = vm_page_free_target*2; } - + /* * Compute the page shortage. If we are still very low on memory * be sure that we will move a minimal amount of pages from active * to inactive. */ +restart_inactivate_all: + page_shortage = vm_page_inactive_target - vm_page_inactive_count; - page_shortage -= vm_page_free_count; + page_shortage -= vm_page_free_count; + + if (page_shortage <= 0) { + if (pages_freed == 0 && + ((vm_page_free_count + vm_page_inactive_count) < + (vm_page_free_min + vm_page_inactive_target))) { + page_shortage = 1; + } else { + page_shortage = 0; + } + } + + maxscan = vm_page_active_count; - if ((page_shortage <= 0) && (pages_freed == 0)) - page_shortage = 1; + /* + * deactivate pages that are active, but have not been used + * for a while. + */ +restart_inactivate: + m = (vm_page_t) queue_first(&vm_page_queue_active); + while (maxscan-- > 0) { + + if (page_shortage <= 0 && + maxscan < (vm_page_active_count - minscan) ) + break; + + if (queue_end(&vm_page_queue_active, (queue_entry_t) m)) { + break; + } + + next = (vm_page_t) queue_next(&m->pageq); - while (page_shortage > 0) { /* - * Move some more pages from active to inactive. + * dont mess with pages that are busy */ + if (m->flags & PG_BUSY) { + m = next; + continue; + } - if (queue_empty(&vm_page_queue_active)) { - break; + /* + * Move some more pages from active to inactive. + */ + + /* + * see if there are any pages that are able to be deactivated + */ + /* + * the referenced bit is the one that say that the page + * has been used. + */ + if (!pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + /* + * if the page has not been referenced, call the + * vm_page_pageout_deactivate routine. It might + * not deactivate the page every time. There is + * a policy associated with it. + */ + if (page_shortage > 0) { + if (vm_page_pageout_deactivate(m)) { + /* + * if the page was really deactivated, then + * decrement the page_shortage + */ + if ((m->flags & PG_ACTIVE) == 0) { + --page_shortage; + } + } + } + } else { + /* + * if the page was recently referenced, set our + * deactivate count and clear reference for a future + * check for deactivation. + */ + vm_pageout_deact_bump(m); + if (page_shortage > 0 || m->deact >= (DEACT_MAX/2)) + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); } - m = (vm_page_t) queue_first(&vm_page_queue_active); - vm_page_deactivate(m); - page_shortage--; + m = next; } vm_page_pagesfreed += pages_freed; - vm_page_unlock_queues(); +} + +/* + * this code maintains a dynamic reference count per page + */ +void +vm_pageout_deact_bump(vm_page_t m) { + if( m->deact >= DEACT_START) { + m->deact += 1; + if( m->deact > DEACT_MAX) + m->deact = DEACT_MAX; + } else { + m->deact += DEACT_START; + } +} + +/* + * optionally do a deactivate if the deactivate has been done + * enough to justify it. + */ +int +vm_page_pageout_deactivate(m) + vm_page_t m; +{ + + switch (m->deact) { +case DEACT_FREE: + vm_page_deactivate(m); + return 1; +case DEACT_CLEAN: + break; +case DEACT_DELAY: + vm_page_makefault(m); +case DEACT_START: + break; + } + --m->deact; + return 0; } /* * vm_pageout is the high level pageout daemon. */ -void vm_pageout() +void +vm_pageout() { + extern npendingio, swiopend; + extern int vm_page_count; (void) spl0(); /* * Initialize some paging parameters. */ - if (vm_page_free_min == 0) { - vm_page_free_min = vm_page_free_count / 20; - if (vm_page_free_min < 3) - vm_page_free_min = 3; - - if (vm_page_free_min > vm_page_free_min_sanity) - vm_page_free_min = vm_page_free_min_sanity; - } - - if (vm_page_free_reserved == 0) { - if ((vm_page_free_reserved = vm_page_free_min / 2) < 10) - vm_page_free_reserved = 10; - } - if (vm_pageout_free_min == 0) { - if ((vm_pageout_free_min = vm_page_free_reserved / 2) > 10) - vm_pageout_free_min = 10; - } - - if (vm_page_free_target == 0) - vm_page_free_target = (vm_page_free_min * 4) / 3; - - if (vm_page_inactive_target == 0) - vm_page_inactive_target = vm_page_free_min * 2; - - if (vm_page_free_target <= vm_page_free_min) - vm_page_free_target = vm_page_free_min + 1; - - if (vm_page_inactive_target <= vm_page_free_target) - vm_page_inactive_target = vm_page_free_target + 1; +vmretry: + vm_page_free_min = npendingio/3; +#ifdef VSMALL + vm_page_free_min = 8; +#endif + vm_page_free_reserved = 8; + if (vm_page_free_min < 8) + vm_page_free_min = 8; + if (vm_page_free_min > 32) + vm_page_free_min = 32; + vm_pageout_free_min = 3; + vm_page_free_target = 2*vm_page_free_min + vm_page_free_reserved; + vm_page_inactive_target = 3*vm_page_free_min + vm_page_free_reserved; + vm_page_free_min += vm_page_free_reserved; + minscan = MINSCAN; + if (minscan > vm_page_count/3) + minscan = vm_page_count/3; /* * The pageout daemon is never done, so loop * forever. */ - simple_lock(&vm_pages_needed_lock); + while (TRUE) { - thread_sleep((int) &vm_pages_needed, &vm_pages_needed_lock, - FALSE); - cnt.v_scan++; + + splhigh(); + if (vm_page_free_count > vm_page_free_min) { + wakeup((caddr_t) &vm_page_free_count); + tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0); + } else { + if (nswiodone) { + spl0(); + goto dosync; + } + tsleep((caddr_t) &vm_pages_needed, PVM, "pslp1", 5); + } + spl0(); + + vm_pager_sync(); vm_pageout_scan(); + dosync: vm_pager_sync(); - simple_lock(&vm_pages_needed_lock); - thread_wakeup((int) &vm_page_free_count); + cnt.v_scan++; + wakeup((caddr_t) kmem_map); } } diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 638684ac394d..d975d8006fd3 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -33,11 +33,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)vm_pageout.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_pageout.h,v 1.2 1993/10/16 16:20:49 rgrimes Exp $ - */ - -/* + * @(#)vm_pageout.h 7.3 (Berkeley) 4/21/91 + * + * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * @@ -63,6 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ +#include <sys/systm.h> /* * Header file for pageout daemon. @@ -83,10 +82,18 @@ simple_lock_data_t vm_pages_needed_lock; /* * Signal pageout-daemon and wait for it. */ +#define VM_WAIT vm_wait() +inline static void vm_wait() { + extern struct proc *curproc, *pageproc; + extern int vm_pageout_pages_needed; + if (curproc == pageproc) { + vm_pageout_pages_needed = 1; + tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0); + vm_pageout_pages_needed = 0; + } else { + wakeup((caddr_t) &vm_pages_needed); + tsleep((caddr_t) &vm_page_free_count, PVM, "vmwait", 0); + } +} + -#define VM_WAIT { \ - simple_lock(&vm_pages_needed_lock); \ - thread_wakeup((int)&vm_pages_needed); \ - thread_sleep((int)&vm_page_free_count, \ - &vm_pages_needed_lock, FALSE); \ - } diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index ac49c43c76ea..d31be45a430f 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_pager.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_pager.c,v 1.3 1993/10/16 16:20:50 rgrimes Exp $ + * $Id: vm_pager.c,v 1.10 1994/01/31 04:21:43 davidg Exp $ */ /* @@ -70,31 +70,16 @@ */ #include "param.h" +#include "systm.h" #include "malloc.h" #include "vm.h" #include "vm_page.h" #include "vm_kern.h" -#include "swappager.h" - -#if NSWAPPAGER > 0 extern struct pagerops swappagerops; -#else -#define swappagerops NULL -#endif -#include "vnodepager.h" -#if NVNODEPAGER > 0 extern struct pagerops vnodepagerops; -#else -#define vnodepagerops NULL -#endif -#include "devpager.h" -#if NDEVPAGER > 0 extern struct pagerops devicepagerops; -#else -#define devicepagerops NULL -#endif struct pagerops *pagertab[] = { &swappagerops, /* PG_SWAP */ @@ -109,7 +94,7 @@ struct pagerops *dfltpagerops = NULL; /* default pager */ * Kernel address space for mapping pages. * Used by pagers where KVAs are needed for IO. */ -#define PAGER_MAP_SIZE (256 * PAGE_SIZE) +#define PAGER_MAP_SIZE (1024 * PAGE_SIZE) vm_map_t pager_map; vm_offset_t pager_sva, pager_eva; @@ -137,17 +122,18 @@ vm_pager_init() * Allocate an instance of a pager of the given type. */ vm_pager_t -vm_pager_allocate(type, handle, size, prot) +vm_pager_allocate(type, handle, size, prot, off) int type; caddr_t handle; vm_size_t size; vm_prot_t prot; + vm_offset_t off; { vm_pager_t pager; struct pagerops *ops; ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type]; - return((*ops->pgo_alloc)(handle, size, prot)); + return((*ops->pgo_alloc)(handle, size, prot, off)); } void @@ -160,6 +146,27 @@ vm_pager_deallocate(pager) VM_PAGER_DEALLOC(pager); } +int +vm_pager_getmulti(pager, m, count, reqpage, sync) + vm_pager_t pager; + vm_page_t m; + int count; + int reqpage; + boolean_t sync; +{ + extern boolean_t vm_page_zero_fill(); + extern int vm_pageout_count; + int i; + + if (pager == NULL) { + for (i=0;i<count;i++) + vm_page_zero_fill(m+i); + return VM_PAGER_OK; + } + return(VM_PAGER_GET_MULTI(pager, m, count, reqpage, sync)); +} + +int vm_pager_get(pager, m, sync) vm_pager_t pager; vm_page_t m; @@ -172,6 +179,7 @@ vm_pager_get(pager, m, sync) return(VM_PAGER_GET(pager, m, sync)); } +int vm_pager_put(pager, m, sync) vm_pager_t pager; vm_page_t m; @@ -212,14 +220,14 @@ vm_pager_map_page(m) vm_offset_t kva; #ifdef DEBUG - if (!m->busy || m->active) + if (!(m->flags & PG_BUSY) || (m->flags & PG_ACTIVE)) panic("vm_pager_map_page: page active or not busy"); - if (m->pagerowned) + if (m->flags & PG_PAGEROWNED) printf("vm_pager_map_page: page %x already in pager\n", m); #endif kva = kmem_alloc_wait(pager_map, PAGE_SIZE); #ifdef DEBUG - m->pagerowned = 1; + m->flags |= PG_PAGEROWNED; #endif pmap_enter(vm_map_pmap(pager_map), kva, VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, TRUE); @@ -237,8 +245,8 @@ vm_pager_unmap_page(kva) #endif kmem_free_wakeup(pager_map, kva, PAGE_SIZE); #ifdef DEBUG - if (m->pagerowned) - m->pagerowned = 0; + if (m->flags & PG_PAGEROWNED) + m->flags &= ~PG_PAGEROWNED; else printf("vm_pager_unmap_page: page %x(%x/%x) not owned\n", m, kva, VM_PAGE_TO_PHYS(m)); @@ -265,6 +273,7 @@ vm_pager_lookup(list, handle) * This routine gains a reference to the object. * Explicit deallocation is necessary. */ +int pager_cache(object, should_cache) vm_object_t object; boolean_t should_cache; diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 292c03da5ac5..699881ab51a3 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)vm_pager.h 7.2 (Berkeley) 4/20/91 - * $Id: vm_pager.h,v 1.2 1993/10/16 16:20:51 rgrimes Exp $ + * $Id: vm_pager.h,v 1.6 1994/01/31 04:21:50 davidg Exp $ */ /* @@ -64,9 +64,10 @@ typedef struct pager_struct *vm_pager_t; struct pagerops { void (*pgo_init)(); /* initialize pager */ - vm_pager_t (*pgo_alloc)(); /* allocate pager */ + vm_pager_t (*pgo_alloc)(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); /* allocate pager */ void (*pgo_dealloc)(); /* disassociate */ int (*pgo_getpage)(); /* get (read) page */ + int (*pgo_getmulti)(); /* get (read) multiple pages */ int (*pgo_putpage)(); /* put (write) page */ boolean_t (*pgo_haspage)(); /* does pager have page? */ }; @@ -77,29 +78,33 @@ struct pagerops { * BAD specified data was out of the accepted range * FAIL specified data was in range, but doesn't exist * PEND operations was initiated but not completed + * TRYAGAIN operation will be successful in the future */ #define VM_PAGER_OK 0 #define VM_PAGER_BAD 1 #define VM_PAGER_FAIL 2 #define VM_PAGER_PEND 3 +#define VM_PAGER_TRYAGAIN 4 -#define VM_PAGER_ALLOC(h, s, p) (*(pg)->pg_ops->pgo_alloc)(h, s, p) +#define VM_PAGER_ALLOC(h, s, p, o) (*(pg)->pg_ops->pgo_alloc)(h, s, p, o) #define VM_PAGER_DEALLOC(pg) (*(pg)->pg_ops->pgo_dealloc)(pg) #define VM_PAGER_GET(pg, m, s) (*(pg)->pg_ops->pgo_getpage)(pg, m, s) +#define VM_PAGER_GET_MULTI(pg, m, c, r, s) (*(pg)->pg_ops->pgo_getmulti)(pg, m, c, r, s) #define VM_PAGER_PUT(pg, m, s) (*(pg)->pg_ops->pgo_putpage)(pg, m, s) #define VM_PAGER_HASPAGE(pg, o) (*(pg)->pg_ops->pgo_haspage)(pg, o) #ifdef KERNEL -vm_pager_t vm_pager_allocate(); -void vm_pager_deallocate(); -int vm_pager_get(); -int vm_pager_put(); -boolean_t vm_pager_has_page(); - -vm_offset_t vm_pager_map_page(); -void vm_pager_unmap_page(); -vm_pager_t vm_pager_lookup(); -void vm_pager_sync(); +extern void vm_pager_init(void); +extern vm_pager_t vm_pager_allocate(int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t); +extern void vm_pager_deallocate(vm_pager_t); +struct vm_page; +extern int vm_pager_get(vm_pager_t, struct vm_page *, boolean_t); +extern int vm_pager_put(vm_pager_t, struct vm_page *, boolean_t); +extern boolean_t vm_pager_has_page(vm_pager_t, vm_offset_t); +extern void vm_pager_sync(void); +extern vm_offset_t vm_pager_map_page(struct vm_page *); +extern void vm_pager_unmap_page(vm_offset_t); +extern vm_pager_t vm_pager_lookup(queue_head_t *, caddr_t); extern struct pagerops *dfltpagerops; #endif diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 32f35ce01360..b3a0ed1ce80a 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_param.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_param.h,v 1.2 1993/10/16 16:20:53 rgrimes Exp $ + * $Id: vm_param.h,v 1.6 1994/01/31 23:48:48 davidg Exp $ */ /* @@ -84,20 +84,6 @@ typedef int boolean_t; #define TRUE 1 #define FALSE 0 -/* - * The machine independent pages are refered to as PAGES. A page - * is some number of hardware pages, depending on the target machine. - */ - -/* - * All references to the size of a page should be done with PAGE_SIZE - * or PAGE_SHIFT. The fact they are variables is hidden here so that - * we can easily make them constant if we so desire. - */ - -#define PAGE_SIZE page_size /* size of page in addressible units */ -#define PAGE_SHIFT page_shift /* number of bits to shift for pages */ - /* * Return values from the VM routines. */ @@ -112,17 +98,12 @@ typedef int boolean_t; #define KERN_NO_ACCESS 8 #ifdef ASSEMBLER -#else ASSEMBLER +#else /* ASSEMBLER */ /* * Convert addresses to pages and vice versa. * No rounding is used. */ -#ifdef KERNEL -#define atop(x) (((unsigned)(x)) >> page_shift) -#define ptoa(x) ((vm_offset_t)((x) << page_shift)) -#endif KERNEL - /* * Round off or truncate to the nearest page. These will work * for either addresses or counts. (i.e. 1 byte rounds to 1 page @@ -130,24 +111,10 @@ typedef int boolean_t; */ #ifdef KERNEL -#define round_page(x) ((vm_offset_t)((((vm_offset_t)(x)) + page_mask) & ~page_mask)) -#define trunc_page(x) ((vm_offset_t)(((vm_offset_t)(x)) & ~page_mask)) -#else KERNEL -#define round_page(x) ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size) -#define trunc_page(x) ((((vm_offset_t)(x)) / vm_page_size) * vm_page_size) -#endif KERNEL - -#ifdef KERNEL -extern vm_size_t page_size; /* machine independent page size */ -extern vm_size_t page_mask; /* page_size - 1; mask for - offset within page */ -extern int page_shift; /* shift to use for page size */ - -extern vm_size_t mem_size; /* size of physical memory (bytes) */ extern vm_offset_t first_addr; /* first physical page */ extern vm_offset_t last_addr; /* last physical page */ -#endif KERNEL +#endif /* KERNEL */ -#endif ASSEMBLER +#endif /* ASSEMBLER */ -#endif _VM_PARAM_ +#endif /* _VM_PARAM_ */ diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h index f830d811f947..29c0235a1e56 100644 --- a/sys/vm/vm_prot.h +++ b/sys/vm/vm_prot.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_prot.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_prot.h,v 1.2 1993/10/16 16:20:54 rgrimes Exp $ + * $Id: vm_prot.h,v 1.4 1994/01/17 09:34:07 davidg Exp $ */ /* @@ -77,7 +77,7 @@ * vm_prot_t VM protection values. */ -typedef int vm_prot_t; +typedef char vm_prot_t; /* * Protection values, defined as bits within the vm_prot_t type @@ -101,4 +101,4 @@ typedef int vm_prot_t; #define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) -#endif _VM_PROT_ +#endif /* _VM_PROT_ */ diff --git a/sys/vm/vm_statistics.h b/sys/vm/vm_statistics.h index c258b1a01af2..f951b5581bf7 100644 --- a/sys/vm/vm_statistics.h +++ b/sys/vm/vm_statistics.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_statistics.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_statistics.h,v 1.2 1993/10/16 16:20:55 rgrimes Exp $ + * $Id: vm_statistics.h,v 1.3 1993/11/07 17:54:28 wollman Exp $ */ /* @@ -91,8 +91,8 @@ typedef struct vm_statistics *vm_statistics_t; typedef struct vm_statistics vm_statistics_data_t; #ifdef KERNEL -vm_statistics_data_t vm_stat; -#endif KERNEL +extern vm_statistics_data_t vm_stat; +#endif /* KERNEL */ /* * Each machine dependent implementation is expected to diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 935d11cba89a..0a8a57457101 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * from: @(#)vm_swap.c 7.18 (Berkeley) 5/6/91 - * $Id: vm_swap.c,v 1.4 1993/10/16 16:20:56 rgrimes Exp $ + * $Id: vm_swap.c,v 1.7 1993/12/19 00:56:15 wollman Exp $ */ #include "param.h" @@ -45,6 +45,9 @@ #include "specdev.h" #include "file.h" #include "rlist.h" +#include "kernel.h" + +static int swfree(struct proc *, int); /* * Indirect driver for multi-controller paging. @@ -59,6 +62,7 @@ int nswap, nswdev; * to buffers, but rather to pages that * are being swapped in and out. */ +void swapinit() { register int i; @@ -87,7 +91,7 @@ swapinit() panic("swapvp"); if (error = swfree(&proc0, 0)) { printf("\nwarning: no swap space present (yet)\n"); - /* printf("(swfree (..., 0) -> %d)\n", error); /* XXX */ + /* printf("(swfree (..., 0) -> %d)\n", error);*/ /* XXX */ /*panic("swapinit swfree 0");*/ } @@ -100,6 +104,7 @@ swapinit() sp->av_forw = NULL; } +void swstrategy(bp) register struct buf *bp; { @@ -173,6 +178,7 @@ struct swapon_args { }; /* ARGSUSED */ +int swapon(p, uap, retval) struct proc *p; struct swapon_args *uap; @@ -227,6 +233,7 @@ swapon(p, uap, retval) * space, which is laid out with blocks of dmmax pages circularly * among the devices. */ +static int swfree(p, index) struct proc *p; int index; diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index f658f633164f..169bf376357b 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -37,7 +37,7 @@ * * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ * from: @(#)vm_unix.c 7.2 (Berkeley) 4/20/91 - * $Id: vm_unix.c,v 1.3 1993/10/16 16:20:58 rgrimes Exp $ + * $Id: vm_unix.c,v 1.5 1993/12/12 12:27:26 davidg Exp $ */ /* @@ -55,6 +55,7 @@ struct obreak_args { }; /* ARGSUSED */ +int obreak(p, uap, retval) struct proc *p; struct obreak_args *uap; @@ -90,42 +91,12 @@ obreak(p, uap, retval) return(0); } -/* - * Enlarge the "stack segment" to include the specified - * stack pointer for the process. - */ -grow(p, sp) - struct proc *p; - unsigned sp; -{ - register struct vmspace *vm = p->p_vmspace; - register int si; - - /* - * For user defined stacks (from sendsig). - */ - if (sp < (unsigned)vm->vm_maxsaddr) - return (0); - /* - * For common case of already allocated (from trap). - */ - if (sp >= (unsigned)vm->vm_maxsaddr + MAXSSIZ - ctob(vm->vm_ssize)) - return (1); - /* - * Really need to check vs limit and increment stack size if ok. - */ - si = clrnd(btoc(vm->vm_maxsaddr + MAXSSIZ - sp) - vm->vm_ssize); - if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) - return (0); - vm->vm_ssize += si; - return (1); -} - struct ovadvise_args { int anom; }; /* ARGSUSED */ +int ovadvise(p, uap, retval) struct proc *p; struct ovadvise_args *uap; diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c index 88ae9b526148..5cc5dfbb5d21 100644 --- a/sys/vm/vm_user.c +++ b/sys/vm/vm_user.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_user.c 7.3 (Berkeley) 4/21/91 - * $Id: vm_user.c,v 1.3 1993/10/16 16:20:59 rgrimes Exp $ + * $Id: vm_user.c,v 1.5 1993/11/25 01:39:18 wollman Exp $ */ /* @@ -91,6 +91,7 @@ struct svm_allocate_args { }; /* ARGSUSED */ +int svm_allocate(p, uap, retval) struct proc *p; struct svm_allocate_args *uap; @@ -99,7 +100,7 @@ svm_allocate(p, uap, retval) vm_offset_t addr; int rv; - uap->map = p->p_map; /* XXX */ + uap->map = &p->p_vmspace->vm_map; /* XXX */ if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr))) rv = KERN_INVALID_ARGUMENT; @@ -119,6 +120,7 @@ struct svm_deallocate_args { }; /* ARGSUSED */ +int svm_deallocate(p, uap, retval) struct proc *p; struct svm_deallocate_args *uap; @@ -126,7 +128,7 @@ svm_deallocate(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = &p->p_vmspace->vm_map; /* XXX */ rv = vm_deallocate(uap->map, uap->addr, uap->size); return((int)rv); } @@ -139,6 +141,7 @@ struct svm_inherit_args { }; /* ARGSUSED */ +int svm_inherit(p, uap, retval) struct proc *p; struct svm_inherit_args *uap; @@ -146,7 +149,7 @@ svm_inherit(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = &p->p_vmspace->vm_map; /* XXX */ rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit); return((int)rv); } @@ -160,6 +163,7 @@ struct svm_protect_args { }; /* ARGSUSED */ +int svm_protect(p, uap, retval) struct proc *p; struct svm_protect_args *uap; @@ -167,7 +171,7 @@ svm_protect(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = &p->p_vmspace->vm_map; /* XXX */ rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot); return((int)rv); } @@ -177,6 +181,7 @@ svm_protect(p, uap, retval) * vm_allocate allocates "zero fill" memory in the specfied * map. */ +int vm_allocate(map, addr, size, anywhere) register vm_map_t map; register vm_offset_t *addr; @@ -208,6 +213,7 @@ vm_allocate(map, addr, size, anywhere) * vm_deallocate deallocates the specified range of addresses in the * specified address map. */ +int vm_deallocate(map, start, size) register vm_map_t map; vm_offset_t start; @@ -226,6 +232,7 @@ vm_deallocate(map, start, size) * vm_inherit sets the inheritence of the specified range in the * specified map. */ +int vm_inherit(map, start, size, new_inheritance) register vm_map_t map; vm_offset_t start; @@ -243,6 +250,7 @@ vm_inherit(map, start, size, new_inheritance) * specified map. */ +int vm_protect(map, start, size, set_maximum, new_protection) register vm_map_t map; vm_offset_t start; diff --git a/sys/vm/vm_user.h b/sys/vm/vm_user.h index a8f13fd34c70..871f806e88d3 100644 --- a/sys/vm/vm_user.h +++ b/sys/vm/vm_user.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_user.h 7.2 (Berkeley) 4/21/91 - * $Id: vm_user.h,v 1.2 1993/10/16 16:21:00 rgrimes Exp $ + * $Id: vm_user.h,v 1.4 1993/12/19 00:56:16 wollman Exp $ */ /* @@ -71,10 +71,47 @@ #ifndef _VM_USER_ #define _VM_USER_ -int vm_allocate(); -int vm_deallocate(); -int vm_inherit(); -int vm_protect(); -int vm_statistics(); +#ifdef KERNEL -#endif _VM_USER_ +#include "sys/cdefs.h" +#include "vm/vm_param.h" +#include "vm/vm_inherit.h" +#include "vm/vm_prot.h" + +struct vm_map; struct vm_object; struct pager_struct; + +extern int munmapfd(struct proc *, int); +extern int vm_mmap(struct vm_map *, vm_offset_t *, vm_size_t, vm_prot_t, + vm_prot_t, int, caddr_t, vm_offset_t); +extern int vm_region(struct vm_map *, vm_offset_t *, vm_size_t *, vm_prot_t *, + vm_prot_t *, vm_inherit_t *, boolean_t *, + struct vm_object **, + vm_offset_t *); +extern int vm_allocate_with_pager(struct vm_map *, vm_offset_t *, vm_size_t, + boolean_t, struct pager_struct *, + vm_offset_t, boolean_t); + + +extern int vm_allocate(struct vm_map *, vm_offset_t *, vm_size_t, boolean_t); +extern int vm_deallocate(struct vm_map *, vm_offset_t, vm_size_t); +extern int vm_inherit(struct vm_map *, vm_offset_t, vm_size_t, vm_inherit_t); +extern int vm_protect(struct vm_map *, vm_offset_t, vm_size_t, boolean_t, + vm_prot_t); + +#else /* not KERNEL */ +#include <sys/cdefs.h> +#include <vm/vm_param.h> +#include <vm/vm_inherit.h> +#include <vm/vm_prot.h> + +__BEGIN_DECLS + +int vm_allocate __P((void *, vm_offset_t *, vm_size_t, boolean_t)); +int vm_deallocate __P((void *, vm_offset_t, vm_size_t)); +int vm_inherit __P((void *, vm_offset_t, vm_size_t, vm_inherit_t)); +int vm_protect __P((void *, vm_offset_t, vm_size_t, boolean_t, vm_prot_t)); + +__END_DECLS + +#endif /* not KERNEL */ +#endif /* _VM_USER_ */ diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 41c2872e4d39..c35971ba67e0 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -2,6 +2,7 @@ * Copyright (c) 1990 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. + * Copyright (c) 1993 John S. Dyson * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer @@ -36,7 +37,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.2 1993/10/16 16:21:02 rgrimes Exp $ + * $Id: vnode_pager.c,v 1.11.2.3 1994/04/18 04:57:49 rgrimes Exp $ */ /* @@ -46,8 +47,24 @@ * pageouts * fix credential use (uses current process credentials now) */ -#include "vnodepager.h" -#if NVNODEPAGER > 0 + +/* + * MODIFICATIONS: + * John S. Dyson 08 Dec 93 + * + * This file in conjunction with some vm_fault mods, eliminate the performance + * advantage for using the buffer cache and minimize memory copies. + * + * 1) Supports multiple - block reads + * 2) Bypasses buffer cache for reads + * + * TODO: + * + * 1) Totally bypass buffer cache for reads + * (Currently will still sometimes use buffer cache for reads) + * 2) Bypass buffer cache for writes + * (Code does not support it, but mods are simple) + */ #include "param.h" #include "proc.h" @@ -57,12 +74,34 @@ #include "mount.h" #include "vm_param.h" +#include "vm.h" #include "lock.h" #include "queue.h" #include "vm_prot.h" #include "vm_object.h" #include "vm_page.h" #include "vnode_pager.h" +#include "vm_map.h" +#include "vm_pageout.h" +#include "buf.h" +#include "specdev.h" + +struct pagerops vnodepagerops = { + vnode_pager_init, + vnode_pager_alloc, + vnode_pager_dealloc, + vnode_pager_getpage, + vnode_pager_getmulti, + vnode_pager_putpage, + vnode_pager_haspage +}; + +static int vnode_pager_io(vn_pager_t vnp, vm_page_t *m, int count, int reqpage, + enum uio_rw rw); +struct buf * getpbuf() ; +void relpbuf(struct buf *bp) ; + +extern vm_map_t pager_map; queue_head_t vnode_pager_list; /* list of managed vnodes */ @@ -91,10 +130,11 @@ vnode_pager_init() * Handle is a vnode pointer. */ vm_pager_t -vnode_pager_alloc(handle, size, prot) +vnode_pager_alloc(handle, size, prot, offset) caddr_t handle; vm_size_t size; vm_prot_t prot; + vm_offset_t offset; { register vm_pager_t pager; register vn_pager_t vnp; @@ -200,17 +240,37 @@ vnode_pager_dealloc(pager) free((caddr_t)pager, M_VMPAGER); } +int +vnode_pager_getmulti(pager, m, count, reqpage, sync) + vm_pager_t pager; + vm_page_t *m; + int count; + int reqpage; + boolean_t sync; +{ + + return vnode_pager_io((vn_pager_t) pager->pg_data, m, count, reqpage, UIO_READ); +} + + +int vnode_pager_getpage(pager, m, sync) vm_pager_t pager; vm_page_t m; boolean_t sync; { + int err; + vm_page_t marray[1]; #ifdef DEBUG if (vpagerdebug & VDB_FOLLOW) printf("vnode_pager_getpage(%x, %x)\n", pager, m); #endif - return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ)); + if (pager == NULL) + return FALSE; + marray[0] = m; + + return vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_READ); } boolean_t @@ -220,19 +280,17 @@ vnode_pager_putpage(pager, m, sync) boolean_t sync; { int err; + vm_page_t marray[1]; #ifdef DEBUG if (vpagerdebug & VDB_FOLLOW) printf("vnode_pager_putpage(%x, %x)\n", pager, m); #endif if (pager == NULL) - return; - err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE); - if (err == VM_PAGER_OK) { - m->clean = TRUE; /* XXX - wrong place */ - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */ - } - return(err); + return FALSE; + marray[0] = m; + err = vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_WRITE); + return err; } boolean_t @@ -292,6 +350,7 @@ vnode_pager_haspage(pager, offset) * Note: this routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. */ +void vnode_pager_setsize(vp, nsize) struct vnode *vp; u_long nsize; @@ -329,20 +388,22 @@ vnode_pager_setsize(vp, nsize) printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", vp, object, vnp->vnp_size, nsize); #endif + /* * File has shrunk. * Toss any cached pages beyond the new EOF. */ - if (nsize < vnp->vnp_size) { + if (round_page(nsize) < round_page(vnp->vnp_size)) { vm_object_lock(object); vm_object_page_remove(object, - (vm_offset_t)nsize, vnp->vnp_size); + (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size)); vm_object_unlock(object); } vnp->vnp_size = (vm_offset_t)nsize; vm_object_deallocate(object); } +void vnode_pager_umount(mp) register struct mount *mp; { @@ -407,78 +468,474 @@ vnode_pager_uncache(vp) return(uncached); } -vnode_pager_io(vnp, m, rw) - register vn_pager_t vnp; + +void +vnode_pager_freepage(m) vm_page_t m; +{ + PAGE_WAKEUP(m); + vm_page_free(m); +} + +/* + * calculate the linear (byte) disk address of specified virtual + * file address + */ +vm_offset_t +vnode_pager_addr(vp, address) + struct vnode *vp; + vm_offset_t address; +{ + int rtaddress; + int bsize; + vm_offset_t block; + struct vnode *rtvp; + int err; + int vblock, voffset; + + bsize = vp->v_mount->mnt_stat.f_bsize; + vblock = address / bsize; + voffset = address % bsize; + + err = VOP_BMAP(vp,vblock,&rtvp,&block); + + rtaddress = block * DEV_BSIZE + voffset; + + return rtaddress; +} + +/* + * interrupt routine for I/O completion + */ +void +vnode_pager_iodone(bp) + struct buf *bp; +{ + bp->b_flags |= B_DONE; + wakeup((caddr_t)bp); +} + +/* + * vnode_pager_io: + * Perform read or write operation for vnode_paging + * + * args: + * vnp -- pointer to vnode pager data structure + * containing size and vnode pointer, etc + * + * m -- pointer to array of vm_page_t entries to + * do I/O to. It is not necessary to fill any + * pages except for the reqpage entry. If a + * page is not filled, it needs to be removed + * from its object... + * + * count -- number of pages for I/O + * + * reqpage -- fault requested page for I/O + * (index into vm_page_t entries above) + * + * rw -- UIO_READ or UIO_WRITE + * + * NOTICE!!!! direct writes look like that they are close to being + * implemented. They are not really, several things need + * to be done to make it work (subtile things.) Hack at + * your own risk (direct writes are scarey). + * + * ANOTHER NOTICE!!!! + * we currently only support direct I/O to filesystems whose + * contiguously allocated blocksize is at least a vm page. + * changes will be made in the future to support more flexibility. + */ + +int +vnode_pager_io(vnp, m, count, reqpage, rw) + register vn_pager_t vnp; + vm_page_t *m; + int count, reqpage; enum uio_rw rw; { + int i,j; struct uio auio; struct iovec aiov; vm_offset_t kva, foff; - int error, size; + int size; struct proc *p = curproc; /* XXX */ + vm_object_t object; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + vm_offset_t mapsize; + int bsize; + int errtype=0; /* 0 is file type otherwise vm type */ + int error = 0; + int trimmed; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_io(%x, %x, %c): vnode %x\n", - vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); -#endif - foff = m->offset + m->object->paging_offset; + object = m[reqpage]->object; /* all vm_page_t items are in same object */ + paging_offset = object->paging_offset; + + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_bsize; + + /* get the UNDERLYING device for the file with VOP_BMAP() */ /* - * Return failure if beyond current EOF + * originally, we did not check for an error return + * value -- assuming an fs always has a bmap entry point + * -- that assumption is wrong!!! */ - if (foff >= vnp->vnp_size) { -#ifdef DEBUG - if (vpagerdebug & VDB_SIZE) - printf("vnode_pager_io: vp %x, off %d size %d\n", - vnp->vnp_vp, foff, vnp->vnp_size); -#endif - return(VM_PAGER_BAD); + /* + * we only do direct I/O if the file is on a local + * BLOCK device and currently if it is a read operation only. + */ + kva = 0; + mapsize = 0; + if (!VOP_BMAP(vp, m[reqpage]->offset+paging_offset, &dp, 0) && + rw == UIO_READ && ((dp->v_type == VBLK && + (vp->v_mount->mnt_stat.f_type == MOUNT_UFS)) || + (vp->v_mount->mnt_stat.f_type == MOUNT_NFS))) { + /* + * we do not block for a kva, notice we default to a kva + * conservative behavior + */ + kva = kmem_alloc_pageable(pager_map, + (mapsize = count*NBPG)); + if( !kva) { + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + m[i] = 0; + } + } + m[0] = m[reqpage]; + kva = vm_pager_map_page(m[0]); + reqpage = 0; + count = 1; + mapsize = count*NBPG; + } } - if (foff + PAGE_SIZE > vnp->vnp_size) - size = vnp->vnp_size - foff; - else - size = PAGE_SIZE; + + if (!kva) { + /* + * here on I/O through VFS + */ + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + m[i] = 0; + } + } + m[0] = m[reqpage]; + foff = m[0]->offset + paging_offset; + reqpage = 0; + count = 1; /* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_READ/WRITE routines. + * Return failure if beyond current EOF */ - kva = vm_pager_map_page(m); - aiov.iov_base = (caddr_t)kva; - aiov.iov_len = size; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = foff; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = rw; - auio.uio_resid = size; - auio.uio_procp = (struct proc *)0; -#ifdef DEBUG - if (vpagerdebug & VDB_IO) - printf("vnode_pager_io: vp %x kva %x foff %x size %x", - vnp->vnp_vp, kva, foff, size); -#endif - if (rw == UIO_READ) - error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); - else - error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); -#ifdef DEBUG - if (vpagerdebug & VDB_IO) { - if (error || auio.uio_resid) - printf(" returns error %x, resid %x", - error, auio.uio_resid); - printf("\n"); + if (foff >= vnp->vnp_size) { + errtype = 1; + error = VM_PAGER_BAD; + } else { + if (foff + NBPG > vnp->vnp_size) + size = vnp->vnp_size - foff; + else + size = NBPG; +/* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_READ/WRITE routines. + */ + kva = vm_pager_map_page(m[0]); + aiov.iov_base = (caddr_t)kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = rw; + auio.uio_resid = size; + auio.uio_procp = (struct proc *)0; + if (rw == UIO_READ) { + error = VOP_READ(vp, &auio, IO_PAGER, p->p_ucred); + } else { + error = VOP_WRITE(vp, &auio, IO_PAGER, p->p_ucred); + } + if (!error) { + register int count = size - auio.uio_resid; + + if (count == 0) + error = EINVAL; + else if (count != NBPG && rw == UIO_READ) + bzero((caddr_t)kva + count, NBPG - count); + } + vm_pager_unmap_page(kva); + } + } else { + + /* + * here on direct device I/O + */ + int first=0, last=count; + int reqaddr, firstaddr; + int block, offset; + + struct buf *bp; + int s; + int failflag; + + foff = m[reqpage]->offset + paging_offset; + + /* + * This pathetic hack gets data from the buffer cache, if it's there. + * I believe that this is not really necessary, and the ends can + * be gotten by defaulting to the normal vfs read behavior, but this + * might be more efficient, because the will NOT invoke read-aheads + * and one of the purposes of this code is to bypass the buffer + * cache and keep from flushing it by reading in a program. + */ + /* + * calculate logical block and offset + */ + block = foff / bsize; + offset = foff % bsize; + s = splbio(); + + /* + * if we have a buffer in core, then try to use it + */ + while (bp = incore(vp, block)) { + int amount; + + /* + * wait until the buffer is avail or gone + */ + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + tsleep ((caddr_t)bp, PVM, "vnwblk", 0); + continue; + } + + amount = NBPG; + if ((foff + amount) > vnp->vnp_size) + amount = vnp->vnp_size - foff; + + /* + * make sure that this page is in the buffer + */ + if ((amount > 0) && (offset + amount) <= bp->b_bcount) { + bp->b_flags |= B_BUSY; + splx(s); + + /* + * map the requested page + */ + pmap_enter(vm_map_pmap(pager_map), + kva, VM_PAGE_TO_PHYS(m[reqpage]), + VM_PROT_DEFAULT, TRUE); + /* + * copy the data from the buffer + */ + bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); + if (amount < NBPG) { + bzero((caddr_t)kva + amount, NBPG - amount); + } + /* + * unmap the page and free the kva + */ + pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG); + kmem_free_wakeup(pager_map, kva, mapsize); + /* + * release the buffer back to the block subsystem + */ + bp->b_flags &= ~B_BUSY; + wakeup((caddr_t)bp); + /* + * we did not have to do any work to get the requested + * page, the read behind/ahead does not justify a read + */ + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + m[i] = 0; + } + } + /* + * sorry for the goto + */ + goto finishup; + } + /* + * buffer is nowhere to be found, read from the disk + */ + break; + } + + foff = m[reqpage]->offset + paging_offset; + reqaddr = vnode_pager_addr(vp, foff); + /* + * Make sure that our I/O request is contiguous. + * Scan backward and stop for the first discontiguous + * entry or stop for a page being in buffer cache. + */ + failflag = 0; + for (i = reqpage - 1; i >= 0; --i) { + int myaddr; + if (failflag || + incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || + (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * NBPG) { + vnode_pager_freepage(m[i]); + m[i] = 0; + if (first == 0) + first = i + 1; + failflag = 1; + } + } + + /* + * Scan forward and stop for the first non-contiguous + * entry or stop for a page being in buffer cache. + */ + failflag = 0; + for (i = reqpage + 1; i < count; i++) { + int myaddr; + if (failflag || + incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || + (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * NBPG) { + vnode_pager_freepage(m[i]); + m[i] = 0; + if (last == count) + last = i; + failflag = 1; + } + } + + /* + * the first and last page have been calculated now, move input + * pages to be zero based... + */ + count = last; + if (first != 0) { + for (i = first; i < count; i++) { + m[i - first] = m[i]; + } + count -= first; + reqpage -= first; + } + + + /* + * calculate the file virtual address for the transfer + */ + foff = m[0]->offset + paging_offset; + /* + * and get the disk physical address (in bytes) + */ + firstaddr = vnode_pager_addr(vp, foff); + + /* + * calculate the size of the transfer + */ + if ((m[count - 1]->offset + paging_offset) + NBPG > vnp->vnp_size) + size = vnp->vnp_size - foff; + else + size = count * NBPG; + + + /* + * and map the pages to be read into the kva + */ + for (i = 0; i < count; i++) + pmap_enter(vm_map_pmap(pager_map), + kva + NBPG * i, VM_PAGE_TO_PHYS(m[i]), + VM_PROT_DEFAULT, TRUE); + VHOLD(vp); + bp = getpbuf(); + + /* build a minimal buffer header */ + bzero((caddr_t)bp, sizeof(struct buf)); + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = vnode_pager_iodone; + /* B_PHYS is not set, but it is nice to fill this in */ + /* bp->b_proc = &proc0; */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = firstaddr / DEV_BSIZE; + bp->b_vp = dp; + + /* Should be a BLOCK or character DEVICE if we get here */ + bp->b_dev = dp->v_rdev; + bp->b_bcount = NBPG * count; + + /* do the input */ + VOP_STRATEGY(bp); + + /* we definitely need to be at splbio here */ + + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "vnread", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + if (!error) { + if (size != count * NBPG) + bzero((caddr_t)kva + size, NBPG * count - size); + } + HOLDRELE(vp); + + pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); + kmem_free_wakeup(pager_map, kva, mapsize); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + } -#endif - if (!error) { - register int count = size - auio.uio_resid; - if (count == 0) - error = EINVAL; - else if (count != PAGE_SIZE && rw == UIO_READ) - bzero(kva + count, PAGE_SIZE - count); +finishup: + if (rw == UIO_READ) + for (i = 0; i < count; i++) { + /* + * we dont mess with pages that have been already + * deallocated.... + */ + if (!m[i]) + continue; + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + if (i != reqpage) { + /* + * whether or not to leave the page activated + * is up in the air, but we should put the page + * on a page queue somewhere. (it already is in + * the object). + * Result: It appears that emperical results show + * that deactivating pages is best. + */ + /* + * just in case someone was asking for this + * page we now tell them that it is ok to use + */ + if (!error) { + vm_page_deactivate(m[i]); + PAGE_WAKEUP(m[i]); + m[i]->flags &= ~PG_FAKE; + } else { + vnode_pager_freepage(m[i]); + } + } + } + if (!error && rw == UIO_WRITE) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); + m[reqpage]->flags |= PG_CLEAN; + m[reqpage]->flags &= ~PG_LAUNDRY; } - vm_pager_unmap_page(kva); + if (error) { + printf("vnode pager error: %d\n", error); + } + if (errtype) + return error; return (error ? VM_PAGER_FAIL : VM_PAGER_OK); } -#endif diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h index 020543576a55..3cabd7221cc4 100644 --- a/sys/vm/vnode_pager.h +++ b/sys/vm/vnode_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: vnode_pager.h,v 1.2 1993/10/16 16:21:03 rgrimes Exp $ + * $Id: vnode_pager.h,v 1.5 1994/01/17 09:34:14 davidg Exp $ */ #ifndef _VNODE_PAGER_ @@ -60,20 +60,15 @@ typedef struct vnpager *vn_pager_t; #ifdef KERNEL void vnode_pager_init(); -vm_pager_t vnode_pager_alloc(); +vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); void vnode_pager_dealloc(); -int vnode_pager_getpage(), vnode_pager_putpage(); +int vnode_pager_getpage(); +int vnode_pager_getmulti(); +int vnode_pager_putpage(); boolean_t vnode_pager_haspage(); -struct pagerops vnodepagerops = { - vnode_pager_init, - vnode_pager_alloc, - vnode_pager_dealloc, - vnode_pager_getpage, - vnode_pager_putpage, - vnode_pager_haspage -}; +extern struct pagerops vnodepagerops; -#endif +#endif /* KERNEL */ #endif /* _VNODE_PAGER_ */ |
