src - FreeBSD source tree

diff options


context:
space:
mode:

author	Alfred Perlstein <alfred@FreeBSD.org>	2001-05-19 01:28:09 +0000
committer	Alfred Perlstein <alfred@FreeBSD.org>	2001-05-19 01:28:09 +0000
commit	2395531439bb140427dff4dfd6d67856f907c15e (patch)
tree	7d51c8cab74aeec829658414e052238902ea14a0 /sys/kern
parent	3620eb66f3ef16ff28810c74476f01e29c1562bf (diff)
download	src-2395531439bb140427dff4dfd6d67856f907c15e.tar.gz src-2395531439bb140427dff4dfd6d67856f907c15e.zip

Introduce a global lock for the vm subsystem (vm_mtx).

vm_mtx does not recurse and is required for most low level vm operations. faults can not be taken without holding Giant. Memory subsystems can now call the base page allocators safely. Almost all atomic ops were removed as they are covered under the vm mutex. Alpha and ia64 now need to catch up to i386's trap handlers. FFS and NFS have been tested, other filesystems will need minor changes (grabbing the vm lock when twiddling page properties). Reviewed (partially) by: jake, jhb

Notes

Notes: svn path=/head/; revision=76827

Diffstat (limited to 'sys/kern')

-rw-r--r--

sys/kern/imgact_aout.c

-rw-r--r--

sys/kern/imgact_elf.c

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

sys/kern/kern_resource.c

-rw-r--r--

sys/kern/kern_synch.c

-rw-r--r--

sys/kern/link_elf.c

-rw-r--r--

sys/kern/link_elf_obj.c

-rw-r--r--

sys/kern/subr_blist.c

-rw-r--r--

sys/kern/subr_trap.c

-rw-r--r--

sys/kern/sys_pipe.c

-rw-r--r--

sys/kern/syscalls.master

-rw-r--r--

sys/kern/sysv_shm.c

-rw-r--r--

sys/kern/vfs_bio.c

105

-rw-r--r--

sys/kern/vfs_cluster.c

-rw-r--r--

sys/kern/vfs_default.c

-rw-r--r--

sys/kern/vfs_extattr.c

-rw-r--r--

sys/kern/vfs_subr.c

-rw-r--r--

sys/kern/vfs_syscalls.c

21 files changed, 259 insertions, 35 deletions

diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9478eb39b4e7..8becda31eb5c 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c

@@ -171,6 +171,7 @@ exec_aout_imgact(imgp)

if (error)

return (error);

+ mtx_lock(&vm_mtx);

* Destroy old process VM and create a new one (with a new stack)

@@ -184,7 +185,9 @@ exec_aout_imgact(imgp)

vp = imgp->vp;

map = &vmspace->vm_map;

vm_map_lock(map);

+ mtx_unlock(&vm_mtx);

VOP_GETVOBJECT(vp, &object);

+ mtx_lock(&vm_mtx);

vm_object_reference(object);

text_end = virtual_offset + a_out->a_text;

@@ -195,6 +198,7 @@ exec_aout_imgact(imgp)

MAP_COPY_ON_WRITE | MAP_PREFAULT);

if (error) {

vm_map_unlock(map);

+ mtx_unlock(&vm_mtx);

return (error);

}

data_end = text_end + a_out->a_data;

@@ -207,6 +211,7 @@ exec_aout_imgact(imgp)

MAP_COPY_ON_WRITE | MAP_PREFAULT);

if (error) {

vm_map_unlock(map);

+ mtx_unlock(&vm_mtx);

return (error);

}

@@ -217,6 +222,7 @@ exec_aout_imgact(imgp)

VM_PROT_ALL, VM_PROT_ALL, 0);

if (error) {

vm_map_unlock(map);

+ mtx_unlock(&vm_mtx);

return (error);

}

@@ -229,6 +235,8 @@ exec_aout_imgact(imgp)

vmspace->vm_daddr = (caddr_t) (uintptr_t)

(virtual_offset + a_out->a_text);

+ mtx_unlock(&vm_mtx);

/* Fill in image_params */

imgp->interpreted = 0;

imgp->entry_addr = a_out->a_entry;

diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index da7b9cb522ef..2a15e9c98e9a 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c

@@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o

else

map_len = round_page(offset+filsz) - file_addr;

+ mtx_lock(&vm_mtx);

if (map_len != 0) {

vm_object_reference(object);

vm_map_lock(&vmspace->vm_map);

@@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o

vm_map_unlock(&vmspace->vm_map);

if (rv != KERN_SUCCESS) {

vm_object_deallocate(object);

+ mtx_unlock(&vm_mtx);

return EINVAL;

}

/* we can stop now if we've covered it all */

- if (memsz == filsz)

+ if (memsz == filsz) {

+ mtx_unlock(&vm_mtx);

return 0;

+ }

}

@@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o

map_addr, map_addr + map_len,

VM_PROT_ALL, VM_PROT_ALL, 0);

vm_map_unlock(&vmspace->vm_map);

- if (rv != KERN_SUCCESS)

+ if (rv != KERN_SUCCESS) {

+ mtx_unlock(&vm_mtx);

return EINVAL;

+ }

}

if (copy_len != 0) {

@@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o

MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);

if (rv != KERN_SUCCESS) {

vm_object_deallocate(object);

+ mtx_unlock(&vm_mtx);

return EINVAL;

}

/* send the page fragment to user space */

+ mtx_unlock(&vm_mtx);

error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);

+ mtx_lock(&vm_mtx);

vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);

- if (error)

+ if (error) {

+ mtx_unlock(&vm_mtx);

return (error);

+ }

}

@@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o

vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot,

FALSE);

+ mtx_unlock(&vm_mtx);

return error;

}

@@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp)

if ((error = exec_extract_strings(imgp)) != 0)

goto fail;

+ mtx_lock(&vm_mtx);

exec_new_vmspace(imgp);

vmspace = imgp->proc->p_vmspace;

+ mtx_unlock(&vm_mtx);

for (i = 0; i < hdr->e_phnum; i++) {

switch(phdr[i].p_type) {

@@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp)

}

+ /* XXX: lock the vm_mtx when twiddling vmspace? */

vmspace->vm_tsize = text_size >> PAGE_SHIFT;

vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;

vmspace->vm_dsize = data_size >> PAGE_SHIFT;

diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index f1a6a0bfd745..6f5c653c2ab3 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c

@@ -485,11 +485,15 @@ start_init(void *dummy)

* Need just enough stack to hold the faked-up "execve()" arguments.

addr = trunc_page(USRSTACK - PAGE_SIZE);

+ mtx_unlock(&Giant);

+ mtx_lock(&vm_mtx);

if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,

FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)

panic("init: couldn't allocate argument space");

p->p_vmspace->vm_maxsaddr = (caddr_t)addr;

p->p_vmspace->vm_ssize = 1;

+ mtx_unlock(&vm_mtx);

+ mtx_lock(&Giant);

if ((var = getenv("init_path")) != NULL) {

strncpy(init_path, var, sizeof init_path);

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0b1b29e2517c..8f49538f038b 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c

@@ -412,6 +412,7 @@ exec_map_first_page(imgp)

VOP_GETVOBJECT(imgp->vp, &object);

s = splvm();

+ mtx_lock(&vm_mtx);

ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);

@@ -443,6 +444,7 @@ exec_map_first_page(imgp)

vm_page_free(ma[0]);

}

splx(s);

+ mtx_unlock(&vm_mtx);

return EIO;

}

@@ -454,6 +456,7 @@ exec_map_first_page(imgp)

pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));

imgp->firstpage = ma[0];

+ mtx_unlock(&vm_mtx);

return 0;

}

@@ -461,9 +464,12 @@ void

exec_unmap_first_page(imgp)

struct image_params *imgp;

{

if (imgp->firstpage) {

+ mtx_lock(&vm_mtx);

pmap_kremove((vm_offset_t) imgp->image_header);

vm_page_unwire(imgp->firstpage, 1);

+ mtx_unlock(&vm_mtx);

imgp->firstpage = NULL;

}

@@ -482,6 +488,7 @@ exec_new_vmspace(imgp)

caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ);

vm_map_t map = &vmspace->vm_map;

+ mtx_assert(&vm_mtx, MA_OWNED);

imgp->vmspace_destroyed = 1;

diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index d5dccab4e914..1af27d253d48 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c

@@ -222,6 +222,7 @@ exit1(p, rv)

* Can't free the entire vmspace as the kernel stack

* may be mapped within that space also.

+ mtx_lock(&vm_mtx);

if (vm->vm_refcnt == 1) {

if (vm->vm_shm)

shmexit(p);

@@ -230,6 +231,7 @@ exit1(p, rv)

(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,

VM_MAXUSER_ADDRESS);

}

+ mtx_unlock(&vm_mtx);

PROC_LOCK(p);

if (SESS_LEADER(p)) {

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index d3b991df85ad..62dcc061a5cd 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c

@@ -220,6 +220,7 @@ fork1(p1, flags, procp)

if ((flags & RFPROC) == 0) {

vm_fork(p1, 0, flags);

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

* Close all file descriptors.

@@ -567,6 +568,7 @@ again:

* execution path later. (ie: directly into user mode)

vm_fork(p1, p2, flags);

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

if (flags == (RFFDG | RFPROC)) {

cnt.v_forks++;

diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 27431ab4de09..f46313c0f4ad 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c

@@ -498,8 +498,10 @@ dosetrlimit(p, which, limp)

}

addr = trunc_page(addr);

size = round_page(size);

+ mtx_lock(&vm_mtx);

(void) vm_map_protect(&p->p_vmspace->vm_map,

addr, addr+size, prot, FALSE);

+ mtx_unlock(&vm_mtx);

}

break;

diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 7d793de4ee2b..e09a3772c8b5 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c

@@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo)

int rval = 0;

WITNESS_SAVE_DECL(mtx);

+ KASSERT(ident == &proc0 || /* XXX: swapper */

+ timo != 0 || /* XXX: we might still miss a wakeup */

+ mtx_owned(&Giant) || mtx != NULL,

+ ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p",

+ wmesg, ident));

+ if (mtx_owned(&vm_mtx) && mtx != &vm_mtx)

+ panic("sleeping with vm_mtx held.");

#ifdef KTRACE

if (p && KTRPOINT(p, KTR_CSW))

ktrcsw(p->p_tracep, 1, 0);

diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 344f163ec343..613d1e4e79ab 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c

@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

ef = (elf_file_t) lf;

#ifdef SPARSE_MAPPING

+ mtx_lock(&vm_mtx);

ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);

if (ef->object == NULL) {

+ mtx_unlock(&vm_mtx);

free(ef, M_LINKER);

error = ENOMEM;

goto out;

@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

VM_PROT_ALL, VM_PROT_ALL, 0);

if (error) {

vm_object_deallocate(ef->object);

+ mtx_unlock(&vm_mtx);

ef->object = 0;

goto out;

}

+ mtx_unlock(&vm_mtx);

#else

ef->address = malloc(mapsize, M_LINKER, M_WAITOK);

if (!ef->address) {

@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

* Wire down the pages

+ mtx_lock(&vm_mtx);

vm_map_pageable(kernel_map,

(vm_offset_t) segbase,

(vm_offset_t) segbase + segs[i]->p_memsz,

FALSE);

+ mtx_unlock(&vm_mtx);

#endif

}

@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)

}

#ifdef SPARSE_MAPPING

if (ef->object) {

+ mtx_lock(&vm_mtx);

vm_map_remove(kernel_map, (vm_offset_t) ef->address,

(vm_offset_t) ef->address

+ (ef->object->size << PAGE_SHIFT));

vm_object_deallocate(ef->object);

+ mtx_unlock(&vm_mtx);

}

#else

if (ef->address)

diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 344f163ec343..613d1e4e79ab 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c

@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

ef = (elf_file_t) lf;

#ifdef SPARSE_MAPPING

+ mtx_lock(&vm_mtx);

ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);

if (ef->object == NULL) {

+ mtx_unlock(&vm_mtx);

free(ef, M_LINKER);

error = ENOMEM;

goto out;

@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

VM_PROT_ALL, VM_PROT_ALL, 0);

if (error) {

vm_object_deallocate(ef->object);

+ mtx_unlock(&vm_mtx);

ef->object = 0;

goto out;

}

+ mtx_unlock(&vm_mtx);

#else

ef->address = malloc(mapsize, M_LINKER, M_WAITOK);

if (!ef->address) {

@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu

* Wire down the pages

+ mtx_lock(&vm_mtx);

vm_map_pageable(kernel_map,

(vm_offset_t) segbase,

(vm_offset_t) segbase + segs[i]->p_memsz,

FALSE);

+ mtx_unlock(&vm_mtx);

#endif

}

@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)

}

#ifdef SPARSE_MAPPING

if (ef->object) {

+ mtx_lock(&vm_mtx);

vm_map_remove(kernel_map, (vm_offset_t) ef->address,

(vm_offset_t) ef->address

+ (ef->object->size << PAGE_SHIFT));

vm_object_deallocate(ef->object);

+ mtx_unlock(&vm_mtx);

}

#else

if (ef->address)

diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 9ac4338c4a1c..061d151d6eaf 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c

@@ -71,6 +71,7 @@

#include <sys/kernel.h>

#include <sys/blist.h>

#include <sys/malloc.h>

+#include <sys/mutex.h>

#include <vm/vm.h>

#include <vm/vm_object.h>

#include <vm/vm_kern.h>

diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index af7bfc1c577b..8924fa29d79a 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c

@@ -330,9 +330,7 @@ restart:

eva = rcr2();

enable_intr();

- mtx_lock(&Giant);

i = trap_pfault(&frame, TRUE, eva);

- mtx_unlock(&Giant);

#if defined(I586_CPU) && !defined(NO_F00F_HACK)

if (i == -2) {

@@ -443,9 +441,7 @@ restart:

eva = rcr2();

enable_intr();

- mtx_lock(&Giant);

(void) trap_pfault(&frame, FALSE, eva);

- mtx_unlock(&Giant);

goto out;

case T_DNA:

@@ -887,7 +883,9 @@ nogo:

frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;

return (0);

}

+ mtx_lock(&Giant);

trap_fatal(frame, eva);

+ mtx_unlock(&Giant);

return (-1);

}

@@ -1147,14 +1145,17 @@ syscall(frame)

* Try to run the syscall without the MP lock if the syscall

- * is MP safe. We have to obtain the MP lock no matter what if

- * we are ktracing

+ * is MP safe.

if ((callp->sy_narg & SYF_MPSAFE) == 0) {

mtx_lock(&Giant);

}

#ifdef KTRACE

+ /*

+ * We have to obtain the MP lock no matter what if

+ * we are ktracing

+ */

if (KTRPOINT(p, KTR_SYSCALL)) {

if (!mtx_owned(&Giant))

mtx_lock(&Giant);

diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 0c3240226e26..a78844898909 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c

@@ -56,6 +56,7 @@

#include <sys/filedesc.h>

#include <sys/filio.h>

#include <sys/lock.h>

+#include <sys/mutex.h>

#include <sys/ttycom.h>

#include <sys/stat.h>

#include <sys/poll.h>

@@ -253,6 +254,7 @@ pipespace(cpipe, size)

* kernel_object.

* XXX -- minor change needed here for NetBSD/OpenBSD VM systems.

+ mtx_lock(&vm_mtx);

object = vm_object_allocate(OBJT_DEFAULT, npages);

buffer = (caddr_t) vm_map_min(kernel_map);

@@ -264,6 +266,7 @@ pipespace(cpipe, size)

error = vm_map_find(kernel_map, object, 0,

(vm_offset_t *) &buffer, size, 1,

VM_PROT_ALL, VM_PROT_ALL, 0);

+ mtx_unlock(&vm_mtx);

if (error != KERN_SUCCESS) {

vm_object_deallocate(object);

@@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio)

size = wpipe->pipe_buffer.size;

endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);

+ mtx_lock(&vm_mtx);

addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);

for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {

vm_page_t m;

@@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio)

for (j = 0; j < i; j++)

vm_page_unwire(wpipe->pipe_map.ms[j], 1);

+ mtx_unlock(&vm_mtx);

return (EFAULT);

}

@@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio)

pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,

wpipe->pipe_map.npages);

+ mtx_unlock(&vm_mtx);

* and update the uio data

@@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe)

amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;

}

+ mtx_lock(&vm_mtx);

for (i = 0; i < wpipe->pipe_map.npages; i++)

vm_page_unwire(wpipe->pipe_map.ms[i], 1);

+ mtx_unlock(&vm_mtx);

}

@@ -1199,12 +1207,13 @@ pipeclose(cpipe)

wakeup(ppipe);

ppipe->pipe_peer = NULL;

}

* free resources

+ mtx_lock(&vm_mtx);

pipe_free_kmem(cpipe);

zfree(pipe_zone, cpipe);

+ mtx_unlock(&vm_mtx);

}

diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 32255bcb1ede..269814c9089d 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master

@@ -61,7 +61,7 @@

14 STD POSIX { int mknod(char *path, int mode, int dev); }

15 STD POSIX { int chmod(char *path, int mode); }

16 STD POSIX { int chown(char *path, int uid, int gid); }

-17 STD BSD { int obreak(char *nsize); } break obreak_args int

+17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int

18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \

int flags); }

19 COMPAT POSIX { long lseek(int fd, long offset, int whence); }

@@ -121,23 +121,23 @@

62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); }

63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \

int arg); } getkerninfo getkerninfo_args int

-64 COMPAT BSD { int getpagesize(void); } \

+64 MPSAFE COMPAT BSD { int getpagesize(void); } \

getpagesize getpagesize_args int

65 STD BSD { int msync(void *addr, size_t len, int flags); }

66 STD BSD { int vfork(void); }

67 OBSOL NOHIDE vread

68 OBSOL NOHIDE vwrite

-69 STD BSD { int sbrk(int incr); }

-70 STD BSD { int sstk(int incr); }

-71 COMPAT BSD { int mmap(void *addr, int len, int prot, \

+69 MPSAFE STD BSD { int sbrk(int incr); }

+70 MPSAFE STD BSD { int sstk(int incr); }

+71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \

int flags, int fd, long pos); }

-72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int

-73 STD BSD { int munmap(void *addr, size_t len); }

-74 STD BSD { int mprotect(const void *addr, size_t len, int prot); }

-75 STD BSD { int madvise(void *addr, size_t len, int behav); }

+72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int

+73 MPSAFE STD BSD { int munmap(void *addr, size_t len); }

+74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); }

+75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); }

76 OBSOL NOHIDE vhangup

77 OBSOL NOHIDE vlimit

-78 STD BSD { int mincore(const void *addr, size_t len, \

+78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \

char *vec); }

79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); }

80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); }

@@ -306,7 +306,7 @@

setrlimit __setrlimit_args int

196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \

long *basep); }

-197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \

+197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \

int flags, int fd, int pad, off_t pos); }

198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int

199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \

@@ -318,8 +318,8 @@

__sysctl sysctl_args int

; properly, __sysctl should be a NOHIDE, but making an exception

; here allows to avoid one in libc/sys/Makefile.inc.

-203 STD BSD { int mlock(const void *addr, size_t len); }

-204 STD BSD { int munlock(const void *addr, size_t len); }

+203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); }

+204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); }

205 STD BSD { int undelete(char *path); }

206 STD BSD { int futimes(int fd, struct timeval *tptr); }

207 STD BSD { int getpgid(pid_t pid); }

@@ -386,7 +386,7 @@

248 UNIMPL NOHIDE nosys

249 UNIMPL NOHIDE nosys

; syscall numbers initially used in OpenBSD

-250 STD BSD { int minherit(void *addr, size_t len, int inherit); }

+250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); }

251 STD BSD { int rfork(int flags); }

252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \

int timeout); }

@@ -414,7 +414,7 @@

274 STD BSD { int lchmod(char *path, mode_t mode); }

275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int

276 STD BSD { int lutimes(char *path, struct timeval *tptr); }

-277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int

+277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int

278 STD BSD { int nstat(char *path, struct nstat *ub); }

279 STD BSD { int nfstat(int fd, struct nstat *sb); }

280 STD BSD { int nlstat(char *path, struct nstat *ub); }

@@ -463,8 +463,8 @@

321 STD BSD { int yield(void); }

322 OBSOL NOHIDE thr_sleep

323 OBSOL NOHIDE thr_wakeup

-324 STD BSD { int mlockall(int how); }

-325 STD BSD { int munlockall(void); }

+324 MPSAFE STD BSD { int mlockall(int how); }

+325 MPSAFE STD BSD { int munlockall(void); }

326 STD BSD { int __getcwd(u_char *buf, u_int buflen); }

327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); }

diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index fab53a83325b..0a9abda70bf9 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c

@@ -43,6 +43,7 @@

#include <sys/shm.h>

#include <sys/proc.h>

#include <sys/malloc.h>

+#include <sys/mutex.h>

#include <sys/mman.h>

#include <sys/stat.h>

#include <sys/syscall.h>

@@ -314,14 +315,17 @@ shmat(p, uap)

}

shm_handle = shmseg->shm_internal;

+ mtx_lock(&vm_mtx);

vm_object_reference(shm_handle->shm_object);

rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,

0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);

if (rv != KERN_SUCCESS) {

+ mtx_unlock(&vm_mtx);

return ENOMEM;

}

vm_map_inherit(&p->p_vmspace->vm_map,

attach_va, attach_va + size, VM_INHERIT_SHARE);

+ mtx_unlock(&vm_mtx);

shmmap_s->va = attach_va;

shmmap_s->shmid = uap->shmid;

@@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode)

* We make sure that we have allocated a pager before we need

* to.

+ mtx_lock(&vm_mtx);

if (shm_use_phys) {

shm_handle->shm_object =

vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);

@@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode)

}

vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);

vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);

+ mtx_unlock(&vm_mtx);

shmseg->shm_internal = shm_handle;

shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;

diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8c76e2..a980330a9b5c 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c

@@ -281,6 +281,8 @@ waitrunningbufspace(void)

* Called when a buffer is extended. This function clears the B_CACHE

* bit if the newly extended portion of the buffer does not contain

* valid data.

+ *

+ * must be called with vm_mtx held

static __inline__

void

@@ -426,11 +428,13 @@ bufinit(void)

* from buf_daemon.

+ mtx_lock(&vm_mtx);

bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);

bogus_page = vm_page_alloc(kernel_object,

((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),

VM_ALLOC_NORMAL);

cnt.v_wire_count++;

+ mtx_unlock(&vm_mtx);

}

@@ -441,17 +445,27 @@ bufinit(void)

* buffer_map.

* Since this call frees up buffer space, we call bufspacewakeup().

+ *

+ * Can be called with or without the vm_mtx.

static void

bfreekva(struct buf * bp)

{

if (bp->b_kvasize) {

+ int hadvmlock;

++buffreekvacnt;

bufspace -= bp->b_kvasize;

+ hadvmlock = mtx_owned(&vm_mtx);

+ if (!hadvmlock)

+ mtx_lock(&vm_mtx);

vm_map_delete(buffer_map,

(vm_offset_t) bp->b_kvabase,

(vm_offset_t) bp->b_kvabase + bp->b_kvasize

);

+ if (!hadvmlock)

+ mtx_unlock(&vm_mtx);

bp->b_kvasize = 0;

bufspacewakeup();

}

@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)

VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);

}

+ mtx_lock(&vm_mtx);

* Set the *dirty* buffer range based upon the VM system dirty pages.

@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)

* out on the next sync, or perhaps the cluster will be completed.

vfs_clean_pages(bp);

+ mtx_unlock(&vm_mtx);

bqrelse(bp);

@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)

* Release a busy buffer and, if requested, free its resources. The

* buffer will be stashed in the appropriate bufqueue[] allowing it

* to be accessed later as a cache entity or reused for other purposes.

+ *

+ * vm_mtx must be not be held.

void

brelse(struct buf * bp)

{

int s;

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));

s = splbio();

@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)

resid = bp->b_bufsize;

foff = bp->b_offset;

+ mtx_lock(&vm_mtx);

for (i = 0; i < bp->b_npages; i++) {

int had_bogus = 0;

@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)

* now.

if (m == bogus_page) {

+ mtx_unlock(&vm_mtx);

VOP_GETVOBJECT(vp, &obj);

poff = OFF_TO_IDX(bp->b_offset);

had_bogus = 1;

+ mtx_lock(&vm_mtx);

for (j = i; j < bp->b_npages; j++) {

vm_page_t mtmp;

mtmp = bp->b_pages[j];

@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)

if (bp->b_flags & (B_INVAL | B_RELBUF))

vfs_vmio_release(bp);

+ mtx_unlock(&vm_mtx);

} else if (bp->b_flags & B_VMIO) {

- if (bp->b_flags & (B_INVAL | B_RELBUF))

+ if (bp->b_flags & (B_INVAL | B_RELBUF)) {

+ mtx_lock(&vm_mtx);

vfs_vmio_release(bp);

+ mtx_unlock(&vm_mtx);

+ }

}

@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)

splx(s);

}

+/*

+ * Must be called with vm_mtx held.

+ */

static void

vfs_vmio_release(bp)

struct buf *bp;

@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)

vm_page_t m;

s = splvm();

+ mtx_assert(&vm_mtx, MA_OWNED);

for (i = 0; i < bp->b_npages; i++) {

m = bp->b_pages[i];

bp->b_pages[i] = NULL;

@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)

}

splx(s);

pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);

+ /* could drop vm_mtx here */

if (bp->b_bufsize) {

bufspacewakeup();

bp->b_bufsize = 0;

@@ -1614,7 +1647,9 @@ restart:

if (qindex == QUEUE_CLEAN) {

if (bp->b_flags & B_VMIO) {

bp->b_flags &= ~B_ASYNC;

+ mtx_lock(&vm_mtx);

vfs_vmio_release(bp);

+ mtx_unlock(&vm_mtx);

}

if (bp->b_vp)

brelvp(bp);

@@ -1735,6 +1770,8 @@ restart:

if (maxsize != bp->b_kvasize) {

vm_offset_t addr = 0;

+ /* we'll hold the lock over some vm ops */

+ mtx_lock(&vm_mtx);

bfreekva(bp);

if (vm_map_findspace(buffer_map,

@@ -1743,6 +1780,7 @@ restart:

* Uh oh. Buffer map is to fragmented. We

* must defragment the map.

+ mtx_unlock(&vm_mtx);

++bufdefragcnt;

defrag = 1;

bp->b_flags |= B_INVAL;

@@ -1759,6 +1797,7 @@ restart:

bufspace += bp->b_kvasize;

++bufreusecnt;

}

+ mtx_unlock(&vm_mtx);

}

bp->b_data = bp->b_kvabase;

}

@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)

size = vp->v_mount->mnt_stat.f_iosize;

off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;

+ mtx_lock(&vm_mtx);

for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {

m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));

if (!m)

- return 0;

+ goto notinmem;

tinc = size;

if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))

tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);

if (vm_page_is_valid(m,

(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)

- return 0;

+ goto notinmem;

}

+ mtx_unlock(&vm_mtx);

return 1;

+notinmem:

+ mtx_unlock(&vm_mtx);

+ return (0);

}

@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)

* This routine is primarily used by NFS, but is generalized for the

* B_VMIO case.

+ *

+ * Can be called with or without vm_mtx

static void

vfs_setdirty(struct buf *bp)

{

int i;

+ int hadvmlock;

vm_object_t object;

@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)

if ((bp->b_flags & B_VMIO) == 0)

return;

+ hadvmlock = mtx_owned(&vm_mtx);

+ if (!hadvmlock)

+ mtx_lock(&vm_mtx);

object = bp->b_pages[0]->object;

if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))

@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)

bp->b_dirtyend = eoffset;

}

+ if (!hadvmlock)

+ mtx_unlock(&vm_mtx);

}

@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)

* DEV_BSIZE aligned existing buffer size. Figure out

* if we have to remove any pages.

+ mtx_lock(&vm_mtx);

if (desiredpages < bp->b_npages) {

for (i = desiredpages; i < bp->b_npages; i++) {

@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)

(desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));

bp->b_npages = desiredpages;

}

+ mtx_unlock(&vm_mtx);

} else if (size > bp->b_bcount) {

* We are growing the buffer, possibly in a

@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)

vp = bp->b_vp;

VOP_GETVOBJECT(vp, &obj);

+ mtx_lock(&vm_mtx);

while (bp->b_npages < desiredpages) {

vm_page_t m;

vm_pindex_t pi;

@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)

bp->b_pages,

bp->b_npages

);

+ mtx_unlock(&vm_mtx);

bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |

(vm_offset_t)(bp->b_offset & PAGE_MASK));

}

@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)

if (error) {

panic("biodone: no object");

}

+ mtx_lock(&vm_mtx);

#if defined(VFS_BIO_DEBUG)

if (obj->paging_in_progress < bp->b_npages) {

printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",

@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)

}

if (obj)

vm_object_pip_wakeupn(obj, 0);

+ mtx_unlock(&vm_mtx);

}

@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)

* This routine is called in lieu of iodone in the case of

* incomplete I/O. This keeps the busy status for pages

* consistant.

+ *

+ * vm_mtx should not be held

void

vfs_unbusy_pages(struct buf * bp)

{

int i;

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

runningbufwakeup(bp);

if (bp->b_flags & B_VMIO) {

struct vnode *vp = bp->b_vp;

@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)

VOP_GETVOBJECT(vp, &obj);

+ mtx_lock(&vm_mtx);

for (i = 0; i < bp->b_npages; i++) {

vm_page_t m = bp->b_pages[i];

@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)

vm_page_io_finish(m);

}

vm_object_pip_wakeupn(obj, 0);

+ mtx_unlock(&vm_mtx);

}

@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)

* range is restricted to the buffer's size.

* This routine is typically called after a read completes.

+ *

+ * vm_mtx should be held

static void

vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)

{

vm_ooffset_t soff, eoff;

+ mtx_assert(&vm_mtx, MA_OWNED);

* Start and end offsets in buffer. eoff - soff may not cross a

* page boundry or cross the end of the buffer. The end of the

@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)

* Since I/O has not been initiated yet, certain buffer flags

* such as BIO_ERROR or B_INVAL may be in an inconsistant state

* and should be ignored.

+ *

+ * vm_mtx should not be held

void

vfs_busy_pages(struct buf * bp, int clear_modify)

{

int i, bogus;

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

if (bp->b_flags & B_VMIO) {

struct vnode *vp = bp->b_vp;

vm_object_t obj;

@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)

foff = bp->b_offset;

KASSERT(bp->b_offset != NOOFFSET,

("vfs_busy_pages: no buffer offset"));

+ mtx_lock(&vm_mtx);

vfs_setdirty(bp);

retry:

@@ -2979,6 +3053,7 @@ retry:

}

if (bogus)

pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);

+ mtx_unlock(&vm_mtx);

}

@@ -2989,12 +3064,15 @@ retry:

* Note that while we only really need to clean through to b_bcount, we

* just go ahead and clean through to b_bufsize.

+ *

+ * should be called with vm_mtx held

static void

vfs_clean_pages(struct buf * bp)

{

int i;

+ mtx_assert(&vm_mtx, MA_OWNED);

if (bp->b_flags & B_VMIO) {

vm_ooffset_t foff;

@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)

* Set the range within the buffer to valid and clean. The range is

* relative to the beginning of the buffer, b_offset. Note that b_offset

* itself may be offset from the beginning of the first page.

+ *

void

@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)

* Note that while we only theoretically need to clear through b_bcount,

* we go ahead and clear through b_bufsize.

+ *

+ * We'll get vm_mtx here for safety if processing a VMIO buffer.

+ * I don't think vm_mtx is needed, but we're twiddling vm_page flags.

void

vfs_bio_clrbuf(struct buf *bp) {

int i, mask = 0;

caddr_t sa, ea;

if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {

+ mtx_lock(&vm_mtx);

bp->b_flags &= ~B_INVAL;

bp->b_ioflags &= ~BIO_ERROR;

if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&

@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {

}

bp->b_pages[0]->valid |= mask;

bp->b_resid = 0;

+ mtx_unlock(&vm_mtx);

return;

}

ea = sa = bp->b_data;

@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {

vm_page_flag_clear(bp->b_pages[i], PG_ZERO);

}

bp->b_resid = 0;

+ mtx_unlock(&vm_mtx);

} else {

clrbuf(bp);

}

@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {

* vm_hold_load_pages and vm_hold_unload pages get pages into

* a buffers address space. The pages are anonymous and are

* not associated with a file object.

+ *

+ * vm_mtx should not be held

-void

+static void

vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)

{

vm_offset_t pg;

vm_page_t p;

int index;

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

to = round_page(to);

from = round_page(from);

index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;

+ mtx_lock(&vm_mtx);

for (pg = from; pg < to; pg += PAGE_SIZE, index++) {

tryagain:

@@ -3152,6 +3242,7 @@ tryagain:

vm_page_wakeup(p);

}

bp->b_npages = index;

+ mtx_unlock(&vm_mtx);

}

void

@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)

vm_offset_t pg;

vm_page_t p;

int index, newnpages;

+ int hadvmlock;

from = round_page(from);

to = round_page(to);

newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;

+ hadvmlock = mtx_owned(&vm_mtx);

+ if (!hadvmlock)

+ mtx_lock(&vm_mtx);

for (pg = from; pg < to; pg += PAGE_SIZE, index++) {

p = bp->b_pages[index];

if (p && (index < bp->b_npages)) {

@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)

}

bp->b_npages = newnpages;

+ if (!hadvmlock)

+ mtx_unlock(&vm_mtx);

}

diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 8a6e0452813d..0eb47bde0a46 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c

@@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)

BUF_KERNPROC(tbp);

TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,

tbp, b_cluster.cluster_entry);

+ mtx_lock(&vm_mtx);

for (j = 0; j < tbp->b_npages; j += 1) {

vm_page_t m;

m = tbp->b_pages[j];

@@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)

if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)

tbp->b_pages[j] = bogus_page;

}

+ mtx_unlock(&vm_mtx);

bp->b_bcount += tbp->b_bcount;

bp->b_bufsize += tbp->b_bufsize;

}

+ mtx_lock(&vm_mtx);

for(j=0;j<bp->b_npages;j++) {

if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==

VM_PAGE_BITS_ALL)

@@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)

pmap_qenter(trunc_page((vm_offset_t) bp->b_data),

(vm_page_t *)bp->b_pages, bp->b_npages);

+ mtx_unlock(&vm_mtx);

return (bp);

}

@@ -484,7 +488,9 @@ cluster_callback(bp)

if (bp->b_ioflags & BIO_ERROR)

error = bp->b_error;

+ mtx_lock(&vm_mtx);

pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);

+ mtx_unlock(&vm_mtx);

* Move memory from the large cluster buffer into the component

* buffers and mark IO as done on these.

@@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len)

}

+ mtx_lock(&vm_mtx);

for (j = 0; j < tbp->b_npages; j += 1) {

m = tbp->b_pages[j];

vm_page_io_start(m);

@@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len)

bp->b_npages++;

}

+ mtx_unlock(&vm_mtx);

}

bp->b_bcount += size;

bp->b_bufsize += size;

@@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len)

tbp, b_cluster.cluster_entry);

}

finishcluster:

+ mtx_lock(&vm_mtx);

pmap_qenter(trunc_page((vm_offset_t) bp->b_data),

(vm_page_t *) bp->b_pages, bp->b_npages);

+ mtx_unlock(&vm_mtx);

if (bp->b_bufsize > bp->b_kvasize)

panic(

"cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",

diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 328a9b1526d3..d17e9348bda8 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c

@@ -535,14 +535,18 @@ retry:

if (vp->v_type == VREG || vp->v_type == VDIR) {

if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)

goto retn;

+ mtx_lock(&vm_mtx);

object = vnode_pager_alloc(vp, vat.va_size, 0, 0);

+ mtx_unlock(&vm_mtx);

} else if (devsw(vp->v_rdev) != NULL) {

* This simply allocates the biggest object possible

* for a disk vnode. This should be fixed, but doesn't

* cause any problems (yet).

+ mtx_lock(&vm_mtx);

object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);

+ mtx_unlock(&vm_mtx);

} else {

goto retn;

}

@@ -550,15 +554,23 @@ retry:

* Dereference the reference we just created. This assumes

* that the object is associated with the vp.

+ mtx_lock(&vm_mtx);

object->ref_count--;

+ mtx_unlock(&vm_mtx);

vp->v_usecount--;

} else {

+ /*

+ * XXX: safe to hold vm mutex through VOP_UNLOCK?

+ */

+ mtx_lock(&vm_mtx);

if (object->flags & OBJ_DEAD) {

VOP_UNLOCK(vp, 0, p);

- tsleep(object, PVM, "vodead", 0);

+ msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0);

+ mtx_unlock(&vm_mtx);

vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);

goto retry;

}

+ mtx_unlock(&vm_mtx);

}

KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));

@@ -580,6 +592,7 @@ vop_stddestroyvobject(ap)

if (vp->v_object == NULL)

return (0);

+ mtx_lock(&vm_mtx);

if (obj->ref_count == 0) {

* vclean() may be called twice. The first time

@@ -594,6 +607,7 @@ vop_stddestroyvobject(ap)

vm_pager_deallocate(obj);

}

+ mtx_unlock(&vm_mtx);

return (0);

}

diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 6b73258f0684..3f975516162d 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c

@@ -2770,8 +2770,13 @@ fsync(p, uap)

if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)

return (error);

vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);

- if (VOP_GETVOBJECT(vp, &obj) == 0)

+ if (VOP_GETVOBJECT(vp, &obj) == 0) {

+ mtx_unlock(&Giant);

+ mtx_lock(&vm_mtx);

vm_object_page_clean(obj, 0, 0, 0);

+ mtx_unlock(&vm_mtx);

+ mtx_lock(&Giant);

+ }

error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);

#ifdef SOFTUPDATES

if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2f4dc8d95326..6c050ba16d84 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c

@@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)

int s, error;

vm_object_t object;

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

if (flags & V_SAVE) {

s = splbio();

while (vp->v_numoutput) {

@@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)

mtx_lock(&vp->v_interlock);

if (VOP_GETVOBJECT(vp, &object) == 0) {

+ mtx_lock(&vm_mtx);

vm_object_page_remove(object, 0, 0,

(flags & V_SAVE) ? TRUE : FALSE);

+ mtx_unlock(&vm_mtx);

}

mtx_unlock(&vp->v_interlock);

@@ -1132,6 +1136,8 @@ speedup_syncer()

* Also sets B_PAGING flag to indicate that vnode is not fully associated

* with the buffer. i.e. the bp has not been linked into the vnode or

* ref-counted.

+ *

+ * Doesn't block, only vnode seems to need a lock.

void

pbgetvp(vp, bp)

@@ -1554,6 +1560,7 @@ vput(vp)

{

struct proc *p = curproc; /* XXX */

+ mtx_assert(&Giant, MA_OWNED);

KASSERT(vp != NULL, ("vput: null vp"));

mtx_lock(&vp->v_interlock);

/* Skip this v_writecount check if we're going to panic below. */

@@ -2382,7 +2389,11 @@ loop:

if (!vget(vp,

LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {

if (VOP_GETVOBJECT(vp, &obj) == 0) {

- vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);

+ mtx_lock(&vm_mtx);

+ vm_object_page_clean(obj, 0, 0,

+ flags == MNT_WAIT ?

+ OBJPC_SYNC : OBJPC_NOSYNC);

+ mtx_unlock(&vm_mtx);

anyio = 1;

}

vput(vp);

@@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred)

struct proc *p;

struct ucred *cred;

{

+ mtx_assert(&vm_mtx, MA_NOTOWNED);

return (VOP_CREATEVOBJECT(vp, cred, p));

}

diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 6b73258f0684..3f975516162d 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c

@@ -2770,8 +2770,13 @@ fsync(p, uap)

if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)

return (error);

vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);

- if (VOP_GETVOBJECT(vp, &obj) == 0)

+ if (VOP_GETVOBJECT(vp, &obj) == 0) {

+ mtx_unlock(&Giant);

+ mtx_lock(&vm_mtx);

vm_object_page_clean(obj, 0, 0, 0);

+ mtx_unlock(&vm_mtx);

+ mtx_lock(&Giant);

+ }

error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);

#ifdef SOFTUPDATES

if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))