aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/linux/linux.h7
-rw-r--r--sys/amd64/linux/linux_machdep.c169
-rw-r--r--sys/amd64/linux32/linux.h7
-rw-r--r--sys/amd64/linux32/linux32_machdep.c184
-rw-r--r--sys/compat/linux/linux_mmap.c257
-rw-r--r--sys/compat/linux/linux_mmap.h49
-rw-r--r--sys/i386/linux/linux.h7
-rw-r--r--sys/i386/linux/linux_machdep.c188
-rw-r--r--sys/modules/linux/Makefile2
-rw-r--r--sys/modules/linux_common/Makefile2
10 files changed, 318 insertions, 554 deletions
diff --git a/sys/amd64/linux/linux.h b/sys/amd64/linux/linux.h
index 639499ab58d0..b30568ffaa3c 100644
--- a/sys/amd64/linux/linux.h
+++ b/sys/amd64/linux/linux.h
@@ -139,13 +139,6 @@ struct l_rlimit {
l_ulong rlim_max;
};
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
/*
* stat family of syscalls
*/
diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c
index 0459ba7afd15..80c48aa68944 100644
--- a/sys/amd64/linux/linux_machdep.c
+++ b/sys/amd64/linux/linux_machdep.c
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_file.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -122,181 +123,19 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
- args->addr, args->len, args->prot,
- args->flags, args->fd, args->pgoff);
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (! ((args->flags & LINUX_MAP_SHARED) ^
- (args->flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (args->flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (args->flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (args->flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (args->flags & LINUX_MAP_ANON)
- bsd_args.flags |= MAP_ANON;
- else
- bsd_args.flags |= MAP_NOSYNC;
- if (args->flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = args->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : args->fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0 )
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (args->flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the initial BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, Linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in Linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It then maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(args->addr) + args->len >
- p->p_vmspace->vm_maxsaddr) {
- /*
- * Some Linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
- lim_cur_proc(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (args->len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(args->addr);
- bsd_args.len = args->len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(args->addr) -
- (STACK_SIZE - GUARD_SIZE - args->len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(args->addr);
- bsd_args.len = args->len;
- }
- bsd_args.pos = (off_t)args->pgoff;
-
- error = sys_mmap(td, &bsd_args);
-
- LINUX_CTR2(mmap2, "return: %d (%p)",
- error, td->td_retval[0]);
- return (error);
+ return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
+ args->flags, args->fd, args->pgoff));
}
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- LINUX_CTR(mprotect);
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h
index 02d12f554e8e..97da878339c0 100644
--- a/sys/amd64/linux32/linux.h
+++ b/sys/amd64/linux32/linux.h
@@ -165,13 +165,6 @@ struct l_rusage {
l_long ru_nivcsw;
} __packed;
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
struct l_mmap_argv {
l_uintptr_t addr;
l_size_t len;
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index 25f023c741aa..187ec15e0218 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <amd64/linux32/linux32_proto.h>
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -84,9 +85,6 @@ struct l_old_select_argv {
l_uintptr_t timeout;
} __packed;
-static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
- l_size_t len, l_int prot, l_int flags, l_int fd,
- l_loff_t pos);
static void
bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
@@ -448,9 +446,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
@@ -489,184 +484,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
(uint32_t)linux_args.pgoff));
}
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
- l_int flags, l_int fd, l_loff_t pos)
-{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (flags & LINUX_MAP_ANON) {
- /* Enforce pos to be on page boundary, then ignore. */
- if ((pos & PAGE_MASK) != 0)
- return (EINVAL);
- pos = 0;
- bsd_args.flags |= MAP_ANON;
- } else
- bsd_args.flags |= MAP_NOSYNC;
- if (flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
-
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0)
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the initial BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, Linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in Linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It then maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
- /*
- * Some Linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
- lim_cur_proc(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr) -
- (STACK_SIZE - GUARD_SIZE - len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- }
- bsd_args.pos = pos;
-
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
- __func__,
- (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
- bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
- error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s() return: 0x%x (0x%08x)\n",
- __func__, error, (u_int)td->td_retval[0]);
-#endif
- return (error);
-}
-
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c
new file mode 100644
index 000000000000..870f0be418f6
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.c
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/capsicum.h>
+#include <sys/file.h>
+#include <sys/imgact.h>
+#include <sys/ktr.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_mmap.h>
+#include <compat/linux/linux_persona.h>
+#include <compat/linux/linux_util.h>
+
+
+#define STACK_SIZE (2 * 1024 * 1024)
+#define GUARD_SIZE (4 * PAGE_SIZE)
+
+#if defined(__amd64__)
+static void linux_fixup_prot(struct thread *td, int *prot);
+#endif
+
+
+int
+linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot,
+ int flags, int fd, off_t pos)
+{
+ struct proc *p = td->td_proc;
+ struct vmspace *vms = td->td_proc->p_vmspace;
+ struct mmap_args /* {
+ caddr_t addr;
+ size_t len;
+ int prot;
+ int flags;
+ int fd;
+ off_t pos;
+ } */ bsd_args;
+ int error;
+ struct file *fp;
+
+ cap_rights_t rights;
+ LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
+ addr, len, prot, flags, fd, pos);
+
+ error = 0;
+ bsd_args.flags = 0;
+ fp = NULL;
+
+ /*
+ * Linux mmap(2):
+ * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
+ */
+ if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
+ return (EINVAL);
+
+ if (flags & LINUX_MAP_SHARED)
+ bsd_args.flags |= MAP_SHARED;
+ if (flags & LINUX_MAP_PRIVATE)
+ bsd_args.flags |= MAP_PRIVATE;
+ if (flags & LINUX_MAP_FIXED)
+ bsd_args.flags |= MAP_FIXED;
+ if (flags & LINUX_MAP_ANON) {
+ /* Enforce pos to be on page boundary, then ignore. */
+ if ((pos & PAGE_MASK) != 0)
+ return (EINVAL);
+ pos = 0;
+ bsd_args.flags |= MAP_ANON;
+ } else
+ bsd_args.flags |= MAP_NOSYNC;
+ if (flags & LINUX_MAP_GROWSDOWN)
+ bsd_args.flags |= MAP_STACK;
+
+ /*
+ * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
+ * on Linux/i386 if the binary requires executable stack.
+ * We do this only for IA32 emulation as on native i386 this is does not
+ * make sense without PAE.
+ *
+ * XXX. Linux checks that the file system is not mounted with noexec.
+ */
+ bsd_args.prot = prot;
+#if defined(__amd64__)
+ linux_fixup_prot(td, &bsd_args.prot);
+#endif
+
+ /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
+ bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
+ if (bsd_args.fd != -1) {
+ /*
+ * Linux follows Solaris mmap(2) description:
+ * The file descriptor fildes is opened with
+ * read permission, regardless of the
+ * protection options specified.
+ */
+
+ error = fget(td, bsd_args.fd,
+ cap_rights_init(&rights, CAP_MMAP), &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_VNODE) {
+ fdrop(fp, td);
+ return (EINVAL);
+ }
+
+ /* Linux mmap() just fails for O_WRONLY files */
+ if (!(fp->f_flag & FREAD)) {
+ fdrop(fp, td);
+ return (EACCES);
+ }
+
+ fdrop(fp, td);
+ }
+
+ if (flags & LINUX_MAP_GROWSDOWN) {
+ /*
+ * The Linux MAP_GROWSDOWN option does not limit auto
+ * growth of the region. Linux mmap with this option
+ * takes as addr the initial BOS, and as len, the initial
+ * region size. It can then grow down from addr without
+ * limit. However, Linux threads has an implicit internal
+ * limit to stack size of STACK_SIZE. Its just not
+ * enforced explicitly in Linux. But, here we impose
+ * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
+ * region, since we can do this with our mmap.
+ *
+ * Our mmap with MAP_STACK takes addr as the maximum
+ * downsize limit on BOS, and as len the max size of
+ * the region. It then maps the top SGROWSIZ bytes,
+ * and auto grows the region down, up to the limit
+ * in addr.
+ *
+ * If we don't use the MAP_STACK option, the effect
+ * of this code is to allocate a stack region of a
+ * fixed size of (STACK_SIZE - GUARD_SIZE).
+ */
+
+ if ((caddr_t)addr + len > vms->vm_maxsaddr) {
+ /*
+ * Some Linux apps will attempt to mmap
+ * thread stacks near the top of their
+ * address space. If their TOS is greater
+ * than vm_maxsaddr, vm_map_growstack()
+ * will confuse the thread stack with the
+ * process stack and deliver a SEGV if they
+ * attempt to grow the thread stack past their
+ * current stacksize rlimit. To avoid this,
+ * adjust vm_maxsaddr upwards to reflect
+ * the current stacksize rlimit rather
+ * than the maximum possible stacksize.
+ * It would be better to adjust the
+ * mmap'ed region, but some apps do not check
+ * mmap's return value.
+ */
+ PROC_LOCK(p);
+ vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack -
+ lim_cur_proc(p, RLIMIT_STACK);
+ PROC_UNLOCK(p);
+ }
+
+ /*
+ * This gives us our maximum stack size and a new BOS.
+ * If we're using VM_STACK, then mmap will just map
+ * the top SGROWSIZ bytes, and let the stack grow down
+ * to the limit at BOS. If we're not using VM_STACK
+ * we map the full stack, since we don't have a way
+ * to autogrow it.
+ */
+ if (len > STACK_SIZE - GUARD_SIZE) {
+ bsd_args.addr = (caddr_t)addr;
+ bsd_args.len = len;
+ } else {
+ bsd_args.addr = (caddr_t)addr -
+ (STACK_SIZE - GUARD_SIZE - len);
+ bsd_args.len = STACK_SIZE - GUARD_SIZE;
+ }
+ } else {
+ bsd_args.addr = (caddr_t)addr;
+ bsd_args.len = len;
+ }
+ bsd_args.pos = pos;
+
+ error = sys_mmap(td, &bsd_args);
+
+ LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]);
+
+ return (error);
+}
+
+int
+linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
+{
+ struct mprotect_args bsd_args;
+
+ bsd_args.addr = (void *)addr;
+ bsd_args.len = len;
+ bsd_args.prot = prot;
+
+#if defined(__amd64__)
+ linux_fixup_prot(td, &bsd_args.prot);
+#endif
+ return (sys_mprotect(td, &bsd_args));
+}
+
+#if defined(__amd64__)
+static void
+linux_fixup_prot(struct thread *td, int *prot)
+{
+ struct linux_pemuldata *pem;
+
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
+ pem = pem_find(td->td_proc);
+ if (pem->persona & LINUX_READ_IMPLIES_EXEC)
+ *prot |= PROT_EXEC;
+ }
+
+}
+#endif
diff --git a/sys/compat/linux/linux_mmap.h b/sys/compat/linux/linux_mmap.h
new file mode 100644
index 000000000000..a27d99d989d5
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_MMAP_H_
+#define _LINUX_MMAP_H_
+
+/* mmap options */
+#define LINUX_MAP_SHARED 0x0001
+#define LINUX_MAP_PRIVATE 0x0002
+#define LINUX_MAP_FIXED 0x0010
+#define LINUX_MAP_ANON 0x0020
+#define LINUX_MAP_GROWSDOWN 0x0100
+
+
+int linux_mmap_common(struct thread *, uintptr_t, size_t, int, int,
+ int, off_t);
+int linux_mprotect_common(struct thread *, uintptr_t, size_t, int);
+
+#endif /* _LINUX_MMAP_H_ */
diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h
index 36b2084cd9f9..42e836ca182e 100644
--- a/sys/i386/linux/linux.h
+++ b/sys/i386/linux/linux.h
@@ -140,13 +140,6 @@ struct l_rlimit {
l_ulong rlim_max;
};
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
struct l_mmap_argv {
l_uintptr_t addr;
l_size_t len;
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index 4fb5785da4db..4b4b88609428 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <i386/linux/linux_proto.h>
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -95,10 +96,6 @@ struct l_old_select_argv {
struct l_timeval *timeout;
};
-static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
- l_size_t len, l_int prot, l_int flags, l_int fd,
- l_loff_t pos);
-
int
linux_execve(struct thread *td, struct linux_execve_args *args)
@@ -340,9 +337,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
@@ -381,187 +375,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
(uint32_t)linux_args.pgoff));
}
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
- l_int flags, l_int fd, l_loff_t pos)
-{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (flags & LINUX_MAP_ANON) {
- /* Enforce pos to be on page boundary, then ignore. */
- if ((pos & PAGE_MASK) != 0)
- return (EINVAL);
- pos = 0;
- bsd_args.flags |= MAP_ANON;
- } else
- bsd_args.flags |= MAP_NOSYNC;
- if (flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
-
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- *
- * Checking just CAP_MMAP is fine here, since the real work
- * is done in the FreeBSD mmap().
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0)
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the inital BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It them maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
- /*
- * Some linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
- lim_cur_proc(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr) -
- (STACK_SIZE - GUARD_SIZE - len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- }
- bsd_args.pos = pos;
-
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
- __func__,
- (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
- bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
- error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s() return: 0x%x (0x%08x)\n",
- __func__, error, (u_int)td->td_retval[0]);
-#endif
- return (error);
-}
-
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile
index f2629723d404..d1707c92e91e 100644
--- a/sys/modules/linux/Makefile
+++ b/sys/modules/linux/Makefile
@@ -30,7 +30,7 @@ SRCS+= opt_apic.h
OBJS= ${VDSO}.so
.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c \
+SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c linux_mmap.c \
linux_emul.c opt_cpu.h linux.c
.endif
diff --git a/sys/modules/linux_common/Makefile b/sys/modules/linux_common/Makefile
index 91449f774ef5..2301796230b0 100644
--- a/sys/modules/linux_common/Makefile
+++ b/sys/modules/linux_common/Makefile
@@ -3,7 +3,7 @@
.PATH: ${.CURDIR}/../../compat/linux
KMOD= linux_common
-SRCS= linux_common.c linux_mib.c linux_util.c linux_emul.c \
+SRCS= linux_common.c linux_mib.c linux_mmap.c linux_util.c linux_emul.c \
linux.c opt_compat.h device_if.h vnode_if.h bus_if.h
EXPORT_SYMS=