aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2022-01-17 16:42:56 +0000
committerMark Johnston <markj@FreeBSD.org>2022-01-17 21:12:36 +0000
commit1811c1e957ee1250b08b3246fc0db37ddf64b736 (patch)
treec207d515f5beee99ffad9202e9e9b1ae3c6e765c
parent758d98debec43ff83b8a1ed9a3d3a8441b83b3cc (diff)
downloadsrc-1811c1e957ee1250b08b3246fc0db37ddf64b736.tar.gz
src-1811c1e957ee1250b08b3246fc0db37ddf64b736.zip
exec: Reimplement stack address randomization
The approach taken by the stack gap implementation was to insert a random gap between the top of the fixed stack mapping and the true top of the main process stack. This approach was chosen so as to avoid randomizing the previously fixed address of certain process metadata stored at the top of the stack, but had some shortcomings. In particular, mlockall(2) calls would wire the gap, bloating the process' memory usage, and RLIMIT_STACK included the size of the gap so small (< several MB) limits could not be used. There is little value in storing each process' ps_strings at a fixed location, as only very old programs hard-code this address; consumers were converted decades ago to use a sysctl-based interface for this purpose. Thus, this change re-implements stack address randomization by simply breaking the convention of storing ps_strings at a fixed location, and randomizing the location of the entire stack mapping. This implementation is simpler and avoids the problems mentioned above, while being unlikely to break compatibility anywhere the default ASLR settings are used. The kern.elfN.aslr.stack_gap sysctl is renamed to kern.elfN.aslr.stack, and is re-enabled by default. PR: 260303 Reviewed by: kib Discussed with: emaste, mw MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D33704
-rw-r--r--share/man/man7/security.716
-rw-r--r--sys/i386/linux/imgact_linux.c4
-rw-r--r--sys/kern/imgact_aout.c4
-rw-r--r--sys/kern/imgact_elf.c27
-rw-r--r--sys/kern/kern_exec.c86
-rw-r--r--sys/sys/exec.h3
-rw-r--r--sys/sys/imgact.h1
-rw-r--r--sys/vm/vm_map.c4
-rw-r--r--sys/vm/vm_map.h9
9 files changed, 103 insertions, 51 deletions
diff --git a/share/man/man7/security.7 b/share/man/man7/security.7
index bb7e120a1d46..1bb5338e54e6 100644
--- a/share/man/man7/security.7
+++ b/share/man/man7/security.7
@@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 28, 2021
+.Dd January 14, 2022
.Dt SECURITY 7
.Os
.Sh NAME
@@ -1062,19 +1062,19 @@ position-independent (PIE) 32bit binaries.
.It Dv kern.elf32.aslr.honor_sbrk
Makes ASLR less aggressive and more compatible with old binaries
relying on the sbrk area.
-.It Dv kern.elf32.aslr.stack_gap
-If ASLR is enabled for a binary, a non-zero value creates a randomized
-stack gap between strings and the end of the aux vector.
-The value is the maximum percentage of main stack to waste on the gap.
-Cannot be greater than 50, i.e., at most half of the stack.
+.It Dv kern.elf32.aslr.stack
+If ASLR is enabled for a binary, a non-zero value enables randomization
+of the stack.
+Otherwise, the stack is mapped at a fixed location determined by the
+process ABI.
.It Dv kern.elf64.aslr.enable
64bit binaries ASLR control.
.It Dv kern.elf64.aslr.pie_enable
64bit PIE binaries ASLR control.
.It Dv kern.elf64.aslr.honor_sbrk
64bit binaries ASLR sbrk compatibility control.
-.It Dv kern.elf64.aslr.stack_gap
-Controls stack gap for 64bit binaries.
+.It Dv kern.elf64.aslr.stack
+Controls stack address randomization for 64bit binaries.
.It Dv kern.elf32.nxstack
Enables non-executable stack for 32bit processes.
Enabled by default if supported by hardware and corresponding binary.
diff --git a/sys/i386/linux/imgact_linux.c b/sys/i386/linux/imgact_linux.c
index 661620b6ceaf..85357f41a705 100644
--- a/sys/i386/linux/imgact_linux.c
+++ b/sys/i386/linux/imgact_linux.c
@@ -213,6 +213,10 @@ exec_linux_imgact(struct image_params *imgp)
vmspace->vm_daddr =
(caddr_t)(void *)(uintptr_t)(virtual_offset + a_out->a_text);
+ error = exec_map_stack(imgp);
+ if (error != 0)
+ goto fail;
+
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 6510488c3edd..1818e5665caf 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -350,6 +350,10 @@ exec_aout_imgact(struct image_params *imgp)
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
+ error = exec_map_stack(imgp);
+ if (error != 0)
+ return (error);
+
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index c3d19064f6e5..a0266108ec84 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -201,11 +201,11 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
&__elfN(aslr_honor_sbrk), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
-static int __elfN(aslr_stack_gap) = 0;
-SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW,
- &__elfN(aslr_stack_gap), 0,
+static int __elfN(aslr_stack) = 1;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
+ &__elfN(aslr_stack), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
- ": maximum percentage of main stack to waste on a random gap");
+ ": enable stack address randomization");
static int __elfN(sigfastblock) = 1;
SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
@@ -1301,6 +1301,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if (!__elfN(aslr_honor_sbrk) ||
(imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
imgp->map_flags |= MAP_ASLR_IGNSTART;
+ if (__elfN(aslr_stack))
+ imgp->map_flags |= MAP_ASLR_STACK;
}
if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 &&
@@ -1309,14 +1311,16 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
imgp->map_flags |= MAP_WXORX;
error = exec_new_vmspace(imgp, sv);
- vmspace = imgp->proc->p_vmspace;
- map = &vmspace->vm_map;
imgp->proc->p_sysent = sv;
imgp->proc->p_elf_brandinfo = brand_info;
- maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
- if (mapsz >= maxv - vm_map_min(map)) {
+ vmspace = imgp->proc->p_vmspace;
+ map = &vmspace->vm_map;
+ maxv = sv->sv_usrstack;
+ if ((imgp->map_flags & MAP_ASLR_STACK) == 0)
+ maxv -= lim_max(td, RLIMIT_STACK);
+ if (error == 0 && mapsz >= maxv - vm_map_min(map)) {
uprintf("Excessive mapping size\n");
error = ENOEXEC;
}
@@ -1342,8 +1346,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if (error != 0)
goto ret;
- entry = (u_long)hdr->e_entry + et_dyn_addr;
-
/*
* We load the dynamic linker where a userland call
* to mmap(0, ...) would put it. The rationale behind this
@@ -1364,6 +1366,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
map->anon_loc = addr;
}
+ entry = (u_long)hdr->e_entry + et_dyn_addr;
imgp->entry_addr = entry;
if (interp != NULL) {
@@ -1384,6 +1387,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
} else
addr = et_dyn_addr;
+ error = exec_map_stack(imgp);
+ if (error != 0)
+ goto ret;
+
/*
* Construct auxargs table (used by the copyout_auxargs routine)
*/
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 09d2461e4053..0494b73fc405 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -178,19 +178,19 @@ static int
sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
{
struct proc *p;
- int error;
+ vm_offset_t val;
p = curproc;
#ifdef SCTL_MASK32
if (req->flags & SCTL_MASK32) {
- unsigned int val;
- val = (unsigned int)p->p_sysent->sv_usrstack;
- error = SYSCTL_OUT(req, &val, sizeof(val));
- } else
+ unsigned int val32;
+
+ val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop);
+ return (SYSCTL_OUT(req, &val32, sizeof(val32)));
+ }
#endif
- error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
- sizeof(p->p_sysent->sv_usrstack));
- return error;
+ val = round_page(p->p_vmspace->vm_stacktop);
+ return (SYSCTL_OUT(req, &val, sizeof(val)));
}
static int
@@ -1106,9 +1106,8 @@ exec_free_abi_mappings(struct proc *p)
}
/*
- * Destroy old address space, and allocate a new stack.
- * The new stack is only sgrowsiz large because it is grown
- * automatically on a page fault.
+ * Run down the current address space and install a new one. Map the shared
+ * page.
*/
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
@@ -1118,11 +1117,8 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
struct vmspace *vmspace = p->p_vmspace;
struct thread *td = curthread;
vm_object_t obj;
- struct rlimit rlim_stack;
- vm_offset_t sv_minuser, stack_addr;
+ vm_offset_t sv_minuser;
vm_map_t map;
- vm_prot_t stack_prot;
- u_long ssiz;
imgp->vmspace_destroyed = true;
imgp->sysent = sv;
@@ -1157,7 +1153,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
*/
vm_map_lock(map);
vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
- MAP_ASLR_IGNSTART | MAP_WXORX);
+ MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX);
vm_map_unlock(map);
} else {
error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
@@ -1183,7 +1179,28 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
}
}
- /* Allocate a new stack */
+ return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
+}
+
+/*
+ * Compute the stack size limit and map the main process stack.
+ */
+int
+exec_map_stack(struct image_params *imgp)
+{
+ struct rlimit rlim_stack;
+ struct sysentvec *sv;
+ struct proc *p;
+ vm_map_t map;
+ struct vmspace *vmspace;
+ vm_offset_t stack_addr, stack_top;
+ u_long ssiz;
+ int error, find_space, stack_off;
+ vm_prot_t stack_prot;
+
+ p = imgp->proc;
+ sv = p->p_sysent;
+
if (imgp->stack_sz != 0) {
ssiz = trunc_page(imgp->stack_sz);
PROC_LOCK(p);
@@ -1200,27 +1217,46 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
} else {
ssiz = maxssiz;
}
- stack_addr = sv->sv_usrstack - ssiz;
- stack_prot = obj != NULL && imgp->stack_prot != 0 ?
+
+ vmspace = p->p_vmspace;
+ map = &vmspace->vm_map;
+
+ stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ?
imgp->stack_prot : sv->sv_stackprot;
- error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot,
- VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
+ if ((map->flags & MAP_ASLR_STACK) != 0) {
+ stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
+ lim_max(curthread, RLIMIT_DATA));
+ find_space = VMFS_ANY_SPACE;
+ } else {
+ stack_addr = sv->sv_usrstack - ssiz;
+ find_space = VMFS_NO_SPACE;
+ }
+ error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz,
+ sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL,
+ MAP_STACK_GROWS_DOWN);
if (error != KERN_SUCCESS) {
uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
- "failed mach error %d errno %d\n", (uintmax_t)ssiz,
+ "failed, mach error %d errno %d\n", (uintmax_t)ssiz,
stack_prot, error, vm_mmap_to_errno(error));
return (vm_mmap_to_errno(error));
}
- vmspace->vm_stkgap = 0;
+
+ stack_top = stack_addr + ssiz;
+ if ((map->flags & MAP_ASLR_STACK) != 0) {
+ /* Randomize within the first page of the stack. */
+ arc4rand(&stack_off, sizeof(stack_off), 0);
+ stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
+ }
/*
* vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
* are still used to enforce the stack rlimit on the process stack.
*/
- vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
vmspace->vm_maxsaddr = (char *)stack_addr;
+ vmspace->vm_stacktop = stack_top;
+ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
- return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
+ return (0);
}
/*
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index 0ae2095c297f..82ee16befe28 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -87,7 +87,8 @@ struct execsw {
* Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant.
*/
#define PS_STRINGS (USRSTACK - sizeof(struct ps_strings))
-#define PROC_PS_STRINGS(p) ((p)->p_sysent->sv_psstrings)
+#define PROC_PS_STRINGS(p) \
+ ((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz)
int exec_map_first_page(struct image_params *);
void exec_unmap_first_page(struct image_params *);
diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
index 70e5c2e81579..bc1ab77a491e 100644
--- a/sys/sys/imgact.h
+++ b/sys/sys/imgact.h
@@ -113,6 +113,7 @@ int exec_check_permissions(struct image_params *);
void exec_cleanup(struct thread *td, struct vmspace *);
int exec_copyout_strings(struct image_params *, uintptr_t *);
void exec_free_args(struct image_args *);
+int exec_map_stack(struct image_params *);
int exec_new_vmspace(struct image_params *, struct sysentvec *);
void exec_setregs(struct thread *, struct image_params *, uintptr_t);
int exec_shell_imgact(struct image_params *);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 1b2b5eb8d5e9..98d3d1e5bb1d 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -343,7 +343,6 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
vm->vm_taddr = 0;
vm->vm_daddr = 0;
vm->vm_maxsaddr = 0;
- vm->vm_stkgap = 0;
return (vm);
}
@@ -4264,7 +4263,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
vm2->vm_taddr = vm1->vm_taddr;
vm2->vm_daddr = vm1->vm_daddr;
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
- vm2->vm_stkgap = vm1->vm_stkgap;
vm_map_lock(old_map);
if (old_map->busy)
vm_map_wait_busy(old_map);
@@ -4283,7 +4281,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
new_map->anon_loc = old_map->anon_loc;
new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART |
- MAP_WXORX);
+ MAP_ASLR_STACK | MAP_WXORX);
VM_MAP_ENTRY_FOREACH(old_entry, old_map) {
if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 873ff62eec4a..8f318b34e601 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -223,12 +223,13 @@ struct vm_map {
* vm_flags_t values
*/
#define MAP_WIREFUTURE 0x01 /* wire all future pages */
-#define MAP_BUSY_WAKEUP 0x02
+#define MAP_BUSY_WAKEUP 0x02 /* thread(s) waiting on busy state */
#define MAP_IS_SUB_MAP 0x04 /* has parent */
#define MAP_ASLR 0x08 /* enabled ASLR */
-#define MAP_ASLR_IGNSTART 0x10
-#define MAP_REPLENISH 0x20
+#define MAP_ASLR_IGNSTART 0x10 /* ASLR ignores data segment */
+#define MAP_REPLENISH 0x20 /* kmapent zone needs to be refilled */
#define MAP_WXORX 0x40 /* enforce W^X */
+#define MAP_ASLR_STACK 0x80 /* stack location is randomized */
#ifdef _KERNEL
#if defined(KLD_MODULE) && !defined(KLD_TIED)
@@ -293,7 +294,7 @@ struct vmspace {
caddr_t vm_taddr; /* (c) user virtual address of text */
caddr_t vm_daddr; /* (c) user virtual address of data */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
- vm_size_t vm_stkgap; /* stack gap size in bytes */
+ vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */
u_int vm_refcnt; /* number of references */
/*
* Keep the PMAP last, so that CPU-specific variations of that