aboutsummaryrefslogtreecommitdiff
path: root/sys/i386/i386/vm_machdep.c
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2018-04-13 20:30:49 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2018-04-13 20:30:49 +0000
commitd86c1f0dc1b17d94a533b4d3e9228dff9cb3fb31 (patch)
tree8ce470882fe1e1e9f05a895ac15e4d6aa42c5da4 /sys/i386/i386/vm_machdep.c
parent1315f9b59f83b4ed83fccfd5a0e696267ede902f (diff)
downloadsrc-d86c1f0dc1b17d94a533b4d3e9228dff9cb3fb31.tar.gz
src-d86c1f0dc1b17d94a533b4d3e9228dff9cb3fb31.zip
i386 4/4G split.
The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
Notes
Notes: svn path=/head/; revision=332489
Diffstat (limited to 'sys/i386/i386/vm_machdep.c')
-rw-r--r--sys/i386/i386/vm_machdep.c18
1 files changed, 10 insertions, 8 deletions
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index dce8435db996..e997ec6c8658 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -204,9 +204,11 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
* Create a new fresh stack for the new process.
* Copy the trap frame for the return to user mode as if from a
* syscall. This copies most of the user mode register values.
- * The -16 is so we can expand the trapframe if we go to vm86.
+ * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe
+ * if we go to vm86.
*/
- td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
+ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb -
+ VM86_STACK_SPACE) - 1;
bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
td2->td_frame->tf_eax = 0; /* Child returns zero */
@@ -238,7 +240,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
- pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
/*-
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
@@ -344,8 +346,7 @@ cpu_thread_clean(struct thread *td)
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
*/
- kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
- ctob(IOPAGES + 1));
+ pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1));
pcb->pcb_ext = NULL;
}
}
@@ -367,7 +368,8 @@ cpu_thread_alloc(struct thread *td)
struct xstate_hdr *xhdr;
td->td_pcb = pcb = get_pcb_td(td);
- td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)pcb -
+ VM86_STACK_SPACE) - 1;
pcb->pcb_ext = NULL;
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
@@ -462,7 +464,7 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
pcb2->pcb_ebx = (int)td; /* trampoline arg */
- pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
pcb2->pcb_gs = rgs();
/*
* If we didn't copy the pcb, we'd need to do the following registers:
@@ -581,7 +583,7 @@ sf_buf_map(struct sf_buf *sf, int flags)
*/
ptep = vtopte(sf->kva);
opte = *ptep;
- *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
+ *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
/*