diff options
author | Joseph Koshy <jkoshy@FreeBSD.org> | 2009-02-03 09:01:45 +0000 |
---|---|---|
committer | Joseph Koshy <jkoshy@FreeBSD.org> | 2009-02-03 09:01:45 +0000 |
commit | bb471e331576bef62eb71ad5fe629aea05d10559 (patch) | |
tree | 96d3c14bf47c0873cb0d0904e26b1d0974eb9ee6 /sys/amd64/amd64/exception.S | |
parent | 2c204a16314db5f3e7b100139a8dfd7deb4e2bd0 (diff) | |
download | src-bb471e331576bef62eb71ad5fe629aea05d10559.tar.gz src-bb471e331576bef62eb71ad5fe629aea05d10559.zip |
Improve robustness of NMI handling, for NMIs recognized in kernel
mode.
- Make the NMI handler run on its own stack (TSS_IST2).
- Store the GSBASE value for each CPU just before the start of
each NMI stack, permitting efficient retrieval using %rsp-relative
addressing.
- For NMIs taken from kernel mode, program MSR_GSBASE explicitly
since one or both of MSR_GSBASE and MSR_KGSBASE can be potentially
invalid. The current contents of MSR_GSBASE are saved and restored
at exit.
- For NMIs handled from user mode, continue to use 'swapgs' to
load the per-CPU GSBASE.
Reviewed by: jeff
Debugging help: jeff
Tested by: gnn, Artem Belevich <artemb at gmail dot com>
Notes
Notes:
svn path=/head/; revision=188065
Diffstat (limited to 'sys/amd64/amd64/exception.S')
-rw-r--r-- | sys/amd64/amd64/exception.S | 77 |
1 files changed, 52 insertions, 25 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 1c098e4cfdf0..897bfec0915c 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -383,22 +383,24 @@ IDTVEC(fast_syscall32) * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF - * bit and the NMI handler may be invoked at any time, including when - * the processor is in a critical section with RFLAGS.IF == 0. In - * particular, this means that the processor's GS.base values could be - * inconsistent on entry to the handler, and so we need to read - * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a - * C-preserved register, to remember whether to swap GS back on the - * exit path. + * bit. The NMI handler may be entered at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. + * The processor's GS.base value could be invalid on entry to the + * handler. * * Second, the processor treats NMIs specially, blocking further NMIs - * until an 'iretq' instruction is executed. We therefore need to - * execute the NMI handler with interrupts disabled to prevent a - * nested interrupt from executing an 'iretq' instruction and - * inadvertently taking the processor out of NMI mode. + * until an 'iretq' instruction is executed. We thus need to execute + * the NMI handler with interrupts disabled, to prevent a nested interrupt + * from executing an 'iretq' instruction and inadvertently taking the + * processor out of NMI mode. * - * Third, the NMI handler runs on its own stack (tss_ist1), shared - * with the double fault handler. + * Third, the NMI handler runs on its own stack (tss_ist2). The canonical + * GS.base value for the processor is stored just above the bottom of its + * NMI stack. For NMIs taken from kernel mode, the current value in + * the processor's GS.base is saved at entry to C-preserved register %r12, + * the canonical value for GS.base is then loaded into the processor, and + * the saved value is restored at exit time. For NMIs taken from user mode, + * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) @@ -423,12 +425,22 @@ IDTVEC(nmi) movq %r15,TF_R15(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) - jnz nmi_needswapgs /* we came from userland */ + jnz nmi_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12. + */ movl $MSR_GSBASE,%ecx rdmsr - cmpl $VM_MAXUSER_ADDRESS >> 32,%edx - jae nmi_calltrap /* GS.base holds a kernel VA */ -nmi_needswapgs: + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + jmp nmi_calltrap +nmi_fromuserspace: incl %ebx swapgs /* Note: this label is also used by ddb and gdb: */ @@ -439,14 +451,19 @@ nmi_calltrap: MEXITCOUNT #ifdef HWPMC_HOOKS /* - * Check if the current trap was from user mode and if so - * whether the current thread needs a user call chain to be - * captured. We are still in NMI mode at this point. + * Capture a userspace callchain if needed. + * + * - Check if the current trap was from user mode. + * - Check if the current thread is valid. + * - Check if the thread requires a user call chain to be + * captured. + * + * We are still in NMI mode at this point. */ - testb $SEL_RPL_MASK,TF_CS(%rsp) - jz nocallchain - movq PCPU(CURTHREAD),%rax /* curthread present? */ - orq %rax,%rax + testl %ebx,%ebx + jz nocallchain /* not from userspace */ + movq PCPU(CURTHREAD),%rax + orq %rax,%rax /* curthread present? */ jz nocallchain testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain @@ -498,8 +515,18 @@ outofnmi: nocallchain: #endif testl %ebx,%ebx - jz nmi_restoreregs + jz nmi_kernelexit swapgs + jmp nmi_restoreregs +nmi_kernelexit: + /* + * Put back the preserved MSR_GSBASE value. + */ + movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr nmi_restoreregs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi |