aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2009-03-17 00:48:11 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2009-03-17 00:48:11 +0000
commitc66d2b38c8343afbd192f635a1b13d2e67ec70e8 (patch)
treec75b77d8208c29993db8a02658e1f33e9e5339b8 /sys
parentc0c0c35c3bc478fdb24961ff94b6e04caac2f36f (diff)
downloadsrc-c66d2b38c8343afbd192f635a1b13d2e67ec70e8.tar.gz
src-c66d2b38c8343afbd192f635a1b13d2e67ec70e8.zip
Initial suspend/resume support for amd64.
This code is heavily inspired by Takanori Watanabe's experimental SMP patch for i386 and large portion was shamelessly cut and pasted from Peter Wemm's AP boot code.
Notes
Notes: svn path=/head/; revision=189903
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/acpica/Makefile33
-rw-r--r--sys/amd64/acpica/acpi_machdep.c27
-rw-r--r--sys/amd64/acpica/acpi_switch.S177
-rw-r--r--sys/amd64/acpica/acpi_wakecode.S278
-rw-r--r--sys/amd64/acpica/acpi_wakeup.c397
-rwxr-xr-xsys/amd64/acpica/genwakecode.sh6
-rwxr-xr-xsys/amd64/acpica/genwakedata.sh9
-rw-r--r--sys/amd64/amd64/amd64_mem.c28
-rw-r--r--sys/amd64/amd64/apic_vector.S16
-rw-r--r--sys/amd64/amd64/cpu_switch.S74
-rw-r--r--sys/amd64/amd64/db_trace.c2
-rw-r--r--sys/amd64/amd64/genassym.c13
-rw-r--r--sys/amd64/amd64/mp_machdep.c40
-rw-r--r--sys/amd64/include/apicvar.h1
-rw-r--r--sys/amd64/include/pcb.h14
-rw-r--r--sys/amd64/include/smp.h2
-rw-r--r--sys/conf/files.amd6413
-rw-r--r--sys/dev/acpica/acpi.c53
-rw-r--r--sys/dev/acpica/acpi_ec.c2
-rw-r--r--sys/i386/i386/i686_mem.c28
-rw-r--r--sys/i386/i386/k6_mem.c1
-rw-r--r--sys/kern/subr_smp.c48
-rw-r--r--sys/sys/memrange.h2
-rw-r--r--sys/sys/smp.h3
24 files changed, 1247 insertions, 20 deletions
diff --git a/sys/amd64/acpica/Makefile b/sys/amd64/acpica/Makefile
new file mode 100644
index 000000000000..743728051e97
--- /dev/null
+++ b/sys/amd64/acpica/Makefile
@@ -0,0 +1,33 @@
+# $FreeBSD$
+
+# Correct path for kernel builds
+# Don't rely on the kernel's .depend file
+.ifdef MAKESRCPATH
+.PATH: ${MAKESRCPATH}
+DEPENDFILE=
+.else
+MAKESRCPATH= ${.CURDIR}
+CLEANFILES= acpi_wakecode.h acpi_wakedata.h acpi_wakecode.bin acpi_wakecode.o
+.endif
+.if ${CC} == "icc"
+CFLAGS+= -restrict
+NOSTDINC= -X
+.else
+NOSTDINC= -nostdinc
+.endif
+CFLAGS+= ${NOSTDINC} -include opt_global.h -I. -I${MAKESRCPATH}/../..
+
+all: acpi_wakecode.h acpi_wakedata.h
+
+acpi_wakecode.o: acpi_wakecode.S assym.s
+
+acpi_wakecode.bin: acpi_wakecode.o
+ objcopy -S -O binary acpi_wakecode.o acpi_wakecode.bin
+
+acpi_wakecode.h: acpi_wakecode.bin
+ sh ${MAKESRCPATH}/genwakecode.sh > acpi_wakecode.h
+
+acpi_wakedata.h: acpi_wakecode.bin
+ sh ${MAKESRCPATH}/genwakedata.sh > acpi_wakedata.h
+
+.include <bsd.prog.mk>
diff --git a/sys/amd64/acpica/acpi_machdep.c b/sys/amd64/acpica/acpi_machdep.c
index e9e9235c3753..8b4840efe033 100644
--- a/sys/amd64/acpica/acpi_machdep.c
+++ b/sys/amd64/acpica/acpi_machdep.c
@@ -31,25 +31,50 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/module.h>
+#include <sys/sysctl.h>
#include <contrib/dev/acpica/acpi.h>
#include <dev/acpica/acpivar.h>
#include <machine/nexusvar.h>
+SYSCTL_DECL(_debug_acpi);
+
+uint32_t acpi_resume_beep;
+TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep);
+SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep,
+ 0, "Beep the PC speaker when resuming");
+uint32_t acpi_reset_video;
+TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video);
+
static int intr_model = ACPI_INTR_PIC;
+static struct apm_clone_data acpi_clone;
int
acpi_machdep_init(device_t dev)
{
- struct acpi_softc *sc;
+ struct acpi_softc *sc;
sc = devclass_get_softc(devclass_find("acpi"), 0);
+
+ /* Create a fake clone for /dev/acpi. */
+ STAILQ_INIT(&sc->apm_cdevs);
+ acpi_clone.cdev = sc->acpi_dev_t;
+ acpi_clone.acpi_sc = sc;
+ ACPI_LOCK(acpi);
+ STAILQ_INSERT_TAIL(&sc->apm_cdevs, &acpi_clone, entries);
+ ACPI_UNLOCK(acpi);
+ sc->acpi_clone = &acpi_clone;
acpi_install_wakeup_handler(sc);
if (intr_model != ACPI_INTR_PIC)
acpi_SetIntrModel(intr_model);
+ SYSCTL_ADD_UINT(&sc->acpi_sysctl_ctx,
+ SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO,
+ "reset_video", CTLFLAG_RW, &acpi_reset_video, 0,
+ "Call the VESA reset BIOS vector on the resume path");
+
return (0);
}
diff --git a/sys/amd64/acpica/acpi_switch.S b/sys/amd64/acpica/acpi_switch.S
new file mode 100644
index 000000000000..d4f732a6eb2b
--- /dev/null
+++ b/sys/amd64/acpica/acpi_switch.S
@@ -0,0 +1,177 @@
+/*-
+ * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
+ * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2008-2009 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "acpi_wakedata.h"
+#include "assym.s"
+
+#define WAKEUP_DECL(member) \
+ .set WAKEUP_ ## member, wakeup_ ## member - wakeup_ctx
+
+ WAKEUP_DECL(xpcb)
+ WAKEUP_DECL(gdt)
+ WAKEUP_DECL(efer)
+ WAKEUP_DECL(pat)
+ WAKEUP_DECL(star)
+ WAKEUP_DECL(lstar)
+ WAKEUP_DECL(cstar)
+ WAKEUP_DECL(sfmask)
+ WAKEUP_DECL(cpu)
+
+#define WAKEUP_CTX(member) WAKEUP_ ## member (%rdi)
+#define WAKEUP_PCB(member) PCB_ ## member(%r11)
+#define WAKEUP_XPCB(member) XPCB_ ## member(%r11)
+
+ENTRY(acpi_restorecpu)
+ /* Switch to KPML4phys. */
+ movq %rsi, %rax
+ movq %rax, %cr3
+
+ /* Restore GDT. */
+ lgdt WAKEUP_CTX(gdt)
+ jmp 1f
+1:
+
+ /* Fetch PCB. */
+ movq WAKEUP_CTX(xpcb), %r11
+
+ /* Restore segment registers. */
+ mov WAKEUP_PCB(DS), %ds
+ mov WAKEUP_PCB(ES), %es
+ mov WAKEUP_XPCB(SS), %ss
+ mov WAKEUP_PCB(FS), %fs
+ mov WAKEUP_PCB(GS), %gs
+
+ movl $MSR_FSBASE, %ecx
+ movl WAKEUP_PCB(FSBASE), %eax
+ movl 4 + WAKEUP_PCB(FSBASE), %edx
+ wrmsr
+ movl $MSR_GSBASE, %ecx
+ movl WAKEUP_PCB(GSBASE), %eax
+ movl 4 + WAKEUP_PCB(GSBASE), %edx
+ wrmsr
+ movl $MSR_KGSBASE, %ecx
+ movl WAKEUP_XPCB(KGSBASE), %eax
+ movl 4 + WAKEUP_XPCB(KGSBASE), %edx
+ wrmsr
+
+ /* Restore EFER. */
+ movl $MSR_EFER, %ecx
+ movl WAKEUP_CTX(efer), %eax
+ wrmsr
+
+ /* Restore PAT. */
+ movl $MSR_PAT, %ecx
+ movl WAKEUP_CTX(pat), %eax
+ movl 4 + WAKEUP_CTX(pat), %edx
+ wrmsr
+
+ /* Restore fast syscall stuff. */
+ movl $MSR_STAR, %ecx
+ movl WAKEUP_CTX(star), %eax
+ movl 4 + WAKEUP_CTX(star), %edx
+ wrmsr
+ movl $MSR_LSTAR, %ecx
+ movl WAKEUP_CTX(lstar), %eax
+ movl 4 + WAKEUP_CTX(lstar), %edx
+ wrmsr
+ movl $MSR_CSTAR, %ecx
+ movl WAKEUP_CTX(cstar), %eax
+ movl 4 + WAKEUP_CTX(cstar), %edx
+ wrmsr
+ movl $MSR_SF_MASK, %ecx
+ movl WAKEUP_CTX(sfmask), %eax
+ wrmsr
+
+ /* Restore CR0, CR2 and CR4. */
+ movq WAKEUP_XPCB(CR0), %rax
+ movq %rax, %cr0
+ movq WAKEUP_XPCB(CR2), %rax
+ movq %rax, %cr2
+ movq WAKEUP_XPCB(CR4), %rax
+ movq %rax, %cr4
+
+ /* Restore descriptor tables. */
+ lidt WAKEUP_XPCB(IDT)
+ lldt WAKEUP_XPCB(LDT)
+ movw WAKEUP_XPCB(TR), %ax
+ ltr %ax
+
+ /* Restore other callee saved registers. */
+ movq WAKEUP_PCB(R15), %r15
+ movq WAKEUP_PCB(R14), %r14
+ movq WAKEUP_PCB(R13), %r13
+ movq WAKEUP_PCB(R12), %r12
+ movq WAKEUP_PCB(RBP), %rbp
+ movq WAKEUP_PCB(RSP), %rsp
+ movq WAKEUP_PCB(RBX), %rbx
+
+ /* Restore debug registers. */
+ movq WAKEUP_PCB(DR0), %rax
+ movq %rax, %dr0
+ movq WAKEUP_PCB(DR1), %rax
+ movq %rax, %dr1
+ movq WAKEUP_PCB(DR2), %rax
+ movq %rax, %dr2
+ movq WAKEUP_PCB(DR3), %rax
+ movq %rax, %dr3
+ movq WAKEUP_PCB(DR6), %rax
+ movq %rax, %dr6
+ movq WAKEUP_PCB(DR7), %rax
+ movq %rax, %dr7
+
+ /* Restore return address. */
+ movq WAKEUP_PCB(RIP), %rax
+ movq %rax, (%rsp)
+
+ /* Indicate the CPU is resumed. */
+ xorl %eax, %eax
+ movl %eax, WAKEUP_CTX(cpu)
+
+ ret
+END(acpi_restorecpu)
+
+ENTRY(acpi_savecpu)
+ /* Fetch XPCB and save CPU context. */
+ movq %rdi, %r10
+ call savectx2
+ movq %r10, %r11
+
+ /* Patch caller's return address and stack pointer. */
+ movq (%rsp), %rax
+ movq %rax, WAKEUP_PCB(RIP)
+ movq %rsp, %rax
+ movq %rax, WAKEUP_PCB(RSP)
+
+ movl $1, %eax
+ ret
+END(acpi_savecpu)
diff --git a/sys/amd64/acpica/acpi_wakecode.S b/sys/amd64/acpica/acpi_wakecode.S
new file mode 100644
index 000000000000..111486e8d1a4
--- /dev/null
+++ b/sys/amd64/acpica/acpi_wakecode.S
@@ -0,0 +1,278 @@
+/*-
+ * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
+ * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define LOCORE
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * Resume entry point for real mode.
+ *
+ * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero
+ * in FACS, the BIOS enters here in real mode after POST with CS set to
+ * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf).
+ * Depending on the previous sleep state, we may need to initialize more
+ * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk).
+ *
+ * Note: If XFirmwareWakingVector is non-zero, it should disable address
+ * translation/paging and interrupts, load all segment registers with
+ * a flat 4 GB address space, and set EFLAGS.IF to zero. Currently
+ * this mode is not supported by this code.
+ */
+
+ .data /* So we can modify it */
+
+ ALIGN_TEXT
+wakeup_start:
+ .code16
+ /*
+ * Set up segment registers for real mode, a small stack for
+ * any calls we make, and clear any flags.
+ */
+ cli /* make sure no interrupts */
+ cld
+ mov %cs, %ax /* copy %cs to %ds. Remember these */
+ mov %ax, %ds /* are offsets rather than selectors */
+ mov %ax, %ss
+ movw $PAGE_SIZE - 8, %sp
+ pushw $0
+ popfw
+
+ /* To debug resume hangs, beep the speaker if the user requested. */
+ cmpw $0, resume_beep - wakeup_start
+ je 1f
+ movb $0xc0, %al
+ outb %al, $0x42
+ movb $0x04, %al
+ outb %al, $0x42
+ inb $0x61, %al
+ orb $0x3, %al
+ outb %al, $0x61
+ movw $0, resume_beep - wakeup_start
+1:
+
+ /* Re-initialize video BIOS if the reset_video tunable is set. */
+ cmpw $0, reset_video - wakeup_start
+ je 1f
+ lcall $0xc000, $3
+ movw $0, reset_video - wakeup_start
+
+ /*
+ * Set up segment registers for real mode again in case the
+ * previous BIOS call clobbers them.
+ */
+ mov %cs, %ax
+ mov %ax, %ds
+ mov %ax, %ss
+1:
+
+ /*
+ * Find relocation base and patch the gdt descript and ljmp targets
+ */
+ xorl %ebx, %ebx
+ mov %cs, %bx
+ sall $4, %ebx /* %ebx is now our relocation base */
+
+ /*
+ * Load the descriptor table pointer. We'll need it when running
+ * in 16-bit protected mode.
+ */
+ lgdtl bootgdtdesc - wakeup_start
+
+ /* Enable protected mode */
+ movl $CR0_PE, %eax
+ mov %eax, %cr0
+
+ /*
+ * Now execute a far jump to turn on protected mode. This
+ * causes the segment registers to turn into selectors and causes
+ * %cs to be loaded from the gdt.
+ *
+ * The following instruction is:
+ * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+wakeup_sw32:
+ .byte 0x66 /* size override to 32 bits */
+ .byte 0xea /* opcode for far jump */
+ .long wakeup_32 - wakeup_start /* offset in segment */
+ .word bootcode32 - bootgdt /* index in gdt for 32 bit code */
+
+ /*
+ * At this point, we are running in 32 bit legacy protected mode.
+ */
+ .code32
+wakeup_32:
+
+ mov $bootdata32 - bootgdt, %eax
+ mov %ax, %ds
+
+ /* Turn on the PAE and PSE bits for when paging is enabled */
+ mov %cr4, %eax
+ orl $(CR4_PAE | CR4_PSE), %eax
+ mov %eax, %cr4
+
+ /*
+ * Enable EFER.LME so that we get long mode when all the prereqs are
+ * in place. In this case, it turns on when CR0_PG is finally enabled.
+ * Pick up a few other EFER bits that we'll use need we're here.
+ */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_LME | EFER_SCE, %eax
+ wrmsr
+
+ /*
+ * Point to the embedded page tables for startup. Note that this
+ * only gets accessed after we're actually in 64 bit mode, however
+ * we can only set the bottom 32 bits of %cr3 in this state. This
+ * means we are required to use a temporary page table that is below
+ * the 4GB limit. %ebx is still our relocation base. We could just
+ * subtract 3 * PAGE_SIZE, but that would be too easy.
+ */
+ leal wakeup_pagetables - wakeup_start(%ebx), %eax
+ movl (%eax), %eax
+ mov %eax, %cr3
+
+ /*
+ * Finally, switch to long bit mode by enabling paging. We have
+ * to be very careful here because all the segmentation disappears
+ * out from underneath us. The spec says we can depend on the
+ * subsequent pipelined branch to execute, but *only if* everthing
+ * is still identity mapped. If any mappings change, the pipeline
+ * will flush.
+ */
+ mov %cr0, %eax
+ orl $CR0_PG, %eax
+ mov %eax, %cr0
+
+ /*
+ * At this point paging is enabled, and we are in "compatability" mode.
+ * We do another far jump to reload %cs with the 64 bit selector.
+ * %cr3 points to a 4-level page table page.
+ * We cannot yet jump all the way to the kernel because we can only
+ * specify a 32 bit linear address. So, yet another trampoline.
+ *
+ * The following instruction is:
+ * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+wakeup_sw64:
+ .byte 0xea /* opcode for far jump */
+ .long wakeup_64 - wakeup_start /* offset in segment */
+ .word bootcode64 - bootgdt /* index in gdt for 64 bit code */
+
+ /*
+ * Yeehar! We're running in 64-bit mode! We can mostly ignore our
+ * segment registers, and get on with it.
+ * Note that we are running at the correct virtual address, but with
+ * a 1:1 1GB mirrored mapping over entire address space. We had better
+ * switch to a real %cr3 promptly so that we can get to the direct map
+ * space. Remember that jmp is relative and that we've been relocated,
+ * so use an indirect jump.
+ */
+ .code64
+wakeup_64:
+ mov $bootdata64 - bootgdt, %eax
+ mov %ax, %ds
+
+ /* Restore arguments and return. */
+ movq wakeup_ctx - wakeup_start(%rbx), %rdi
+ movq wakeup_kpml4 - wakeup_start(%rbx), %rsi
+ movq wakeup_retaddr - wakeup_start(%rbx), %rax
+ jmp *%rax
+
+ ALIGN_DATA
+bootgdt:
+ .long 0x00000000
+ .long 0x00000000
+
+bootcode64:
+ .long 0x0000ffff
+ .long 0x00af9b00
+
+bootdata64:
+ .long 0x0000ffff
+ .long 0x00af9300
+
+bootcode32:
+ .long 0x0000ffff
+ .long 0x00cf9b00
+
+bootdata32:
+ .long 0x0000ffff
+ .long 0x00cf9300
+bootgdtend:
+
+wakeup_pagetables:
+ .long 0
+
+bootgdtdesc:
+ .word bootgdtend - bootgdt /* Length */
+ .long bootgdt - wakeup_start /* Offset plus %ds << 4 */
+
+ ALIGN_DATA
+resume_beep:
+ .long 0
+reset_video:
+ .long 0
+wakeup_retaddr:
+ .quad 0
+wakeup_kpml4:
+ .quad 0
+
+wakeup_ctx:
+ .quad 0
+wakeup_xpcb:
+ .quad 0
+wakeup_gdt:
+ .word 0
+ .quad 0
+wakeup_efer:
+ .quad 0
+wakeup_pat:
+ .quad 0
+wakeup_star:
+ .quad 0
+wakeup_lstar:
+ .quad 0
+wakeup_cstar:
+ .quad 0
+wakeup_sfmask:
+ .quad 0
+wakeup_cpu:
+ .long 0
+dummy:
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 53868e40a85a..5e3d5b13433d 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -1,6 +1,8 @@
/*-
* Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
* Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008-2009 Jung-uk Kim <jkim@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,18 +31,411 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/pcb.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+#include <machine/vmparam.h>
+
+#ifdef SMP
+#include <machine/apicreg.h>
+#include <machine/smp.h>
+#endif
#include <contrib/dev/acpica/acpi.h>
#include <dev/acpica/acpivar.h>
+#include "acpi_wakecode.h"
+#include "acpi_wakedata.h"
+
+/* Make sure the code is less than a page and leave room for the stack. */
+CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+extern uint32_t acpi_resume_beep;
+extern uint32_t acpi_reset_video;
+
+#ifdef SMP
+extern struct xpcb *stopxpcbs;
+#else
+static struct xpcb *stopxpcbs;
+#endif
+
+int acpi_restorecpu(struct xpcb *, vm_offset_t);
+int acpi_savecpu(struct xpcb *);
+
+static void acpi_reset_tss(int cpu);
+static void acpi_alloc_wakeup_handler(void);
+static void acpi_stop_beep(void *);
+
+#ifdef SMP
+static int acpi_wakeup_ap(struct acpi_softc *, int);
+static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
+#endif
+
+#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
+#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE))
+#define WAKECODE_FIXUP(offset, type, val) do { \
+ type *addr; \
+ addr = (type *)(WAKECODE_VADDR(sc) + offset); \
+ *addr = val; \
+} while (0)
+
+/* Turn off bits 1&2 of the PIT, stopping the beep. */
+static void
+acpi_stop_beep(void *arg)
+{
+ outb(0x61, inb(0x61) & ~0x3);
+}
+
+#ifdef SMP
+static int
+acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
+{
+ int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff;
+ int apic_id = cpu_apic_ids[cpu];
+ int ms;
+
+ WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[cpu]);
+ WAKECODE_FIXUP(wakeup_gdt, uint16_t, stopxpcbs[cpu].xpcb_gdt.rd_limit);
+ WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
+ stopxpcbs[cpu].xpcb_gdt.rd_base);
+ WAKECODE_FIXUP(wakeup_cpu, int, cpu);
+
+ acpi_reset_tss(cpu);
+
+ /* do an INIT IPI: assert RESET */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+
+ /* wait for pending status end */
+ lapic_ipi_wait(-1);
+
+ /* do an INIT IPI: deassert RESET */
+ lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
+
+ /* wait for pending status end */
+ DELAY(10000); /* wait ~10mS */
+ lapic_ipi_wait(-1);
+
+ /*
+ * next we do a STARTUP IPI: the previous INIT IPI might still be
+ * latched, (P5 bug) this 1st STARTUP would then terminate
+ * immediately, and the previously started INIT IPI would continue. OR
+ * the previous INIT IPI has already run. and this STARTUP IPI will
+ * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+ * will run.
+ */
+
+ /* do a STARTUP IPI */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /*
+ * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+ * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+ * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+ * recognized after hardware RESET or INIT IPI.
+ */
+
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /* Wait up to 5 seconds for it to start. */
+ for (ms = 0; ms < 5000; ms++) {
+ if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0)
+ return (1); /* return SUCCESS */
+ DELAY(1000);
+ }
+ return (0); /* return FAILURE */
+}
+
+#define WARMBOOT_TARGET 0
+#define WARMBOOT_OFF (KERNBASE + 0x0467)
+#define WARMBOOT_SEG (KERNBASE + 0x0469)
+
+#define CMOS_REG (0x70)
+#define CMOS_DATA (0x71)
+#define BIOS_RESET (0x0f)
+#define BIOS_WARM (0x0a)
+
+static void
+acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
+{
+ uint32_t mpbioswarmvec;
+ cpumask_t map;
+ int cpu;
+ u_char mpbiosreason;
+
+ /* save the current value of the warm-start vector */
+ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF);
+ outb(CMOS_REG, BIOS_RESET);
+ mpbiosreason = inb(CMOS_DATA);
+
+ /* setup a vector to our boot code */
+ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET;
+ *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4;
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
+
+ /* Wake up each AP. */
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ map = 1ul << cpu;
+ if ((wakeup_cpus & map) != map)
+ continue;
+ if (acpi_wakeup_ap(sc, cpu) == 0) {
+ /* restore the warmstart vector */
+ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
+ panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)",
+ cpu, cpu_apic_ids[cpu]);
+ }
+ }
+
+ /* restore the warmstart vector */
+ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
+
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, mpbiosreason);
+}
+#endif
+
+static void
+acpi_reset_tss(int cpu)
+{
+ uint32_t *tss;
+
+ /*
+ * We have to clear "task busy" bit in TSS to restore
+ * task register later. Otherwise, ltr causes GPF.
+ */
+ tss = (uint32_t *)&gdt[NGDT * cpu + GPROC0_SEL] + 1;
+ *tss &= ~((SDT_SYSBSY ^ SDT_SYSTSS) << 8);
+}
+
int
acpi_sleep_machdep(struct acpi_softc *sc, int state)
{
- return (0);
+ struct savefpu *stopfpu;
+#ifdef SMP
+ cpumask_t wakeup_cpus;
+#endif
+ register_t cr3, rf;
+ ACPI_STATUS status;
+ int ret;
+
+ ret = -1;
+
+ if (sc->acpi_wakeaddr == 0ul)
+ return (ret);
+
+#ifdef SMP
+ wakeup_cpus = PCPU_GET(other_cpus);
+#endif
+
+ AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc));
+
+ rf = intr_disable();
+ intr_suspend();
+
+ /*
+ * Temporarily switch to the kernel pmap because it provides
+ * an identity mapping (setup at boot) for the low physical
+ * memory region containing the wakeup code.
+ */
+ cr3 = rcr3();
+ load_cr3(KPML4phys);
+
+ stopfpu = &stopxpcbs[0].xpcb_pcb.pcb_save;
+ if (acpi_savecpu(&stopxpcbs[0])) {
+ fpugetregs(curthread, stopfpu);
+
+#ifdef SMP
+ if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
+ device_printf(sc->acpi_dev,
+ "Failed to suspend APs: CPU mask = 0x%jx\n",
+ (uintmax_t)(wakeup_cpus & ~stopped_cpus));
+ goto out;
+ }
+#endif
+
+ WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep);
+ WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video);
+
+ WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[0]);
+ WAKECODE_FIXUP(wakeup_gdt, uint16_t,
+ stopxpcbs[0].xpcb_gdt.rd_limit);
+ WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
+ stopxpcbs[0].xpcb_gdt.rd_base);
+ WAKECODE_FIXUP(wakeup_cpu, int, 0);
+
+ acpi_reset_tss(0);
+
+ /* Call ACPICA to enter the desired sleep state */
+ if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
+ status = AcpiEnterSleepStateS4bios();
+ else
+ status = AcpiEnterSleepState(state);
+
+ if (status != AE_OK) {
+ device_printf(sc->acpi_dev,
+ "AcpiEnterSleepState failed - %s\n",
+ AcpiFormatException(status));
+ goto out;
+ }
+
+ for (;;)
+ ia32_pause();
+ } else {
+ fpusetregs(curthread, stopfpu);
+
+ WAKECODE_FIXUP(resume_beep, uint32_t, 0);
+ WAKECODE_FIXUP(reset_video, uint32_t, 0);
+#ifdef SMP
+ if (wakeup_cpus != 0)
+ acpi_wakeup_cpus(sc, wakeup_cpus);
+#endif
+ ret = 0;
+ }
+
+out:
+#ifdef SMP
+ if (wakeup_cpus != 0)
+ restart_cpus(wakeup_cpus);
+#endif
+
+ load_cr3(cr3);
+ intr_resume();
+ intr_restore(rf);
+
+ AcpiSetFirmwareWakingVector(0);
+
+ if (ret == 0 && mem_range_softc.mr_op != NULL &&
+ mem_range_softc.mr_op->reinit != NULL)
+ mem_range_softc.mr_op->reinit(&mem_range_softc);
+
+ /* If we beeped, turn it off after a delay. */
+ if (acpi_resume_beep)
+ timeout(acpi_stop_beep, NULL, 3 * hz);
+
+ return (ret);
+}
+
+static vm_offset_t acpi_wakeaddr;
+
+static void
+acpi_alloc_wakeup_handler(void)
+{
+ void *wakeaddr;
+
+ if (!cold)
+ return;
+
+ /*
+ * Specify the region for our wakeup code. We want it in the low 1 MB
+ * region, excluding video memory and above (0xa0000). We ask for
+ * it to be page-aligned, just to be safe.
+ */
+ wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0, 0x9ffff,
+ PAGE_SIZE, 0ul);
+ if (wakeaddr == NULL) {
+ printf("%s: can't alloc wake memory\n", __func__);
+ return;
+ }
+ stopxpcbs = malloc(mp_ncpus * sizeof(*stopxpcbs), M_DEVBUF, M_NOWAIT);
+ if (stopxpcbs == NULL) {
+ contigfree(wakeaddr, 4 * PAGE_SIZE, M_DEVBUF);
+ printf("%s: can't alloc CPU state memory\n", __func__);
+ return;
+ }
+ acpi_wakeaddr = (vm_offset_t)wakeaddr;
}
+SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0);
+
void
acpi_install_wakeup_handler(struct acpi_softc *sc)
{
+ uint64_t *pt4, *pt3, *pt2;
+ int i;
+
+ if (acpi_wakeaddr == 0ul)
+ return;
+
+ sc->acpi_wakeaddr = acpi_wakeaddr;
+ sc->acpi_wakephys = vtophys(acpi_wakeaddr);
+
+ bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode));
+
+ /* Patch GDT base address, ljmp targets and page table base address. */
+ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t,
+ WAKECODE_PADDR(sc) + bootgdt);
+ WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t,
+ WAKECODE_PADDR(sc) + wakeup_32);
+ WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t,
+ WAKECODE_PADDR(sc) + wakeup_64);
+ WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys);
+
+ /* Save pointers to some global data. */
+ WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu);
+ WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys);
+ WAKECODE_FIXUP(wakeup_ctx, vm_offset_t,
+ WAKECODE_VADDR(sc) + wakeup_ctx);
+ WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER));
+ WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT));
+ WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR));
+ WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR));
+ WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR));
+ WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK));
+
+ /* Build temporary page tables below realmode code. */
+ pt4 = (uint64_t *)acpi_wakeaddr;
+ pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t);
+ pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t);
+
+ /* Create the initial 1GB replicated page tables */
+ for (i = 0; i < 512; i++) {
+ /*
+ * Each slot of the level 4 pages points
+ * to the same level 3 page
+ */
+ pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE);
+ pt4[i] |= PG_V | PG_RW | PG_U;
+
+ /*
+ * Each slot of the level 3 pages points
+ * to the same level 2 page
+ */
+ pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE));
+ pt3[i] |= PG_V | PG_RW | PG_U;
+
+ /* The level 2 page slots are mapped with 2MB pages for 1GB. */
+ pt2[i] = i * (2 * 1024 * 1024);
+ pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+ }
+
+ if (bootverbose)
+ device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n",
+ (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys);
}
diff --git a/sys/amd64/acpica/genwakecode.sh b/sys/amd64/acpica/genwakecode.sh
new file mode 100755
index 000000000000..b4853863ae7e
--- /dev/null
+++ b/sys/amd64/acpica/genwakecode.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# $FreeBSD$
+#
+file2c 'static char wakecode[] = {' '};' <acpi_wakecode.bin
+
+exit 0
diff --git a/sys/amd64/acpica/genwakedata.sh b/sys/amd64/acpica/genwakedata.sh
new file mode 100755
index 000000000000..6d4181ecacaf
--- /dev/null
+++ b/sys/amd64/acpica/genwakedata.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+# $FreeBSD$
+#
+nm -n --defined-only acpi_wakecode.o | while read offset dummy what
+do
+ echo "#define ${what} 0x${offset}"
+done
+
+exit 0
diff --git a/sys/amd64/amd64/amd64_mem.c b/sys/amd64/amd64/amd64_mem.c
index 2b5a73db2151..d7959fd45fb7 100644
--- a/sys/amd64/amd64/amd64_mem.c
+++ b/sys/amd64/amd64/amd64_mem.c
@@ -73,11 +73,13 @@ static void amd64_mrinit(struct mem_range_softc *sc);
static int amd64_mrset(struct mem_range_softc *sc,
struct mem_range_desc *mrd, int *arg);
static void amd64_mrAPinit(struct mem_range_softc *sc);
+static void amd64_mrreinit(struct mem_range_softc *sc);
static struct mem_range_ops amd64_mrops = {
amd64_mrinit,
amd64_mrset,
- amd64_mrAPinit
+ amd64_mrAPinit,
+ amd64_mrreinit
};
/* XXX for AP startup hook */
@@ -668,6 +670,30 @@ amd64_mrAPinit(struct mem_range_softc *sc)
wrmsr(MSR_MTRRdefType, mtrrdef);
}
+/*
+ * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+amd64_mrreinit(struct mem_range_softc *sc)
+{
+#ifdef SMP
+ /*
+ * We should use ipi_all_but_self() to call other CPUs into a
+ * locking gate, then call a target function to do this work.
+ * The "proper" solution involves a generalised locking gate
+ * implementation, not ready yet.
+ */
+ smp_rendezvous(NULL, (void *)amd64_mrAPinit, NULL, sc);
+#else
+ disable_intr(); /* disable interrupts */
+ amd64_mrAPinit(sc);
+ enable_intr();
+#endif
+}
+
static void
amd64_mem_drvinit(void *unused)
{
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 14a6f87576a1..0306bb37ea42 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -224,6 +224,22 @@ IDTVEC(cpustop)
iretq
/*
+ * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(cpususpend)
+ PUSH_FRAME
+
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
+
+ call cpususpend_handler
+
+ POP_FRAME
+ iretq
+
+/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
*
* - Calls the generic rendezvous action function.
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index bc1a4bbb42c2..0c59703f921b 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -325,9 +325,8 @@ load_dr:
movq %r11,%dr6
movq %rax,%dr7
jmp done_load_dr
-
END(cpu_switch)
-
+
/*
* savectx(pcb)
* Update pcb, saving current processor state.
@@ -386,3 +385,74 @@ ENTRY(savectx)
ret
END(savectx)
+
+/*
+ * savectx2(xpcb)
+ * Update xpcb, saving current processor state.
+ */
+ENTRY(savectx2)
+ /* Fetch XPCB. */
+ movq %rdi,%r8
+
+ /* Save caller's return address. */
+ movq (%rsp),%rax
+ movq %rax,PCB_RIP(%r8)
+
+ mov %ds,PCB_DS(%r8)
+ mov %es,PCB_ES(%r8)
+ mov %ss,XPCB_SS(%r8)
+ mov %fs,PCB_FS(%r8)
+ mov %gs,PCB_GS(%r8)
+
+ movq %rbx,PCB_RBX(%r8)
+ movq %rsp,PCB_RSP(%r8)
+ movq %rbp,PCB_RBP(%r8)
+ movq %r12,PCB_R12(%r8)
+ movq %r13,PCB_R13(%r8)
+ movq %r14,PCB_R14(%r8)
+ movq %r15,PCB_R15(%r8)
+
+ movq %cr0,%rax
+ movq %rax,XPCB_CR0(%r8)
+ movq %cr2,%rax
+ movq %rax,XPCB_CR2(%r8)
+ movq %cr4,%rax
+ movq %rax,XPCB_CR4(%r8)
+
+ movq %dr0,%rax
+ movq %rax,PCB_DR0(%r8)
+ movq %dr1,%rax
+ movq %rax,PCB_DR1(%r8)
+ movq %dr2,%rax
+ movq %rax,PCB_DR2(%r8)
+ movq %dr3,%rax
+ movq %rax,PCB_DR3(%r8)
+ movq %dr6,%rax
+ movq %rax,PCB_DR6(%r8)
+ movq %dr7,%rax
+ movq %rax,PCB_DR7(%r8)
+
+ sgdt XPCB_GDT(%r8)
+ sidt XPCB_IDT(%r8)
+ sldt XPCB_LDT(%r8)
+ str XPCB_TR(%r8)
+
+ movl $MSR_FSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,PCB_FSBASE(%r8)
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,PCB_GSBASE(%r8)
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,XPCB_KGSBASE(%r8)
+
+ movl $1, %eax
+ ret
+END(savectx2)
diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index 9676963f4f2b..50a5f4d2ca7a 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -316,6 +316,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
strcmp(name, "Xtimerint") == 0 ||
strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
strcmp(name, "Xcpustop") == 0 ||
+ strcmp(name, "Xcpususpend") == 0 ||
strcmp(name, "Xrendezvous") == 0)
frame_type = INTERRUPT;
else if (strcmp(name, "Xfast_syscall") == 0)
@@ -327,6 +328,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
/* XXX: These are interrupts with trap frames. */
else if (strcmp(name, "Xtimerint") == 0 ||
strcmp(name, "Xcpustop") == 0 ||
+ strcmp(name, "Xcpususpend") == 0 ||
strcmp(name, "Xrendezvous") == 0 ||
strcmp(name, "Xipi_intr_bitmap_handler") == 0)
frame_type = TRAP_INTERRUPT;
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 1924be7f534b..5aa3134855c2 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -155,6 +155,19 @@ ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
ASSYM(PCB_SIZE, sizeof(struct pcb));
+ASSYM(XPCB_PCB, offsetof(struct xpcb, xpcb_pcb));
+ASSYM(XPCB_CR0, offsetof(struct xpcb, xpcb_cr0));
+ASSYM(XPCB_CR2, offsetof(struct xpcb, xpcb_cr2));
+ASSYM(XPCB_CR4, offsetof(struct xpcb, xpcb_cr4));
+ASSYM(XPCB_KGSBASE, offsetof(struct xpcb, xpcb_kgsbase));
+ASSYM(XPCB_SS, offsetof(struct xpcb, xpcb_ss));
+ASSYM(XPCB_GDT, offsetof(struct xpcb, xpcb_gdt));
+ASSYM(XPCB_IDT, offsetof(struct xpcb, xpcb_idt));
+ASSYM(XPCB_LDT, offsetof(struct xpcb, xpcb_ldt));
+ASSYM(XPCB_TR, offsetof(struct xpcb, xpcb_tr));
+
+ASSYM(XPCB_SIZE, sizeof(struct xpcb));
+
ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0));
ASSYM(TF_R15, offsetof(struct trapframe, tf_r15));
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index c65f6f85c549..b7c03d9fb2db 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <machine/apicreg.h>
#include <machine/cputypes.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
@@ -103,6 +104,7 @@ extern pt_entry_t *SMPpt;
extern int _udatasel;
struct pcb stoppcbs[MAXCPU];
+struct xpcb *stopxpcbs = NULL;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
@@ -344,6 +346,9 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for CPU stop/restart */
setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
+ /* Install an inter-CPU IPI for CPU suspend/resume */
+ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
+
/* Set boot_cpu_id if needed. */
if (boot_cpu_id == -1) {
boot_cpu_id = PCPU_GET(apic_id);
@@ -1145,6 +1150,41 @@ cpustop_handler(void)
}
/*
+ * Handle an IPI_SUSPEND by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpususpend_handler(void)
+{
+ struct savefpu *stopfpu;
+ register_t cr3, rf;
+ int cpu = PCPU_GET(cpuid);
+ int cpumask = PCPU_GET(cpumask);
+
+ rf = intr_disable();
+ cr3 = rcr3();
+ stopfpu = &stopxpcbs[cpu].xpcb_pcb.pcb_save;
+ if (savectx2(&stopxpcbs[cpu])) {
+ fpugetregs(curthread, stopfpu);
+ wbinvd();
+ atomic_set_int(&stopped_cpus, cpumask);
+ } else
+ fpusetregs(curthread, stopfpu);
+
+ /* Wait for resume */
+ while (!(started_cpus & cpumask))
+ ia32_pause();
+
+ atomic_clear_int(&started_cpus, cpumask);
+ atomic_clear_int(&stopped_cpus, cpumask);
+
+ /* Restore CR3 and enable interrupts */
+ load_cr3(cr3);
+ lapic_setup(0);
+ intr_restore(rf);
+}
+
+/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index ea971ec8585c..3cc028d564b5 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -130,6 +130,7 @@
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
+#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */
/*
* The spurious interrupt can share the priority class with the IPIs since
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index e6a5add7f983..2e2ca87eb61f 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -82,11 +82,25 @@ struct pcb {
struct user_segment_descriptor pcb_gs32sd;
};
+struct xpcb {
+ struct pcb xpcb_pcb;
+ register_t xpcb_cr0;
+ register_t xpcb_cr2;
+ register_t xpcb_cr4;
+ register_t xpcb_kgsbase;
+ uint32_t xpcb_ss;
+ struct region_descriptor xpcb_gdt;
+ struct region_descriptor xpcb_idt;
+ struct region_descriptor xpcb_ldt;
+ uint16_t xpcb_tr;
+};
+
#ifdef _KERNEL
struct trapframe;
void makectx(struct trapframe *, struct pcb *);
void savectx(struct pcb *);
+int savectx2(struct xpcb *);
#endif
#endif /* _AMD64_PCB_H_ */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 06222f8a9e9a..d2ff189f2f85 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -48,11 +48,13 @@ inthand_t
IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
+ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous); /* handle CPU rendezvous */
/* functions in mp_machdep.c */
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
+void cpususpend_handler(void);
void init_secondary(void);
void ipi_selected(u_int cpus, u_int ipi);
void ipi_all_but_self(u_int ipi);
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index e826550edaaa..2f17a01f0a92 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -69,6 +69,19 @@ hptrr_lib.o optional hptrr \
#
amd64/acpica/OsdEnvironment.c optional acpi
amd64/acpica/acpi_machdep.c optional acpi
+amd64/acpica/acpi_switch.S optional acpi
+acpi_wakecode.h optional acpi \
+ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \
+ compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \
+ no-obj no-implicit-rule before-depend \
+ clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin"
+#
+acpi_wakedata.h optional acpi \
+ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \
+ compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \
+ no-obj no-implicit-rule before-depend \
+ clean "acpi_wakedata.h acpi_wakecode.o acpi_wakecode.bin"
+#
amd64/acpica/acpi_wakeup.c optional acpi
amd64/acpica/madt.c optional acpi
amd64/amd64/amd64_mem.c optional mem
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index 82719174eca3..15e8d00fd7ca 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -46,7 +46,11 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/power.h>
#include <sys/sbuf.h>
+#ifdef SMP
+#include <sys/sched.h>
+#endif
#include <sys/smp.h>
+#include <sys/timetc.h>
#if defined(__i386__) || defined(__amd64__)
#include <machine/pci_cfgreg.h>
@@ -2274,6 +2278,7 @@ acpi_SetSleepState(struct acpi_softc *sc, int state)
return (acpi_EnterSleepState(sc, state));
}
+#if defined(__amd64__) || defined(__i386__)
static void
acpi_sleep_force(void *arg)
{
@@ -2284,6 +2289,7 @@ acpi_sleep_force(void *arg)
if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
printf("acpi: force sleep state S%d failed\n", sc->acpi_next_sstate);
}
+#endif
/*
* Request that the system enter the given suspend state. All /dev/apm
@@ -2294,7 +2300,9 @@ acpi_sleep_force(void *arg)
int
acpi_ReqSleepState(struct acpi_softc *sc, int state)
{
+#if defined(__i386__)
struct apm_clone_data *clone;
+#endif
if (state < ACPI_STATE_S1 || state > ACPI_STATE_S5)
return (EINVAL);
@@ -2307,11 +2315,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
return (ENXIO);
}
-#if !defined(__i386__)
- /* This platform does not support acpi suspend/resume. */
- return (EOPNOTSUPP);
-#endif
-
+#if defined(__amd64__) || defined(__i386__)
/* If a suspend request is already in progress, just return. */
ACPI_LOCK(acpi);
if (sc->acpi_next_sstate != 0) {
@@ -2321,6 +2325,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
/* Record the pending state and notify all apm devices. */
sc->acpi_next_sstate = state;
+#if defined(__i386__)
STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
clone->notify_status = APM_EV_NONE;
if ((clone->flags & ACPI_EVF_DEVD) == 0) {
@@ -2328,6 +2333,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
KNOTE_UNLOCKED(&clone->sel_read.si_note, 0);
}
}
+#endif
/* If devd(8) is not running, immediately enter the sleep state. */
if (devctl_process_running() == FALSE) {
@@ -2352,6 +2358,10 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc);
ACPI_UNLOCK(acpi);
return (0);
+#else
+ /* This platform does not support acpi suspend/resume. */
+ return (EOPNOTSUPP);
+#endif
}
/*
@@ -2364,14 +2374,10 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
int
acpi_AckSleepState(struct apm_clone_data *clone, int error)
{
+#if defined(__amd64__) || defined(__i386__)
struct acpi_softc *sc;
int ret, sleeping;
-#if !defined(__i386__)
- /* This platform does not support acpi suspend/resume. */
- return (EOPNOTSUPP);
-#endif
-
/* If no pending sleep state, return an error. */
ACPI_LOCK(acpi);
sc = clone->acpi_sc;
@@ -2395,8 +2401,9 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
* all devices, seeing if they agree yet. We only count devices that
* are writable since read-only devices couldn't ack the request.
*/
- clone->notify_status = APM_EV_ACKED;
sleeping = TRUE;
+#if defined(__i386__)
+ clone->notify_status = APM_EV_ACKED;
STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
if ((clone->flags & ACPI_EVF_WRITE) != 0 &&
clone->notify_status != APM_EV_ACKED) {
@@ -2404,6 +2411,7 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
break;
}
}
+#endif
/* If all devices have voted "yes", we will suspend now. */
if (sleeping)
@@ -2414,8 +2422,11 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
ret = ENODEV;
}
-
return (ret);
+#else
+ /* This platform does not support acpi suspend/resume. */
+ return (EOPNOTSUPP);
+#endif
}
static void
@@ -2459,11 +2470,18 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
sc->acpi_sleep_disabled = 1;
ACPI_UNLOCK(acpi);
+#ifdef SMP
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+#endif
+
/*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
*/
mtx_lock(&Giant);
+
slp_state = ACPI_SS_NONE;
switch (state) {
case ACPI_STATE_S1:
@@ -2570,6 +2588,17 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
mtx_unlock(&Giant);
+
+ /* Warm up timecounter again */
+ (void)timecounter->tc_get_timecount(timecounter);
+ (void)timecounter->tc_get_timecount(timecounter);
+
+#ifdef SMP
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+#endif
+
return_ACPI_STATUS (status);
}
diff --git a/sys/dev/acpica/acpi_ec.c b/sys/dev/acpica/acpi_ec.c
index bb752236ae93..696542cf21c5 100644
--- a/sys/dev/acpica/acpi_ec.c
+++ b/sys/dev/acpica/acpi_ec.c
@@ -747,7 +747,7 @@ EcSpaceHandler(UINT32 Function, ACPI_PHYSICAL_ADDRESS Address, UINT32 width,
* If booting, check if we need to run the query handler. If so, we
* we call it directly here since our thread taskq is not active yet.
*/
- if (cold || rebooting) {
+ if (cold || rebooting || sc->ec_suspending) {
if ((EC_GET_CSR(sc) & EC_EVENT_SCI)) {
CTR0(KTR_ACPI, "ec running gpe handler directly");
EcGpeQueryHandler(sc);
diff --git a/sys/i386/i386/i686_mem.c b/sys/i386/i386/i686_mem.c
index fc88be157c38..fe229cc3be20 100644
--- a/sys/i386/i386/i686_mem.c
+++ b/sys/i386/i386/i686_mem.c
@@ -73,11 +73,13 @@ static void i686_mrinit(struct mem_range_softc *sc);
static int i686_mrset(struct mem_range_softc *sc,
struct mem_range_desc *mrd, int *arg);
static void i686_mrAPinit(struct mem_range_softc *sc);
+static void i686_mrreinit(struct mem_range_softc *sc);
static struct mem_range_ops i686_mrops = {
i686_mrinit,
i686_mrset,
- i686_mrAPinit
+ i686_mrAPinit,
+ i686_mrreinit
};
/* XXX for AP startup hook */
@@ -668,6 +670,30 @@ i686_mrAPinit(struct mem_range_softc *sc)
wrmsr(MSR_MTRRdefType, mtrrdef);
}
+/*
+ * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+i686_mrreinit(struct mem_range_softc *sc)
+{
+#ifdef SMP
+ /*
+ * We should use ipi_all_but_self() to call other CPUs into a
+ * locking gate, then call a target function to do this work.
+ * The "proper" solution involves a generalised locking gate
+ * implementation, not ready yet.
+ */
+ smp_rendezvous(NULL, (void *)i686_mrAPinit, NULL, sc);
+#else
+ disable_intr(); /* disable interrupts */
+ i686_mrAPinit(sc);
+ enable_intr();
+#endif
+}
+
static void
i686_mem_drvinit(void *unused)
{
diff --git a/sys/i386/i386/k6_mem.c b/sys/i386/i386/k6_mem.c
index 9cbacfeedefe..c99cf277489a 100644
--- a/sys/i386/i386/k6_mem.c
+++ b/sys/i386/i386/k6_mem.c
@@ -70,6 +70,7 @@ static struct mem_range_ops k6_mrops =
{
k6_mrinit,
k6_mrset,
+ NULL,
NULL
};
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index e807e485cbcf..1a3a6fa81bc8 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -262,6 +262,54 @@ stop_cpus(cpumask_t map)
return 1;
}
+#if defined(__amd64__)
+/*
+ * When called the executing CPU will send an IPI to all other CPUs
+ * requesting that they halt execution.
+ *
+ * Usually (but not necessarily) called with 'other_cpus' as its arg.
+ *
+ * - Signals all CPUs in map to suspend.
+ * - Waits for each to suspend.
+ *
+ * Returns:
+ * -1: error
+ * 0: NA
+ * 1: ok
+ *
+ * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
+ * from executing at same time.
+ */
+int
+suspend_cpus(cpumask_t map)
+{
+ int i;
+
+ if (!smp_started)
+ return (0);
+
+ CTR1(KTR_SMP, "suspend_cpus(%x)", map);
+
+ /* send the suspend IPI to all CPUs in map */
+ ipi_selected(map, IPI_SUSPEND);
+
+ i = 0;
+ while ((stopped_cpus & map) != map) {
+ /* spin */
+ cpu_spinwait();
+ i++;
+#ifdef DIAGNOSTIC
+ if (i == 100000) {
+ printf("timeout suspending cpus\n");
+ break;
+ }
+#endif
+ }
+
+ return (1);
+}
+#endif
+
/*
* Called by a CPU to restart stopped CPUs.
*
diff --git a/sys/sys/memrange.h b/sys/sys/memrange.h
index ace778d5bfd4..c90104f75942 100644
--- a/sys/sys/memrange.h
+++ b/sys/sys/memrange.h
@@ -52,6 +52,7 @@ struct mem_range_ops
void (*init)(struct mem_range_softc *sc);
int (*set)(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg);
void (*initAP)(struct mem_range_softc *sc);
+ void (*reinit)(struct mem_range_softc *sc);
};
struct mem_range_softc
@@ -68,4 +69,3 @@ extern int mem_range_attr_get(struct mem_range_desc *mrd, int *arg);
extern int mem_range_attr_set(struct mem_range_desc *mrd, int *arg);
#endif
-
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 8306d5259772..500516f6a1a8 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -122,6 +122,9 @@ void forward_signal(struct thread *);
void forward_roundrobin(void);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
+#if defined(__amd64__)
+int suspend_cpus(cpumask_t);
+#endif
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;