aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/acpica/Makefile33
-rw-r--r--sys/amd64/acpica/acpi_machdep.c27
-rw-r--r--sys/amd64/acpica/acpi_switch.S177
-rw-r--r--sys/amd64/acpica/acpi_wakecode.S278
-rw-r--r--sys/amd64/acpica/acpi_wakeup.c397
-rwxr-xr-xsys/amd64/acpica/genwakecode.sh6
-rwxr-xr-xsys/amd64/acpica/genwakedata.sh9
-rw-r--r--sys/amd64/amd64/amd64_mem.c28
-rw-r--r--sys/amd64/amd64/apic_vector.S16
-rw-r--r--sys/amd64/amd64/cpu_switch.S74
-rw-r--r--sys/amd64/amd64/db_trace.c2
-rw-r--r--sys/amd64/amd64/genassym.c13
-rw-r--r--sys/amd64/amd64/mp_machdep.c40
-rw-r--r--sys/amd64/include/apicvar.h1
-rw-r--r--sys/amd64/include/pcb.h14
-rw-r--r--sys/amd64/include/smp.h2
-rw-r--r--sys/conf/files.amd6413
-rw-r--r--sys/dev/acpica/acpi.c53
-rw-r--r--sys/dev/acpica/acpi_ec.c2
-rw-r--r--sys/i386/i386/i686_mem.c28
-rw-r--r--sys/i386/i386/k6_mem.c1
-rw-r--r--sys/kern/subr_smp.c48
-rw-r--r--sys/sys/memrange.h2
-rw-r--r--sys/sys/smp.h3
24 files changed, 1247 insertions, 20 deletions
diff --git a/sys/amd64/acpica/Makefile b/sys/amd64/acpica/Makefile
new file mode 100644
index 000000000000..743728051e97
--- /dev/null
+++ b/sys/amd64/acpica/Makefile
@@ -0,0 +1,33 @@
+# $FreeBSD$
+
+# Correct path for kernel builds
+# Don't rely on the kernel's .depend file
+.ifdef MAKESRCPATH
+.PATH: ${MAKESRCPATH}
+DEPENDFILE=
+.else
+MAKESRCPATH= ${.CURDIR}
+CLEANFILES= acpi_wakecode.h acpi_wakedata.h acpi_wakecode.bin acpi_wakecode.o
+.endif
+.if ${CC} == "icc"
+CFLAGS+= -restrict
+NOSTDINC= -X
+.else
+NOSTDINC= -nostdinc
+.endif
+CFLAGS+= ${NOSTDINC} -include opt_global.h -I. -I${MAKESRCPATH}/../..
+
+all: acpi_wakecode.h acpi_wakedata.h
+
+acpi_wakecode.o: acpi_wakecode.S assym.s
+
+acpi_wakecode.bin: acpi_wakecode.o
+ objcopy -S -O binary acpi_wakecode.o acpi_wakecode.bin
+
+acpi_wakecode.h: acpi_wakecode.bin
+ sh ${MAKESRCPATH}/genwakecode.sh > acpi_wakecode.h
+
+acpi_wakedata.h: acpi_wakecode.bin
+ sh ${MAKESRCPATH}/genwakedata.sh > acpi_wakedata.h
+
+.include <bsd.prog.mk>
diff --git a/sys/amd64/acpica/acpi_machdep.c b/sys/amd64/acpica/acpi_machdep.c
index e9e9235c3753..8b4840efe033 100644
--- a/sys/amd64/acpica/acpi_machdep.c
+++ b/sys/amd64/acpica/acpi_machdep.c
@@ -31,25 +31,50 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/module.h>
+#include <sys/sysctl.h>
#include <contrib/dev/acpica/acpi.h>
#include <dev/acpica/acpivar.h>
#include <machine/nexusvar.h>
+SYSCTL_DECL(_debug_acpi);
+
+uint32_t acpi_resume_beep;
+TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep);
+SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep,
+ 0, "Beep the PC speaker when resuming");
+uint32_t acpi_reset_video;
+TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video);
+
static int intr_model = ACPI_INTR_PIC;
+static struct apm_clone_data acpi_clone;
int
acpi_machdep_init(device_t dev)
{
- struct acpi_softc *sc;
+ struct acpi_softc *sc;
sc = devclass_get_softc(devclass_find("acpi"), 0);
+
+ /* Create a fake clone for /dev/acpi. */
+ STAILQ_INIT(&sc->apm_cdevs);
+ acpi_clone.cdev = sc->acpi_dev_t;
+ acpi_clone.acpi_sc = sc;
+ ACPI_LOCK(acpi);
+ STAILQ_INSERT_TAIL(&sc->apm_cdevs, &acpi_clone, entries);
+ ACPI_UNLOCK(acpi);
+ sc->acpi_clone = &acpi_clone;
acpi_install_wakeup_handler(sc);
if (intr_model != ACPI_INTR_PIC)
acpi_SetIntrModel(intr_model);
+ SYSCTL_ADD_UINT(&sc->acpi_sysctl_ctx,
+ SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO,
+ "reset_video", CTLFLAG_RW, &acpi_reset_video, 0,
+ "Call the VESA reset BIOS vector on the resume path");
+
return (0);
}
diff --git a/sys/amd64/acpica/acpi_switch.S b/sys/amd64/acpica/acpi_switch.S
new file mode 100644
index 000000000000..d4f732a6eb2b
--- /dev/null
+++ b/sys/amd64/acpica/acpi_switch.S
@@ -0,0 +1,177 @@
+/*-
+ * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
+ * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2008-2009 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "acpi_wakedata.h"
+#include "assym.s"
+
+#define WAKEUP_DECL(member) \
+ .set WAKEUP_ ## member, wakeup_ ## member - wakeup_ctx
+
+ WAKEUP_DECL(xpcb)
+ WAKEUP_DECL(gdt)
+ WAKEUP_DECL(efer)
+ WAKEUP_DECL(pat)
+ WAKEUP_DECL(star)
+ WAKEUP_DECL(lstar)
+ WAKEUP_DECL(cstar)
+ WAKEUP_DECL(sfmask)
+ WAKEUP_DECL(cpu)
+
+#define WAKEUP_CTX(member) WAKEUP_ ## member (%rdi)
+#define WAKEUP_PCB(member) PCB_ ## member(%r11)
+#define WAKEUP_XPCB(member) XPCB_ ## member(%r11)
+
+ENTRY(acpi_restorecpu)
+ /* Switch to KPML4phys. */
+ movq %rsi, %rax
+ movq %rax, %cr3
+
+ /* Restore GDT. */
+ lgdt WAKEUP_CTX(gdt)
+ jmp 1f
+1:
+
+ /* Fetch PCB. */
+ movq WAKEUP_CTX(xpcb), %r11
+
+ /* Restore segment registers. */
+ mov WAKEUP_PCB(DS), %ds
+ mov WAKEUP_PCB(ES), %es
+ mov WAKEUP_XPCB(SS), %ss
+ mov WAKEUP_PCB(FS), %fs
+ mov WAKEUP_PCB(GS), %gs
+
+ movl $MSR_FSBASE, %ecx
+ movl WAKEUP_PCB(FSBASE), %eax
+ movl 4 + WAKEUP_PCB(FSBASE), %edx
+ wrmsr
+ movl $MSR_GSBASE, %ecx
+ movl WAKEUP_PCB(GSBASE), %eax
+ movl 4 + WAKEUP_PCB(GSBASE), %edx
+ wrmsr
+ movl $MSR_KGSBASE, %ecx
+ movl WAKEUP_XPCB(KGSBASE), %eax
+ movl 4 + WAKEUP_XPCB(KGSBASE), %edx
+ wrmsr
+
+ /* Restore EFER. */
+ movl $MSR_EFER, %ecx
+ movl WAKEUP_CTX(efer), %eax
+ wrmsr
+
+ /* Restore PAT. */
+ movl $MSR_PAT, %ecx
+ movl WAKEUP_CTX(pat), %eax
+ movl 4 + WAKEUP_CTX(pat), %edx
+ wrmsr
+
+ /* Restore fast syscall stuff. */
+ movl $MSR_STAR, %ecx
+ movl WAKEUP_CTX(star), %eax
+ movl 4 + WAKEUP_CTX(star), %edx
+ wrmsr
+ movl $MSR_LSTAR, %ecx
+ movl WAKEUP_CTX(lstar), %eax
+ movl 4 + WAKEUP_CTX(lstar), %edx
+ wrmsr
+ movl $MSR_CSTAR, %ecx
+ movl WAKEUP_CTX(cstar), %eax
+ movl 4 + WAKEUP_CTX(cstar), %edx
+ wrmsr
+ movl $MSR_SF_MASK, %ecx
+ movl WAKEUP_CTX(sfmask), %eax
+ wrmsr
+
+ /* Restore CR0, CR2 and CR4. */
+ movq WAKEUP_XPCB(CR0), %rax
+ movq %rax, %cr0
+ movq WAKEUP_XPCB(CR2), %rax
+ movq %rax, %cr2
+ movq WAKEUP_XPCB(CR4), %rax
+ movq %rax, %cr4
+
+ /* Restore descriptor tables. */
+ lidt WAKEUP_XPCB(IDT)
+ lldt WAKEUP_XPCB(LDT)
+ movw WAKEUP_XPCB(TR), %ax
+ ltr %ax
+
+ /* Restore other callee saved registers. */
+ movq WAKEUP_PCB(R15), %r15
+ movq WAKEUP_PCB(R14), %r14
+ movq WAKEUP_PCB(R13), %r13
+ movq WAKEUP_PCB(R12), %r12
+ movq WAKEUP_PCB(RBP), %rbp
+ movq WAKEUP_PCB(RSP), %rsp
+ movq WAKEUP_PCB(RBX), %rbx
+
+ /* Restore debug registers. */
+ movq WAKEUP_PCB(DR0), %rax
+ movq %rax, %dr0
+ movq WAKEUP_PCB(DR1), %rax
+ movq %rax, %dr1
+ movq WAKEUP_PCB(DR2), %rax
+ movq %rax, %dr2
+ movq WAKEUP_PCB(DR3), %rax
+ movq %rax, %dr3
+ movq WAKEUP_PCB(DR6), %rax
+ movq %rax, %dr6
+ movq WAKEUP_PCB(DR7), %rax
+ movq %rax, %dr7
+
+ /* Restore return address. */
+ movq WAKEUP_PCB(RIP), %rax
+ movq %rax, (%rsp)
+
+ /* Indicate the CPU is resumed. */
+ xorl %eax, %eax
+ movl %eax, WAKEUP_CTX(cpu)
+
+ ret
+END(acpi_restorecpu)
+
+ENTRY(acpi_savecpu)
+ /* Fetch XPCB and save CPU context. */
+ movq %rdi, %r10
+ call savectx2
+ movq %r10, %r11
+
+ /* Patch caller's return address and stack pointer. */
+ movq (%rsp), %rax
+ movq %rax, WAKEUP_PCB(RIP)
+ movq %rsp, %rax
+ movq %rax, WAKEUP_PCB(RSP)
+
+ movl $1, %eax
+ ret
+END(acpi_savecpu)
diff --git a/sys/amd64/acpica/acpi_wakecode.S b/sys/amd64/acpica/acpi_wakecode.S
new file mode 100644
index 000000000000..111486e8d1a4
--- /dev/null
+++ b/sys/amd64/acpica/acpi_wakecode.S
@@ -0,0 +1,278 @@
+/*-
+ * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
+ * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define LOCORE
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * Resume entry point for real mode.
+ *
+ * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero
+ * in FACS, the BIOS enters here in real mode after POST with CS set to
+ * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf).
+ * Depending on the previous sleep state, we may need to initialize more
+ * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk).
+ *
+ * Note: If XFirmwareWakingVector is non-zero, it should disable address
+ * translation/paging and interrupts, load all segment registers with
+ * a flat 4 GB address space, and set EFLAGS.IF to zero. Currently
+ * this mode is not supported by this code.
+ */
+
+ .data /* So we can modify it */
+
+ ALIGN_TEXT
+wakeup_start:
+ .code16
+ /*
+ * Set up segment registers for real mode, a small stack for
+ * any calls we make, and clear any flags.
+ */
+ cli /* make sure no interrupts */
+ cld
+ mov %cs, %ax /* copy %cs to %ds. Remember these */
+ mov %ax, %ds /* are offsets rather than selectors */
+ mov %ax, %ss
+ movw $PAGE_SIZE - 8, %sp
+ pushw $0
+ popfw
+
+ /* To debug resume hangs, beep the speaker if the user requested. */
+ cmpw $0, resume_beep - wakeup_start
+ je 1f
+ movb $0xc0, %al
+ outb %al, $0x42
+ movb $0x04, %al
+ outb %al, $0x42
+ inb $0x61, %al
+ orb $0x3, %al
+ outb %al, $0x61
+ movw $0, resume_beep - wakeup_start
+1:
+
+ /* Re-initialize video BIOS if the reset_video tunable is set. */
+ cmpw $0, reset_video - wakeup_start
+ je 1f
+ lcall $0xc000, $3
+ movw $0, reset_video - wakeup_start
+
+ /*
+ * Set up segment registers for real mode again in case the
+ * previous BIOS call clobbers them.
+ */
+ mov %cs, %ax
+ mov %ax, %ds
+ mov %ax, %ss
+1:
+
+ /*
+ * Find relocation base and patch the gdt descript and ljmp targets
+ */
+ xorl %ebx, %ebx
+ mov %cs, %bx
+ sall $4, %ebx /* %ebx is now our relocation base */
+
+ /*
+ * Load the descriptor table pointer. We'll need it when running
+ * in 16-bit protected mode.
+ */
+ lgdtl bootgdtdesc - wakeup_start
+
+ /* Enable protected mode */
+ movl $CR0_PE, %eax
+ mov %eax, %cr0
+
+ /*
+ * Now execute a far jump to turn on protected mode. This
+ * causes the segment registers to turn into selectors and causes
+ * %cs to be loaded from the gdt.
+ *
+ * The following instruction is:
+ * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+wakeup_sw32:
+ .byte 0x66 /* size override to 32 bits */
+ .byte 0xea /* opcode for far jump */
+ .long wakeup_32 - wakeup_start /* offset in segment */
+ .word bootcode32 - bootgdt /* index in gdt for 32 bit code */
+
+ /*
+ * At this point, we are running in 32 bit legacy protected mode.
+ */
+ .code32
+wakeup_32:
+
+ mov $bootdata32 - bootgdt, %eax
+ mov %ax, %ds
+
+ /* Turn on the PAE and PSE bits for when paging is enabled */
+ mov %cr4, %eax
+ orl $(CR4_PAE | CR4_PSE), %eax
+ mov %eax, %cr4
+
+ /*
+ * Enable EFER.LME so that we get long mode when all the prereqs are
+ * in place. In this case, it turns on when CR0_PG is finally enabled.
+ * Pick up a few other EFER bits that we'll use need we're here.
+ */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_LME | EFER_SCE, %eax
+ wrmsr
+
+ /*
+ * Point to the embedded page tables for startup. Note that this
+ * only gets accessed after we're actually in 64 bit mode, however
+ * we can only set the bottom 32 bits of %cr3 in this state. This
+ * means we are required to use a temporary page table that is below
+ * the 4GB limit. %ebx is still our relocation base. We could just
+ * subtract 3 * PAGE_SIZE, but that would be too easy.
+ */
+ leal wakeup_pagetables - wakeup_start(%ebx), %eax
+ movl (%eax), %eax
+ mov %eax, %cr3
+
+ /*
+ * Finally, switch to long bit mode by enabling paging. We have
+ * to be very careful here because all the segmentation disappears
+ * out from underneath us. The spec says we can depend on the
+ * subsequent pipelined branch to execute, but *only if* everthing
+ * is still identity mapped. If any mappings change, the pipeline
+ * will flush.
+ */
+ mov %cr0, %eax
+ orl $CR0_PG, %eax
+ mov %eax, %cr0
+
+ /*
+ * At this point paging is enabled, and we are in "compatability" mode.
+ * We do another far jump to reload %cs with the 64 bit selector.
+ * %cr3 points to a 4-level page table page.
+ * We cannot yet jump all the way to the kernel because we can only
+ * specify a 32 bit linear address. So, yet another trampoline.
+ *
+ * The following instruction is:
+ * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+wakeup_sw64:
+ .byte 0xea /* opcode for far jump */
+ .long wakeup_64 - wakeup_start /* offset in segment */
+ .word bootcode64 - bootgdt /* index in gdt for 64 bit code */
+
+ /*
+ * Yeehar! We're running in 64-bit mode! We can mostly ignore our
+ * segment registers, and get on with it.
+ * Note that we are running at the correct virtual address, but with
+ * a 1:1 1GB mirrored mapping over entire address space. We had better
+ * switch to a real %cr3 promptly so that we can get to the direct map
+ * space. Remember that jmp is relative and that we've been relocated,
+ * so use an indirect jump.
+ */
+ .code64
+wakeup_64:
+ mov $bootdata64 - bootgdt, %eax
+ mov %ax, %ds
+
+ /* Restore arguments and return. */
+ movq wakeup_ctx - wakeup_start(%rbx), %rdi
+ movq wakeup_kpml4 - wakeup_start(%rbx), %rsi
+ movq wakeup_retaddr - wakeup_start(%rbx), %rax
+ jmp *%rax
+
+ ALIGN_DATA
+bootgdt:
+ .long 0x00000000
+ .long 0x00000000
+
+bootcode64:
+ .long 0x0000ffff
+ .long 0x00af9b00
+
+bootdata64:
+ .long 0x0000ffff
+ .long 0x00af9300
+
+bootcode32:
+ .long 0x0000ffff
+ .long 0x00cf9b00
+
+bootdata32:
+ .long 0x0000ffff
+ .long 0x00cf9300
+bootgdtend:
+
+wakeup_pagetables:
+ .long 0
+
+bootgdtdesc:
+ .word bootgdtend - bootgdt /* Length */
+ .long bootgdt - wakeup_start /* Offset plus %ds << 4 */
+
+ ALIGN_DATA
+resume_beep:
+ .long 0
+reset_video:
+ .long 0
+wakeup_retaddr:
+ .quad 0
+wakeup_kpml4:
+ .quad 0
+
+wakeup_ctx:
+ .quad 0
+wakeup_xpcb:
+ .quad 0
+wakeup_gdt:
+ .word 0
+ .quad 0
+wakeup_efer:
+ .quad 0
+wakeup_pat:
+ .quad 0
+wakeup_star:
+ .quad 0
+wakeup_lstar:
+ .quad 0
+wakeup_cstar:
+ .quad 0
+wakeup_sfmask:
+ .quad 0
+wakeup_cpu:
+ .long 0
+dummy:
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 53868e40a85a..5e3d5b13433d 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -1,6 +1,8 @@
/*-
* Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
* Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008-2009 Jung-uk Kim <jkim@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,18 +31,411 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/pcb.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+#include <machine/vmparam.h>
+
+#ifdef SMP
+#include <machine/apicreg.h>
+#include <machine/smp.h>
+#endif
#include <contrib/dev/acpica/acpi.h>
#include <dev/acpica/acpivar.h>
+#include "acpi_wakecode.h"
+#include "acpi_wakedata.h"
+
+/* Make sure the code is less than a page and leave room for the stack. */
+CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+extern uint32_t acpi_resume_beep;
+extern uint32_t acpi_reset_video;
+
+#ifdef SMP
+extern struct xpcb *stopxpcbs;
+#else
+static struct xpcb *stopxpcbs;
+#endif
+
+int acpi_restorecpu(struct xpcb *, vm_offset_t);
+int acpi_savecpu(struct xpcb *);
+
+static void acpi_reset_tss(int cpu);
+static void acpi_alloc_wakeup_handler(void);
+static void acpi_stop_beep(void *);
+
+#ifdef SMP
+static int acpi_wakeup_ap(struct acpi_softc *, int);
+static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
+#endif
+
+#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
+#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE))
+#define WAKECODE_FIXUP(offset, type, val) do { \
+ type *addr; \
+ addr = (type *)(WAKECODE_VADDR(sc) + offset); \
+ *addr = val; \
+} while (0)
+
+/* Turn off bits 1&2 of the PIT, stopping the beep. */
+static void
+acpi_stop_beep(void *arg)
+{
+ outb(0x61, inb(0x61) & ~0x3);
+}
+
+#ifdef SMP
+static int
+acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
+{
+ int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff;
+ int apic_id = cpu_apic_ids[cpu];
+ int ms;
+
+ WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[cpu]);
+ WAKECODE_FIXUP(wakeup_gdt, uint16_t, stopxpcbs[cpu].xpcb_gdt.rd_limit);
+ WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
+ stopxpcbs[cpu].xpcb_gdt.rd_base);
+ WAKECODE_FIXUP(wakeup_cpu, int, cpu);
+
+ acpi_reset_tss(cpu);
+
+ /* do an INIT IPI: assert RESET */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+
+ /* wait for pending status end */
+ lapic_ipi_wait(-1);
+
+ /* do an INIT IPI: deassert RESET */
+ lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
+
+ /* wait for pending status end */
+ DELAY(10000); /* wait ~10mS */
+ lapic_ipi_wait(-1);
+
+ /*
+ * next we do a STARTUP IPI: the previous INIT IPI might still be
+ * latched, (P5 bug) this 1st STARTUP would then terminate
+ * immediately, and the previously started INIT IPI would continue. OR
+ * the previous INIT IPI has already run. and this STARTUP IPI will
+ * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+ * will run.
+ */
+
+ /* do a STARTUP IPI */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /*
+ * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+ * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+ * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+ * recognized after hardware RESET or INIT IPI.
+ */
+
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /* Wait up to 5 seconds for it to start. */
+ for (ms = 0; ms < 5000; ms++) {
+ if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0)
+ return (1); /* return SUCCESS */
+ DELAY(1000);
+ }
+ return (0); /* return FAILURE */
+}
+
+#define WARMBOOT_TARGET 0
+#define WARMBOOT_OFF (KERNBASE + 0x0467)
+#define WARMBOOT_SEG (KERNBASE + 0x0469)
+
+#define CMOS_REG (0x70)
+#define CMOS_DATA (0x71)
+#define BIOS_RESET (0x0f)
+#define BIOS_WARM (0x0a)
+
+static void
+acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
+{
+ uint32_t mpbioswarmvec;
+ cpumask_t map;
+ int cpu;
+ u_char mpbiosreason;
+
+ /* save the current value of the warm-start vector */
+ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF);
+ outb(CMOS_REG, BIOS_RESET);
+ mpbiosreason = inb(CMOS_DATA);
+
+ /* setup a vector to our boot code */
+ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET;
+ *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4;
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
+
+ /* Wake up each AP. */
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ map = 1ul << cpu;
+ if ((wakeup_cpus & map) != map)
+ continue;
+ if (acpi_wakeup_ap(sc, cpu) == 0) {
+ /* restore the warmstart vector */
+ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
+ panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)",
+ cpu, cpu_apic_ids[cpu]);
+ }
+ }
+
+ /* restore the warmstart vector */
+ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
+
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, mpbiosreason);
+}
+#endif
+
+static void
+acpi_reset_tss(int cpu)
+{
+ uint32_t *tss;
+
+ /*
+ * We have to clear "task busy" bit in TSS to restore
+ * task register later. Otherwise, ltr causes GPF.
+ */
+ tss = (uint32_t *)&gdt[NGDT * cpu + GPROC0_SEL] + 1;
+ *tss &= ~((SDT_SYSBSY ^ SDT_SYSTSS) << 8);
+}
+
int
acpi_sleep_machdep(struct acpi_softc *sc, int state)
{
- return (0);
+ struct savefpu *stopfpu;
+#ifdef SMP
+ cpumask_t wakeup_cpus;
+#endif
+ register_t cr3, rf;
+ ACPI_STATUS status;
+ int ret;
+
+ ret = -1;
+
+ if (sc->acpi_wakeaddr == 0ul)
+ return (ret);
+
+#ifdef SMP
+ wakeup_cpus = PCPU_GET(other_cpus);
+#endif
+
+ AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc));
+
+ rf = intr_disable();
+ intr_suspend();
+
+ /*
+ * Temporarily switch to the kernel pmap because it provides
+ * an identity mapping (setup at boot) for the low physical
+ * memory region containing the wakeup code.
+ */
+ cr3 = rcr3();
+ load_cr3(KPML4phys);
+
+ stopfpu = &stopxpcbs[0].xpcb_pcb.pcb_save;
+ if (acpi_savecpu(&stopxpcbs[0])) {
+ fpugetregs(curthread, stopfpu);
+
+#ifdef SMP
+ if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
+ device_printf(sc->acpi_dev,
+ "Failed to suspend APs: CPU mask = 0x%jx\n",
+ (uintmax_t)(wakeup_cpus & ~stopped_cpus));
+ goto out;
+ }
+#endif
+
+ WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep);
+ WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video);
+
+ WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[0]);
+ WAKECODE_FIXUP(wakeup_gdt, uint16_t,
+ stopxpcbs[0].xpcb_gdt.rd_limit);
+ WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
+ stopxpcbs[0].xpcb_gdt.rd_base);
+ WAKECODE_FIXUP(wakeup_cpu, int, 0);
+
+ acpi_reset_tss(0);
+
+ /* Call ACPICA to enter the desired sleep state */
+ if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
+ status = AcpiEnterSleepStateS4bios();
+ else
+ status = AcpiEnterSleepState(state);
+
+ if (status != AE_OK) {
+ device_printf(sc->acpi_dev,
+ "AcpiEnterSleepState failed - %s\n",
+ AcpiFormatException(status));
+ goto out;
+ }
+
+ for (;;)
+ ia32_pause();
+ } else {
+ fpusetregs(curthread, stopfpu);
+
+ WAKECODE_FIXUP(resume_beep, uint32_t, 0);
+ WAKECODE_FIXUP(reset_video, uint32_t, 0);
+#ifdef SMP
+ if (wakeup_cpus != 0)
+ acpi_wakeup_cpus(sc, wakeup_cpus);
+#endif
+ ret = 0;
+ }
+
+out:
+#ifdef SMP
+ if (wakeup_cpus != 0)
+ restart_cpus(wakeup_cpus);
+#endif
+
+ load_cr3(cr3);
+ intr_resume();
+ intr_restore(rf);
+
+ AcpiSetFirmwareWakingVector(0);
+
+ if (ret == 0 && mem_range_softc.mr_op != NULL &&
+ mem_range_softc.mr_op->reinit != NULL)
+ mem_range_softc.mr_op->reinit(&mem_range_softc);
+
+ /* If we beeped, turn it off after a delay. */
+ if (acpi_resume_beep)
+ timeout(acpi_stop_beep, NULL, 3 * hz);
+
+ return (ret);
+}
+
+static vm_offset_t acpi_wakeaddr;
+
+static void
+acpi_alloc_wakeup_handler(void)
+{
+ void *wakeaddr;
+
+ if (!cold)
+ return;
+
+ /*
+ * Specify the region for our wakeup code. We want it in the low 1 MB
+ * region, excluding video memory and above (0xa0000). We ask for
+ * it to be page-aligned, just to be safe.
+ */
+ wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0, 0x9ffff,
+ PAGE_SIZE, 0ul);
+ if (wakeaddr == NULL) {
+ printf("%s: can't alloc wake memory\n", __func__);
+ return;
+ }
+ stopxpcbs = malloc(mp_ncpus * sizeof(*stopxpcbs), M_DEVBUF, M_NOWAIT);
+ if (stopxpcbs == NULL) {
+ contigfree(wakeaddr, 4 * PAGE_SIZE, M_DEVBUF);
+ printf("%s: can't alloc CPU state memory\n", __func__);
+ return;
+ }
+ acpi_wakeaddr = (vm_offset_t)wakeaddr;
}
+SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0);
+
void
acpi_install_wakeup_handler(struct acpi_softc *sc)
{
+ uint64_t *pt4, *pt3, *pt2;
+ int i;
+
+ if (acpi_wakeaddr == 0ul)
+ return;
+
+ sc->acpi_wakeaddr = acpi_wakeaddr;
+ sc->acpi_wakephys = vtophys(acpi_wakeaddr);
+
+ bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode));
+
+ /* Patch GDT base address, ljmp targets and page table base address. */
+ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t,
+ WAKECODE_PADDR(sc) + bootgdt);
+ WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t,
+ WAKECODE_PADDR(sc) + wakeup_32);
+ WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t,
+ WAKECODE_PADDR(sc) + wakeup_64);
+ WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys);
+
+ /* Save pointers to some global data. */
+ WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu);
+ WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys);
+ WAKECODE_FIXUP(wakeup_ctx, vm_offset_t,
+ WAKECODE_VADDR(sc) + wakeup_ctx);
+ WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER));
+ WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT));
+ WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR));
+ WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR));
+ WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR));
+ WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK));
+
+ /* Build temporary page tables below realmode code. */
+ pt4 = (uint64_t *)acpi_wakeaddr;
+ pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t);
+ pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t);
+
+ /* Create the initial 1GB replicated page tables */
+ for (i = 0; i < 512; i++) {
+ /*
+ * Each slot of the level 4 pages points
+ * to the same level 3 page
+ */
+ pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE);
+ pt4[i] |= PG_V | PG_RW | PG_U;
+
+ /*
+ * Each slot of the level 3 pages points
+ * to the same level 2 page
+ */
+ pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE));
+ pt3[i] |= PG_V | PG_RW | PG_U;
+
+ /* The level 2 page slots are mapped with 2MB pages for 1GB. */
+ pt2[i] = i * (2 * 1024 * 1024);
+ pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+ }
+
+ if (bootverbose)
+ device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n",
+ (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys);
}
diff --git a/sys/amd64/acpica/genwakecode.sh b/sys/amd64/acpica/genwakecode.sh
new file mode 100755
index 000000000000..b4853863ae7e
--- /dev/null
+++ b/sys/amd64/acpica/genwakecode.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# $FreeBSD$
+#
+file2c 'static char wakecode[] = {' '};' <acpi_wakecode.bin
+
+exit 0
diff --git a/sys/amd64/acpica/genwakedata.sh b/sys/amd64/acpica/genwakedata.sh
new file mode 100755
index 000000000000..6d4181ecacaf
--- /dev/null
+++ b/sys/amd64/acpica/genwakedata.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+# $FreeBSD$
+#
+nm -n --defined-only acpi_wakecode.o | while read offset dummy what
+do
+ echo "#define ${what} 0x${offset}"
+done
+
+exit 0
diff --git a/sys/amd64/amd64/amd64_mem.c b/sys/amd64/amd64/amd64_mem.c
index 2b5a73db2151..d7959fd45fb7 100644
--- a/sys/amd64/amd64/amd64_mem.c
+++ b/sys/amd64/amd64/amd64_mem.c
@@ -73,11 +73,13 @@ static void amd64_mrinit(struct mem_range_softc *sc);
static int amd64_mrset(struct mem_range_softc *sc,
struct mem_range_desc *mrd, int *arg);
static void amd64_mrAPinit(struct mem_range_softc *sc);
+static void amd64_mrreinit(struct mem_range_softc *sc);
static struct mem_range_ops amd64_mrops = {
amd64_mrinit,
amd64_mrset,
- amd64_mrAPinit
+ amd64_mrAPinit,
+ amd64_mrreinit
};
/* XXX for AP startup hook */
@@ -668,6 +670,30 @@ amd64_mrAPinit(struct mem_range_softc *sc)
wrmsr(MSR_MTRRdefType, mtrrdef);
}
+/*
+ * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+amd64_mrreinit(struct mem_range_softc *sc)
+{
+#ifdef SMP
+ /*
+ * We should use ipi_all_but_self() to call other CPUs into a
+ * locking gate, then call a target function to do this work.
+ * The "proper" solution involves a generalised locking gate
+ * implementation, not ready yet.
+ */
+ smp_rendezvous(NULL, (void *)amd64_mrAPinit, NULL, sc);
+#else
+ disable_intr(); /* disable interrupts */
+ amd64_mrAPinit(sc);
+ enable_intr();
+#endif
+}
+
static void
amd64_mem_drvinit(void *unused)
{
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 14a6f87576a1..0306bb37ea42 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -224,6 +224,22 @@ IDTVEC(cpustop)
iretq
/*
+ * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(cpususpend)
+ PUSH_FRAME
+
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
+
+ call cpususpend_handler
+
+ POP_FRAME
+ iretq
+
+/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
*
* - Calls the generic rendezvous action function.
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index bc1a4bbb42c2..0c59703f921b 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -325,9 +325,8 @@ load_dr:
movq %r11,%dr6
movq %rax,%dr7
jmp done_load_dr
-
END(cpu_switch)
-
+
/*
* savectx(pcb)
* Update pcb, saving current processor state.
@@ -386,3 +385,74 @@ ENTRY(savectx)
ret
END(savectx)
+
+/*
+ * savectx2(xpcb)
+ * Update xpcb, saving current processor state.
+ */
+ENTRY(savectx2)
+ /* Fetch XPCB. */
+ movq %rdi,%r8
+
+ /* Save caller's return address. */
+ movq (%rsp),%rax
+ movq %rax,PCB_RIP(%r8)
+
+ mov %ds,PCB_DS(%r8)
+ mov %es,PCB_ES(%r8)
+ mov %ss,XPCB_SS(%r8)
+ mov %fs,PCB_FS(%r8)
+ mov %gs,PCB_GS(%r8)
+
+ movq %rbx,PCB_RBX(%r8)
+ movq %rsp,PCB_RSP(%r8)
+ movq %rbp,PCB_RBP(%r8)
+ movq %r12,PCB_R12(%r8)
+ movq %r13,PCB_R13(%r8)
+ movq %r14,PCB_R14(%r8)
+ movq %r15,PCB_R15(%r8)
+
+ movq %cr0,%rax
+ movq %rax,XPCB_CR0(%r8)
+ movq %cr2,%rax
+ movq %rax,XPCB_CR2(%r8)
+ movq %cr4,%rax
+ movq %rax,XPCB_CR4(%r8)
+
+ movq %dr0,%rax
+ movq %rax,PCB_DR0(%r8)
+ movq %dr1,%rax
+ movq %rax,PCB_DR1(%r8)
+ movq %dr2,%rax
+ movq %rax,PCB_DR2(%r8)
+ movq %dr3,%rax
+ movq %rax,PCB_DR3(%r8)
+ movq %dr6,%rax
+ movq %rax,PCB_DR6(%r8)
+ movq %dr7,%rax
+ movq %rax,PCB_DR7(%r8)
+
+ sgdt XPCB_GDT(%r8)
+ sidt XPCB_IDT(%r8)
+ sldt XPCB_LDT(%r8)
+ str XPCB_TR(%r8)
+
+ movl $MSR_FSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,PCB_FSBASE(%r8)
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,PCB_GSBASE(%r8)
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ leaq (%rax,%rdx),%rax
+ movq %rax,XPCB_KGSBASE(%r8)
+
+ movl $1, %eax
+ ret
+END(savectx2)
diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index 9676963f4f2b..50a5f4d2ca7a 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -316,6 +316,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
strcmp(name, "Xtimerint") == 0 ||
strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
strcmp(name, "Xcpustop") == 0 ||
+ strcmp(name, "Xcpususpend") == 0 ||
strcmp(name, "Xrendezvous") == 0)
frame_type = INTERRUPT;
else if (strcmp(name, "Xfast_syscall") == 0)
@@ -327,6 +328,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
/* XXX: These are interrupts with trap frames. */
else if (strcmp(name, "Xtimerint") == 0 ||
strcmp(name, "Xcpustop") == 0 ||
+ strcmp(name, "Xcpususpend") == 0 ||
strcmp(name, "Xrendezvous") == 0 ||
strcmp(name, "Xipi_intr_bitmap_handler") == 0)
frame_type = TRAP_INTERRUPT;
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 1924be7f534b..5aa3134855c2 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -155,6 +155,19 @@ ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
ASSYM(PCB_SIZE, sizeof(struct pcb));
+ASSYM(XPCB_PCB, offsetof(struct xpcb, xpcb_pcb));
+ASSYM(XPCB_CR0, offsetof(struct xpcb, xpcb_cr0));
+ASSYM(XPCB_CR2, offsetof(struct xpcb, xpcb_cr2));
+ASSYM(XPCB_CR4, offsetof(struct xpcb, xpcb_cr4));
+ASSYM(XPCB_KGSBASE, offsetof(struct xpcb, xpcb_kgsbase));
+ASSYM(XPCB_SS, offsetof(struct xpcb, xpcb_ss));
+ASSYM(XPCB_GDT, offsetof(struct xpcb, xpcb_gdt));
+ASSYM(XPCB_IDT, offsetof(struct xpcb, xpcb_idt));
+ASSYM(XPCB_LDT, offsetof(struct xpcb, xpcb_ldt));
+ASSYM(XPCB_TR, offsetof(struct xpcb, xpcb_tr));
+
+ASSYM(XPCB_SIZE, sizeof(struct xpcb));
+
ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0));
ASSYM(TF_R15, offsetof(struct trapframe, tf_r15));
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index c65f6f85c549..b7c03d9fb2db 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <machine/apicreg.h>
#include <machine/cputypes.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
@@ -103,6 +104,7 @@ extern pt_entry_t *SMPpt;
extern int _udatasel;
struct pcb stoppcbs[MAXCPU];
+struct xpcb *stopxpcbs = NULL;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
@@ -344,6 +346,9 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for CPU stop/restart */
setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
+ /* Install an inter-CPU IPI for CPU suspend/resume */
+ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
+
/* Set boot_cpu_id if needed. */
if (boot_cpu_id == -1) {
boot_cpu_id = PCPU_GET(apic_id);
@@ -1145,6 +1150,41 @@ cpustop_handler(void)
}
/*
+ * Handle an IPI_SUSPEND by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpususpend_handler(void)
+{
+ struct savefpu *stopfpu;
+ register_t cr3, rf;
+ int cpu = PCPU_GET(cpuid);
+ int cpumask = PCPU_GET(cpumask);
+
+ rf = intr_disable();
+ cr3 = rcr3();
+ stopfpu = &stopxpcbs[cpu].xpcb_pcb.pcb_save;
+ if (savectx2(&stopxpcbs[cpu])) {
+ fpugetregs(curthread, stopfpu);
+ wbinvd();
+ atomic_set_int(&stopped_cpus, cpumask);
+ } else
+ fpusetregs(curthread, stopfpu);
+
+ /* Wait for resume */
+ while (!(started_cpus & cpumask))
+ ia32_pause();
+
+ atomic_clear_int(&started_cpus, cpumask);
+ atomic_clear_int(&stopped_cpus, cpumask);
+
+ /* Restore CR3 and enable interrupts */
+ load_cr3(cr3);
+ lapic_setup(0);
+ intr_restore(rf);
+}
+
+/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index ea971ec8585c..3cc028d564b5 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -130,6 +130,7 @@
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
+#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */
/*
* The spurious interrupt can share the priority class with the IPIs since
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index e6a5add7f983..2e2ca87eb61f 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -82,11 +82,25 @@ struct pcb {
struct user_segment_descriptor pcb_gs32sd;
};
+struct xpcb {
+ struct pcb xpcb_pcb;
+ register_t xpcb_cr0;
+ register_t xpcb_cr2;
+ register_t xpcb_cr4;
+ register_t xpcb_kgsbase;
+ uint32_t xpcb_ss;
+ struct region_descriptor xpcb_gdt;
+ struct region_descriptor xpcb_idt;
+ struct region_descriptor xpcb_ldt;
+ uint16_t xpcb_tr;
+};
+
#ifdef _KERNEL
struct trapframe;
void makectx(struct trapframe *, struct pcb *);
void savectx(struct pcb *);
+int savectx2(struct xpcb *);
#endif
#endif /* _AMD64_PCB_H_ */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 06222f8a9e9a..d2ff189f2f85 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -48,11 +48,13 @@ inthand_t
IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
+ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous); /* handle CPU rendezvous */
/* functions in mp_machdep.c */
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
+void cpususpend_handler(void);
void init_secondary(void);
void ipi_selected(u_int cpus, u_int ipi);
void ipi_all_but_self(u_int ipi);
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index e826550edaaa..2f17a01f0a92 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -69,6 +69,19 @@ hptrr_lib.o optional hptrr \
#
amd64/acpica/OsdEnvironment.c optional acpi
amd64/acpica/acpi_machdep.c optional acpi
+amd64/acpica/acpi_switch.S optional acpi
+acpi_wakecode.h optional acpi \
+ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \
+ compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \
+ no-obj no-implicit-rule before-depend \
+ clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin"
+#
+acpi_wakedata.h optional acpi \
+ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \
+ compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \
+ no-obj no-implicit-rule before-depend \
+ clean "acpi_wakedata.h acpi_wakecode.o acpi_wakecode.bin"
+#
amd64/acpica/acpi_wakeup.c optional acpi
amd64/acpica/madt.c optional acpi
amd64/amd64/amd64_mem.c optional mem
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index 82719174eca3..15e8d00fd7ca 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -46,7 +46,11 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/power.h>
#include <sys/sbuf.h>
+#ifdef SMP
+#include <sys/sched.h>
+#endif
#include <sys/smp.h>
+#include <sys/timetc.h>
#if defined(__i386__) || defined(__amd64__)
#include <machine/pci_cfgreg.h>
@@ -2274,6 +2278,7 @@ acpi_SetSleepState(struct acpi_softc *sc, int state)
return (acpi_EnterSleepState(sc, state));
}
+#if defined(__amd64__) || defined(__i386__)
static void
acpi_sleep_force(void *arg)
{
@@ -2284,6 +2289,7 @@ acpi_sleep_force(void *arg)
if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
printf("acpi: force sleep state S%d failed\n", sc->acpi_next_sstate);
}
+#endif
/*
* Request that the system enter the given suspend state. All /dev/apm
@@ -2294,7 +2300,9 @@ acpi_sleep_force(void *arg)
int
acpi_ReqSleepState(struct acpi_softc *sc, int state)
{
+#if defined(__i386__)
struct apm_clone_data *clone;
+#endif
if (state < ACPI_STATE_S1 || state > ACPI_STATE_S5)
return (EINVAL);
@@ -2307,11 +2315,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
return (ENXIO);
}
-#if !defined(__i386__)
- /* This platform does not support acpi suspend/resume. */
- return (EOPNOTSUPP);
-#endif
-
+#if defined(__amd64__) || defined(__i386__)
/* If a suspend request is already in progress, just return. */
ACPI_LOCK(acpi);
if (sc->acpi_next_sstate != 0) {
@@ -2321,6 +2325,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
/* Record the pending state and notify all apm devices. */
sc->acpi_next_sstate = state;
+#if defined(__i386__)
STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
clone->notify_status = APM_EV_NONE;
if ((clone->flags & ACPI_EVF_DEVD) == 0) {
@@ -2328,6 +2333,7 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
KNOTE_UNLOCKED(&clone->sel_read.si_note, 0);
}
}
+#endif
/* If devd(8) is not running, immediately enter the sleep state. */
if (devctl_process_running() == FALSE) {
@@ -2352,6 +2358,10 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc);
ACPI_UNLOCK(acpi);
return (0);
+#else
+ /* This platform does not support acpi suspend/resume. */
+ return (EOPNOTSUPP);
+#endif
}
/*
@@ -2364,14 +2374,10 @@ acpi_ReqSleepState(struct acpi_softc *sc, int state)
int
acpi_AckSleepState(struct apm_clone_data *clone, int error)
{
+#if defined(__amd64__) || defined(__i386__)
struct acpi_softc *sc;
int ret, sleeping;
-#if !defined(__i386__)
- /* This platform does not support acpi suspend/resume. */
- return (EOPNOTSUPP);
-#endif
-
/* If no pending sleep state, return an error. */
ACPI_LOCK(acpi);
sc = clone->acpi_sc;
@@ -2395,8 +2401,9 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
* all devices, seeing if they agree yet. We only count devices that
* are writable since read-only devices couldn't ack the request.
*/
- clone->notify_status = APM_EV_ACKED;
sleeping = TRUE;
+#if defined(__i386__)
+ clone->notify_status = APM_EV_ACKED;
STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
if ((clone->flags & ACPI_EVF_WRITE) != 0 &&
clone->notify_status != APM_EV_ACKED) {
@@ -2404,6 +2411,7 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
break;
}
}
+#endif
/* If all devices have voted "yes", we will suspend now. */
if (sleeping)
@@ -2414,8 +2422,11 @@ acpi_AckSleepState(struct apm_clone_data *clone, int error)
if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
ret = ENODEV;
}
-
return (ret);
+#else
+ /* This platform does not support acpi suspend/resume. */
+ return (EOPNOTSUPP);
+#endif
}
static void
@@ -2459,11 +2470,18 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
sc->acpi_sleep_disabled = 1;
ACPI_UNLOCK(acpi);
+#ifdef SMP
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+#endif
+
/*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
*/
mtx_lock(&Giant);
+
slp_state = ACPI_SS_NONE;
switch (state) {
case ACPI_STATE_S1:
@@ -2570,6 +2588,17 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state)
acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
mtx_unlock(&Giant);
+
+ /* Warm up timecounter again */
+ (void)timecounter->tc_get_timecount(timecounter);
+ (void)timecounter->tc_get_timecount(timecounter);
+
+#ifdef SMP
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+#endif
+
return_ACPI_STATUS (status);
}
diff --git a/sys/dev/acpica/acpi_ec.c b/sys/dev/acpica/acpi_ec.c
index bb752236ae93..696542cf21c5 100644
--- a/sys/dev/acpica/acpi_ec.c
+++ b/sys/dev/acpica/acpi_ec.c
@@ -747,7 +747,7 @@ EcSpaceHandler(UINT32 Function, ACPI_PHYSICAL_ADDRESS Address, UINT32 width,
* If booting, check if we need to run the query handler. If so, we
* we call it directly here since our thread taskq is not active yet.
*/
- if (cold || rebooting) {
+ if (cold || rebooting || sc->ec_suspending) {
if ((EC_GET_CSR(sc) & EC_EVENT_SCI)) {
CTR0(KTR_ACPI, "ec running gpe handler directly");
EcGpeQueryHandler(sc);
diff --git a/sys/i386/i386/i686_mem.c b/sys/i386/i386/i686_mem.c
index fc88be157c38..fe229cc3be20 100644
--- a/sys/i386/i386/i686_mem.c
+++ b/sys/i386/i386/i686_mem.c
@@ -73,11 +73,13 @@ static void i686_mrinit(struct mem_range_softc *sc);
static int i686_mrset(struct mem_range_softc *sc,
struct mem_range_desc *mrd, int *arg);
static void i686_mrAPinit(struct mem_range_softc *sc);
+static void i686_mrreinit(struct mem_range_softc *sc);
static struct mem_range_ops i686_mrops = {
i686_mrinit,
i686_mrset,
- i686_mrAPinit
+ i686_mrAPinit,
+ i686_mrreinit
};
/* XXX for AP startup hook */
@@ -668,6 +670,30 @@ i686_mrAPinit(struct mem_range_softc *sc)
wrmsr(MSR_MTRRdefType, mtrrdef);
}
+/*
+ * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+i686_mrreinit(struct mem_range_softc *sc)
+{
+#ifdef SMP
+ /*
+ * We should use ipi_all_but_self() to call other CPUs into a
+ * locking gate, then call a target function to do this work.
+ * The "proper" solution involves a generalised locking gate
+ * implementation, not ready yet.
+ */
+ smp_rendezvous(NULL, (void *)i686_mrAPinit, NULL, sc);
+#else
+ disable_intr(); /* disable interrupts */
+ i686_mrAPinit(sc);
+ enable_intr();
+#endif
+}
+
static void
i686_mem_drvinit(void *unused)
{
diff --git a/sys/i386/i386/k6_mem.c b/sys/i386/i386/k6_mem.c
index 9cbacfeedefe..c99cf277489a 100644
--- a/sys/i386/i386/k6_mem.c
+++ b/sys/i386/i386/k6_mem.c
@@ -70,6 +70,7 @@ static struct mem_range_ops k6_mrops =
{
k6_mrinit,
k6_mrset,
+ NULL,
NULL
};
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index e807e485cbcf..1a3a6fa81bc8 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -262,6 +262,54 @@ stop_cpus(cpumask_t map)
return 1;
}
+#if defined(__amd64__)
+/*
+ * When called the executing CPU will send an IPI to all other CPUs
+ * requesting that they halt execution.
+ *
+ * Usually (but not necessarily) called with 'other_cpus' as its arg.
+ *
+ * - Signals all CPUs in map to suspend.
+ * - Waits for each to suspend.
+ *
+ * Returns:
+ * -1: error
+ * 0: NA
+ * 1: ok
+ *
+ * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
+ * from executing at same time.
+ */
+int
+suspend_cpus(cpumask_t map)
+{
+ int i;
+
+ if (!smp_started)
+ return (0);
+
+ CTR1(KTR_SMP, "suspend_cpus(%x)", map);
+
+ /* send the suspend IPI to all CPUs in map */
+ ipi_selected(map, IPI_SUSPEND);
+
+ i = 0;
+ while ((stopped_cpus & map) != map) {
+ /* spin */
+ cpu_spinwait();
+ i++;
+#ifdef DIAGNOSTIC
+ if (i == 100000) {
+ printf("timeout suspending cpus\n");
+ break;
+ }
+#endif
+ }
+
+ return (1);
+}
+#endif
+
/*
* Called by a CPU to restart stopped CPUs.
*
diff --git a/sys/sys/memrange.h b/sys/sys/memrange.h
index ace778d5bfd4..c90104f75942 100644
--- a/sys/sys/memrange.h
+++ b/sys/sys/memrange.h
@@ -52,6 +52,7 @@ struct mem_range_ops
void (*init)(struct mem_range_softc *sc);
int (*set)(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg);
void (*initAP)(struct mem_range_softc *sc);
+ void (*reinit)(struct mem_range_softc *sc);
};
struct mem_range_softc
@@ -68,4 +69,3 @@ extern int mem_range_attr_get(struct mem_range_desc *mrd, int *arg);
extern int mem_range_attr_set(struct mem_range_desc *mrd, int *arg);
#endif
-
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 8306d5259772..500516f6a1a8 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -122,6 +122,9 @@ void forward_signal(struct thread *);
void forward_roundrobin(void);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
+#if defined(__amd64__)
+int suspend_cpus(cpumask_t);
+#endif
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;