diff options
Diffstat (limited to 'sys/amd64/include')
94 files changed, 8627 insertions, 0 deletions
diff --git a/sys/amd64/include/_align.h b/sys/amd64/include/_align.h new file mode 100644 index 000000000000..506bc3c9e7f3 --- /dev/null +++ b/sys/amd64/include/_align.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/_align.h> diff --git a/sys/amd64/include/_bus.h b/sys/amd64/include/_bus.h new file mode 100644 index 000000000000..ce73118c9793 --- /dev/null +++ b/sys/amd64/include/_bus.h @@ -0,0 +1,28 @@ +/*- + * Copyright (c) 2005 The FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Derived in part from NetBSD's bus.h files by (alphabetically): + * Christopher G. Demetriou + * Charles M. Hannum + * Jason Thorpe + * The NetBSD Foundation. + */ + +#ifndef AMD64_INCLUDE__BUS_H +#define AMD64_INCLUDE__BUS_H + +/* + * Bus address and size types + */ +typedef uint64_t bus_addr_t; +typedef uint64_t bus_size_t; + +/* + * Access methods for bus resources and address space. + */ +typedef uint64_t bus_space_tag_t; +typedef uint64_t bus_space_handle_t; + +#endif /* AMD64_INCLUDE__BUS_H */ diff --git a/sys/amd64/include/_inttypes.h b/sys/amd64/include/_inttypes.h new file mode 100644 index 000000000000..2d0d28c29fe7 --- /dev/null +++ b/sys/amd64/include/_inttypes.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/_inttypes.h> diff --git a/sys/amd64/include/_limits.h b/sys/amd64/include/_limits.h new file mode 100644 index 000000000000..aa71de42e795 --- /dev/null +++ b/sys/amd64/include/_limits.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/_limits.h> diff --git a/sys/amd64/include/_pmap.h b/sys/amd64/include/_pmap.h new file mode 100644 index 000000000000..b4b1b5962838 --- /dev/null +++ b/sys/amd64/include/_pmap.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2014-2020 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by + * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE__PMAP_H_ +#define _MACHINE__PMAP_H_ + +struct pmap_pcid { + uint32_t pm_pcid; + uint32_t pm_gen; +}; + +#endif diff --git a/sys/amd64/include/_stdint.h b/sys/amd64/include/_stdint.h new file mode 100644 index 000000000000..033e31af2ccd --- /dev/null +++ b/sys/amd64/include/_stdint.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/_stdint.h> diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h new file mode 100644 index 000000000000..8d6d2dfa0d72 --- /dev/null +++ b/sys/amd64/include/_types.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/_types.h> diff --git a/sys/amd64/include/acpica_machdep.h b/sys/amd64/include/acpica_machdep.h new file mode 100644 index 000000000000..21f797929cec --- /dev/null +++ b/sys/amd64/include/acpica_machdep.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/acpica_machdep.h> diff --git a/sys/amd64/include/apm_bios.h b/sys/amd64/include/apm_bios.h new file mode 100644 index 000000000000..9d31da47c3aa --- /dev/null +++ b/sys/amd64/include/apm_bios.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/apm_bios.h> diff --git a/sys/amd64/include/asan.h b/sys/amd64/include/asan.h new file mode 100644 index 000000000000..7d1d291aa06a --- /dev/null +++ b/sys/amd64/include/asan.h @@ -0,0 +1,77 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2020 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from the + * FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_ASAN_H_ +#define _MACHINE_ASAN_H_ + +#ifdef KASAN + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <machine/vmparam.h> + +static inline vm_offset_t +kasan_md_addr_to_shad(vm_offset_t addr) +{ + return (((addr - VM_MIN_KERNEL_ADDRESS) >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_MIN_ADDRESS); +} + +static inline bool +kasan_md_unsupported(vm_offset_t addr) +{ + vm_offset_t kernmin; + + /* + * The vm_page array is mapped at the beginning of the kernel map, but + * accesses to the array are not validated for now. Handle the fact + * that KASAN must validate accesses before the vm_page array is + * initialized. + */ + kernmin = vm_page_array == NULL ? VM_MIN_KERNEL_ADDRESS : + (vm_offset_t)(vm_page_array + vm_page_array_size); + return (addr < kernmin || addr >= VM_MAX_KERNEL_ADDRESS); +} + +static inline void +kasan_md_init(void) +{ +} + +static inline void +kasan_md_init_early(vm_offset_t bootstack, size_t size) +{ + kasan_shadow_map(bootstack, size); +} + +#endif /* KASAN */ + +#endif /* !_MACHINE_ASAN_H_ */ diff --git a/sys/amd64/include/asm.h b/sys/amd64/include/asm.h new file mode 100644 index 000000000000..6535e44d5cef --- /dev/null +++ b/sys/amd64/include/asm.h @@ -0,0 +1,115 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/asm.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_ASM_H_ +#define _MACHINE_ASM_H_ + +#include <sys/cdefs.h> + +#ifdef PIC +#define PIC_PLT(x) x@PLT +#define PIC_GOT(x) x@GOTPCREL(%rip) +#else +#define PIC_PLT(x) x +#endif + +/* + * CNAME and HIDENAME manage the relationship between symbol names in C + * and the equivalent assembly language names. CNAME is given a name as + * it would be used in a C program. It expands to the equivalent assembly + * language name. HIDENAME is given an assembly-language name, and expands + * to a possibly-modified form that will be invisible to C programs. + */ +#define CNAME(csym) csym +#define HIDENAME(asmsym) .asmsym + +#define _START_ENTRY .text; .p2align 4,0x90 + +#define _ENTRY(x) _START_ENTRY; \ + .globl CNAME(x); .type CNAME(x),@function; CNAME(x):; \ + .cfi_startproc + +#ifdef PROF +#define ALTENTRY(x) _ENTRY(x); \ + pushq %rbp; \ + .cfi_def_cfa_offset 16; \ + .cfi_offset %rbp, -16; \ + movq %rsp,%rbp; \ + call PIC_PLT(HIDENAME(mcount)); \ + popq %rbp; \ + .cfi_restore %rbp; \ + .cfi_def_cfa_offset 8; \ + jmp 9f +#define ENTRY(x) _ENTRY(x); \ + pushq %rbp; \ + .cfi_def_cfa_offset 16; \ + .cfi_offset %rbp, -16; \ + movq %rsp,%rbp; \ + call PIC_PLT(HIDENAME(mcount)); \ + popq %rbp; \ + .cfi_restore %rbp; \ + .cfi_def_cfa_offset 8; \ + 9: +#else +#define ALTENTRY(x) _ENTRY(x) +#define ENTRY(x) _ENTRY(x) +#endif + +#define END(x) .size x, . - x; .cfi_endproc +/* + * WEAK_REFERENCE(): create a weak reference alias from sym. + * The macro is not a general asm macro that takes arbitrary names, + * but one that takes only C names. It does the non-null name + * translation inside the macro. + */ +#define WEAK_REFERENCE(sym, alias) \ + .weak CNAME(alias); \ + .equ CNAME(alias),CNAME(sym) + +#define RCSID(x) .text; .asciz x + +#undef __FBSDID +#if !defined(STRIP_FBSDID) +#define __FBSDID(s) .ident s +#else +#define __FBSDID(s) /* nothing */ +#endif /* !STRIP_FBSDID */ + +#endif /* !_MACHINE_ASM_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h new file mode 100644 index 000000000000..3f415a3f751b --- /dev/null +++ b/sys/amd64/include/asmacros.h @@ -0,0 +1,270 @@ +/* -*- mode: asm -*- */ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by + * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(__i386__) +#include <i386/asmacros.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_ASMACROS_H_ +#define _MACHINE_ASMACROS_H_ + +#include <sys/cdefs.h> + +/* XXX too much duplication in various asm*.h's. */ + +/* + * CNAME is used to manage the relationship between symbol names in C + * and the equivalent assembly language names. CNAME is given a name as + * it would be used in a C program. It expands to the equivalent assembly + * language name. + */ +#define CNAME(csym) csym + +#define ALIGN_DATA .p2align 3 /* 8 byte alignment, zero filled */ +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ +#define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + +#define GEN_ENTRY(name) ALIGN_TEXT; .globl CNAME(name); \ + .type CNAME(name),@function; CNAME(name): +#define ENTRY(name) GEN_ENTRY(name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define END(name) .size name, . - name + +/* + * Convenience for adding frame pointers to hand-coded ASM. Useful for + * DTrace, HWPMC, and KDB. + */ +#define PUSH_FRAME_POINTER \ + pushq %rbp ; \ + movq %rsp, %rbp ; +#define POP_FRAME_POINTER \ + popq %rbp + +#ifdef LOCORE +/* + * Access per-CPU data. + */ +#define PCPU(member) %gs:PC_ ## member +#define PCPU_ADDR(member, reg) \ + movq %gs:PC_PRVSPACE, reg ; \ + addq $PC_ ## member, reg + +/* + * Convenience macro for declaring interrupt entry points. + */ +#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ + .type __CONCAT(X,name),@function; __CONCAT(X,name): + + .macro SAVE_SEGS + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + .endm + + .macro MOVE_STACKS qw + .L.offset=0 + .rept \qw + movq .L.offset(%rsp),%rdx + movq %rdx,.L.offset(%rax) + .L.offset=.L.offset+8 + .endr + .endm + + .macro PTI_UUENTRY has_err + movq PCPU(KCR3),%rax + movq %rax,%cr3 + movq PCPU(RSP0),%rax + subq $PTI_SIZE - 8 * (1 - \has_err),%rax + MOVE_STACKS ((PTI_SIZE / 8) - 1 + \has_err) + movq %rax,%rsp + popq %rdx + popq %rax + .endm + + .macro PTI_UENTRY has_err + swapgs + lfence + cmpq $~0,PCPU(UCR3) + je 1f + pushq %rax + pushq %rdx + PTI_UUENTRY \has_err +1: + .endm + + .macro PTI_ENTRY name, contk, contu, has_err=0 + ALIGN_TEXT + .globl X\name\()_pti + .type X\name\()_pti,@function +X\name\()_pti: + /* %rax, %rdx, and possibly err are not yet pushed */ + testb $SEL_RPL_MASK,PTI_CS-PTI_ERR-((1-\has_err)*8)(%rsp) + jz \contk + PTI_UENTRY \has_err + jmp \contu + .endm + + .macro PTI_INTRENTRY vec_name + SUPERALIGN_TEXT + .globl X\vec_name\()_pti + .type X\vec_name\()_pti,@function +X\vec_name\()_pti: + testb $SEL_RPL_MASK,PTI_CS-3*8(%rsp) /* err, %rax, %rdx not pushed */ + jz .L\vec_name\()_u + PTI_UENTRY has_err=0 + jmp .L\vec_name\()_u + .endm + + .macro INTR_PUSH_FRAME vec_name + SUPERALIGN_TEXT + .globl X\vec_name + .type X\vec_name,@function +X\vec_name: + testb $SEL_RPL_MASK,PTI_CS-3*8(%rsp) /* come from kernel? */ + jz .L\vec_name\()_u /* Yes, dont swapgs again */ + swapgs +.L\vec_name\()_u: + lfence + subq $TF_RIP,%rsp /* skip dummy tf_err and tf_trapno */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + SAVE_SEGS + movl $TF_HASSEGS,TF_FLAGS(%rsp) + pushfq + andq $~(PSL_D|PSL_AC),(%rsp) + popfq + testb $SEL_RPL_MASK,TF_CS(%rsp) /* come from kernel ? */ + jz 1f /* yes, leave PCB_FULL_IRET alone */ + movq PCPU(CURPCB),%r8 + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) + call handle_ibrs_entry +1: + .endm + + .macro INTR_HANDLER vec_name + .text + PTI_INTRENTRY \vec_name + INTR_PUSH_FRAME \vec_name + .endm + + .macro RESTORE_REGS + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + .endm + +#ifdef KMSAN +/* + * The KMSAN runtime relies on a TLS block to track initialization and origin + * state for function parameters and return values. To keep this state + * consistent in the face of asynchronous kernel-mode traps, the runtime + * maintains a stack of blocks: when handling an exception or interrupt, + * kmsan_intr_enter() pushes the new block to be used until the handler is + * complete, at which point kmsan_intr_leave() restores the previous block. + * + * Thus, KMSAN_ENTER/LEAVE hooks are required only in handlers for events that + * may have happened while in kernel-mode. In particular, they are not required + * around amd64_syscall() or ast() calls. Otherwise, kmsan_intr_enter() can be + * called unconditionally, without distinguishing between entry from user-mode + * or kernel-mode. + */ +#define KMSAN_ENTER callq kmsan_intr_enter +#define KMSAN_LEAVE callq kmsan_intr_leave +#else +#define KMSAN_ENTER +#define KMSAN_LEAVE +#endif + +#endif /* LOCORE */ + +#ifdef __STDC__ +#define ELFNOTE(name, type, desctype, descdata...) \ +.pushsection .note.name, "a", @note ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz #name ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#else /* !__STDC__, i.e. -traditional */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name, "a", @note ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#endif /* __STDC__ */ + +#endif /* !_MACHINE_ASMACROS_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h new file mode 100644 index 000000000000..78717a8d6544 --- /dev/null +++ b/sys/amd64/include/atomic.h @@ -0,0 +1,598 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/atomic.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_ATOMIC_H_ +#define _MACHINE_ATOMIC_H_ + +/* + * To express interprocessor (as opposed to processor and device) memory + * ordering constraints, use the atomic_*() functions with acquire and release + * semantics rather than the *mb() functions. An architecture's memory + * ordering (or memory consistency) model governs the order in which a + * program's accesses to different locations may be performed by an + * implementation of that architecture. In general, for memory regions + * defined as writeback cacheable, the memory ordering implemented by amd64 + * processors preserves the program ordering of a load followed by a load, a + * load followed by a store, and a store followed by a store. Only a store + * followed by a load to a different memory location may be reordered. + * Therefore, except for special cases, like non-temporal memory accesses or + * memory regions defined as write combining, the memory ordering effects + * provided by the sfence instruction in the wmb() function and the lfence + * instruction in the rmb() function are redundant. In contrast, the + * atomic_*() functions with acquire and release semantics do not perform + * redundant instructions for ordinary cases of interprocessor memory + * ordering on any architecture. + */ +#define mb() __asm __volatile("mfence;" : : : "memory") +#define wmb() __asm __volatile("sfence;" : : : "memory") +#define rmb() __asm __volatile("lfence;" : : : "memory") + +#ifdef _KERNEL +/* + * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). + * + * The open-coded number is used instead of the symbolic expression to + * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. + * An assertion in amd64/vm_machdep.c ensures that the value is correct. + */ +#define OFFSETOF_MONITORBUF 0x100 +#endif + +#if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME) +#include <sys/atomic_san.h> +#else +#include <sys/atomic_common.h> + +/* + * Various simple operations on memory, each of which is atomic in the + * presence of interrupts and multiple processors. + * + * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) + * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) + * atomic_add_char(P, V) (*(u_char *)(P) += (V)) + * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) + * + * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) + * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) + * atomic_add_short(P, V) (*(u_short *)(P) += (V)) + * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) + * + * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) + * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) + * atomic_add_int(P, V) (*(u_int *)(P) += (V)) + * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) + * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) + * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) + * + * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) + * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) + * atomic_add_long(P, V) (*(u_long *)(P) += (V)) + * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) + * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) + * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) + */ + +/* + * Always use lock prefixes. The result is slightly less optimal for + * UP systems, but it matters less now, and sometimes UP is emulated + * over SMP. + * + * The assembly is volatilized to avoid code chunk removal by the compiler. + * GCC aggressively reorders operations and memory clobbering is necessary + * in order to avoid that for memory barriers. + */ +#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ +static __inline void \ +atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("lock; " OP \ + : "+m" (*p) \ + : CONS (V) \ + : "cc"); \ +} \ + \ +static __inline void \ +atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("lock; " OP \ + : "+m" (*p) \ + : CONS (V) \ + : "memory", "cc"); \ +} \ +struct __hack + +/* + * Atomic compare and set, used by the mutex functions. + * + * cmpset: + * if (*dst == expect) + * *dst = src + * + * fcmpset: + * if (*dst == *expect) + * *dst = src + * else + * *expect = *dst + * + * Returns 0 on failure, non-zero on success. + */ +#define ATOMIC_CMPSET(TYPE) \ +static __inline int \ +atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \ +{ \ + u_char res; \ + \ + __asm __volatile( \ + " lock; cmpxchg %3,%1 ; " \ + "# atomic_cmpset_" #TYPE " " \ + : "=@cce" (res), /* 0 */ \ + "+m" (*dst), /* 1 */ \ + "+a" (expect) /* 2 */ \ + : "r" (src) /* 3 */ \ + : "memory", "cc"); \ + return (res); \ +} \ + \ +static __inline int \ +atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \ +{ \ + u_char res; \ + \ + __asm __volatile( \ + " lock; cmpxchg %3,%1 ; " \ + "# atomic_fcmpset_" #TYPE " " \ + : "=@cce" (res), /* 0 */ \ + "+m" (*dst), /* 1 */ \ + "+a" (*expect) /* 2 */ \ + : "r" (src) /* 3 */ \ + : "memory", "cc"); \ + return (res); \ +} + +ATOMIC_CMPSET(char); +ATOMIC_CMPSET(short); +ATOMIC_CMPSET(int); +ATOMIC_CMPSET(long); + +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_int +atomic_fetchadd_int(volatile u_int *p, u_int v) +{ + + __asm __volatile( + " lock; xaddl %0,%1 ; " + "# atomic_fetchadd_int" + : "+r" (v), /* 0 */ + "+m" (*p) /* 1 */ + : : "cc"); + return (v); +} + +/* + * Atomically add the value of v to the long integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_long +atomic_fetchadd_long(volatile u_long *p, u_long v) +{ + + __asm __volatile( + " lock; xaddq %0,%1 ; " + "# atomic_fetchadd_long" + : "+r" (v), /* 0 */ + "+m" (*p) /* 1 */ + : : "cc"); + return (v); +} + +static __inline int +atomic_testandset_int(volatile u_int *p, u_int v) +{ + u_char res; + + __asm __volatile( + " lock; btsl %2,%1 ; " + "# atomic_testandset_int" + : "=@ccc" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x1f) /* 2 */ + : "cc"); + return (res); +} + +static __inline int +atomic_testandset_long(volatile u_long *p, u_int v) +{ + u_char res; + + __asm __volatile( + " lock; btsq %2,%1 ; " + "# atomic_testandset_long" + : "=@ccc" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Jr" ((u_long)(v & 0x3f)) /* 2 */ + : "cc"); + return (res); +} + +static __inline int +atomic_testandclear_int(volatile u_int *p, u_int v) +{ + u_char res; + + __asm __volatile( + " lock; btrl %2,%1 ; " + "# atomic_testandclear_int" + : "=@ccc" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x1f) /* 2 */ + : "cc"); + return (res); +} + +static __inline int +atomic_testandclear_long(volatile u_long *p, u_int v) +{ + u_char res; + + __asm __volatile( + " lock; btrq %2,%1 ; " + "# atomic_testandclear_long" + : "=@ccc" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Jr" ((u_long)(v & 0x3f)) /* 2 */ + : "cc"); + return (res); +} + +/* + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, a load may pass a store if they are performed on distinct + * addresses, so we need a Store/Load barrier for sequentially + * consistent fences in SMP kernels. We use "lock addl $0,mem" for a + * Store/Load barrier, as recommended by the AMD Software Optimization + * Guide, and not mfence. To avoid false data dependencies, we use a + * special address for "mem". In the kernel, we use a private per-cpu + * cache line. In user space, we use a word in the stack's red zone + * (-8(%rsp)). + */ + +static __inline void +__storeload_barrier(void) +{ +#if defined(_KERNEL) + __asm __volatile("lock; addl $0,%%gs:%c0" + : : "i" (OFFSETOF_MONITORBUF) : "memory", "cc"); +#else /* !_KERNEL */ + __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); +#endif /* _KERNEL*/ +} + +#define ATOMIC_LOAD(TYPE) \ +static __inline u_##TYPE \ +atomic_load_acq_##TYPE(const volatile u_##TYPE *p) \ +{ \ + u_##TYPE res; \ + \ + res = *p; \ + __compiler_membar(); \ + return (res); \ +} \ +struct __hack + +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ +{ \ + \ + __compiler_membar(); \ + *p = v; \ +} \ +struct __hack + +static __inline void +atomic_thread_fence_acq(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + __storeload_barrier(); +} + +ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); +ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); +ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); +ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); + +ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); +ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); +ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); +ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); + +ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); +ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); +ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); +ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); + +ATOMIC_ASM(set, long, "orq %1,%0", "er", v); +ATOMIC_ASM(clear, long, "andq %1,%0", "er", ~v); +ATOMIC_ASM(add, long, "addq %1,%0", "er", v); +ATOMIC_ASM(subtract, long, "subq %1,%0", "er", v); + +#define ATOMIC_LOADSTORE(TYPE) \ + ATOMIC_LOAD(TYPE); \ + ATOMIC_STORE(TYPE) + +ATOMIC_LOADSTORE(char); +ATOMIC_LOADSTORE(short); +ATOMIC_LOADSTORE(int); +ATOMIC_LOADSTORE(long); + +#undef ATOMIC_ASM +#undef ATOMIC_LOAD +#undef ATOMIC_STORE +#undef ATOMIC_LOADSTORE + +/* Read the current value and store a new value in the destination. */ +static __inline u_int +atomic_swap_int(volatile u_int *p, u_int v) +{ + + __asm __volatile( + " xchgl %1,%0 ; " + "# atomic_swap_int" + : "+r" (v), /* 0 */ + "+m" (*p)); /* 1 */ + return (v); +} + +static __inline u_long +atomic_swap_long(volatile u_long *p, u_long v) +{ + + __asm __volatile( + " xchgq %1,%0 ; " + "# atomic_swap_long" + : "+r" (v), /* 0 */ + "+m" (*p)); /* 1 */ + return (v); +} + +#define atomic_set_acq_char atomic_set_barr_char +#define atomic_set_rel_char atomic_set_barr_char +#define atomic_clear_acq_char atomic_clear_barr_char +#define atomic_clear_rel_char atomic_clear_barr_char +#define atomic_add_acq_char atomic_add_barr_char +#define atomic_add_rel_char atomic_add_barr_char +#define atomic_subtract_acq_char atomic_subtract_barr_char +#define atomic_subtract_rel_char atomic_subtract_barr_char +#define atomic_cmpset_acq_char atomic_cmpset_char +#define atomic_cmpset_rel_char atomic_cmpset_char +#define atomic_fcmpset_acq_char atomic_fcmpset_char +#define atomic_fcmpset_rel_char atomic_fcmpset_char + +#define atomic_set_acq_short atomic_set_barr_short +#define atomic_set_rel_short atomic_set_barr_short +#define atomic_clear_acq_short atomic_clear_barr_short +#define atomic_clear_rel_short atomic_clear_barr_short +#define atomic_add_acq_short atomic_add_barr_short +#define atomic_add_rel_short atomic_add_barr_short +#define atomic_subtract_acq_short atomic_subtract_barr_short +#define atomic_subtract_rel_short atomic_subtract_barr_short +#define atomic_cmpset_acq_short atomic_cmpset_short +#define atomic_cmpset_rel_short atomic_cmpset_short +#define atomic_fcmpset_acq_short atomic_fcmpset_short +#define atomic_fcmpset_rel_short atomic_fcmpset_short + +#define atomic_set_acq_int atomic_set_barr_int +#define atomic_set_rel_int atomic_set_barr_int +#define atomic_clear_acq_int atomic_clear_barr_int +#define atomic_clear_rel_int atomic_clear_barr_int +#define atomic_add_acq_int atomic_add_barr_int +#define atomic_add_rel_int atomic_add_barr_int +#define atomic_subtract_acq_int atomic_subtract_barr_int +#define atomic_subtract_rel_int atomic_subtract_barr_int +#define atomic_cmpset_acq_int atomic_cmpset_int +#define atomic_cmpset_rel_int atomic_cmpset_int +#define atomic_fcmpset_acq_int atomic_fcmpset_int +#define atomic_fcmpset_rel_int atomic_fcmpset_int + +#define atomic_set_acq_long atomic_set_barr_long +#define atomic_set_rel_long atomic_set_barr_long +#define atomic_clear_acq_long atomic_clear_barr_long +#define atomic_clear_rel_long atomic_clear_barr_long +#define atomic_add_acq_long atomic_add_barr_long +#define atomic_add_rel_long atomic_add_barr_long +#define atomic_subtract_acq_long atomic_subtract_barr_long +#define atomic_subtract_rel_long atomic_subtract_barr_long +#define atomic_cmpset_acq_long atomic_cmpset_long +#define atomic_cmpset_rel_long atomic_cmpset_long +#define atomic_fcmpset_acq_long atomic_fcmpset_long +#define atomic_fcmpset_rel_long atomic_fcmpset_long + +#define atomic_readandclear_int(p) atomic_swap_int(p, 0) +#define atomic_readandclear_long(p) atomic_swap_long(p, 0) +#define atomic_testandset_acq_long atomic_testandset_long + +/* Operations on 8-bit bytes. */ +#define atomic_set_8 atomic_set_char +#define atomic_set_acq_8 atomic_set_acq_char +#define atomic_set_rel_8 atomic_set_rel_char +#define atomic_clear_8 atomic_clear_char +#define atomic_clear_acq_8 atomic_clear_acq_char +#define atomic_clear_rel_8 atomic_clear_rel_char +#define atomic_add_8 atomic_add_char +#define atomic_add_acq_8 atomic_add_acq_char +#define atomic_add_rel_8 atomic_add_rel_char +#define atomic_subtract_8 atomic_subtract_char +#define atomic_subtract_acq_8 atomic_subtract_acq_char +#define atomic_subtract_rel_8 atomic_subtract_rel_char +#define atomic_load_acq_8 atomic_load_acq_char +#define atomic_store_rel_8 atomic_store_rel_char +#define atomic_cmpset_8 atomic_cmpset_char +#define atomic_cmpset_acq_8 atomic_cmpset_acq_char +#define atomic_cmpset_rel_8 atomic_cmpset_rel_char +#define atomic_fcmpset_8 atomic_fcmpset_char +#define atomic_fcmpset_acq_8 atomic_fcmpset_acq_char +#define atomic_fcmpset_rel_8 atomic_fcmpset_rel_char + +/* Operations on 16-bit words. */ +#define atomic_set_16 atomic_set_short +#define atomic_set_acq_16 atomic_set_acq_short +#define atomic_set_rel_16 atomic_set_rel_short +#define atomic_clear_16 atomic_clear_short +#define atomic_clear_acq_16 atomic_clear_acq_short +#define atomic_clear_rel_16 atomic_clear_rel_short +#define atomic_add_16 atomic_add_short +#define atomic_add_acq_16 atomic_add_acq_short +#define atomic_add_rel_16 atomic_add_rel_short +#define atomic_subtract_16 atomic_subtract_short +#define atomic_subtract_acq_16 atomic_subtract_acq_short +#define atomic_subtract_rel_16 atomic_subtract_rel_short +#define atomic_load_acq_16 atomic_load_acq_short +#define atomic_store_rel_16 atomic_store_rel_short +#define atomic_cmpset_16 atomic_cmpset_short +#define atomic_cmpset_acq_16 atomic_cmpset_acq_short +#define atomic_cmpset_rel_16 atomic_cmpset_rel_short +#define atomic_fcmpset_16 atomic_fcmpset_short +#define atomic_fcmpset_acq_16 atomic_fcmpset_acq_short +#define atomic_fcmpset_rel_16 atomic_fcmpset_rel_short + +/* Operations on 32-bit double words. */ +#define atomic_set_32 atomic_set_int +#define atomic_set_acq_32 atomic_set_acq_int +#define atomic_set_rel_32 atomic_set_rel_int +#define atomic_clear_32 atomic_clear_int +#define atomic_clear_acq_32 atomic_clear_acq_int +#define atomic_clear_rel_32 atomic_clear_rel_int +#define atomic_add_32 atomic_add_int +#define atomic_add_acq_32 atomic_add_acq_int +#define atomic_add_rel_32 atomic_add_rel_int +#define atomic_subtract_32 atomic_subtract_int +#define atomic_subtract_acq_32 atomic_subtract_acq_int +#define atomic_subtract_rel_32 atomic_subtract_rel_int +#define atomic_load_acq_32 atomic_load_acq_int +#define atomic_store_rel_32 atomic_store_rel_int +#define atomic_cmpset_32 atomic_cmpset_int +#define atomic_cmpset_acq_32 atomic_cmpset_acq_int +#define atomic_cmpset_rel_32 atomic_cmpset_rel_int +#define atomic_fcmpset_32 atomic_fcmpset_int +#define atomic_fcmpset_acq_32 atomic_fcmpset_acq_int +#define atomic_fcmpset_rel_32 atomic_fcmpset_rel_int +#define atomic_swap_32 atomic_swap_int +#define atomic_readandclear_32 atomic_readandclear_int +#define atomic_fetchadd_32 atomic_fetchadd_int +#define atomic_testandset_32 atomic_testandset_int +#define atomic_testandclear_32 atomic_testandclear_int + +/* Operations on 64-bit quad words. */ +#define atomic_set_64 atomic_set_long +#define atomic_set_acq_64 atomic_set_acq_long +#define atomic_set_rel_64 atomic_set_rel_long +#define atomic_clear_64 atomic_clear_long +#define atomic_clear_acq_64 atomic_clear_acq_long +#define atomic_clear_rel_64 atomic_clear_rel_long +#define atomic_add_64 atomic_add_long +#define atomic_add_acq_64 atomic_add_acq_long +#define atomic_add_rel_64 atomic_add_rel_long +#define atomic_subtract_64 atomic_subtract_long +#define atomic_subtract_acq_64 atomic_subtract_acq_long +#define atomic_subtract_rel_64 atomic_subtract_rel_long +#define atomic_load_acq_64 atomic_load_acq_long +#define atomic_store_rel_64 atomic_store_rel_long +#define atomic_cmpset_64 atomic_cmpset_long +#define atomic_cmpset_acq_64 atomic_cmpset_acq_long +#define atomic_cmpset_rel_64 atomic_cmpset_rel_long +#define atomic_fcmpset_64 atomic_fcmpset_long +#define atomic_fcmpset_acq_64 atomic_fcmpset_acq_long +#define atomic_fcmpset_rel_64 atomic_fcmpset_rel_long +#define atomic_swap_64 atomic_swap_long +#define atomic_readandclear_64 atomic_readandclear_long +#define atomic_fetchadd_64 atomic_fetchadd_long +#define atomic_testandset_64 atomic_testandset_long +#define atomic_testandclear_64 atomic_testandclear_long + +/* Operations on pointers. */ +#define atomic_set_ptr atomic_set_long +#define atomic_set_acq_ptr atomic_set_acq_long +#define atomic_set_rel_ptr atomic_set_rel_long +#define atomic_clear_ptr atomic_clear_long +#define atomic_clear_acq_ptr atomic_clear_acq_long +#define atomic_clear_rel_ptr atomic_clear_rel_long +#define atomic_add_ptr atomic_add_long +#define atomic_add_acq_ptr atomic_add_acq_long +#define atomic_add_rel_ptr atomic_add_rel_long +#define atomic_subtract_ptr atomic_subtract_long +#define atomic_subtract_acq_ptr atomic_subtract_acq_long +#define atomic_subtract_rel_ptr atomic_subtract_rel_long +#define atomic_load_acq_ptr atomic_load_acq_long +#define atomic_store_rel_ptr atomic_store_rel_long +#define atomic_cmpset_ptr atomic_cmpset_long +#define atomic_cmpset_acq_ptr atomic_cmpset_acq_long +#define atomic_cmpset_rel_ptr atomic_cmpset_rel_long +#define atomic_fcmpset_ptr atomic_fcmpset_long +#define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_long +#define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_long +#define atomic_swap_ptr atomic_swap_long +#define atomic_readandclear_ptr atomic_readandclear_long +#define atomic_testandset_ptr atomic_testandset_long +#define atomic_testandclear_ptr atomic_testandclear_long + +#endif /* !SAN_NEEDS_INTERCEPTORS || SAN_RUNTIME */ + +#endif /* !_MACHINE_ATOMIC_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/bus.h b/sys/amd64/include/bus.h new file mode 100644 index 000000000000..a53fadb1995a --- /dev/null +++ b/sys/amd64/include/bus.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/bus.h> diff --git a/sys/amd64/include/bus_dma.h b/sys/amd64/include/bus_dma.h new file mode 100644 index 000000000000..30d819cc3425 --- /dev/null +++ b/sys/amd64/include/bus_dma.h @@ -0,0 +1,34 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2005 Scott Long + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_BUS_DMA_H_ +#define _AMD64_BUS_DMA_H_ + +#include <x86/bus_dma.h> + +#endif /* _AMD64_BUS_DMA_H_ */ diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h new file mode 100644 index 000000000000..57233651aef3 --- /dev/null +++ b/sys/amd64/include/clock.h @@ -0,0 +1,5 @@ +/* + * This file is in the public domain. + */ + +#include <x86/clock.h> diff --git a/sys/amd64/include/counter.h b/sys/amd64/include/counter.h new file mode 100644 index 000000000000..b8b2a19233aa --- /dev/null +++ b/sys/amd64/include/counter.h @@ -0,0 +1,97 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/counter.h> +#else /* !__i386__ */ + +#ifndef __MACHINE_COUNTER_H__ +#define __MACHINE_COUNTER_H__ + +#include <sys/pcpu.h> +#include <sys/kassert.h> + +#define EARLY_COUNTER (void *)__offsetof(struct pcpu, pc_early_dummy_counter) + +#define counter_enter() do {} while (0) +#define counter_exit() do {} while (0) + +#ifdef IN_SUBR_COUNTER_C +static inline uint64_t +counter_u64_read_one(counter_u64_t c, int cpu) +{ + + MPASS(c != EARLY_COUNTER); + return (*zpcpu_get_cpu(c, cpu)); +} + +static inline uint64_t +counter_u64_fetch_inline(uint64_t *c) +{ + uint64_t r; + int cpu; + + r = 0; + CPU_FOREACH(cpu) + r += counter_u64_read_one(c, cpu); + + return (r); +} + +static void +counter_u64_zero_one_cpu(void *arg) +{ + counter_u64_t c; + + c = arg; + MPASS(c != EARLY_COUNTER); + *(zpcpu_get(c)) = 0; +} + +static inline void +counter_u64_zero_inline(counter_u64_t c) +{ + + smp_rendezvous(smp_no_rendezvous_barrier, counter_u64_zero_one_cpu, + smp_no_rendezvous_barrier, c); +} +#endif + +#define counter_u64_add_protected(c, i) counter_u64_add(c, i) + +static inline void +counter_u64_add(counter_u64_t c, int64_t inc) +{ + + KASSERT(IS_BSP() || c != EARLY_COUNTER, ("EARLY_COUNTER used on AP")); + zpcpu_add(c, inc); +} + +#endif /* ! __MACHINE_COUNTER_H__ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h new file mode 100644 index 000000000000..57f2a0b59bbb --- /dev/null +++ b/sys/amd64/include/cpu.h @@ -0,0 +1,99 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_CPU_H_ +#define _MACHINE_CPU_H_ + +/* + * Definitions unique to i386 cpu support. + */ +#include <machine/psl.h> +#include <machine/frame.h> +#include <machine/segments.h> + +#define cpu_exec(p) /* nothing */ +#define cpu_swapin(p) /* nothing */ +#define cpu_getstack(td) ((td)->td_frame->tf_rsp) +#define cpu_setstack(td, ap) ((td)->td_frame->tf_rsp = (ap)) +#define cpu_spinwait() ia32_pause() + +#define TRAPF_USERMODE(framep) \ + (ISPL((framep)->tf_cs) == SEL_UPL) +#define TRAPF_PC(framep) ((framep)->tf_rip) + +#ifdef _KERNEL +/* + * Struct containing pointers to CPU management functions whose + * implementation is run time selectable. Selection can be made, + * for example, based on detection of a particular CPU variant or + * hypervisor environment. + */ +struct cpu_ops { + void (*cpu_init)(void); + void (*cpu_resume)(void); +}; + +extern struct cpu_ops cpu_ops; +extern char brwsection[]; +extern char btext[]; +extern char _end[]; +extern char etext[]; + +/* Suspend and resume hook for VMM. */ +extern void (*vmm_suspend_p)(void); +extern void (*vmm_resume_p)(void); + +void cpu_halt(void); +void cpu_lock_delay(void); +void cpu_reset(void); +void fork_trampoline(void); + +/* + * Return contents of in-cpu fast counter as a sort of "bogo-time" + * for random-harvesting purposes. + */ +static __inline u_int64_t +get_cyclecount(void) +{ + + return (rdtsc()); +} + +#define MEMSET_EARLY_FUNC memset_std +#define MEMCPY_EARLY_FUNC memcpy_std +#define MEMMOVE_EARLY_FUNC memmove_std + +#endif + +#endif /* !_MACHINE_CPU_H_ */ diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h new file mode 100644 index 000000000000..d180f5c76afb --- /dev/null +++ b/sys/amd64/include/cpufunc.h @@ -0,0 +1,977 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Functions to provide access to special i386 instructions. + * This in included in sys/systm.h, and that file should be + * used in preference to this. + */ + +#ifdef __i386__ +#include <i386/cpufunc.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_CPUFUNC_H_ +#define _MACHINE_CPUFUNC_H_ + +struct region_descriptor; + +#define readb(va) (*(volatile uint8_t *) (va)) +#define readw(va) (*(volatile uint16_t *) (va)) +#define readl(va) (*(volatile uint32_t *) (va)) +#define readq(va) (*(volatile uint64_t *) (va)) + +#define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) +#define writew(va, d) (*(volatile uint16_t *) (va) = (d)) +#define writel(va, d) (*(volatile uint32_t *) (va) = (d)) +#define writeq(va, d) (*(volatile uint64_t *) (va) = (d)) + +static __inline void +breakpoint(void) +{ + __asm __volatile("int $3"); +} + +#define bsfl(mask) __builtin_ctz(mask) + +#define bsfq(mask) __builtin_ctzl(mask) + +static __inline void +clflush(u_long addr) +{ + + __asm __volatile("clflush %0" : : "m" (*(char *)addr)); +} + +static __inline void +clflushopt(u_long addr) +{ + + __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); +} + +static __inline void +clwb(u_long addr) +{ + + __asm __volatile("clwb %0" : : "m" (*(char *)addr)); +} + +static __inline void +clts(void) +{ + + __asm __volatile("clts"); +} + +static __inline void +disable_intr(void) +{ + __asm __volatile("cli" : : : "memory"); +} + +static __inline void +do_cpuid(u_int ax, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); +} + +static __inline void +cpuid_count(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); +} + +static __inline void +enable_intr(void) +{ + __asm __volatile("sti"); +} + +static __inline void +halt(void) +{ + __asm __volatile("hlt"); +} + +static __inline u_char +inb(u_int port) +{ + u_char data; + + __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); + return (data); +} + +static __inline u_int +inl(u_int port) +{ + u_int data; + + __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); + return (data); +} + +static __inline void +insb(u_int port, void *addr, size_t count) +{ + __asm __volatile("rep; insb" + : "+D" (addr), "+c" (count) + : "d" (port) + : "memory"); +} + +static __inline void +insw(u_int port, void *addr, size_t count) +{ + __asm __volatile("rep; insw" + : "+D" (addr), "+c" (count) + : "d" (port) + : "memory"); +} + +static __inline void +insl(u_int port, void *addr, size_t count) +{ + __asm __volatile("rep; insl" + : "+D" (addr), "+c" (count) + : "d" (port) + : "memory"); +} + +static __inline void +invd(void) +{ + __asm __volatile("invd"); +} + +static __inline u_short +inw(u_int port) +{ + u_short data; + + __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); + return (data); +} + +static __inline void +outb(u_int port, u_char data) +{ + __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); +} + +static __inline void +outl(u_int port, u_int data) +{ + __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); +} + +static __inline void +outsb(u_int port, const void *addr, size_t count) +{ + __asm __volatile("rep; outsb" + : "+S" (addr), "+c" (count) + : "d" (port)); +} + +static __inline void +outsw(u_int port, const void *addr, size_t count) +{ + __asm __volatile("rep; outsw" + : "+S" (addr), "+c" (count) + : "d" (port)); +} + +static __inline void +outsl(u_int port, const void *addr, size_t count) +{ + __asm __volatile("rep; outsl" + : "+S" (addr), "+c" (count) + : "d" (port)); +} + +static __inline void +outw(u_int port, u_short data) +{ + __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); +} + +static __inline u_long +popcntq(u_long mask) +{ + u_long result; + + __asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask)); + return (result); +} + +static __inline void +lfence(void) +{ + + __asm __volatile("lfence" : : : "memory"); +} + +static __inline void +mfence(void) +{ + + __asm __volatile("mfence" : : : "memory"); +} + +static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + +static __inline void +ia32_pause(void) +{ + __asm __volatile("pause"); +} + +static __inline u_long +read_rflags(void) +{ + u_long rf; + + __asm __volatile("pushfq; popq %0" : "=r" (rf)); + return (rf); +} + +static __inline uint64_t +rdmsr(u_int msr) +{ + uint32_t low, high; + + __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); + return (low | ((uint64_t)high << 32)); +} + +static __inline uint32_t +rdmsr32(u_int msr) +{ + uint32_t low; + + __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx"); + return (low); +} + +static __inline uint64_t +rdpmc(u_int pmc) +{ + uint32_t low, high; + + __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc)); + return (low | ((uint64_t)high << 32)); +} + +static __inline uint64_t +rdtsc(void) +{ + uint32_t low, high; + + __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); + return (low | ((uint64_t)high << 32)); +} + +static __inline uint64_t +rdtsc_ordered_lfence(void) +{ + lfence(); + return (rdtsc()); +} + +static __inline uint64_t +rdtsc_ordered_mfence(void) +{ + mfence(); + return (rdtsc()); +} + +static __inline uint64_t +rdtscp(void) +{ + uint32_t low, high; + + __asm __volatile("rdtscp" : "=a" (low), "=d" (high) : : "ecx"); + return (low | ((uint64_t)high << 32)); +} + +static __inline uint64_t +rdtscp_aux(uint32_t *aux) +{ + uint32_t low, high; + + __asm __volatile("rdtscp" : "=a" (low), "=d" (high), "=c" (*aux)); + return (low | ((uint64_t)high << 32)); +} + +static __inline uint32_t +rdtsc32(void) +{ + uint32_t rv; + + __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); + return (rv); +} + +static __inline uint32_t +rdtscp32(void) +{ + uint32_t rv; + + __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); + return (rv); +} + +static __inline void +wbinvd(void) +{ + __asm __volatile("wbinvd"); +} + +static __inline void +write_rflags(u_long rf) +{ + __asm __volatile("pushq %0; popfq" : : "r" (rf)); +} + +static __inline void +wrmsr(u_int msr, uint64_t newval) +{ + uint32_t low, high; + + low = newval; + high = newval >> 32; + __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); +} + +static __inline void +load_cr0(u_long data) +{ + + __asm __volatile("movq %0,%%cr0" : : "r" (data)); +} + +static __inline u_long +rcr0(void) +{ + u_long data; + + __asm __volatile("movq %%cr0,%0" : "=r" (data)); + return (data); +} + +static __inline u_long +rcr2(void) +{ + u_long data; + + __asm __volatile("movq %%cr2,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr3(u_long data) +{ + + __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory"); +} + +static __inline u_long +rcr3(void) +{ + u_long data; + + __asm __volatile("movq %%cr3,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr4(u_long data) +{ + __asm __volatile("movq %0,%%cr4" : : "r" (data)); +} + +static __inline u_long +rcr4(void) +{ + u_long data; + + __asm __volatile("movq %%cr4,%0" : "=r" (data)); + return (data); +} + +static __inline u_long +rxcr(u_int reg) +{ + u_int low, high; + + __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); + return (low | ((uint64_t)high << 32)); +} + +static __inline void +load_xcr(u_int reg, u_long val) +{ + u_int low, high; + + low = val; + high = val >> 32; + __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); +} + +/* + * Global TLB flush (except for thise for pages marked PG_G) + */ +static __inline void +invltlb(void) +{ + + load_cr3(rcr3()); +} + +#ifndef CR4_PGE +#define CR4_PGE 0x00000080 /* Page global enable */ +#endif + +/* + * Perform the guaranteed invalidation of all TLB entries. This + * includes the global entries, and entries in all PCIDs, not only the + * current context. The function works both on non-PCID CPUs and CPUs + * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 + * Operations that Invalidate TLBs and Paging-Structure Caches. + */ +static __inline void +invltlb_glob(void) +{ + uint64_t cr4; + + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. PG_G is simply + * ignored if CR4.PGE is clear. Moreover, in case this block + * is re-entered, the load_cr4() either above or below will + * modify CR4.PGE flushing the TLB. + */ + load_cr4(cr4 | CR4_PGE); +} + +/* + * TLB flush for an individual page (even if it has PG_G). + * Only works on 486+ CPUs (i386 does not have PG_G). + */ +static __inline void +invlpg(u_long addr) +{ + + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +#define INVPCID_ADDR 0 +#define INVPCID_CTX 1 +#define INVPCID_CTXGLOB 2 +#define INVPCID_ALLCTX 3 + +struct invpcid_descr { + uint64_t pcid:12 __packed; + uint64_t pad:52 __packed; + uint64_t addr; +} __packed; + +static __inline void +invpcid(struct invpcid_descr *d, int type) +{ + + __asm __volatile("invpcid (%0),%1" + : : "r" (d), "r" ((u_long)type) : "memory"); +} + +#define INVLPGB_VA 0x0001 +#define INVLPGB_PCID 0x0002 +#define INVLPGB_ASID 0x0004 +#define INVLPGB_GLOB 0x0008 +#define INVLPGB_FIN 0x0010 +#define INVLPGB_NEST 0x0020 + +#define INVLPGB_DESCR(asid, pcid) (((pcid) << 16) | (asid)) + +#define INVLPGB_2M_CNT (1u << 31) + +static __inline void +invlpgb(uint64_t rax, uint32_t edx, uint32_t ecx) +{ + __asm __volatile("invlpgb" : : "a" (rax), "d" (edx), "c" (ecx)); +} + +static __inline void +tlbsync(void) +{ + __asm __volatile("tlbsync"); +} + +static __inline u_short +rfs(void) +{ + u_short sel; + __asm __volatile("movw %%fs,%0" : "=rm" (sel)); + return (sel); +} + +static __inline u_short +rgs(void) +{ + u_short sel; + __asm __volatile("movw %%gs,%0" : "=rm" (sel)); + return (sel); +} + +static __inline u_short +rss(void) +{ + u_short sel; + __asm __volatile("movw %%ss,%0" : "=rm" (sel)); + return (sel); +} + +static __inline void +load_ds(u_short sel) +{ + __asm __volatile("movw %0,%%ds" : : "rm" (sel)); +} + +static __inline void +load_es(u_short sel) +{ + __asm __volatile("movw %0,%%es" : : "rm" (sel)); +} + +static __inline void +cpu_monitor(const void *addr, u_long extensions, u_int hints) +{ + + __asm __volatile("monitor" + : : "a" (addr), "c" (extensions), "d" (hints)); +} + +static __inline void +cpu_mwait(u_long extensions, u_int hints) +{ + + __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); +} + +static __inline uint32_t +rdpkru(void) +{ + uint32_t res; + + __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); + return (res); +} + +static __inline void +wrpkru(uint32_t mask) +{ + + __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); +} + +#ifdef _KERNEL +/* This is defined in <machine/specialreg.h> but is too painful to get to */ +#ifndef MSR_FSBASE +#define MSR_FSBASE 0xc0000100 +#endif +static __inline void +load_fs(u_short sel) +{ + /* Preserve the fsbase value across the selector load */ + __asm __volatile("rdmsr; movw %0,%%fs; wrmsr" + : : "rm" (sel), "c" (MSR_FSBASE) : "eax", "edx"); +} + +#ifndef MSR_GSBASE +#define MSR_GSBASE 0xc0000101 +#endif +static __inline void +load_gs(u_short sel) +{ + /* + * Preserve the gsbase value across the selector load. + * Note that we have to disable interrupts because the gsbase + * being trashed happens to be the kernel gsbase at the time. + */ + __asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq" + : : "rm" (sel), "c" (MSR_GSBASE) : "eax", "edx"); +} +#else +/* Usable by userland */ +static __inline void +load_fs(u_short sel) +{ + __asm __volatile("movw %0,%%fs" : : "rm" (sel)); +} + +static __inline void +load_gs(u_short sel) +{ + __asm __volatile("movw %0,%%gs" : : "rm" (sel)); +} +#endif + +static __inline uint64_t +rdfsbase(void) +{ + uint64_t x; + + __asm __volatile("rdfsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrfsbase(uint64_t x) +{ + + __asm __volatile("wrfsbase %0" : : "r" (x)); +} + +static __inline uint64_t +rdgsbase(void) +{ + uint64_t x; + + __asm __volatile("rdgsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrgsbase(uint64_t x) +{ + + __asm __volatile("wrgsbase %0" : : "r" (x)); +} + +static __inline void +bare_lgdt(struct region_descriptor *addr) +{ + __asm __volatile("lgdt (%0)" : : "r" (addr)); +} + +static __inline void +sgdt(struct region_descriptor *addr) +{ + char *loc; + + loc = (char *)addr; + __asm __volatile("sgdt %0" : "=m" (*loc) : : "memory"); +} + +static __inline void +lidt(struct region_descriptor *addr) +{ + __asm __volatile("lidt (%0)" : : "r" (addr)); +} + +static __inline void +sidt(struct region_descriptor *addr) +{ + char *loc; + + loc = (char *)addr; + __asm __volatile("sidt %0" : "=m" (*loc) : : "memory"); +} + +static __inline void +lldt(u_short sel) +{ + __asm __volatile("lldt %0" : : "r" (sel)); +} + +static __inline u_short +sldt(void) +{ + u_short sel; + + __asm __volatile("sldt %0" : "=r" (sel)); + return (sel); +} + +static __inline void +ltr(u_short sel) +{ + __asm __volatile("ltr %0" : : "r" (sel)); +} + +static __inline uint32_t +read_tr(void) +{ + u_short sel; + + __asm __volatile("str %0" : "=r" (sel)); + return (sel); +} + +static __inline uint64_t +rdr0(void) +{ + uint64_t data; + __asm __volatile("movq %%dr0,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr0(uint64_t dr0) +{ + __asm __volatile("movq %0,%%dr0" : : "r" (dr0)); +} + +static __inline uint64_t +rdr1(void) +{ + uint64_t data; + __asm __volatile("movq %%dr1,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr1(uint64_t dr1) +{ + __asm __volatile("movq %0,%%dr1" : : "r" (dr1)); +} + +static __inline uint64_t +rdr2(void) +{ + uint64_t data; + __asm __volatile("movq %%dr2,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr2(uint64_t dr2) +{ + __asm __volatile("movq %0,%%dr2" : : "r" (dr2)); +} + +static __inline uint64_t +rdr3(void) +{ + uint64_t data; + __asm __volatile("movq %%dr3,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr3(uint64_t dr3) +{ + __asm __volatile("movq %0,%%dr3" : : "r" (dr3)); +} + +static __inline uint64_t +rdr6(void) +{ + uint64_t data; + __asm __volatile("movq %%dr6,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr6(uint64_t dr6) +{ + __asm __volatile("movq %0,%%dr6" : : "r" (dr6)); +} + +static __inline uint64_t +rdr7(void) +{ + uint64_t data; + __asm __volatile("movq %%dr7,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_dr7(uint64_t dr7) +{ + __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); +} + +static __inline register_t +intr_disable(void) +{ + register_t rflags; + + rflags = read_rflags(); + disable_intr(); + return (rflags); +} + +static __inline void +intr_restore(register_t rflags) +{ + write_rflags(rflags); +} + +static __inline void +stac(void) +{ + + __asm __volatile("stac" : : : "cc"); +} + +static __inline void +clac(void) +{ + + __asm __volatile("clac" : : : "cc"); +} + +enum { + SGX_ECREATE = 0x0, + SGX_EADD = 0x1, + SGX_EINIT = 0x2, + SGX_EREMOVE = 0x3, + SGX_EDGBRD = 0x4, + SGX_EDGBWR = 0x5, + SGX_EEXTEND = 0x6, + SGX_ELDU = 0x8, + SGX_EBLOCK = 0x9, + SGX_EPA = 0xA, + SGX_EWB = 0xB, + SGX_ETRACK = 0xC, +}; + +enum { + SGX_PT_SECS = 0x00, + SGX_PT_TCS = 0x01, + SGX_PT_REG = 0x02, + SGX_PT_VA = 0x03, + SGX_PT_TRIM = 0x04, +}; + +int sgx_encls(uint32_t eax, uint64_t rbx, uint64_t rcx, uint64_t rdx); + +static __inline int +sgx_ecreate(void *pginfo, void *secs) +{ + + return (sgx_encls(SGX_ECREATE, (uint64_t)pginfo, + (uint64_t)secs, 0)); +} + +static __inline int +sgx_eadd(void *pginfo, void *epc) +{ + + return (sgx_encls(SGX_EADD, (uint64_t)pginfo, + (uint64_t)epc, 0)); +} + +static __inline int +sgx_einit(void *sigstruct, void *secs, void *einittoken) +{ + + return (sgx_encls(SGX_EINIT, (uint64_t)sigstruct, + (uint64_t)secs, (uint64_t)einittoken)); +} + +static __inline int +sgx_eextend(void *secs, void *epc) +{ + + return (sgx_encls(SGX_EEXTEND, (uint64_t)secs, + (uint64_t)epc, 0)); +} + +static __inline int +sgx_epa(void *epc) +{ + + return (sgx_encls(SGX_EPA, SGX_PT_VA, (uint64_t)epc, 0)); +} + +static __inline int +sgx_eldu(uint64_t rbx, uint64_t rcx, + uint64_t rdx) +{ + + return (sgx_encls(SGX_ELDU, rbx, rcx, rdx)); +} + +static __inline int +sgx_eremove(void *epc) +{ + + return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0)); +} + +static __inline void +xrstors(uint8_t *save_area, uint64_t state_bitmap) +{ + uint32_t low, hi; + + low = state_bitmap; + hi = state_bitmap >> 32; + __asm __volatile("xrstors %0" : : "m"(*save_area), "a"(low), + "d"(hi)); +} + +static __inline void +xsaves(uint8_t *save_area, uint64_t state_bitmap) +{ + uint32_t low, hi; + + low = state_bitmap; + hi = state_bitmap >> 32; + __asm __volatile("xsaves %0" : "=m"(*save_area) : "a"(low), + "d"(hi) + : "memory"); +} + +void reset_dbregs(void); + +#ifdef _KERNEL +int rdmsr_safe(u_int msr, uint64_t *val); +int wrmsr_safe(u_int msr, uint64_t newval); +#endif + +#endif /* !_MACHINE_CPUFUNC_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/cputypes.h b/sys/amd64/include/cputypes.h new file mode 100644 index 000000000000..088692e9ad95 --- /dev/null +++ b/sys/amd64/include/cputypes.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/cputypes.h> diff --git a/sys/amd64/include/csan.h b/sys/amd64/include/csan.h new file mode 100644 index 000000000000..87ca97885232 --- /dev/null +++ b/sys/amd64/include/csan.h @@ -0,0 +1,81 @@ +/* $NetBSD: csan.h,v 1.2 2019/11/06 06:57:22 maxv Exp $ */ + +/* + * Copyright (c) 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/timetc.h> + +#include <machine/clock.h> +#include <machine/cpufunc.h> +#include <machine/stack.h> +#include <machine/vmparam.h> + +static inline bool +kcsan_md_unsupported(vm_offset_t addr) +{ + return false; +} + +static inline bool +kcsan_md_is_avail(void) +{ + return true; +} + +static inline void +kcsan_md_disable_intrs(uint64_t *state) +{ + + *state = intr_disable(); +} + +static inline void +kcsan_md_enable_intrs(uint64_t *state) +{ + + intr_restore(*state); +} + +static inline void +kcsan_md_delay(uint64_t us) +{ + /* + * Only call DELAY if not using the early delay code. The i8254 + * early delay function may cause us to recurse on a spin lock + * leading to a panic. + */ + if ((tsc_is_invariant && tsc_freq != 0) || + timecounter->tc_quality > 0) + DELAY(us); +} + +static void +kcsan_md_unwind(void) +{ +} diff --git a/sys/amd64/include/db_machdep.h b/sys/amd64/include/db_machdep.h new file mode 100644 index 000000000000..d354fc9ed607 --- /dev/null +++ b/sys/amd64/include/db_machdep.h @@ -0,0 +1,83 @@ +/*- + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _MACHINE_DB_MACHDEP_H_ +#define _MACHINE_DB_MACHDEP_H_ + +#include <machine/frame.h> +#include <machine/trap.h> + +typedef vm_offset_t db_addr_t; /* address - unsigned */ +typedef long db_expr_t; /* expression - signed */ + +#define PC_REGS() ((db_addr_t)kdb_thrctx->pcb_rip) + +#define BKPT_INST 0xcc /* breakpoint instruction */ +#define BKPT_SIZE (1) /* size of breakpoint inst */ +#define BKPT_SET(inst) (BKPT_INST) + +#define BKPT_SKIP \ +do { \ + kdb_frame->tf_rip += 1; \ + kdb_thrctx->pcb_rip += 1; \ +} while(0) + +#define FIXUP_PC_AFTER_BREAK \ +do { \ + kdb_frame->tf_rip -= 1; \ + kdb_thrctx->pcb_rip -= 1; \ +} while(0); + +#define db_clear_single_step kdb_cpu_clear_singlestep +#define db_set_single_step kdb_cpu_set_singlestep + +/* + * The debug exception type is copied from %dr6 to 'code' and used to + * disambiguate single step traps. Watchpoints have no special support. + * Our hardware breakpoints are not well integrated with ddb and are too + * different from watchpoints. ddb treats them as unknown traps with + * unknown addresses and doesn't turn them off while it is running. + */ +#define IS_BREAKPOINT_TRAP(type, code) ((type) == T_BPTFLT) +#define IS_SSTEP_TRAP(type, code) \ + ((type) == T_TRCTRAP && (code) & DBREG_DR6_BS) +#define IS_WATCHPOINT_TRAP(type, code) 0 + +#define I_CALL 0xe8 +#define I_CALLI 0xff +#define i_calli(ins) (((ins)&0xff) == I_CALLI && ((ins)&0x3800) == 0x1000) +#define I_RET 0xc3 +#define I_IRET 0xcf +#define i_rex(ins) (((ins) & 0xff) == 0x41 || ((ins) & 0xff) == 0x43) + +#define inst_trap_return(ins) (((ins)&0xff) == I_IRET) +#define inst_return(ins) (((ins)&0xff) == I_RET) +#define inst_call(ins) (((ins)&0xff) == I_CALL || i_calli(ins) || \ + (i_calli((ins) >> 8) && i_rex(ins))) +#define inst_load(ins) 0 +#define inst_store(ins) 0 + +#endif /* !_MACHINE_DB_MACHDEP_H_ */ diff --git a/sys/amd64/include/dump.h b/sys/amd64/include/dump.h new file mode 100644 index 000000000000..4dc14c6c3415 --- /dev/null +++ b/sys/amd64/include/dump.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/dump.h> diff --git a/sys/amd64/include/efi.h b/sys/amd64/include/efi.h new file mode 100644 index 000000000000..439f2f0b317d --- /dev/null +++ b/sys/amd64/include/efi.h @@ -0,0 +1,82 @@ +/*- + * Copyright (c) 2016 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __AMD64_INCLUDE_EFI_H_ +#define __AMD64_INCLUDE_EFI_H_ + +#include <sys/types.h> + +/* + * XXX: from gcc 6.2 manual: + * Note, the ms_abi attribute for Microsoft Windows 64-bit targets + * currently requires the -maccumulate-outgoing-args option. + * + * Avoid EFIABI_ATTR declarations for compilers that don't support it. + * GCC support began in version 4.4. + */ +#if defined(__clang__) || defined(__GNUC__) && \ + (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4) +#define EFIABI_ATTR __attribute__((ms_abi)) +#endif + +#ifdef _KERNEL +#include <isa/rtc.h> +#define ARCH_MAY_USE_EFI + +#define EFI_TIME_LOCK() mtx_lock(&atrtc_time_lock) +#define EFI_TIME_UNLOCK() mtx_unlock(&atrtc_time_lock) +#define EFI_TIME_OWNED() mtx_assert(&atrtc_time_lock, MA_OWNED) + +#define EFI_RT_HANDLE_FAULTS_DEFAULT 1 + +#define EFI_MAP_BOOTTYPE_ALLOWED(type) (((efi_map_regs >> (type)) & 1) != 0) + +extern uint32_t efi_map_regs; +#endif + +struct efirt_callinfo { + const char *ec_name; + register_t ec_efi_status; + register_t ec_fptr; + register_t ec_argcnt; + register_t ec_arg1; + register_t ec_arg2; + register_t ec_arg3; + register_t ec_arg4; + register_t ec_arg5; + register_t ec_rbx; + register_t ec_rsp; + register_t ec_rbp; + register_t ec_r12; + register_t ec_r13; + register_t ec_r14; + register_t ec_r15; + register_t ec_rflags; +}; + +#endif /* __AMD64_INCLUDE_EFI_H_ */ diff --git a/sys/amd64/include/elf.h b/sys/amd64/include/elf.h new file mode 100644 index 000000000000..3b0807772920 --- /dev/null +++ b/sys/amd64/include/elf.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/elf.h> diff --git a/sys/amd64/include/endian.h b/sys/amd64/include/endian.h new file mode 100644 index 000000000000..a8e10b0a36f2 --- /dev/null +++ b/sys/amd64/include/endian.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/endian.h> diff --git a/sys/amd64/include/exec.h b/sys/amd64/include/exec.h new file mode 100644 index 000000000000..f48188fcefd3 --- /dev/null +++ b/sys/amd64/include/exec.h @@ -0,0 +1,37 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_EXEC_H_ +#define _MACHINE_EXEC_H_ + +#define __LDPGSZ 4096 + +#endif /* !_MACHINE_EXEC_H_ */ diff --git a/sys/amd64/include/fdt.h b/sys/amd64/include/fdt.h new file mode 100644 index 000000000000..a7c73c541997 --- /dev/null +++ b/sys/amd64/include/fdt.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/fdt.h> diff --git a/sys/amd64/include/float.h b/sys/amd64/include/float.h new file mode 100644 index 000000000000..2cde8b6d60e4 --- /dev/null +++ b/sys/amd64/include/float.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/float.h> diff --git a/sys/amd64/include/floatingpoint.h b/sys/amd64/include/floatingpoint.h new file mode 100644 index 000000000000..f077ff1037a8 --- /dev/null +++ b/sys/amd64/include/floatingpoint.h @@ -0,0 +1,42 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 1993 Andrew Moore, Talke Studio + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _FLOATINGPOINT_H_ +#define _FLOATINGPOINT_H_ + +#include <sys/cdefs.h> +#include <machine/ieeefp.h> + +#endif /* !_FLOATINGPOINT_H_ */ diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h new file mode 100644 index 000000000000..cb09f5d05e85 --- /dev/null +++ b/sys/amd64/include/fpu.h @@ -0,0 +1,93 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Floating Point Data Structures and Constants + * W. Jolitz 1/90 + */ + +#ifndef _MACHINE_FPU_H_ +#define _MACHINE_FPU_H_ + +#include <x86/fpu.h> + +#ifdef _KERNEL + +struct fpu_kern_ctx; + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) + +#define XSAVE_AREA_ALIGN 64 + +void fpudna(void); +void fpudrop(void); +void fpuexit(struct thread *td); +int fpuformat(void); +int fpugetregs(struct thread *td); +void fpuinit(void); +void fpurestore(void *addr); +void fpuresume(void *addr); +void fpusave(void *addr); +int fpusetregs(struct thread *td, struct savefpu *addr, + char *xfpustate, size_t xfpustate_size); +int fpusetxstate(struct thread *td, char *xfpustate, + size_t xfpustate_size); +void fpususpend(void *addr); +int fputrap_sse(void); +int fputrap_x87(void); +void fpuuserinited(struct thread *td); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +struct fpu_kern_ctx *fpu_kern_alloc_ctx_domain(int domain, u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, + u_int flags); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(u_int flags); +int is_fpu_kern_thread(u_int flags); + +struct savefpu *fpu_save_area_alloc(void); +void fpu_save_area_free(struct savefpu *fsa); +void fpu_save_area_reset(struct savefpu *fsa); + +/* + * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). + */ +#define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 +#define FPU_KERN_KTHR 0x0002 +#define FPU_KERN_NOCTX 0x0004 + +#endif + +#endif /* !_MACHINE_FPU_H_ */ diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h new file mode 100644 index 000000000000..f3a885589527 --- /dev/null +++ b/sys/amd64/include/frame.h @@ -0,0 +1,52 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2018 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_FRAME_H +#define _AMD64_FRAME_H + +#include <x86/frame.h> + +struct pti_frame { + register_t pti_rdx; + register_t pti_rax; + register_t pti_err; + register_t pti_rip; + register_t pti_cs; + register_t pti_rflags; + register_t pti_rsp; + register_t pti_ss; +}; + +#ifdef _KERNEL +#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) +#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) +#endif + +#endif diff --git a/sys/amd64/include/gdb_machdep.h b/sys/amd64/include/gdb_machdep.h new file mode 100644 index 000000000000..282a5f078da3 --- /dev/null +++ b/sys/amd64/include/gdb_machdep.h @@ -0,0 +1,75 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MACHINE_GDB_MACHDEP_H_ +#define _MACHINE_GDB_MACHDEP_H_ + +#define GDB_BUFSZ 4096 +#define GDB_NREGS 56 +#define GDB_REG_RAX 0 +#define GDB_REG_RBX 1 +#define GDB_REG_RCX 2 +#define GDB_REG_RDX 3 +#define GDB_REG_RSI 4 +#define GDB_REG_RDI 5 +#define GDB_REG_RBP 6 +#define GDB_REG_RSP 7 +#define GDB_REG_R8 8 +#define GDB_REG_R9 9 +#define GDB_REG_R10 10 +#define GDB_REG_R11 11 +#define GDB_REG_R12 12 +#define GDB_REG_R13 13 +#define GDB_REG_R14 14 +#define GDB_REG_R15 15 +#define GDB_REG_PC 16 +#define GDB_REG_RFLAGS 17 +#define GDB_REG_CS 18 +#define GDB_REG_SS 19 +_Static_assert(GDB_BUFSZ >= (GDB_NREGS * 16), "buffer fits 'g' regs"); + +static __inline size_t +gdb_cpu_regsz(int regnum) +{ + return ((regnum > 16 && regnum < 24) ? 4 : 8); +} + +static __inline int +gdb_cpu_query(void) +{ + return (0); +} + +void *gdb_begin_write(void); +void *gdb_cpu_getreg(int, size_t *); +void gdb_cpu_setreg(int, void *); +int gdb_cpu_signal(int, int); +void gdb_end_write(void *); +void gdb_cpu_stop_reason(int, int); + +#endif /* !_MACHINE_GDB_MACHDEP_H_ */ diff --git a/sys/amd64/include/ieeefp.h b/sys/amd64/include/ieeefp.h new file mode 100644 index 000000000000..f9ec5bf83fd0 --- /dev/null +++ b/sys/amd64/include/ieeefp.h @@ -0,0 +1,208 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1990 Andrew Moore, Talke Studio + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_IEEEFP_H_ +#define _MACHINE_IEEEFP_H_ + +/* Deprecated historical FPU control interface */ + +#include <x86/x86_ieeefp.h> + +/* + * IEEE floating point type, constant and function definitions. + * XXX: {FP,SSE}*FLD and {FP,SSE}*OFF are undocumented pollution. + */ + +/* + * SSE mxcsr register bit-field masks. + */ +#define SSE_STKY_FLD 0x3f /* exception flags */ +#define SSE_DAZ_FLD 0x40 /* Denormals are zero */ +#define SSE_MSKS_FLD 0x1f80 /* exception masks field */ +#define SSE_RND_FLD 0x6000 /* rounding control */ +#define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ + +/* + * SSE mxcsr register bit-field offsets (shift counts). + */ +#define SSE_STKY_OFF 0 /* exception flags offset */ +#define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ +#define SSE_MSKS_OFF 7 /* other exception masks offset */ +#define SSE_RND_OFF 13 /* rounding control offset */ +#define SSE_FZ_OFF 15 /* flush to zero offset */ + +/* + * General notes about conflicting SSE vs FP status bits. + * This code assumes that software will not fiddle with the control + * bits of the SSE and x87 in such a way to get them out of sync and + * still expect this to work. Break this at your peril. + * Because I based this on the i386 port, the x87 state is used for + * the fpget*() functions, and is shadowed into the SSE state for + * the fpset*() functions. For dual source fpget*() functions, I + * merge the two together. I think. + */ + +static __inline fp_rnd_t +__fpgetround(void) +{ + unsigned short _cw; + + __fnstcw(&_cw); + return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF)); +} + +static __inline fp_rnd_t +__fpsetround(fp_rnd_t _m) +{ + fp_rnd_t _p; + unsigned _mxcsr; + unsigned short _cw, _newcw; + + __fnstcw(&_cw); + _p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF); + _newcw = _cw & ~FP_RND_FLD; + _newcw |= (_m << FP_RND_OFF) & FP_RND_FLD; + __fnldcw(_cw, _newcw); + __stmxcsr(&_mxcsr); + _mxcsr &= ~SSE_RND_FLD; + _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; + __ldmxcsr(&_mxcsr); + return (_p); +} + +/* + * Get or set the rounding precision for x87 arithmetic operations. + * There is no equivalent SSE mode or control. + */ + +static __inline fp_prec_t +__fpgetprec(void) +{ + unsigned short _cw; + + __fnstcw(&_cw); + return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF)); +} + +static __inline fp_prec_t +__fpsetprec(fp_prec_t _m) +{ + fp_prec_t _p; + unsigned short _cw, _newcw; + + __fnstcw(&_cw); + _p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF); + _newcw = _cw & ~FP_PRC_FLD; + _newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; + __fnldcw(_cw, _newcw); + return (_p); +} + +/* + * Get or set the exception mask. + * Note that the x87 mask bits are inverted by the API -- a mask bit of 1 + * means disable for x87 and SSE, but for fp*mask() it means enable. + */ + +static __inline fp_except_t +__fpgetmask(void) +{ + unsigned short _cw; + + __fnstcw(&_cw); + return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF); +} + +static __inline fp_except_t +__fpsetmask(fp_except_t _m) +{ + fp_except_t _p; + unsigned _mxcsr; + unsigned short _cw, _newcw; + + __fnstcw(&_cw); + _p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF; + _newcw = _cw & ~FP_MSKS_FLD; + _newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD; + __fnldcw(_cw, _newcw); + __stmxcsr(&_mxcsr); + /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ + _mxcsr &= ~SSE_MSKS_FLD; + _mxcsr |= (~_m << SSE_MSKS_OFF) & SSE_MSKS_FLD; + __ldmxcsr(&_mxcsr); + return (_p); +} + +static __inline fp_except_t +__fpgetsticky(void) +{ + unsigned _ex, _mxcsr; + unsigned short _sw; + + __fnstsw(&_sw); + _ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF; + __stmxcsr(&_mxcsr); + _ex |= (_mxcsr & SSE_STKY_FLD) >> SSE_STKY_OFF; + return ((fp_except_t)_ex); +} + +#if !defined(__IEEEFP_NOINLINES__) + +#define fpgetmask() __fpgetmask() +#define fpgetprec() __fpgetprec() +#define fpgetround() __fpgetround() +#define fpgetsticky() __fpgetsticky() +#define fpsetmask(m) __fpsetmask(m) +#define fpsetprec(m) __fpsetprec(m) +#define fpsetround(m) __fpsetround(m) + +#else /* __IEEEFP_NOINLINES__ */ + +/* Augment the userland declarations. */ +__BEGIN_DECLS +extern fp_rnd_t fpgetround(void); +extern fp_rnd_t fpsetround(fp_rnd_t); +extern fp_except_t fpgetmask(void); +extern fp_except_t fpsetmask(fp_except_t); +extern fp_except_t fpgetsticky(void); +extern fp_except_t fpsetsticky(fp_except_t); +fp_prec_t fpgetprec(void); +fp_prec_t fpsetprec(fp_prec_t); +__END_DECLS + +#endif /* !__IEEEFP_NOINLINES__ */ + +#endif /* !_MACHINE_IEEEFP_H_ */ diff --git a/sys/amd64/include/in_cksum.h b/sys/amd64/include/in_cksum.h new file mode 100644 index 000000000000..7b649011f7d9 --- /dev/null +++ b/sys/amd64/include/in_cksum.h @@ -0,0 +1,49 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from tahoe: in_cksum.c 1.2 86/01/05 + * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp + */ + +#ifndef _MACHINE_IN_CKSUM_H_ +#define _MACHINE_IN_CKSUM_H_ 1 + +#define in_cksum(m, len) in_cksum_skip(m, len, 0) + +#ifdef _KERNEL +#if defined(IPVERSION) && (IPVERSION == 4) +u_int in_cksum_hdr(const struct ip *ip); +#endif +u_short in_addword(u_short sum, u_short b); +u_short in_pseudo(u_int sum, u_int b, u_int c); +u_short in_cksum_skip(struct mbuf *m, int len, int skip); +#endif + +#endif /* _MACHINE_IN_CKSUM_H_ */ diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h new file mode 100644 index 000000000000..1b78875bf853 --- /dev/null +++ b/sys/amd64/include/intr_machdep.h @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __MACHINE_INTR_MACHDEP_H__ +#define __MACHINE_INTR_MACHDEP_H__ + +#include <x86/intr_machdep.h> + +/* + * The following data structure holds per-cpu data, and is placed just + * above the top of the space used for the NMI and MC# stacks. + */ +struct nmi_pcpu { + register_t np_pcpu; + register_t __padding; /* pad to 16 bytes */ +}; + +#define DBLFAULT_STACK_SIZE PAGE_SIZE +#define NMI_STACK_SIZE PAGE_SIZE +#define MCE_STACK_SIZE PAGE_SIZE +#define DBG_STACK_SIZE PAGE_SIZE + +#endif /* !__MACHINE_INTR_MACHDEP_H__ */ diff --git a/sys/amd64/include/iodev.h b/sys/amd64/include/iodev.h new file mode 100644 index 000000000000..afba7a2b0867 --- /dev/null +++ b/sys/amd64/include/iodev.h @@ -0,0 +1,46 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004 Mark R V Murray + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MACHINE_IODEV_H_ +#define _MACHINE_IODEV_H_ + +#ifdef _KERNEL +#include <machine/cpufunc.h> + +#define iodev_read_1 inb +#define iodev_read_2 inw +#define iodev_read_4 inl +#define iodev_write_1 outb +#define iodev_write_2 outw +#define iodev_write_4 outl + +int iodev_open(struct thread *td); +int iodev_close(struct thread *td); +int iodev_ioctl(u_long cmd, caddr_t data); + +#endif /* _KERNEL */ +#endif /* _MACHINE_IODEV_H_ */ diff --git a/sys/amd64/include/iommu.h b/sys/amd64/include/iommu.h new file mode 100644 index 000000000000..ee9a3ac29847 --- /dev/null +++ b/sys/amd64/include/iommu.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/iommu.h> diff --git a/sys/amd64/include/kdb.h b/sys/amd64/include/kdb.h new file mode 100644 index 000000000000..e95d96cb3a3d --- /dev/null +++ b/sys/amd64/include/kdb.h @@ -0,0 +1,60 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MACHINE_KDB_H_ +#define _MACHINE_KDB_H_ + +#include <machine/frame.h> +#include <machine/psl.h> + +int kdb_cpu_set_watchpoint(vm_offset_t addr, vm_size_t size, int access); +int kdb_cpu_clr_watchpoint(vm_offset_t addr, vm_size_t size); + +static __inline void +kdb_cpu_clear_singlestep(void) +{ + kdb_frame->tf_rflags &= ~PSL_T; +} + +static __inline void +kdb_cpu_set_singlestep(void) +{ + kdb_frame->tf_rflags |= PSL_T; +} + +static __inline void +kdb_cpu_sync_icache(unsigned char *addr, size_t size) +{ +} + +static __inline void +kdb_cpu_trap(int type, int code) +{ +} + +#endif /* _MACHINE_KDB_H_ */ diff --git a/sys/amd64/include/limits.h b/sys/amd64/include/limits.h new file mode 100644 index 000000000000..72dc3a64c23d --- /dev/null +++ b/sys/amd64/include/limits.h @@ -0,0 +1,39 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_LIMITS_H_ +#define _MACHINE_LIMITS_H_ + +#warning "machine/limits.h is deprecated. Include sys/limits.h instead." + +#include <sys/limits.h> + +#endif /* !_MACHINE_LIMITS_H_ */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h new file mode 100644 index 000000000000..b6ddc6eaaebe --- /dev/null +++ b/sys/amd64/include/md_var.h @@ -0,0 +1,104 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1995 Bruce D. Evans. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/md_var.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_MD_VAR_H_ +#define _MACHINE_MD_VAR_H_ + +#include <x86/x86_var.h> + +extern char ctx_switch_xsave[]; +extern char ctx_switch_xsave32[]; +extern int hw_lower_amd64_sharedpage; +extern int hw_ibrs_disable; +extern int hw_ssb_disable; +extern int nmi_flush_l1d_sw; +extern int syscall_ret_l1d_flush_mode; + +extern vm_paddr_t intel_graphics_stolen_base; +extern vm_paddr_t intel_graphics_stolen_size; + +extern int la57; +extern int prefer_uva_la48; + +extern vm_paddr_t kernphys; +extern vm_paddr_t KERNend; + +extern bool efi_boot; + +struct __mcontext; +struct savefpu; +struct sysentvec; + +void amd64_conf_fast_syscall(void); +void amd64_db_resume_dbreg(void); +vm_paddr_t amd64_loadaddr(void); +void amd64_lower_shared_page(struct sysentvec *); +void amd64_bsp_pcpu_init1(struct pcpu *pc); +void amd64_bsp_pcpu_init2(uint64_t rsp0); +void amd64_bsp_ist_init(struct pcpu *pc); +void amd64_syscall(struct thread *td, int traced); +void amd64_syscall_ret_flush_l1d(int error); +void amd64_syscall_ret_flush_l1d_recalc(void); +void cpu_init_small_core(void); +void doreti_iret(void) __asm(__STRING(doreti_iret)); +void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); +void flush_l1d_sw_abi(void); +void ld_ds(void) __asm(__STRING(ld_ds)); +void ld_es(void) __asm(__STRING(ld_es)); +void ld_fs(void) __asm(__STRING(ld_fs)); +void ld_gs(void) __asm(__STRING(ld_gs)); +void ld_fsbase(void) __asm(__STRING(ld_fsbase)); +void ld_gsbase(void) __asm(__STRING(ld_gsbase)); +void ds_load_fault(void) __asm(__STRING(ds_load_fault)); +void es_load_fault(void) __asm(__STRING(es_load_fault)); +void fs_load_fault(void) __asm(__STRING(fs_load_fault)); +void gs_load_fault(void) __asm(__STRING(gs_load_fault)); +void fsbase_load_fault(void) __asm(__STRING(fsbase_load_fault)); +void gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault)); +void fpstate_drop(struct thread *td); +void pagezero(void *addr); +void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); +void set_top_of_stack_td(struct thread *td); +struct savefpu *get_pcb_user_save_td(struct thread *td); +struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); +void pci_early_quirks(void); +void get_fpcontext(struct thread *td, struct __mcontext *mcp, + char **xfpusave, size_t *xfpusave_len); +int set_fpcontext(struct thread *td, struct __mcontext *mcp, + char *xfpustate, size_t xfpustate_len); + +#endif /* !_MACHINE_MD_VAR_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/memdev.h b/sys/amd64/include/memdev.h new file mode 100644 index 000000000000..02dd5896b5b4 --- /dev/null +++ b/sys/amd64/include/memdev.h @@ -0,0 +1,40 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004 Mark R V Murray + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MACHINE_MEMDEV_H_ +#define _MACHINE_MEMDEV_H_ + +#define CDEV_MINOR_MEM 0 +#define CDEV_MINOR_KMEM 1 + +d_open_t memopen; +d_read_t memrw; +d_ioctl_t memioctl_md; +d_mmap_t memmmap; + +#endif /* _MACHINE_MEMDEV_H_ */ diff --git a/sys/amd64/include/metadata.h b/sys/amd64/include/metadata.h new file mode 100644 index 000000000000..cd8c069c5bb0 --- /dev/null +++ b/sys/amd64/include/metadata.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/metadata.h> diff --git a/sys/amd64/include/minidump.h b/sys/amd64/include/minidump.h new file mode 100644 index 000000000000..3457ec18a848 --- /dev/null +++ b/sys/amd64/include/minidump.h @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MACHINE_MINIDUMP_H_ +#define _MACHINE_MINIDUMP_H_ 1 + +#define MINIDUMP_MAGIC "minidump FreeBSD/amd64" +#define MINIDUMP_VERSION 3 + +struct minidumphdr { + char magic[24]; + uint32_t version; + uint32_t msgbufsize; + uint32_t bitmapsize; + uint32_t pmapsize; + uint64_t kernbase; + uint64_t dmapbase; + uint64_t dmapend; + uint32_t dumpavailsize; +}; + +#endif /* _MACHINE_MINIDUMP_H_ */ diff --git a/sys/amd64/include/msan.h b/sys/amd64/include/msan.h new file mode 100644 index 000000000000..00b0b636d6d7 --- /dev/null +++ b/sys/amd64/include/msan.h @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from the + * FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_MSAN_H_ +#define _MACHINE_MSAN_H_ + +#ifdef KMSAN + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <machine/vmparam.h> + +typedef uint32_t msan_orig_t; + +/* + * Our 32-bit origin cells encode a 2-bit type and 30-bit pointer to a kernel + * instruction. The pointer is compressed by making it a positive offset + * relative to KERNBASE. + */ +#define KMSAN_ORIG_TYPE_SHIFT 30u +#define KMSAN_ORIG_PTR_MASK ((1u << KMSAN_ORIG_TYPE_SHIFT) - 1) + +static inline msan_orig_t +kmsan_md_orig_encode(int type, uintptr_t ptr) +{ + return ((type << KMSAN_ORIG_TYPE_SHIFT) | + ((ptr & KMSAN_ORIG_PTR_MASK))); +} + +static inline void +kmsan_md_orig_decode(msan_orig_t orig, int *type, uintptr_t *ptr) +{ + *type = orig >> KMSAN_ORIG_TYPE_SHIFT; + *ptr = (orig & KMSAN_ORIG_PTR_MASK) | KERNBASE; +} + +static inline vm_offset_t +kmsan_md_addr_to_shad(vm_offset_t addr) +{ + return (addr - VM_MIN_KERNEL_ADDRESS + KMSAN_SHAD_MIN_ADDRESS); +} + +static inline vm_offset_t +kmsan_md_addr_to_orig(vm_offset_t addr) +{ + return (addr - VM_MIN_KERNEL_ADDRESS + KMSAN_ORIG_MIN_ADDRESS); +} + +static inline bool +kmsan_md_unsupported(vm_offset_t addr) +{ + /* + * The kernel itself isn't shadowed: for most purposes global variables + * are always initialized, and because KMSAN kernels are large + * (GENERIC-KMSAN is ~80MB at the time of writing), shadowing would + * incur significant memory usage. + */ + return (addr < VM_MIN_KERNEL_ADDRESS || addr >= KERNBASE); +} + +#endif /* KMSAN */ + +#endif /* !_MACHINE_MSAN_H_ */ diff --git a/sys/amd64/include/nexusvar.h b/sys/amd64/include/nexusvar.h new file mode 100644 index 000000000000..5e73cf0932bf --- /dev/null +++ b/sys/amd64/include/nexusvar.h @@ -0,0 +1,43 @@ +/*- + * Copyright 1998 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_NEXUSVAR_H_ +#define _MACHINE_NEXUSVAR_H_ + +struct nexus_device { + struct resource_list nx_resources; +}; + +DECLARE_CLASS(nexus_driver); + +extern struct rman irq_rman, drq_rman, port_rman, mem_rman; + +void nexus_init_resources(void); + +#endif /* !_MACHINE_NEXUSVAR_H_ */ diff --git a/sys/amd64/include/npx.h b/sys/amd64/include/npx.h new file mode 100644 index 000000000000..431cc00314f7 --- /dev/null +++ b/sys/amd64/include/npx.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/fpu.h> diff --git a/sys/amd64/include/ofw_machdep.h b/sys/amd64/include/ofw_machdep.h new file mode 100644 index 000000000000..81fe027ceb41 --- /dev/null +++ b/sys/amd64/include/ofw_machdep.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/ofw_machdep.h> diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h new file mode 100644 index 000000000000..5a9c3162e14c --- /dev/null +++ b/sys/amd64/include/param.h @@ -0,0 +1,155 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 2002 David E. O'Brien. All rights reserved. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and Ralph Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_INCLUDE_PARAM_H_ +#define _AMD64_INCLUDE_PARAM_H_ + +#include <machine/_align.h> + +/* + * Machine dependent constants for AMD64. + */ + +#ifndef MACHINE +#define MACHINE "amd64" +#endif +#ifndef MACHINE_ARCH +#define MACHINE_ARCH "amd64" +#endif +#ifndef MACHINE_ARCH32 +#define MACHINE_ARCH32 "i386" +#endif + +#ifdef SMP +#ifndef MAXCPU +#define MAXCPU 1024 +#endif +#else +#define MAXCPU 1 +#endif + +#ifndef MAXMEMDOM +#define MAXMEMDOM 8 +#endif + +#define ALIGNBYTES _ALIGNBYTES +#define ALIGN(p) _ALIGN(p) +/* + * ALIGNED_POINTER is a boolean macro that checks whether an address + * is valid to fetch data elements of type t from on this architecture. + * This does not reflect the optimal alignment, just the possibility + * (within reasonable limits). + */ +#define ALIGNED_POINTER(p, t) 1 + +/* + * CACHE_LINE_SIZE is the compile-time maximum cache line size for an + * architecture. It should be used with appropriate caution. + */ +#define CACHE_LINE_SHIFT 6 +#define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) + +/* Size of the level 1 page table units */ +#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */ +#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ +#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */ +#define PAGE_MASK (PAGE_SIZE-1) +/* Size of the level 2 page directory units */ +#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NPDEPGSHIFT 9 /* LOG2(NPDEPG) */ +#define PDRSHIFT 21 /* LOG2(NBPDR) */ +#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */ +#define PDRMASK (NBPDR-1) +/* Size of the level 3 page directory pointer table units */ +#define NPDPEPG (PAGE_SIZE/(sizeof (pdp_entry_t))) +#define NPDPEPGSHIFT 9 /* LOG2(NPDPEPG) */ +#define PDPSHIFT 30 /* LOG2(NBPDP) */ +#define NBPDP (1<<PDPSHIFT) /* bytes/page dir ptr table */ +#define PDPMASK (NBPDP-1) +/* Size of the level 4 page-map level-4 table units */ +#define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t))) +#define NPML4EPGSHIFT 9 /* LOG2(NPML4EPG) */ +#define PML4SHIFT 39 /* LOG2(NBPML4) */ +#define NBPML4 (1UL<<PML4SHIFT)/* bytes/page map lev4 table */ +#define PML4MASK (NBPML4-1) +/* Size of the level 5 page-map level-5 table units */ +#define NPML5EPG (PAGE_SIZE/(sizeof (pml5_entry_t))) +#define NPML5EPGSHIFT 9 /* LOG2(NPML5EPG) */ +#define PML5SHIFT 48 /* LOG2(NBPML5) */ +#define NBPML5 (1UL<<PML5SHIFT)/* bytes/page map lev5 table */ +#define PML5MASK (NBPML5-1) + +#define MAXPAGESIZES 3 /* maximum number of supported page sizes */ + +#define IOPAGES 2 /* pages of i/o permission bitmap */ +/* + * I/O permission bitmap has a bit for each I/O port plus an additional + * byte at the end with all bits set. See section "I/O Permission Bit Map" + * in the Intel SDM for more details. + */ +#define IOPERM_BITMAP_SIZE (IOPAGES * PAGE_SIZE + 1) + +#ifndef KSTACK_PAGES +#if defined(KASAN) || defined(KMSAN) +#define KSTACK_PAGES 6 +#else +#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */ +#endif +#endif +#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */ + +/* + * Mach derived conversion macros + */ +#define trunc_2mpage(x) ((unsigned long)(x) & ~PDRMASK) +#define round_2mpage(x) ((((unsigned long)(x)) + PDRMASK) & ~PDRMASK) +#define trunc_1gpage(x) ((unsigned long)(x) & ~PDPMASK) + +#define amd64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) +#define amd64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) + +#define INKERNEL(va) \ + (((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) || \ + ((va) >= kva_layout.km_low && (va) < kva_layout.km_high)) + +#define SC_TABLESIZE 1024 /* Must be power of 2. */ + +#endif /* !_AMD64_INCLUDE_PARAM_H_ */ diff --git a/sys/amd64/include/pc/bios.h b/sys/amd64/include/pc/bios.h new file mode 100644 index 000000000000..fbc331d0c9d7 --- /dev/null +++ b/sys/amd64/include/pc/bios.h @@ -0,0 +1,92 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 1997 Michael Smith + * Copyright (c) 1998 Jonathan Lemon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_PC_BIOS_H_ +#define _MACHINE_PC_BIOS_H_ + +/* + * Int 15:E820 'SMAP' structure + */ +#define SMAP_SIG 0x534D4150 /* 'SMAP' */ + +#define SMAP_TYPE_MEMORY 1 +#define SMAP_TYPE_RESERVED 2 +#define SMAP_TYPE_ACPI_RECLAIM 3 +#define SMAP_TYPE_ACPI_NVS 4 +#define SMAP_TYPE_ACPI_ERROR 5 +#define SMAP_TYPE_DISABLED 6 +#define SMAP_TYPE_PMEM 7 +#define SMAP_TYPE_PRAM 12 + +#define SMAP_XATTR_ENABLED 0x00000001 +#define SMAP_XATTR_NON_VOLATILE 0x00000002 +#define SMAP_XATTR_MASK (SMAP_XATTR_ENABLED | SMAP_XATTR_NON_VOLATILE) + +struct bios_smap { + u_int64_t base; + u_int64_t length; + u_int32_t type; +} __packed; + +/* Structure extended to include extended attribute field in ACPI 3.0. */ +struct bios_smap_xattr { + u_int64_t base; + u_int64_t length; + u_int32_t type; + u_int32_t xattr; +} __packed; + +#ifdef _KERNEL +#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE) +#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE) + +struct bios_oem_signature { + char * anchor; /* search anchor string in BIOS memory */ + size_t offset; /* offset from anchor (may be negative) */ + size_t totlen; /* total length of BIOS string to copy */ +} __packed; + +struct bios_oem_range { + u_int from; /* shouldn't be below 0xe0000 */ + u_int to; /* shouldn't be above 0xfffff */ +} __packed; + +struct bios_oem { + struct bios_oem_range range; + struct bios_oem_signature signature[]; +} __packed; + +int bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen); +uint32_t bios_sigsearch(uint32_t start, u_char *sig, int siglen, int paralen, + int sigofs); +void bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, + vm_paddr_t *physmap, int *physmap_idx); +#endif + +#endif /* _MACHINE_PC_BIOS_H_ */ diff --git a/sys/amd64/include/pc/display.h b/sys/amd64/include/pc/display.h new file mode 100644 index 000000000000..cab46e44e881 --- /dev/null +++ b/sys/amd64/include/pc/display.h @@ -0,0 +1,43 @@ +/* + * IBM PC display definitions + */ + +/* Color attributes for foreground text */ + +#define FG_BLACK 0 +#define FG_BLUE 1 +#define FG_GREEN 2 +#define FG_CYAN 3 +#define FG_RED 4 +#define FG_MAGENTA 5 +#define FG_BROWN 6 +#define FG_LIGHTGREY 7 +#define FG_DARKGREY 8 +#define FG_LIGHTBLUE 9 +#define FG_LIGHTGREEN 10 +#define FG_LIGHTCYAN 11 +#define FG_LIGHTRED 12 +#define FG_LIGHTMAGENTA 13 +#define FG_YELLOW 14 +#define FG_WHITE 15 +#define FG_BLINK 0x80 + +/* Color attributes for text background */ + +#define BG_BLACK 0x00 +#define BG_BLUE 0x10 +#define BG_GREEN 0x20 +#define BG_CYAN 0x30 +#define BG_RED 0x40 +#define BG_MAGENTA 0x50 +#define BG_BROWN 0x60 +#define BG_LIGHTGREY 0x70 + +/* Monochrome attributes for foreground text */ + +#define FG_UNDERLINE 0x01 +#define FG_INTENSE 0x08 + +/* Monochrome attributes for text background */ + +#define BG_INTENSE 0x10 diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h new file mode 100644 index 000000000000..27e1dce08ee1 --- /dev/null +++ b/sys/amd64/include/pcb.h @@ -0,0 +1,139 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_PCB_H_ +#define _AMD64_PCB_H_ + +/* + * AMD64 process control block + */ +#include <machine/fpu.h> +#include <machine/segments.h> + +#ifdef __amd64__ +/* + * NB: The fields marked with (*) are used by kernel debuggers. Their + * ABI should be preserved. + */ +struct pcb { + register_t pcb_r15; /* (*) */ + register_t pcb_r14; /* (*) */ + register_t pcb_r13; /* (*) */ + register_t pcb_r12; /* (*) */ + register_t pcb_rbp; /* (*) */ + register_t pcb_rsp; /* (*) */ + register_t pcb_rbx; /* (*) */ + register_t pcb_rip; /* (*) */ + register_t pcb_fsbase; + register_t pcb_gsbase; + register_t pcb_kgsbase; + register_t pcb_cr0; + register_t pcb_cr2; + register_t pcb_cr3; + register_t pcb_cr4; + register_t pcb_dr0; + register_t pcb_dr1; + register_t pcb_dr2; + register_t pcb_dr3; + register_t pcb_dr6; + register_t pcb_dr7; + + struct region_descriptor pcb_gdt; + struct region_descriptor pcb_idt; + struct region_descriptor pcb_ldt; + uint16_t pcb_tr; + + u_int pcb_flags; +#define PCB_FULL_IRET 0x0001 /* full iret is required */ +#define PCB_DBREGS 0x0002 /* process using debug registers */ +#define PCB_KERNFPU 0x0004 /* kernel uses fpu */ +#define PCB_FPUINITDONE 0x0008 /* fpu state is initialized */ +#define PCB_USERFPUINITDONE 0x0010 /* fpu user state is initialized */ +#define PCB_KERNFPU_THR 0x0020 /* fpu_kern_thread() */ +#define PCB_32BIT 0x0040 /* process has 32 bit context (segs etc) */ +#define PCB_FPUNOSAVE 0x0080 /* no save area for current FPU ctx */ +#define PCB_TLSBASE 0x0100 /* tlsbase was set */ + + uint16_t pcb_initial_fpucw; + + /* copyin/out fault recovery */ + caddr_t pcb_onfault; + + uint64_t pcb_saved_ucr3; + + /* local tss, with i/o bitmap; NULL for common */ + struct amd64tss *pcb_tssp; + + /* model specific registers */ + register_t pcb_efer; + register_t pcb_star; + register_t pcb_lstar; + register_t pcb_cstar; + register_t pcb_sfmask; + + struct savefpu *pcb_save; + + register_t pcb_tlsbase; /* not same as pcb_fsbase */ + uint64_t pcb_pad[4]; +}; + +/* Per-CPU state saved during suspend and resume. */ +struct susppcb { + struct pcb sp_pcb; + + /* fpu context for suspend/resume */ + void *sp_fpususpend; +}; +#else /* 32bit */ +struct pcb { + uint64_t pcb_dummy[40]; +}; +#endif + +#ifdef _KERNEL +struct trapframe; + +void clear_pcb_flags(struct pcb *pcb, const u_int flags); +void makectx(struct trapframe *, struct pcb *); +void set_pcb_flags(struct pcb *pcb, const u_int flags); +void set_pcb_flags_raw(struct pcb *pcb, const u_int flags); +int savectx(struct pcb *) __returns_twice; +void resumectx(struct pcb *); + +/* Ensure that pcb_gsbase and pcb_fsbase are up to date */ +#define update_pcb_bases(pcb) set_pcb_flags((pcb), PCB_FULL_IRET) +#endif + +#endif /* _AMD64_PCB_H_ */ diff --git a/sys/amd64/include/pci_cfgreg.h b/sys/amd64/include/pci_cfgreg.h new file mode 100644 index 000000000000..651c71045353 --- /dev/null +++ b/sys/amd64/include/pci_cfgreg.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/pci_cfgreg.h> diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h new file mode 100644 index 000000000000..213790d245dc --- /dev/null +++ b/sys/amd64/include/pcpu.h @@ -0,0 +1,334 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) Peter Wemm <peter@netplex.com.au> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/pcpu.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_PCPU_H_ +#define _MACHINE_PCPU_H_ + +#include <machine/_pmap.h> +#include <machine/segments.h> +#include <machine/tss.h> + +#define PC_PTI_STACK_SZ 16 + +struct monitorbuf { + int idle_state; /* Used by cpu_idle_mwait. */ + int stop_state; /* Used by cpustop_handler. */ + char padding[128 - (2 * sizeof(int))]; +}; +_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); + +/* + * The SMP parts are setup in pmap.c and locore.s for the BSP, and + * mp_machdep.c sets up the data for the AP's to "see" when they awake. + * The reason for doing it via a struct is so that an array of pointers + * to each CPU's data can be set up for things like "check curproc on all + * other processors" + */ +#define PCPU_MD_FIELDS \ + struct monitorbuf pc_monitorbuf __aligned(128); /* cache line */\ + struct pcpu *pc_prvspace; /* Self-reference */ \ + struct pmap *pc_curpmap; \ + struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \ + void *pc_pad0; \ + uint64_t pc_kcr3; \ + uint64_t pc_ucr3; \ + uint64_t pc_saved_ucr3; \ + register_t pc_rsp0; \ + register_t pc_scratch_rsp; /* User %rsp in syscall */ \ + register_t pc_scratch_rax; \ + u_int pc_apic_id; \ + u_int pc_acpi_id; /* ACPI CPU id */ \ + /* Pointer to the CPU %fs descriptor */ \ + struct user_segment_descriptor *pc_fs32p; \ + /* Pointer to the CPU %gs descriptor */ \ + struct user_segment_descriptor *pc_gs32p; \ + /* Pointer to the CPU LDT descriptor */ \ + struct system_segment_descriptor *pc_ldt; \ + /* Pointer to the CPU TSS descriptor */ \ + struct system_segment_descriptor *pc_tss; \ + u_int pc_cmci_mask; /* MCx banks for CMCI */ \ + uint64_t pc_dbreg[16]; /* ddb debugging regs */ \ + uint64_t pc_pti_stack[PC_PTI_STACK_SZ]; \ + register_t pc_pti_rsp0; \ + int pc_dbreg_cmd; /* ddb debugging reg cmd */ \ + u_int pc_vcpu_id; /* Xen vCPU ID */ \ + uint32_t pc_pcid_next; \ + uint32_t pc_pcid_gen; \ + uint32_t pc_unused; \ + uint32_t pc_ibpb_set; \ + void *pc_mds_buf; \ + void *pc_mds_buf64; \ + uint32_t pc_pad[4]; \ + uint8_t pc_mds_tmp[64]; \ + u_int pc_ipi_bitmap; \ + struct amd64tss pc_common_tss; \ + struct user_segment_descriptor pc_gdt[NGDT]; \ + void *pc_smp_tlb_pmap; \ + uint64_t pc_smp_tlb_addr1; \ + uint64_t pc_smp_tlb_addr2; \ + uint32_t pc_smp_tlb_gen; \ + u_int pc_smp_tlb_op; \ + uint64_t pc_ucr3_load_mask; \ + u_int pc_small_core; \ + u_int pc_pcid_invlpg_workaround; \ + struct pmap_pcid pc_kpmap_store; \ + char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */ + +#define PC_DBREG_CMD_NONE 0 +#define PC_DBREG_CMD_LOAD 1 + +#ifdef _KERNEL + +#define MONITOR_STOPSTATE_RUNNING 0 +#define MONITOR_STOPSTATE_STOPPED 1 + +/* + * Evaluates to the type of the per-cpu variable name. + */ +#define __pcpu_type(name) \ + __typeof(((struct pcpu *)0)->name) + +#ifdef __SEG_GS +#define get_pcpu() __extension__ ({ \ + static struct pcpu __seg_gs *__pc = 0; \ + \ + __pc->pc_prvspace; \ +}) + +/* + * Evaluates to the address of the per-cpu variable name. + */ +#define __PCPU_PTR(name) __extension__ ({ \ + struct pcpu *__pc = get_pcpu(); \ + \ + &__pc->name; \ +}) + +/* + * Evaluates to the value of the per-cpu variable name. + */ +#define __PCPU_GET(name) __extension__ ({ \ + static struct pcpu __seg_gs *__pc = 0; \ + \ + __pc->name; \ +}) + +/* + * Adds the value to the per-cpu counter name. The implementation + * must be atomic with respect to interrupts. + */ +#define __PCPU_ADD(name, val) do { \ + static struct pcpu __seg_gs *__pc = 0; \ + __pcpu_type(name) __val; \ + \ + __val = (val); \ + if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ + sizeof(__val) == 4 || sizeof(__val) == 8) { \ + __pc->name += __val; \ + } else \ + *__PCPU_PTR(name) += __val; \ +} while (0) + +/* + * Sets the value of the per-cpu variable name to value val. + */ +#define __PCPU_SET(name, val) do { \ + static struct pcpu __seg_gs *__pc = 0; \ + __pcpu_type(name) __val; \ + \ + __val = (val); \ + if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ + sizeof(__val) == 4 || sizeof(__val) == 8) { \ + __pc->name = __val; \ + } else \ + *__PCPU_PTR(name) = __val; \ +} while (0) +#else /* !__SEG_GS */ +/* + * Evaluates to the byte offset of the per-cpu variable name. + */ +#define __pcpu_offset(name) \ + __offsetof(struct pcpu, name) + +/* + * Evaluates to the address of the per-cpu variable name. + */ +#define __PCPU_PTR(name) \ + (&get_pcpu()->name) + +/* + * Evaluates to the value of the per-cpu variable name. + */ +#define __PCPU_GET(name) __extension__ ({ \ + __pcpu_type(name) __res; \ + struct __s { \ + u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ + }; \ + \ + if (sizeof(__res) == 1 || sizeof(__res) == 2 || \ + sizeof(__res) == 4 || sizeof(__res) == 8) { \ + __asm __volatile("mov %%gs:%c1,%0" \ + : "=r" (*(struct __s *)(void *)&__res) \ + : "i" (__pcpu_offset(name))); \ + } else { \ + __res = *__PCPU_PTR(name); \ + } \ + __res; \ +}) + +/* + * Adds the value to the per-cpu counter name. The implementation + * must be atomic with respect to interrupts. + */ +#define __PCPU_ADD(name, val) do { \ + __pcpu_type(name) __val; \ + struct __s { \ + u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ + }; \ + \ + __val = (val); \ + if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ + sizeof(__val) == 4 || sizeof(__val) == 8) { \ + __asm __volatile("add %1,%%gs:%c0" \ + : \ + : "i" (__pcpu_offset(name)), \ + "r" (*(struct __s *)(void *)&__val) \ + : "cc", "memory"); \ + } else \ + *__PCPU_PTR(name) += __val; \ +} while (0) + +/* + * Sets the value of the per-cpu variable name to value val. + */ +#define __PCPU_SET(name, val) do { \ + __pcpu_type(name) __val; \ + struct __s { \ + u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ + }; \ + \ + __val = (val); \ + if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ + sizeof(__val) == 4 || sizeof(__val) == 8) { \ + __asm __volatile("mov %1,%%gs:%c0" \ + : \ + : "i" (__pcpu_offset(name)), \ + "r" (*(struct __s *)(void *)&__val) \ + : "memory"); \ + } else { \ + *__PCPU_PTR(name) = __val; \ + } \ +} while (0) + +#define get_pcpu() __extension__ ({ \ + struct pcpu *__pc; \ + \ + __asm __volatile("movq %%gs:%c1,%0" \ + : "=r" (__pc) \ + : "i" (__pcpu_offset(pc_prvspace))); \ + __pc; \ +}) +#endif /* !__SEG_GS */ + +#define PCPU_GET(member) __PCPU_GET(pc_ ## member) +#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) +#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) +#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) + +#define IS_BSP() (PCPU_GET(cpuid) == 0) + +#define zpcpu_offset_cpu(cpu) ((uintptr_t)&__pcpu[0] + UMA_PCPU_ALLOC_SIZE * cpu) +#define zpcpu_base_to_offset(base) (void *)((uintptr_t)(base) - (uintptr_t)&__pcpu[0]) +#define zpcpu_offset_to_base(base) (void *)((uintptr_t)(base) + (uintptr_t)&__pcpu[0]) + +#define zpcpu_sub_protected(base, n) do { \ + ZPCPU_ASSERT_PROTECTED(); \ + zpcpu_sub(base, n); \ +} while (0) + +#define zpcpu_set_protected(base, n) do { \ + __typeof(*base) __n = (n); \ + ZPCPU_ASSERT_PROTECTED(); \ + switch (sizeof(*base)) { \ + case 4: \ + __asm __volatile("movl\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + case 8: \ + __asm __volatile("movq\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + default: \ + *zpcpu_get(base) = __n; \ + } \ +} while (0); + +#define zpcpu_add(base, n) do { \ + __typeof(*base) __n = (n); \ + CTASSERT(sizeof(*base) == 4 || sizeof(*base) == 8); \ + switch (sizeof(*base)) { \ + case 4: \ + __asm __volatile("addl\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + case 8: \ + __asm __volatile("addq\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + } \ +} while (0) + +#define zpcpu_add_protected(base, n) do { \ + ZPCPU_ASSERT_PROTECTED(); \ + zpcpu_add(base, n); \ +} while (0) + +#define zpcpu_sub(base, n) do { \ + __typeof(*base) __n = (n); \ + CTASSERT(sizeof(*base) == 4 || sizeof(*base) == 8); \ + switch (sizeof(*base)) { \ + case 4: \ + __asm __volatile("subl\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + case 8: \ + __asm __volatile("subq\t%1,%%gs:(%0)" \ + : : "r" (base), "ri" (__n) : "memory", "cc"); \ + break; \ + } \ +} while (0); + +#endif /* _KERNEL */ + +#endif /* !_MACHINE_PCPU_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/pcpu_aux.h b/sys/amd64/include/pcpu_aux.h new file mode 100644 index 000000000000..6e27162e1fb4 --- /dev/null +++ b/sys/amd64/include/pcpu_aux.h @@ -0,0 +1,66 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/pcpu_aux.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_PCPU_AUX_H_ +#define _MACHINE_PCPU_AUX_H_ + +#ifndef _KERNEL +#error "Not for userspace" +#endif + +#ifndef _SYS_PCPU_H_ +#error "Do not include machine/pcpu_aux.h directly" +#endif + +/* Required for counters(9) to work on x86. */ +_Static_assert(sizeof(struct pcpu) == UMA_PCPU_ALLOC_SIZE, "fix pcpu size"); + +extern struct pcpu *__pcpu; +extern struct pcpu temp_bsp_pcpu; + +static __inline __pure2 struct thread * +__curthread(void) +{ + struct thread *td; + + __asm("movq %%gs:%c1,%0" : "=r" (td) + : "i" (offsetof(struct pcpu, pc_curthread))); + return (td); +} +#define curthread (__curthread()) +#define curpcb (&curthread->td_md.md_pcb) + +#endif /* _MACHINE_PCPU_AUX_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h new file mode 100644 index 000000000000..e2f97442c10f --- /dev/null +++ b/sys/amd64/include/pmap.h @@ -0,0 +1,581 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + */ + +#ifdef __i386__ +#include <i386/pmap.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_PMAP_H_ +#define _MACHINE_PMAP_H_ + +#include <machine/pte.h> + +/* + * Define the PG_xx macros in terms of the bits on x86 PTEs. + */ +#define PG_V X86_PG_V +#define PG_RW X86_PG_RW +#define PG_U X86_PG_U +#define PG_NC_PWT X86_PG_NC_PWT +#define PG_NC_PCD X86_PG_NC_PCD +#define PG_A X86_PG_A +#define PG_M X86_PG_M +#define PG_PS X86_PG_PS +#define PG_PTE_PAT X86_PG_PTE_PAT +#define PG_G X86_PG_G +#define PG_AVAIL1 X86_PG_AVAIL1 +#define PG_AVAIL2 X86_PG_AVAIL2 +#define PG_AVAIL3 X86_PG_AVAIL3 +#define PG_PDE_PAT X86_PG_PDE_PAT +#define PG_NX X86_PG_NX +#define PG_PDE_CACHE X86_PG_PDE_CACHE +#define PG_PTE_CACHE X86_PG_PTE_CACHE + +/* Our various interpretations of the above */ +#define PG_W X86_PG_AVAIL3 /* "Wired" pseudoflag */ +#define PG_MANAGED X86_PG_AVAIL2 +#define EPT_PG_EMUL_V X86_PG_AVAIL(52) +#define EPT_PG_EMUL_RW X86_PG_AVAIL(53) +#define PG_PROMOTED X86_PG_AVAIL(54) /* PDE only */ + +/* + * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB + * (PTE) page mappings have identical settings for the following fields: + */ +#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \ + PG_M | PG_U | PG_RW | PG_V | PG_PKU_MASK) + +/* + * undef the PG_xx macros that define bits in the regular x86 PTEs that + * have a different position in nested PTEs. This is done when compiling + * code that needs to be aware of the differences between regular x86 and + * nested PTEs. + * + * The appropriate bitmask will be calculated at runtime based on the pmap + * type. + */ +#ifdef AMD64_NPT_AWARE +#undef PG_AVAIL1 /* X86_PG_AVAIL1 aliases with EPT_PG_M */ +#undef PG_G +#undef PG_A +#undef PG_M +#undef PG_PDE_PAT +#undef PG_PDE_CACHE +#undef PG_PTE_PAT +#undef PG_PTE_CACHE +#undef PG_RW +#undef PG_V +#endif + +/* + * Pte related macros. This is complicated by having to deal with + * the sign extension of the 48th bit. + */ +#define KV4ADDR(l4, l3, l2, l1) KV5ADDR(-1, l4, l3, l2, l1) +#define KV5ADDR(l5, l4, l3, l2, l1) ( \ + ((unsigned long)-1 << 56) | \ + ((unsigned long)(l5) << PML5SHIFT) | \ + ((unsigned long)(l4) << PML4SHIFT) | \ + ((unsigned long)(l3) << PDPSHIFT) | \ + ((unsigned long)(l2) << PDRSHIFT) | \ + ((unsigned long)(l1) << PAGE_SHIFT)) + +#define UVADDR(l5, l4, l3, l2, l1) ( \ + ((unsigned long)(l5) << PML5SHIFT) | \ + ((unsigned long)(l4) << PML4SHIFT) | \ + ((unsigned long)(l3) << PDPSHIFT) | \ + ((unsigned long)(l2) << PDRSHIFT) | \ + ((unsigned long)(l1) << PAGE_SHIFT)) + +/* + * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so, + * but setting it larger than NDMPML4E makes no sense. + * + * Each slot provides .5 TB of kernel virtual space. + */ +#define NKPML4E 4 + +/* + * Number of PML4 slots for the KASAN shadow map. It requires 1 byte of memory + * for every 8 bytes of the kernel address space. + */ +#define NKASANPML4E ((NKPML4E + 7) / 8) + +/* + * Number of PML4 slots for the KMSAN shadow and origin maps. These are + * one-to-one with the kernel map. + */ +#define NKMSANSHADPML4E NKPML4E +#define NKMSANORIGPML4E NKPML4E + +/* + * We use the same numbering of the page table pages for 5-level and + * 4-level paging structures. + */ +#define NUPML5E (NPML5EPG / 2) /* number of userland PML5 + pages */ +#define NUPML4E (NUPML5E * NPML4EPG) /* number of userland PML4 + pages */ +#define NUPDPE (NUPML4E * NPDPEPG) /* number of userland PDP + pages */ +#define NUPDE (NUPDPE * NPDEPG) /* number of userland PD + entries */ +#define NUP4ML4E (NPML4EPG / 2) + +/* + * NDMPML4E is the maximum number of PML4 entries that will be + * used to implement the direct map. It must be a power of two, + * and should generally exceed NKPML4E. The maximum possible + * value is 64; using 128 will make the direct map intrude into + * the recursive page table map. + */ +#define NDMPML4E 8 +#define NDMPML5E 32 + +/* + * These values control the layout of virtual memory. The starting + * address of the direct map is controlled by DMPML4I on LA48 and + * DMPML5I on LA57. + * + * Note: KPML4I is the index of the (single) level 4 page that maps + * the KVA that holds KERNBASE, while KPML4BASE is the index of the + * first level 4 page that maps VM_MIN_KERNEL_ADDRESS. If NKPML4E + * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra + * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to + * KERNBASE. + * + * (KPML4I combines with KPDPI to choose where KERNBASE starts. + * Or, in other words, KPML4I provides bits 39..47 of KERNBASE, + * and KPDPI provides bits 30..38.) + */ +#define PML4PML4I (NPML4EPG / 2) /* Index of recursive pml4 mapping */ +#define PML5PML5I (NPML5EPG / 2) /* Index of recursive pml5 mapping */ + +#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ +#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define DMPML5I (NPML5EPG / 2 + 1) + +#define KPML4I (NPML4EPG-1) +#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ + +#define KASANPML4I (DMPML4I - NKASANPML4E) /* Below the direct map */ + +#define KMSANSHADPML4I (KPML4BASE - NKMSANSHADPML4E) +#define KMSANORIGPML4I (DMPML4I - NKMSANORIGPML4E) + +/* + * Large map: index of the first and max last pml4/la48 and pml5/la57 + * entry. + */ +#define LMSPML4I (PML4PML4I + 1) +#define LMEPML4I (KASANPML4I - 1) +#define LMSPML5I (DMPML5I + NDMPML5E) +#define LMEPML5I (LMSPML5I + 32 - 1) /* 32 slots for large map */ + +/* + * XXX doesn't really belong here I guess... + */ +#define ISA_HOLE_START 0xa0000 +#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) + +#define PMAP_PCID_NONE 0xffffffff +#define PMAP_PCID_KERN 0 +#define PMAP_PCID_OVERMAX 0x1000 +#define PMAP_PCID_OVERMAX_KERN 0x800 +#define PMAP_PCID_USER_PT 0x800 + +#define PMAP_NO_CR3 0xffffffffffffffff +#define PMAP_UCR3_NOMASK 0xffffffffffffffff + +#ifndef LOCORE + +#include <sys/kassert.h> +#include <sys/queue.h> +#include <sys/_cpuset.h> +#include <sys/_lock.h> +#include <sys/_mutex.h> +#include <sys/_pctrie.h> +#include <machine/_pmap.h> +#include <sys/_pv_entry.h> +#include <sys/_rangeset.h> +#include <sys/_smr.h> + +#include <vm/_vm_radix.h> + +typedef u_int64_t pd_entry_t; +typedef u_int64_t pt_entry_t; +typedef u_int64_t pdp_entry_t; +typedef u_int64_t pml4_entry_t; +typedef u_int64_t pml5_entry_t; + +/* + * Address of current address space page table maps and directories. + */ +#ifdef _KERNEL +#define addr_P4Tmap (KV4ADDR(PML4PML4I, 0, 0, 0)) +#define addr_P4Dmap (KV4ADDR(PML4PML4I, PML4PML4I, 0, 0)) +#define addr_P4DPmap (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0)) +#define addr_P4ML4map (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)) +#define addr_P4ML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t))) +#define P4Tmap ((pt_entry_t *)(addr_P4Tmap)) +#define P4Dmap ((pd_entry_t *)(addr_P4Dmap)) + +#define addr_P5Tmap (KV5ADDR(PML5PML5I, 0, 0, 0, 0)) +#define addr_P5Dmap (KV5ADDR(PML5PML5I, PML5PML5I, 0, 0, 0)) +#define addr_P5DPmap (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, 0, 0)) +#define addr_P5ML4map (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, 0)) +#define addr_P5ML5map \ + (KVADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I)) +#define addr_P5ML5pml5e (addr_P5ML5map + (PML5PML5I * sizeof(pml5_entry_t))) +#define P5Tmap ((pt_entry_t *)(addr_P5Tmap)) +#define P5Dmap ((pd_entry_t *)(addr_P5Dmap)) + +extern int nkpt; /* Initial number of kernel page tables */ +extern u_int64_t KPML4phys; /* physical address of kernel level 4 */ +extern u_int64_t KPML5phys; /* physical address of kernel level 5 */ + +/* + * virtual address to page table entry and + * to physical address. + * Note: these work recursively, thus vtopte of a pte will give + * the corresponding pde that in turn maps it. + */ +pt_entry_t *vtopte(vm_offset_t); +#define vtophys(va) pmap_kextract(((vm_offset_t) (va))) + +#define pte_load_store(ptep, pte) atomic_swap_long(ptep, pte) +#define pte_load_clear(ptep) atomic_swap_long(ptep, 0) +#define pte_store(ptep, pte) do { \ + *(u_long *)(ptep) = (u_long)(pte); \ +} while (0) +#define pte_clear(ptep) pte_store(ptep, 0) + +#define pde_store(pdep, pde) pte_store(pdep, pde) + +extern pt_entry_t pg_nx; + +#endif /* _KERNEL */ + +/* + * Pmap stuff + */ + +/* + * Locks + * (p) PV list lock + */ +struct md_page { + TAILQ_HEAD(, pv_entry) pv_list; /* (p) */ + int pv_gen; /* (p) */ + int pat_mode; +}; + +enum pmap_type { + PT_X86, /* regular x86 page tables */ + PT_EPT, /* Intel's nested page tables */ + PT_RVI, /* AMD's nested page tables */ +}; + +/* + * The kernel virtual address (KVA) of the level 4 page table page is always + * within the direct map (DMAP) region. + */ +struct pmap { + struct mtx pm_mtx; + pml4_entry_t *pm_pmltop; /* KVA of top level page table */ + pml4_entry_t *pm_pmltopu; /* KVA of user top page table */ + uint64_t pm_cr3; + uint64_t pm_ucr3; + TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ + cpuset_t pm_active; /* active on cpus */ + enum pmap_type pm_type; /* regular or nested tables */ + struct pmap_statistics pm_stats; /* pmap statistics */ + struct vm_radix pm_root; /* spare page table pages */ + long pm_eptgen; /* EPT pmap generation id */ + smr_t pm_eptsmr; + int pm_flags; + struct pmap_pcid *pm_pcidp; + struct rangeset pm_pkru; +}; + +/* flags */ +#define PMAP_NESTED_IPIMASK 0xff +#define PMAP_PDE_SUPERPAGE (1 << 8) /* supports 2MB superpages */ +#define PMAP_EMULATE_AD_BITS (1 << 9) /* needs A/D bits emulation */ +#define PMAP_SUPPORTS_EXEC_ONLY (1 << 10) /* execute only mappings ok */ + +typedef struct pmap *pmap_t; + +#ifdef _KERNEL +extern struct pmap kernel_pmap_store; +#define kernel_pmap (&kernel_pmap_store) + +#define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) +#define PMAP_LOCK_ASSERT(pmap, type) \ + mtx_assert(&(pmap)->pm_mtx, (type)) +#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) +#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ + NULL, MTX_DEF | MTX_DUPOK) +#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx) +#define PMAP_MTX(pmap) (&(pmap)->pm_mtx) +#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) +#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) + +int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags); +int pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype); + +extern caddr_t CADDR1; +extern pt_entry_t *CMAP1; +extern vm_offset_t virtual_avail; +extern vm_offset_t virtual_end; +extern vm_paddr_t dmaplimit; +extern int pmap_pcid_enabled; +extern int invpcid_works; +extern int invlpgb_works; +extern int invlpgb_maxcnt; +extern int pmap_pcid_invlpg_workaround; +extern int pmap_pcid_invlpg_workaround_uena; + +#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) +#define pmap_page_is_write_mapped(m) (((m)->a.flags & PGA_WRITEABLE) != 0) +#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz)) + +#define pmap_vm_page_alloc_check(m) \ + KASSERT(m->phys_addr < kernphys || \ + m->phys_addr >= kernphys + (vm_offset_t)&_end - KERNSTART, \ + ("allocating kernel page %p pa %#lx kernphys %#lx end %p", \ + m, m->phys_addr, kernphys, &_end)); + +struct thread; + +void pmap_activate_boot(pmap_t pmap); +void pmap_activate_sw(struct thread *); +void pmap_allow_2m_x_ept_recalculate(void); +void pmap_bootstrap(vm_paddr_t *); +int pmap_cache_bits(pmap_t pmap, int mode, bool is_pde); +int pmap_change_attr(vm_offset_t, vm_size_t, int); +int pmap_change_prot(vm_offset_t, vm_size_t, vm_prot_t); +void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate); +void pmap_flush_cache_range(vm_offset_t, vm_offset_t); +void pmap_flush_cache_phys_range(vm_paddr_t, vm_paddr_t, vm_memattr_t); +void pmap_init_pat(void); +void pmap_kenter(vm_offset_t va, vm_paddr_t pa); +void *pmap_kenter_temporary(vm_paddr_t pa, int i); +vm_paddr_t pmap_kextract(vm_offset_t); +void pmap_kremove(vm_offset_t); +int pmap_large_map(vm_paddr_t, vm_size_t, void **, vm_memattr_t); +void pmap_large_map_wb(void *sva, vm_size_t len); +void pmap_large_unmap(void *sva, vm_size_t len); +void *pmap_mapbios(vm_paddr_t, vm_size_t); +void *pmap_mapdev(vm_paddr_t, vm_size_t); +void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); +void *pmap_mapdev_pciecfg(vm_paddr_t pa, vm_size_t size); +bool pmap_not_in_di(void); +bool pmap_page_is_mapped(vm_page_t m); +void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); +void pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma); +void pmap_pinit_pml4(vm_page_t); +void pmap_pinit_pml5(vm_page_t); +bool pmap_ps_enabled(pmap_t pmap); +void pmap_unmapdev(void *, vm_size_t); +void pmap_invalidate_page(pmap_t, vm_offset_t); +void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); +void pmap_invalidate_all(pmap_t); +void pmap_invalidate_cache(void); +void pmap_invalidate_cache_pages(vm_page_t *pages, int count); +void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); +void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); +void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); +bool pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, bool); +void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, bool); +void pmap_map_delete(pmap_t, vm_offset_t, vm_offset_t); +void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec); +void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva); +void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); +void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va); +void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva, + vm_offset_t eva); +int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); +int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + u_int keyidx, int flags); +void pmap_thread_init_invl_gen(struct thread *td); +int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap); +void pmap_page_array_startup(long count); +vm_page_t pmap_page_alloc_below_4g(bool zeroed); + +#if defined(KASAN) || defined(KMSAN) +void pmap_san_enter(vm_offset_t); +#endif + +/* + * Returns a pointer to a set of CPUs on which the pmap is currently active. + * Note that the set can be modified without any mutual exclusion, so a copy + * must be made if a stable value is required. + */ +static __inline volatile cpuset_t * +pmap_invalidate_cpu_mask(pmap_t pmap) +{ + return (&pmap->pm_active); +} + +#if defined(_SYS_PCPU_H_) && defined(_MACHINE_CPUFUNC_H_) +/* + * It seems that AlderLake+ small cores have some microarchitectural + * bug, which results in the INVLPG instruction failing to flush all + * global TLB entries when PCID is enabled. Work around it for now, + * by doing global invalidation on small cores instead of INVLPG. + */ +static __inline void +pmap_invlpg(pmap_t pmap, vm_offset_t va) +{ + if (pmap == kernel_pmap && PCPU_GET(pcid_invlpg_workaround)) { + struct invpcid_descr d = { 0 }; + + invpcid(&d, INVPCID_CTXGLOB); + } else { + invlpg(va); + } +} +#endif /* sys/pcpu.h && machine/cpufunc.h */ + +#if defined(_SYS_PCPU_H_) +/* Return pcid for the pmap pmap on current cpu */ +static __inline uint32_t +pmap_get_pcid(pmap_t pmap) +{ + struct pmap_pcid *pcidp; + + MPASS(pmap_pcid_enabled); + pcidp = zpcpu_get(pmap->pm_pcidp); + return (pcidp->pm_pcid); +} +#endif /* sys/pcpu.h */ + +/* + * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the + * enum to avoid both namespace and ABI issues (with enums). + */ +enum invl_op_codes { + INVL_OP_TLB = 1, + INVL_OP_TLB_INVPCID = 2, + INVL_OP_TLB_INVPCID_PTI = 3, + INVL_OP_TLB_PCID = 4, + INVL_OP_PGRNG = 5, + INVL_OP_PGRNG_INVPCID = 6, + INVL_OP_PGRNG_PCID = 7, + INVL_OP_PG = 8, + INVL_OP_PG_INVPCID = 9, + INVL_OP_PG_PCID = 10, + INVL_OP_CACHE = 11, +}; + +typedef void (*smp_invl_local_cb_t)(struct pmap *, vm_offset_t addr1, + vm_offset_t addr2); +typedef void (*smp_targeted_tlb_shootdown_t)(pmap_t, vm_offset_t, vm_offset_t, + smp_invl_local_cb_t, enum invl_op_codes); + +void smp_targeted_tlb_shootdown_native(pmap_t, vm_offset_t, vm_offset_t, + smp_invl_local_cb_t, enum invl_op_codes); +extern smp_targeted_tlb_shootdown_t smp_targeted_tlb_shootdown; + +#endif /* _KERNEL */ + +/* Return various clipped indexes for a given VA */ +static __inline vm_pindex_t +pmap_pte_index(vm_offset_t va) +{ + + return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pde_index(vm_offset_t va) +{ + + return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pdpe_index(vm_offset_t va) +{ + + return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pml4e_index(vm_offset_t va) +{ + + return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pml5e_index(vm_offset_t va) +{ + + return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1)); +} + +struct kva_layout_s { + vm_offset_t kva_min; + vm_offset_t kva_max; + vm_offset_t dmap_low; /* DMAP_MIN_ADDRESS */ + vm_offset_t dmap_high; /* DMAP_MAX_ADDRESS */ + vm_offset_t lm_low; /* LARGEMAP_MIN_ADDRESS */ + vm_offset_t lm_high; /* LARGEMAP_MAX_ADDRESS */ + vm_offset_t km_low; /* VM_MIN_KERNEL_ADDRESS */ + vm_offset_t km_high; /* VM_MAX_KERNEL_ADDRESS */ + vm_offset_t rec_pt; + vm_offset_t kasan_shadow_low; /* KASAN_MIN_ADDRESS */ + vm_offset_t kasan_shadow_high; /* KASAN_MAX_ADDRESS */ + vm_offset_t kmsan_shadow_low; /* KMSAN_SHAD_MIN_ADDRESS */ + vm_offset_t kmsan_shadow_high; /* KMSAN_SHAD_MAX_ADDRESS */ + vm_offset_t kmsan_origin_low; /* KMSAN_ORIG_MIN_ADDRESS */ + vm_offset_t kmsan_origin_high; /* KMSAN_ORIG_MAX_ADDRESS */ +}; +extern struct kva_layout_s kva_layout; + +#endif /* !LOCORE */ + +#endif /* !_MACHINE_PMAP_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/pmc_mdep.h b/sys/amd64/include/pmc_mdep.h new file mode 100644 index 000000000000..5c20d8473855 --- /dev/null +++ b/sys/amd64/include/pmc_mdep.h @@ -0,0 +1,135 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2003-2008 Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Machine dependent interfaces */ + +#ifndef _MACHINE_PMC_MDEP_H +#define _MACHINE_PMC_MDEP_H 1 + +#ifdef _KERNEL +struct pmc_mdep; +#endif + +#include <dev/hwpmc/hwpmc_amd.h> +#include <dev/hwpmc/hwpmc_core.h> +#include <dev/hwpmc/hwpmc_tsc.h> +#include <dev/hwpmc/hwpmc_uncore.h> + +/* + * Intel processors implementing V2 and later of the Intel performance + * measurement architecture have PMCs of the following classes: TSC, + * IAF, IAP, UCF and UCP. + */ +#define PMC_MDEP_CLASS_INDEX_TSC 1 +#define PMC_MDEP_CLASS_INDEX_K8 2 +#define PMC_MDEP_CLASS_INDEX_P4 2 +#define PMC_MDEP_CLASS_INDEX_IAP 2 +#define PMC_MDEP_CLASS_INDEX_IAF 3 +#define PMC_MDEP_CLASS_INDEX_UCP 4 +#define PMC_MDEP_CLASS_INDEX_UCF 5 + +/* + * On the amd64 platform we support the following PMCs. + * + * TSC The timestamp counter + * K8 AMD Athlon64 and Opteron PMCs in 64 bit mode. + * PIV Intel P4/HTT and P4/EMT64 + * IAP Intel Core/Core2/Atom CPUs in 64 bits mode. + * IAF Intel fixed-function PMCs in Core2 and later CPUs. + * UCP Intel Uncore programmable PMCs. + * UCF Intel Uncore fixed-function PMCs. + */ + +union pmc_md_op_pmcallocate { + struct pmc_md_amd_op_pmcallocate pm_amd; + struct pmc_md_iap_op_pmcallocate pm_iap; + struct pmc_md_ucf_op_pmcallocate pm_ucf; + struct pmc_md_ucp_op_pmcallocate pm_ucp; + uint64_t __pad[4]; +}; + +/* Logging */ +#define PMCLOG_READADDR PMCLOG_READ64 +#define PMCLOG_EMITADDR PMCLOG_EMIT64 + +#ifdef _KERNEL + +union pmc_md_pmc { + struct pmc_md_amd_pmc pm_amd; + struct pmc_md_iaf_pmc pm_iaf; + struct pmc_md_iap_pmc pm_iap; + struct pmc_md_ucf_pmc pm_ucf; + struct pmc_md_ucp_pmc pm_ucp; +}; + +#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_rip) +#define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_rbp) +#define PMC_TRAPFRAME_TO_USER_SP(TF) ((TF)->tf_rsp) +#define PMC_TRAPFRAME_TO_KERNEL_SP(TF) ((TF)->tf_rsp) + +#define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \ + (((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */ +#define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \ + (((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */ +#define PMC_AT_FUNCTION_EPILOGUE_RET(I) \ + (((I) & 0xFF) == 0xC3) /* ret */ + +#define PMC_IN_TRAP_HANDLER(PC) \ + ((PC) >= (uintptr_t) start_exceptions && \ + (PC) < (uintptr_t) end_exceptions) + +#define PMC_IN_KERNEL_STACK(va) kstack_contains(curthread, (va), sizeof(va)) +#define PMC_IN_KERNEL(va) INKERNEL(va) +#define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS) + +/* Build a fake kernel trapframe from current instruction pointer. */ +#define PMC_FAKE_TRAPFRAME(TF) \ + do { \ + (TF)->tf_cs = 0; (TF)->tf_rflags = 0; \ + __asm __volatile("movq %%rbp,%0" : "=r" ((TF)->tf_rbp)); \ + __asm __volatile("movq %%rsp,%0" : "=r" ((TF)->tf_rsp)); \ + __asm __volatile("call 1f \n\t1: pop %0" : "=r"((TF)->tf_rip)); \ + } while (0) + +/* + * Prototypes + */ + +void start_exceptions(void), end_exceptions(void); + +struct pmc_mdep *pmc_amd_initialize(void); +void pmc_amd_finalize(struct pmc_mdep *_md); +struct pmc_mdep *pmc_intel_initialize(void); +void pmc_intel_finalize(struct pmc_mdep *_md); + +#endif /* _KERNEL */ +#endif /* _MACHINE_PMC_MDEP_H */ diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h new file mode 100644 index 000000000000..ff547981d599 --- /dev/null +++ b/sys/amd64/include/proc.h @@ -0,0 +1,113 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/proc.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_PROC_H_ +#define _MACHINE_PROC_H_ + +#include <sys/queue.h> +#include <machine/pcb.h> +#include <machine/segments.h> + +/* + * List of locks + * c - proc lock + * k - only accessed by curthread + * pp - pmap.c:invl_gen_mtx + */ + +struct proc_ldt { + caddr_t ldt_base; + int ldt_refcnt; +}; + +#define PMAP_INVL_GEN_NEXT_INVALID 0x1ULL +struct pmap_invl_gen { + u_long gen; /* (k) */ + union { + LIST_ENTRY(pmap_invl_gen) link; /* (pp) */ + struct { + struct pmap_invl_gen *next; + u_char saved_pri; + }; + }; +} __aligned(16); + +/* + * Machine-dependent part of the proc structure for AMD64. + */ +struct mdthread { + int md_spinlock_count; /* (k) */ + register_t md_saved_flags; /* (k) */ + register_t md_spurflt_addr; /* (k) Spurious page fault address. */ + struct pmap_invl_gen md_invl_gen; + register_t md_efirt_tmp; /* (k) */ + int md_efirt_dis_pf; /* (k) */ + struct pcb md_pcb; + vm_offset_t md_stack_base; + void *md_usr_fpu_save; +}; + +struct mdproc { + struct proc_ldt *md_ldt; /* (t) per-process ldt */ + struct system_segment_descriptor md_ldt_sd; + u_int md_flags; /* (c) md process flags P_MD */ +}; + +#define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */ +#define P_MD_LA48 0x00000002 /* Request LA48 after exec */ +#define P_MD_LA57 0x00000004 /* Request LA57 after exec */ + +#define KINFO_PROC_SIZE 1088 +#define KINFO_PROC32_SIZE 768 + +#ifdef _KERNEL + +struct proc_ldt *user_ldt_alloc(struct proc *, int); +void user_ldt_free(struct thread *); +struct sysarch_args; +int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space); +int amd64_set_ldt_data(struct thread *td, int start, int num, + struct user_segment_descriptor *descs); + +extern struct mtx dt_lock; +extern int max_ldt_segment; + +#define NARGREGS 6 + +#endif /* _KERNEL */ + +#endif /* !_MACHINE_PROC_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/procctl.h b/sys/amd64/include/procctl.h new file mode 100644 index 000000000000..a3875dad78b7 --- /dev/null +++ b/sys/amd64/include/procctl.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/procctl.h> diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h new file mode 100644 index 000000000000..ec8e90c106e9 --- /dev/null +++ b/sys/amd64/include/profile.h @@ -0,0 +1,121 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/profile.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ + +#ifndef _KERNEL + +#include <sys/cdefs.h> + +#define FUNCTION_ALIGNMENT 4 + +#define _MCOUNT_DECL \ +static void _mcount(uintfptr_t frompc, uintfptr_t selfpc) __used; \ +static void _mcount + +#define MCOUNT __asm(" \n\ + .text \n\ + .p2align 4,0x90 \n\ + .globl .mcount \n\ + .type .mcount,@function \n\ +.mcount: \n\ + pushq %rdi \n\ + pushq %rsi \n\ + pushq %rdx \n\ + pushq %rcx \n\ + pushq %r8 \n\ + pushq %r9 \n\ + pushq %rax \n\ + movq 8(%rbp),%rdi \n\ + movq 7*8(%rsp),%rsi \n\ + call _mcount \n\ + popq %rax \n\ + popq %r9 \n\ + popq %r8 \n\ + popq %rcx \n\ + popq %rdx \n\ + popq %rsi \n\ + popq %rdi \n\ + ret \n\ + .size .mcount, . - .mcount"); +#if 0 +/* + * We could use this, except it doesn't preserve the registers that were + * being passed with arguments to the function that we were inserted + * into. I've left it here as documentation of what the code above is + * supposed to do. + */ +#define MCOUNT \ +void \ +mcount() \ +{ \ + uintfptr_t selfpc, frompc; \ + /* \ + * Find the return address for mcount, \ + * and the return address for mcount's caller. \ + * \ + * selfpc = pc pushed by call to mcount \ + */ \ + __asm("movq 8(%%rbp),%0" : "=r" (selfpc)); \ + /* \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %rbp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer.\ + */ \ + __asm("movq (%%rbp),%0" : "=r" (frompc)); \ + frompc = ((uintfptr_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ +} +#endif + +typedef u_long uintfptr_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef u_long fptrdiff_t; + +__BEGIN_DECLS +void mcount(void) __asm(".mcount"); +__END_DECLS + +#endif /* !_KERNEL */ + +#endif /* !_MACHINE_PROFILE_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/psl.h b/sys/amd64/include/psl.h new file mode 100644 index 000000000000..fd047b0ea72d --- /dev/null +++ b/sys/amd64/include/psl.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/psl.h> diff --git a/sys/amd64/include/pte.h b/sys/amd64/include/pte.h new file mode 100644 index 000000000000..5195bdc224f8 --- /dev/null +++ b/sys/amd64/include/pte.h @@ -0,0 +1,104 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + */ + +#ifndef _MACHINE_PTE_H_ +#define _MACHINE_PTE_H_ + +/* + * Page-directory and page-table entries follow this format, with a few + * of the fields not present here and there, depending on a lot of things. + */ + /* ---- Intel Nomenclature ---- */ +#define X86_PG_V 0x001 /* P Valid */ +#define X86_PG_RW 0x002 /* R/W Read/Write */ +#define X86_PG_U 0x004 /* U/S User/Supervisor */ +#define X86_PG_NC_PWT 0x008 /* PWT Write through */ +#define X86_PG_NC_PCD 0x010 /* PCD Cache disable */ +#define X86_PG_A 0x020 /* A Accessed */ +#define X86_PG_M 0x040 /* D Dirty */ +#define X86_PG_PS 0x080 /* PS Page size (0=4k,1=2M) */ +#define X86_PG_PTE_PAT 0x080 /* PAT PAT index */ +#define X86_PG_G 0x100 /* G Global */ +#define X86_PG_AVAIL1 0x200 /* / Available for system */ +#define X86_PG_AVAIL2 0x400 /* < programmers use */ +#define X86_PG_AVAIL3 0x800 /* \ */ +#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */ +#define X86_PG_PKU(idx) ((pt_entry_t)idx << 59) +#define X86_PG_NX (1ul<<63) /* No-execute */ +#define X86_PG_AVAIL(x) (1ul << (x)) + +/* Page level cache control fields used to determine the PAT type */ +#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD) +#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD) + +/* Protection keys indexes */ +#define PMAP_MAX_PKRU_IDX 0xf +#define X86_PG_PKU_MASK X86_PG_PKU(PMAP_MAX_PKRU_IDX) + +/* + * Intel extended page table (EPT) bit definitions. + */ +#define EPT_PG_READ 0x001 /* R Read */ +#define EPT_PG_WRITE 0x002 /* W Write */ +#define EPT_PG_EXECUTE 0x004 /* X Execute */ +#define EPT_PG_IGNORE_PAT 0x040 /* IPAT Ignore PAT */ +#define EPT_PG_PS 0x080 /* PS Page size */ +#define EPT_PG_A 0x100 /* A Accessed */ +#define EPT_PG_M 0x200 /* D Dirty */ +#define EPT_PG_MEMORY_TYPE(x) ((x) << 3) /* MT Memory Type */ + +#define PG_FRAME (0x000ffffffffff000ul) +#define PG_PS_FRAME (0x000fffffffe00000ul) +#define PG_PS_PDP_FRAME (0x000fffffc0000000ul) + +/* + * Page Protection Exception bits + */ +#define PGEX_P 0x01 /* Protection violation vs. not present */ +#define PGEX_W 0x02 /* during a Write cycle */ +#define PGEX_U 0x04 /* access from User mode (UPL) */ +#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */ +#define PGEX_I 0x10 /* during an instruction fetch */ +#define PGEX_PK 0x20 /* protection key violation */ +#define PGEX_SGX 0x8000 /* SGX-related */ + +#endif diff --git a/sys/amd64/include/ptrace.h b/sys/amd64/include/ptrace.h new file mode 100644 index 000000000000..7506c43f5168 --- /dev/null +++ b/sys/amd64/include/ptrace.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/ptrace.h> diff --git a/sys/amd64/include/pvclock.h b/sys/amd64/include/pvclock.h new file mode 100644 index 000000000000..91cb7604b171 --- /dev/null +++ b/sys/amd64/include/pvclock.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/pvclock.h> diff --git a/sys/amd64/include/reg.h b/sys/amd64/include/reg.h new file mode 100644 index 000000000000..37ddd32142b6 --- /dev/null +++ b/sys/amd64/include/reg.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/reg.h> diff --git a/sys/amd64/include/reloc.h b/sys/amd64/include/reloc.h new file mode 100644 index 000000000000..851019096f72 --- /dev/null +++ b/sys/amd64/include/reloc.h @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _I386_MACHINE_RELOC_H_ +#define _I386_MACHINE_RELOC_H_ + +/* Relocation format. */ +struct relocation_info { + int r_address; /* offset in text or data segment */ + unsigned int r_symbolnum : 24, /* ordinal number of add symbol */ + r_pcrel : 1, /* 1 if value should be pc-relative */ + r_length : 2, /* log base 2 of value's width */ + r_extern : 1, /* 1 if need to add symbol to value */ + r_baserel : 1, /* linkage table relative */ + r_jmptable : 1, /* relocate to jump table */ + r_relative : 1, /* load address relative */ + r_copy : 1; /* run time copy */ +}; + +#endif diff --git a/sys/amd64/include/resource.h b/sys/amd64/include/resource.h new file mode 100644 index 000000000000..d44fd653cc84 --- /dev/null +++ b/sys/amd64/include/resource.h @@ -0,0 +1,44 @@ +/*- + * Copyright 1998 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_RESOURCE_H_ +#define _MACHINE_RESOURCE_H_ 1 + +/* + * Definitions of resource types for Intel Architecture machines + * with support for legacy ISA devices and drivers. + */ + +#define SYS_RES_IRQ 1 /* interrupt lines */ +#define SYS_RES_DRQ 2 /* isa dma lines */ +#define SYS_RES_MEMORY 3 /* i/o memory */ +#define SYS_RES_IOPORT 4 /* i/o ports */ +#define PCI_RES_BUS 5 /* PCI bus numbers */ + +#endif /* !_MACHINE_RESOURCE_H_ */ diff --git a/sys/amd64/include/sdt_machdep.h b/sys/amd64/include/sdt_machdep.h new file mode 100644 index 000000000000..497ca26d5277 --- /dev/null +++ b/sys/amd64/include/sdt_machdep.h @@ -0,0 +1,12 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org> + */ + +#ifndef _SYS_SDT_MACHDEP_H_ +#define _SYS_SDT_MACHDEP_H_ + +#define _SDT_ASM_PATCH_INSTR "nopw 0(%%rax,%%rax,1)" + +#endif diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h new file mode 100644 index 000000000000..7706e4cdb032 --- /dev/null +++ b/sys/amd64/include/segments.h @@ -0,0 +1,110 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1989, 1990 William F. Jolitz + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/segments.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_SEGMENTS_H_ +#define _MACHINE_SEGMENTS_H_ + +/* + * AMD64 Segmentation Data Structures and definitions + */ + +#include <x86/segments.h> + +/* + * System segment descriptors (128 bit wide) + */ +struct system_segment_descriptor { + u_int64_t sd_lolimit:16; /* segment extent (lsb) */ + u_int64_t sd_lobase:24; /* segment base address (lsb) */ + u_int64_t sd_type:5; /* segment type */ + u_int64_t sd_dpl:2; /* segment descriptor priority level */ + u_int64_t sd_p:1; /* segment descriptor present */ + u_int64_t sd_hilimit:4; /* segment extent (msb) */ + u_int64_t sd_xx0:3; /* unused */ + u_int64_t sd_gran:1; /* limit granularity (byte/page units)*/ + u_int64_t sd_hibase:40 __packed;/* segment base address (msb) */ + u_int64_t sd_xx1:8; + u_int64_t sd_mbz:5; /* MUST be zero */ + u_int64_t sd_xx2:19; +} __packed; + +/* + * Software definitions are in this convenient format, + * which are translated into inconvenient segment descriptors + * when needed to be used by the 386 hardware + */ + +struct soft_segment_descriptor { + unsigned long ssd_base; /* segment base address */ + unsigned long ssd_limit; /* segment extent */ + unsigned long ssd_type:5; /* segment type */ + unsigned long ssd_dpl:2; /* segment descriptor priority level */ + unsigned long ssd_p:1; /* segment descriptor present */ + unsigned long ssd_long:1; /* long mode (for %cs) */ + unsigned long ssd_def32:1; /* default 32 vs 16 bit size */ + unsigned long ssd_gran:1; /* limit granularity (byte/page units)*/ +} __packed; + +/* + * region descriptors, used to load gdt/idt tables before segments yet exist. + */ +struct region_descriptor { + uint64_t rd_limit:16; /* segment extent */ + uint64_t rd_base:64 __packed; /* base address */ +} __packed; + +#ifdef _KERNEL +extern struct soft_segment_descriptor gdt_segs[]; +extern struct gate_descriptor *idt; +extern struct region_descriptor r_idt; + +void lgdt(struct region_descriptor *rdp); +void sdtossd(struct user_segment_descriptor *sdp, + struct soft_segment_descriptor *ssdp); +void ssdtosd(struct soft_segment_descriptor *ssdp, + struct user_segment_descriptor *sdp); +void ssdtosyssd(struct soft_segment_descriptor *ssdp, + struct system_segment_descriptor *sdp); +void update_gdt_gsbase(struct thread *td, uint32_t base); +void update_gdt_fsbase(struct thread *td, uint32_t base); +#endif /* _KERNEL */ + +#endif /* !_MACHINE_SEGMENTS_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/setjmp.h b/sys/amd64/include/setjmp.h new file mode 100644 index 000000000000..306fb8424274 --- /dev/null +++ b/sys/amd64/include/setjmp.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/setjmp.h> diff --git a/sys/amd64/include/sf_buf.h b/sys/amd64/include/sf_buf.h new file mode 100644 index 000000000000..4a64928ae141 --- /dev/null +++ b/sys/amd64/include/sf_buf.h @@ -0,0 +1,53 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_SF_BUF_H_ +#define _MACHINE_SF_BUF_H_ + +#ifdef __amd64__ +/* + * On this machine, the only purpose for which sf_buf is used is to implement + * an opaque pointer required by the machine-independent parts of the kernel. + * That pointer references the vm_page that is "mapped" by the sf_buf. The + * actual mapping is provided by the direct virtual-to-physical mapping. + */ +static inline vm_offset_t +sf_buf_kva(struct sf_buf *sf) +{ + + return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf))); +} + +static inline vm_page_t +sf_buf_page(struct sf_buf *sf) +{ + + return ((vm_page_t)sf); +} +#endif /* __amd64__ */ +#endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/amd64/include/sgx.h b/sys/amd64/include/sgx.h new file mode 100644 index 000000000000..1e73385f74be --- /dev/null +++ b/sys/amd64/include/sgx.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com> + * All rights reserved. + * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* User-visible header. */ + +#ifndef _MACHINE_SGX_H_ +#define _MACHINE_SGX_H_ + +#define SGX_MAGIC 0xA4 +#define SGX_IOC_ENCLAVE_CREATE \ + _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create) +#define SGX_IOC_ENCLAVE_ADD_PAGE \ + _IOW(SGX_MAGIC, 0x01, struct sgx_enclave_add_page) +#define SGX_IOC_ENCLAVE_INIT \ + _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) + +struct sgx_enclave_create { + uint64_t src; +} __packed; + +struct sgx_enclave_add_page { + uint64_t addr; + uint64_t src; + uint64_t secinfo; + uint16_t mrmask; +} __packed; + +struct sgx_enclave_init { + uint64_t addr; + uint64_t sigstruct; + uint64_t einittoken; +} __packed; + +#endif /* !_MACHINE_SGX_H_ */ diff --git a/sys/amd64/include/sgxreg.h b/sys/amd64/include/sgxreg.h new file mode 100644 index 000000000000..180c0ae63933 --- /dev/null +++ b/sys/amd64/include/sgxreg.h @@ -0,0 +1,153 @@ +/*- + * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com> + * All rights reserved. + * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Machine-defined variables. */ + +#ifndef _MACHINE_SGXREG_H_ +#define _MACHINE_SGXREG_H_ + +/* Error codes. */ +#define SGX_SUCCESS 0 +#define SGX_INVALID_SIG_STRUCT 1 /* EINIT */ +#define SGX_INVALID_ATTRIBUTE 2 /* EINIT, EGETKEY */ +#define SGX_BLSTATE 3 /* EBLOCK */ +#define SGX_INVALID_MEASUREMENT 4 /* EINIT */ +#define SGX_NOTBLOCKABLE 5 /* EBLOCK */ +#define SGX_PG_INVLD 6 /* EBLOCK */ +#define SGX_LOCKFAIL 7 /* EBLOCK, EMODPR, EMODT */ +#define SGX_INVALID_SIGNATURE 8 /* EINIT */ +#define SGX_MAC_COMPARE_FAIL 9 /* ELDB, ELDU */ +#define SGX_PAGE_NOT_BLOCKED 10 /* EWB */ +#define SGX_NOT_TRACKED 11 /* EWB, EACCEPT */ +#define SGX_VA_SLOT_OCCUPIED 12 /* EWB */ +#define SGX_CHILD_PRESENT 13 /* EWB, EREMOVE */ +#define SGX_ENCLAVE_ACT 14 /* EREMOVE */ +#define SGX_ENTRYEPOCH_LOCKED 15 /* EBLOCK */ +#define SGX_INVALID_EINIT_TOKEN 16 /* EINIT */ +#define SGX_PREV_TRK_INCMPL 17 /* ETRACK */ +#define SGX_PG_IS_SECS 18 /* EBLOCK */ +#define SGX_PAGE_ATTRIBUTES_MISMATCH 19 /* EACCEPT, EACCEPTCOPY */ +#define SGX_PAGE_NOT_MODIFIABLE 20 /* EMODPR, EMODT */ +#define SGX_INVALID_CPUSVN 32 /* EINIT, EGETKEY */ +#define SGX_INVALID_ISVSVN 64 /* EGETKEY */ +#define SGX_UNMASKED_EVENT 128 /* EINIT */ +#define SGX_INVALID_KEYNAME 256 /* EGETKEY */ + +/* + * 2.10 Page Information (PAGEINFO) + * PAGEINFO is an architectural data structure that is used as a parameter + * to the EPC-management instructions. It requires 32-Byte alignment. + */ +struct page_info { + uint64_t linaddr; + uint64_t srcpge; + union { + struct secinfo *secinfo; + uint64_t pcmd; + }; + uint64_t secs; +} __aligned(32); + +/* + * 2.11 Security Information (SECINFO) + * The SECINFO data structure holds meta-data about an enclave page. + */ +struct secinfo { + uint64_t flags; +#define SECINFO_FLAGS_PT_S 8 /* Page type shift */ +#define SECINFO_FLAGS_PT_M (0xff << SECINFO_FLAGS_PT_S) + uint64_t reserved[7]; +} __aligned(64); + +/* + * 2.7.1 ATTRIBUTES + * The ATTRIBUTES data structure is comprised of bit-granular fields that + * are used in the SECS, CPUID enumeration, the REPORT and the KEYREQUEST + * structures. + */ +struct secs_attr { + uint8_t reserved1: 1; + uint8_t debug: 1; + uint8_t mode64bit: 1; + uint8_t reserved2: 1; + uint8_t provisionkey: 1; + uint8_t einittokenkey: 1; + uint8_t reserved3: 2; +#define SECS_ATTR_RSV4_SIZE 7 + uint8_t reserved4[SECS_ATTR_RSV4_SIZE]; + uint64_t xfrm; /* X-Feature Request Mask */ +}; + +/* + * 2.7 SGX Enclave Control Structure (SECS) + * The SECS data structure requires 4K-Bytes alignment. + */ +struct secs { + uint64_t size; + uint64_t base; + uint32_t ssa_frame_size; + uint32_t misc_select; +#define SECS_RSV1_SIZE 24 + uint8_t reserved1[SECS_RSV1_SIZE]; + struct secs_attr attributes; + uint8_t mr_enclave[32]; +#define SECS_RSV2_SIZE 32 + uint8_t reserved2[SECS_RSV2_SIZE]; + uint8_t mr_signer[32]; +#define SECS_RSV3_SIZE 96 + uint8_t reserved3[SECS_RSV3_SIZE]; + uint16_t isv_prod_id; + uint16_t isv_svn; +#define SECS_RSV4_SIZE 3836 + uint8_t reserved4[SECS_RSV4_SIZE]; +}; + +/* + * 2.8 Thread Control Structure (TCS) + * Each executing thread in the enclave is associated with a + * Thread Control Structure. It requires 4K-Bytes alignment. + */ +struct tcs { + uint64_t reserved1; + uint64_t flags; + uint64_t ossa; + uint32_t cssa; + uint32_t nssa; + uint64_t oentry; + uint64_t reserved2; + uint64_t ofsbasgx; + uint64_t ogsbasgx; + uint32_t fslimit; + uint32_t gslimit; + uint64_t reserved3[503]; +}; + +#endif /* !_MACHINE_SGXREG_H_ */ diff --git a/sys/amd64/include/sigframe.h b/sys/amd64/include/sigframe.h new file mode 100644 index 000000000000..1bf3f9913ad0 --- /dev/null +++ b/sys/amd64/include/sigframe.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/sigframe.h> diff --git a/sys/amd64/include/signal.h b/sys/amd64/include/signal.h new file mode 100644 index 000000000000..8fb36cb1c94c --- /dev/null +++ b/sys/amd64/include/signal.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/signal.h> diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h new file mode 100644 index 000000000000..bff92570ff82 --- /dev/null +++ b/sys/amd64/include/smp.h @@ -0,0 +1,42 @@ +/*- + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + */ + +#ifndef _MACHINE_SMP_H_ +#define _MACHINE_SMP_H_ + +#ifdef _KERNEL + +#ifndef LOCORE + +#include <x86/x86_smp.h> + +/* global symbols in mpboot.S */ +extern char mptramp_start[]; +extern u_int32_t mptramp_pagetables; + +/* IPI handlers */ +inthand_t + IDTVEC(justreturn), /* interrupt CPU with minimum overhead */ + IDTVEC(justreturn1_pti), + IDTVEC(invlop_pti), + IDTVEC(invlop), + IDTVEC(ipi_intr_bitmap_handler_pti), + IDTVEC(ipi_swi_pti), + IDTVEC(cpustop_pti), + IDTVEC(cpususpend_pti), + IDTVEC(rendezvous_pti); + +void invlop_handler(void); +int start_all_aps(void); + +#endif /* !LOCORE */ + +#endif /* _KERNEL */ +#endif /* _MACHINE_SMP_H_ */ diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h new file mode 100644 index 000000000000..39528555d328 --- /dev/null +++ b/sys/amd64/include/specialreg.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/specialreg.h> diff --git a/sys/amd64/include/stack.h b/sys/amd64/include/stack.h new file mode 100644 index 000000000000..848f35aad1a1 --- /dev/null +++ b/sys/amd64/include/stack.h @@ -0,0 +1,29 @@ +/* + * This file is in the public domain. + */ + +#ifndef _MACHINE_STACK_H_ +#define _MACHINE_STACK_H_ + +#include <x86/stack.h> + +#ifdef _SYS_PROC_H_ + +/* Get the current kernel thread stack usage. */ +#define GET_STACK_USAGE(total, used) do { \ + struct thread *td = curthread; \ + (total) = td->td_kstack_pages * PAGE_SIZE; \ + (used) = (char *)td->td_kstack + \ + td->td_kstack_pages * PAGE_SIZE - \ + (char *)&td; \ +} while (0) + +static __inline bool +kstack_contains(struct thread *td, vm_offset_t va, size_t len) +{ + return (va >= td->td_kstack && va + len >= va && + va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE); +} +#endif /* _SYS_PROC_H_ */ + +#endif diff --git a/sys/amd64/include/stdarg.h b/sys/amd64/include/stdarg.h new file mode 100644 index 000000000000..1739bbeb9ea0 --- /dev/null +++ b/sys/amd64/include/stdarg.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/stdarg.h> diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h new file mode 100644 index 000000000000..47cecf1d27c3 --- /dev/null +++ b/sys/amd64/include/sysarch.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/sysarch.h> diff --git a/sys/amd64/include/tls.h b/sys/amd64/include/tls.h new file mode 100644 index 000000000000..15a4a407c0e0 --- /dev/null +++ b/sys/amd64/include/tls.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/tls.h> diff --git a/sys/amd64/include/trap.h b/sys/amd64/include/trap.h new file mode 100644 index 000000000000..20f898d15b94 --- /dev/null +++ b/sys/amd64/include/trap.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/trap.h> diff --git a/sys/amd64/include/tss.h b/sys/amd64/include/tss.h new file mode 100644 index 000000000000..8ce6bbc0ada7 --- /dev/null +++ b/sys/amd64/include/tss.h @@ -0,0 +1,65 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_TSS_H_ +#define _MACHINE_TSS_H_ 1 + +/* + * amd64 Context Data Type + * + * The alignment is pretty messed up here due to reuse of the original 32 bit + * fields. It might be worth trying to set the tss on a +4 byte offset to + * make the 64 bit fields aligned in practice. + */ +struct amd64tss { + u_int32_t tss_rsvd0; + u_int64_t tss_rsp0 __packed; /* kernel stack pointer ring 0 */ + u_int64_t tss_rsp1 __packed; /* kernel stack pointer ring 1 */ + u_int64_t tss_rsp2 __packed; /* kernel stack pointer ring 2 */ + u_int32_t tss_rsvd1; + u_int32_t tss_rsvd2; + u_int64_t tss_ist1 __packed; /* Interrupt stack table 1 */ + u_int64_t tss_ist2 __packed; /* Interrupt stack table 2 */ + u_int64_t tss_ist3 __packed; /* Interrupt stack table 3 */ + u_int64_t tss_ist4 __packed; /* Interrupt stack table 4 */ + u_int64_t tss_ist5 __packed; /* Interrupt stack table 5 */ + u_int64_t tss_ist6 __packed; /* Interrupt stack table 6 */ + u_int64_t tss_ist7 __packed; /* Interrupt stack table 7 */ + u_int32_t tss_rsvd3; + u_int32_t tss_rsvd4; + u_int16_t tss_rsvd5; + u_int16_t tss_iobase; /* io bitmap offset */ +}; + +#endif /* _MACHINE_TSS_H_ */ diff --git a/sys/amd64/include/ucontext.h b/sys/amd64/include/ucontext.h new file mode 100644 index 000000000000..aa3a439bcdad --- /dev/null +++ b/sys/amd64/include/ucontext.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/ucontext.h> diff --git a/sys/amd64/include/vdso.h b/sys/amd64/include/vdso.h new file mode 100644 index 000000000000..77fc7655d55e --- /dev/null +++ b/sys/amd64/include/vdso.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/vdso.h> diff --git a/sys/amd64/include/vm.h b/sys/amd64/include/vm.h new file mode 100644 index 000000000000..2e156b1cb1be --- /dev/null +++ b/sys/amd64/include/vm.h @@ -0,0 +1,46 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2009 Hudson River Trading LLC + * Written by: John H. Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MACHINE_VM_H_ +#define _MACHINE_VM_H_ + +#include <machine/specialreg.h> + +/* Memory attributes. */ +#define VM_MEMATTR_UNCACHEABLE ((vm_memattr_t)PAT_UNCACHEABLE) +#define VM_MEMATTR_WRITE_COMBINING ((vm_memattr_t)PAT_WRITE_COMBINING) +#define VM_MEMATTR_WRITE_THROUGH ((vm_memattr_t)PAT_WRITE_THROUGH) +#define VM_MEMATTR_WRITE_PROTECTED ((vm_memattr_t)PAT_WRITE_PROTECTED) +#define VM_MEMATTR_WRITE_BACK ((vm_memattr_t)PAT_WRITE_BACK) +#define VM_MEMATTR_WEAK_UNCACHEABLE ((vm_memattr_t)PAT_UNCACHED) + +#define VM_MEMATTR_DEFAULT VM_MEMATTR_WRITE_BACK +#define VM_MEMATTR_DEVICE VM_MEMATTR_UNCACHEABLE + +#endif /* !_MACHINE_VM_H_ */ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h new file mode 100644 index 000000000000..0b3daed4f69e --- /dev/null +++ b/sys/amd64/include/vmm.h @@ -0,0 +1,788 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_H_ +#define _VMM_H_ + +#include <sys/cpuset.h> +#include <sys/sdt.h> +#include <x86/segments.h> + +struct vcpu; +struct vm_snapshot_meta; + +#ifdef _KERNEL +SDT_PROVIDER_DECLARE(vmm); +#endif + +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_TRIPLEFAULT, + VM_SUSPEND_LAST +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RSI, + VM_REG_GUEST_RDI, + VM_REG_GUEST_RBP, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15, + VM_REG_GUEST_CR0, + VM_REG_GUEST_CR3, + VM_REG_GUEST_CR4, + VM_REG_GUEST_DR7, + VM_REG_GUEST_RSP, + VM_REG_GUEST_RIP, + VM_REG_GUEST_RFLAGS, + VM_REG_GUEST_ES, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS, + VM_REG_GUEST_LDTR, + VM_REG_GUEST_TR, + VM_REG_GUEST_IDTR, + VM_REG_GUEST_GDTR, + VM_REG_GUEST_EFER, + VM_REG_GUEST_CR2, + VM_REG_GUEST_PDPTE0, + VM_REG_GUEST_PDPTE1, + VM_REG_GUEST_PDPTE2, + VM_REG_GUEST_PDPTE3, + VM_REG_GUEST_INTR_SHADOW, + VM_REG_GUEST_DR0, + VM_REG_GUEST_DR1, + VM_REG_GUEST_DR2, + VM_REG_GUEST_DR3, + VM_REG_GUEST_DR6, + VM_REG_GUEST_ENTRY_INST_LENGTH, + VM_REG_GUEST_FS_BASE, + VM_REG_GUEST_GS_BASE, + VM_REG_GUEST_KGS_BASE, + VM_REG_GUEST_TPR, + VM_REG_LAST +}; + +enum x2apic_state { + X2APIC_DISABLED, + X2APIC_ENABLED, + X2APIC_STATE_LAST +}; + +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) + +#ifdef _KERNEL +#include <sys/kassert.h> + +CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); + +struct vm; +struct vm_exception; +struct vm_mem; +struct seg_desc; +struct vm_exit; +struct vm_run; +struct vhpet; +struct vioapic; +struct vlapic; +struct vmspace; +struct vm_object; +struct vm_guest_paging; +struct pmap; +enum snapshot_req; + +struct vm_eventinfo { + cpuset_t *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + +typedef int (*vmm_init_func_t)(int ipinum); +typedef int (*vmm_cleanup_func_t)(void); +typedef void (*vmm_suspend_func_t)(void); +typedef void (*vmm_resume_func_t)(void); +typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); +typedef int (*vmi_run_func_t)(void *vcpui, register_t rip, + struct pmap *pmap, struct vm_eventinfo *info); +typedef void (*vmi_cleanup_func_t)(void *vmi); +typedef void * (*vmi_vcpu_init_func_t)(void *vmi, struct vcpu *vcpu, + int vcpu_id); +typedef void (*vmi_vcpu_cleanup_func_t)(void *vcpui); +typedef int (*vmi_get_register_t)(void *vcpui, int num, uint64_t *retval); +typedef int (*vmi_set_register_t)(void *vcpui, int num, uint64_t val); +typedef int (*vmi_get_desc_t)(void *vcpui, int num, struct seg_desc *desc); +typedef int (*vmi_set_desc_t)(void *vcpui, int num, struct seg_desc *desc); +typedef int (*vmi_get_cap_t)(void *vcpui, int num, int *retval); +typedef int (*vmi_set_cap_t)(void *vcpui, int num, int val); +typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); +typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); +typedef struct vlapic * (*vmi_vlapic_init)(void *vcpui); +typedef void (*vmi_vlapic_cleanup)(struct vlapic *vlapic); +typedef int (*vmi_snapshot_vcpu_t)(void *vcpui, struct vm_snapshot_meta *meta); +typedef int (*vmi_restore_tsc_t)(void *vcpui, uint64_t now); + +struct vmm_ops { + vmm_init_func_t modinit; /* module wide initialization */ + vmm_cleanup_func_t modcleanup; + vmm_resume_func_t modsuspend; + vmm_resume_func_t modresume; + + vmi_init_func_t init; /* vm-specific initialization */ + vmi_run_func_t run; + vmi_cleanup_func_t cleanup; + vmi_vcpu_init_func_t vcpu_init; + vmi_vcpu_cleanup_func_t vcpu_cleanup; + vmi_get_register_t getreg; + vmi_set_register_t setreg; + vmi_get_desc_t getdesc; + vmi_set_desc_t setdesc; + vmi_get_cap_t getcap; + vmi_set_cap_t setcap; + vmi_vmspace_alloc vmspace_alloc; + vmi_vmspace_free vmspace_free; + vmi_vlapic_init vlapic_init; + vmi_vlapic_cleanup vlapic_cleanup; + + /* checkpoint operations */ + vmi_snapshot_vcpu_t vcpu_snapshot; + vmi_restore_tsc_t restore_tsc; +}; + +extern const struct vmm_ops vmm_ops_intel; +extern const struct vmm_ops vmm_ops_amd; + +extern u_int vm_maxcpu; /* maximum virtual cpus */ + +int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_disable_vcpu_creation(struct vm *vm); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); +void vm_destroy(struct vm *vm); +int vm_reinit(struct vm *vm); +const char *vm_name(struct vm *vm); +uint16_t vm_get_maxcpus(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); + +int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); +int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); +int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); +int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); + +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); +int vm_get_seg_desc(struct vcpu *vcpu, int reg, + struct seg_desc *ret_desc); +int vm_set_seg_desc(struct vcpu *vcpu, int reg, + struct seg_desc *desc); +int vm_run(struct vcpu *vcpu); +int vm_suspend(struct vm *vm, enum vm_suspend_how how); +int vm_inject_nmi(struct vcpu *vcpu); +int vm_nmi_pending(struct vcpu *vcpu); +void vm_nmi_clear(struct vcpu *vcpu); +int vm_inject_extint(struct vcpu *vcpu); +int vm_extint_pending(struct vcpu *vcpu); +void vm_extint_clear(struct vcpu *vcpu); +int vcpu_vcpuid(struct vcpu *vcpu); +struct vm *vcpu_vm(struct vcpu *vcpu); +struct vcpu *vm_vcpu(struct vm *vm, int cpu); +struct vlapic *vm_lapic(struct vcpu *vcpu); +struct vioapic *vm_ioapic(struct vm *vm); +struct vhpet *vm_hpet(struct vm *vm); +int vm_get_capability(struct vcpu *vcpu, int type, int *val); +int vm_set_capability(struct vcpu *vcpu, int type, int val); +int vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state); +int vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state); +int vm_apicid2vcpuid(struct vm *vm, int apicid); +int vm_activate_cpu(struct vcpu *vcpu); +int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_restart_instruction(struct vcpu *vcpu); +struct vm_exit *vm_exitinfo(struct vcpu *vcpu); +cpuset_t *vm_exitinfo_cpuset(struct vcpu *vcpu); +void vm_exit_suspended(struct vcpu *vcpu, uint64_t rip); +void vm_exit_debug(struct vcpu *vcpu, uint64_t rip); +void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip); +void vm_exit_astpending(struct vcpu *vcpu, uint64_t rip); +void vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip); +int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta); +int vm_restore_time(struct vm *vm); + +#ifdef _SYS__CPUSET_H_ +/* + * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. + * The rendezvous 'func(arg)' is not allowed to do anything that will + * cause the thread to be put to sleep. + * + * The caller cannot hold any locks when initiating the rendezvous. + * + * The implementation of this API may cause vcpus other than those specified + * by 'dest' to be stalled. The caller should not rely on any vcpus making + * forward progress when the rendezvous is in progress. + */ +typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg); +int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest, + vm_rendezvous_func_t func, void *arg); + +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_debug_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); +cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart); +void vm_await_start(struct vm *vm, const cpuset_t *waiting); +#endif /* _SYS__CPUSET_H_ */ + +static __inline int +vcpu_rendezvous_pending(struct vcpu *vcpu, struct vm_eventinfo *info) +{ + /* + * This check isn't done with atomic operations or under a lock because + * there's no need to. If the vcpuid bit is set, the vcpu is part of a + * rendezvous and the bit won't be cleared until the vcpu enters the + * rendezvous. On rendezvous exit, the cpuset is cleared and the vcpu + * will see an empty cpuset. So, the races are harmless. + */ + return (CPU_ISSET(vcpu_vcpuid(vcpu), info->rptr)); +} + +static __inline int +vcpu_suspended(struct vm_eventinfo *info) +{ + + return (*info->sptr); +} + +static __inline int +vcpu_reqidle(struct vm_eventinfo *info) +{ + + return (*info->iptr); +} + +int vcpu_debugged(struct vcpu *vcpu); + +/* + * Return true if device indicated by bus/slot/func is supposed to be a + * pci passthrough device. + * + * Return false otherwise. + */ +bool vmm_is_pptdev(int bus, int slot, int func); + +void *vm_iommu_domain(struct vm *vm); + +enum vcpu_state { + VCPU_IDLE, + VCPU_FROZEN, + VCPU_RUNNING, + VCPU_SLEEPING, +}; + +int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); + +static int __inline +vcpu_is_running(struct vcpu *vcpu, int *hostcpu) +{ + return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); +} + +#ifdef _SYS_PROC_H_ +static int __inline +vcpu_should_yield(struct vcpu *vcpu) +{ + struct thread *td; + + td = curthread; + return (td->td_ast != 0 || td->td_owepreempt != 0); +} +#endif + +void *vcpu_stats(struct vcpu *vcpu); +void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr); +struct vmspace *vm_vmspace(struct vm *vm); +struct vm_mem *vm_mem(struct vm *vm); +struct vatpic *vm_atpic(struct vm *vm); +struct vatpit *vm_atpit(struct vm *vm); +struct vpmtmr *vm_pmtmr(struct vm *vm); +struct vrtc *vm_rtc(struct vm *vm); + +/* + * Inject exception 'vector' into the guest vcpu. This function returns 0 on + * success and non-zero on failure. + * + * Wrapper functions like 'vm_inject_gp()' should be preferred to calling + * this function directly because they enforce the trap-like or fault-like + * behavior of an exception. + * + * This function should only be called in the context of the thread that is + * executing this vcpu. + */ +int vm_inject_exception(struct vcpu *vcpu, int vector, int err_valid, + uint32_t errcode, int restart_instruction); + +/* + * This function is called after a VM-exit that occurred during exception or + * interrupt delivery through the IDT. The format of 'intinfo' is described + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. + * + * If a VM-exit handler completes the event delivery successfully then it + * should call vm_exit_intinfo() to extinguish the pending event. For e.g., + * if the task switch emulation is triggered via a task gate then it should + * call this function with 'intinfo=0' to indicate that the external event + * is not pending anymore. + * + * Return value is 0 on success and non-zero on failure. + */ +int vm_exit_intinfo(struct vcpu *vcpu, uint64_t intinfo); + +/* + * This function is called before every VM-entry to retrieve a pending + * event that should be injected into the guest. This function combines + * nested events into a double or triple fault. + * + * Returns 0 if there are no events that need to be injected into the guest + * and non-zero otherwise. + */ +int vm_entry_intinfo(struct vcpu *vcpu, uint64_t *info); + +int vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2); + +/* + * Function used to keep track of the guest's TSC offset. The + * offset is used by the virtualization extensions to provide a consistent + * value for the Time Stamp Counter to the guest. + */ +void vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset); + +enum vm_reg_name vm_segment_name(int seg_encoding); + +struct vm_copyinfo { + uint64_t gpa; + size_t len; + void *hva; + void *cookie; +}; + +/* + * Set up 'copyinfo[]' to copy to/from guest linear address space starting + * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for + * a copyin or PROT_WRITE for a copyout. + * + * retval is_fault Interpretation + * 0 0 Success + * 0 1 An exception was injected into the guest + * EFAULT N/A Unrecoverable error + * + * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if + * the return value is 0. The 'copyinfo[]' resources should be freed by calling + * 'vm_copy_teardown()' after the copy is done. + */ +int vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, + int num_copyinfo, int *is_fault); +void vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo); +void vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len); +void vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len); + +int vcpu_trace_exceptions(struct vcpu *vcpu); +int vcpu_trap_wbinvd(struct vcpu *vcpu); +#endif /* KERNEL */ + +/* + * Identifiers for optional vmm capabilities + */ +enum vm_cap_type { + VM_CAP_HALT_EXIT, + VM_CAP_MTRAP_EXIT, + VM_CAP_PAUSE_EXIT, + VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_ENABLE_INVPCID, + VM_CAP_BPT_EXIT, + VM_CAP_RDPID, + VM_CAP_RDTSCP, + VM_CAP_IPI_EXIT, + VM_CAP_MASK_HWINTR, + VM_CAP_RFLAGS_TF, + VM_CAP_MAX +}; + +enum vm_intr_trigger { + EDGE_TRIGGER, + LEVEL_TRIGGER +}; + +/* + * The 'access' field has the format specified in Table 21-2 of the Intel + * Architecture Manual vol 3b. + * + * XXX The contents of the 'access' field are architecturally defined except + * bit 16 - Segment Unusable. + */ +struct seg_desc { + uint64_t base; + uint32_t limit; + uint32_t access; +}; +#define SEG_DESC_TYPE(access) ((access) & 0x001f) +#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) +#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) +#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) +#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) +#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) + +enum vm_cpu_mode { + CPU_MODE_REAL, + CPU_MODE_PROTECTED, + CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ + CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ +}; + +enum vm_paging_mode { + PAGING_MODE_FLAT, + PAGING_MODE_32, + PAGING_MODE_PAE, + PAGING_MODE_64, + PAGING_MODE_64_LA57, +}; + +struct vm_guest_paging { + uint64_t cr3; + int cpl; + enum vm_cpu_mode cpu_mode; + enum vm_paging_mode paging_mode; +}; + +/* + * The data structures 'vie' and 'vie_op' are meant to be opaque to the + * consumers of instruction decoding. The only reason why their contents + * need to be exposed is because they are part of the 'vm_exit' structure. + */ +struct vie_op { + uint8_t op_byte; /* actual opcode byte */ + uint8_t op_type; /* type of operation (e.g. MOV) */ + uint16_t op_flags; +}; +_Static_assert(sizeof(struct vie_op) == 4, "ABI"); +_Static_assert(_Alignof(struct vie_op) == 2, "ABI"); + +#define VIE_INST_SIZE 15 +struct vie { + uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ + uint8_t num_valid; /* size of the instruction */ + +/* The following fields are all zeroed upon restart. */ +#define vie_startzero num_processed + uint8_t num_processed; + + uint8_t addrsize:4, opsize:4; /* address and operand sizes */ + uint8_t rex_w:1, /* REX prefix */ + rex_r:1, + rex_x:1, + rex_b:1, + rex_present:1, + repz_present:1, /* REP/REPE/REPZ prefix */ + repnz_present:1, /* REPNE/REPNZ prefix */ + opsize_override:1, /* Operand size override */ + addrsize_override:1, /* Address size override */ + segment_override:1; /* Segment override */ + + uint8_t mod:2, /* ModRM byte */ + reg:4, + rm:4; + + uint8_t ss:2, /* SIB byte */ + vex_present:1, /* VEX prefixed */ + vex_l:1, /* L bit */ + index:4, /* SIB byte */ + base:4; /* SIB byte */ + + uint8_t disp_bytes; + uint8_t imm_bytes; + + uint8_t scale; + + uint8_t vex_reg:4, /* vvvv: first source register specifier */ + vex_pp:2, /* pp */ + _sparebits:2; + + uint8_t _sparebytes[2]; + + int base_register; /* VM_REG_GUEST_xyz */ + int index_register; /* VM_REG_GUEST_xyz */ + int segment_register; /* VM_REG_GUEST_xyz */ + + int64_t displacement; /* optional addr displacement */ + int64_t immediate; /* optional immediate operand */ + + uint8_t decoded; /* set to 1 if successfully decoded */ + + uint8_t _sparebyte; + + struct vie_op op; /* opcode description */ +}; +_Static_assert(sizeof(struct vie) == 64, "ABI"); +_Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI"); +_Static_assert(__offsetof(struct vie, scale) == 24, "ABI"); +_Static_assert(__offsetof(struct vie, base_register) == 28, "ABI"); + +enum vm_exitcode { + VM_EXITCODE_INOUT, + VM_EXITCODE_VMX, + VM_EXITCODE_BOGUS, + VM_EXITCODE_RDMSR, + VM_EXITCODE_WRMSR, + VM_EXITCODE_HLT, + VM_EXITCODE_MTRAP, + VM_EXITCODE_PAUSE, + VM_EXITCODE_PAGING, + VM_EXITCODE_INST_EMUL, + VM_EXITCODE_SPINUP_AP, + VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ + VM_EXITCODE_RENDEZVOUS, + VM_EXITCODE_IOAPIC_EOI, + VM_EXITCODE_SUSPENDED, + VM_EXITCODE_INOUT_STR, + VM_EXITCODE_TASK_SWITCH, + VM_EXITCODE_MONITOR, + VM_EXITCODE_MWAIT, + VM_EXITCODE_SVM, + VM_EXITCODE_REQIDLE, + VM_EXITCODE_DEBUG, + VM_EXITCODE_VMINSN, + VM_EXITCODE_BPT, + VM_EXITCODE_IPI, + VM_EXITCODE_DB, + VM_EXITCODE_MAX +}; + +struct vm_inout { + uint16_t bytes:3; /* 1 or 2 or 4 */ + uint16_t in:1; + uint16_t string:1; + uint16_t rep:1; + uint16_t port; + uint32_t eax; /* valid for out */ +}; + +struct vm_inout_str { + struct vm_inout inout; /* must be the first element */ + struct vm_guest_paging paging; + uint64_t rflags; + uint64_t cr0; + uint64_t index; + uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ + int addrsize; + enum vm_reg_name seg_name; + struct seg_desc seg_desc; + int cs_d; + uint64_t cs_base; +}; + +enum task_switch_reason { + TSR_CALL, + TSR_IRET, + TSR_JMP, + TSR_IDT_GATE, /* task gate in IDT */ +}; + +struct vm_task_switch { + uint16_t tsssel; /* new TSS selector */ + int ext; /* task switch due to external event */ + uint32_t errcode; + int errcode_valid; /* push 'errcode' on the new stack */ + enum task_switch_reason reason; + struct vm_guest_paging paging; +}; + +struct vm_exit { + enum vm_exitcode exitcode; + int inst_length; /* 0 means unknown */ + uint64_t rip; + union { + struct vm_inout inout; + struct vm_inout_str inout_str; + struct { + uint64_t gpa; + int fault_type; + } paging; + struct { + uint64_t gpa; + uint64_t gla; + uint64_t cs_base; + int cs_d; /* CS.D */ + struct vm_guest_paging paging; + struct vie vie; + } inst_emul; + /* + * VMX specific payload. Used when there is no "better" + * exitcode to represent the VM-exit. + */ + struct { + int status; /* vmx inst status */ + /* + * 'exit_reason' and 'exit_qualification' are valid + * only if 'status' is zero. + */ + uint32_t exit_reason; + uint64_t exit_qualification; + /* + * 'inst_error' and 'inst_type' are valid + * only if 'status' is non-zero. + */ + int inst_type; + int inst_error; + } vmx; + /* + * SVM specific payload. + */ + struct { + uint64_t exitcode; + uint64_t exitinfo1; + uint64_t exitinfo2; + } svm; + struct { + int inst_length; + } bpt; + struct { + int trace_trap; + int pushf_intercept; + int tf_shadow_val; + struct vm_guest_paging paging; + } dbg; + struct { + uint32_t code; /* ecx value */ + uint64_t wval; + } msr; + struct { + int vcpu; + uint64_t rip; + } spinup_ap; + struct { + uint64_t rflags; + uint64_t intr_status; + } hlt; + struct { + int vector; + } ioapic_eoi; + struct { + enum vm_suspend_how how; + } suspended; + struct { + /* + * The destination vCPU mask is saved in vcpu->cpuset + * and is copied out to userspace separately to avoid + * ABI concerns. + */ + uint32_t mode; + uint8_t vector; + } ipi; + struct vm_task_switch task_switch; + } u; +}; + +/* APIs to inject faults into the guest */ +void vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, + int errcode); + +static __inline void +vm_inject_ud(struct vcpu *vcpu) +{ + vm_inject_fault(vcpu, IDT_UD, 0, 0); +} + +static __inline void +vm_inject_gp(struct vcpu *vcpu) +{ + vm_inject_fault(vcpu, IDT_GP, 1, 0); +} + +static __inline void +vm_inject_ac(struct vcpu *vcpu, int errcode) +{ + vm_inject_fault(vcpu, IDT_AC, 1, errcode); +} + +static __inline void +vm_inject_ss(struct vcpu *vcpu, int errcode) +{ + vm_inject_fault(vcpu, IDT_SS, 1, errcode); +} + +void vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2); + +#endif /* _VMM_H_ */ diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h new file mode 100644 index 000000000000..441330fd57b8 --- /dev/null +++ b/sys/amd64/include/vmm_dev.h @@ -0,0 +1,469 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_DEV_H_ +#define _VMM_DEV_H_ + +#include <sys/domainset.h> + +#include <machine/vmm.h> +#include <machine/vmm_snapshot.h> + +struct vm_memmap { + vm_paddr_t gpa; + int segid; /* memory segment */ + vm_ooffset_t segoff; /* offset into memory segment */ + size_t len; /* mmap length */ + int prot; /* RWX */ + int flags; +}; +#define VM_MEMMAP_F_WIRED 0x01 +#define VM_MEMMAP_F_IOMMU 0x02 + +struct vm_munmap { + vm_paddr_t gpa; + size_t len; +}; + +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) +struct vm_memseg { + int segid; + size_t len; + char name[VM_MAX_SUFFIXLEN + 1]; + domainset_t *ds_mask; + size_t ds_mask_size; + int ds_policy; +}; + +struct vm_register { + int cpuid; + int regnum; /* enum vm_reg_name */ + uint64_t regval; +}; + +struct vm_seg_desc { /* data or code segment */ + int cpuid; + int regnum; /* enum vm_reg_name */ + struct seg_desc desc; +}; + +struct vm_register_set { + int cpuid; + unsigned int count; + const int *regnums; /* enum vm_reg_name */ + uint64_t *regvals; +}; + +struct vm_run { + int cpuid; + cpuset_t *cpuset; /* CPU set storage */ + size_t cpusetsize; + struct vm_exit *vm_exit; +}; + +struct vm_exception { + int cpuid; + int vector; + uint32_t error_code; + int error_code_valid; + int restart_instruction; +}; + +struct vm_lapic_msi { + uint64_t msg; + uint64_t addr; +}; + +struct vm_lapic_irq { + int cpuid; + int vector; +}; + +struct vm_ioapic_irq { + int irq; +}; + +struct vm_isa_irq { + int atpic_irq; + int ioapic_irq; +}; + +struct vm_isa_irq_trigger { + int atpic_irq; + enum vm_intr_trigger trigger; +}; + +struct vm_capability { + int cpuid; + enum vm_cap_type captype; + int capval; + int allcpus; +}; + +struct vm_pptdev { + int bus; + int slot; + int func; +}; + +struct vm_pptdev_mmio { + int bus; + int slot; + int func; + vm_paddr_t gpa; + vm_paddr_t hpa; + size_t len; +}; + +struct vm_pptdev_msi { + int vcpu; /* unused */ + int bus; + int slot; + int func; + int numvec; /* 0 means disabled */ + uint64_t msg; + uint64_t addr; +}; + +struct vm_pptdev_msix { + int vcpu; /* unused */ + int bus; + int slot; + int func; + int idx; + uint64_t msg; + uint32_t vector_control; + uint64_t addr; +}; + +struct vm_nmi { + int cpuid; +}; + +#define MAX_VM_STATS 64 +struct vm_stats { + int cpuid; /* in */ + int index; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; + +struct vm_stat_desc { + int index; /* in */ + char desc[128]; /* out */ +}; + +struct vm_x2apic { + int cpuid; + enum x2apic_state state; +}; + +struct vm_gpa_pte { + uint64_t gpa; /* in */ + uint64_t pte[4]; /* out */ + int ptenum; +}; + +struct vm_hpet_cap { + uint32_t capabilities; /* lower 32 bits of HPET capabilities */ +}; + +struct vm_suspend { + enum vm_suspend_how how; +}; + +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_cpuset { + int which; + int cpusetsize; + cpuset_t *cpus; +}; +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 +#define VM_DEBUG_CPUS 2 + +struct vm_intinfo { + int vcpuid; + uint64_t info1; + uint64_t info2; +}; + +struct vm_rtc_time { + time_t secs; +}; + +struct vm_rtc_data { + int offset; + uint8_t value; +}; + +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + +struct vm_readwrite_kernemu_device { + int vcpuid; + unsigned access_width : 3; + unsigned _unused : 29; + uint64_t gpa; + uint64_t value; +}; +_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); + +enum { + /* general routines */ + IOCNUM_ABIVERS = 0, + IOCNUM_RUN = 1, + IOCNUM_SET_CAPABILITY = 2, + IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, + + /* memory apis */ + IOCNUM_MAP_MEMORY = 10, /* deprecated */ + IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */ + IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA = 13, + IOCNUM_ALLOC_MEMSEG = 14, + IOCNUM_GET_MEMSEG = 15, + IOCNUM_MMAP_MEMSEG = 16, + IOCNUM_MMAP_GETNEXT = 17, + IOCNUM_GLA2GPA_NOFAULT = 18, + IOCNUM_MUNMAP_MEMSEG = 19, + + /* register/state accessors */ + IOCNUM_SET_REGISTER = 20, + IOCNUM_GET_REGISTER = 21, + IOCNUM_SET_SEGMENT_DESCRIPTOR = 22, + IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, + IOCNUM_SET_REGISTER_SET = 24, + IOCNUM_GET_REGISTER_SET = 25, + IOCNUM_GET_KERNEMU_DEV = 26, + IOCNUM_SET_KERNEMU_DEV = 27, + + /* interrupt injection */ + IOCNUM_GET_INTINFO = 28, + IOCNUM_SET_INTINFO = 29, + IOCNUM_INJECT_EXCEPTION = 30, + IOCNUM_LAPIC_IRQ = 31, + IOCNUM_INJECT_NMI = 32, + IOCNUM_IOAPIC_ASSERT_IRQ = 33, + IOCNUM_IOAPIC_DEASSERT_IRQ = 34, + IOCNUM_IOAPIC_PULSE_IRQ = 35, + IOCNUM_LAPIC_MSI = 36, + IOCNUM_LAPIC_LOCAL_IRQ = 37, + IOCNUM_IOAPIC_PINCOUNT = 38, + IOCNUM_RESTART_INSTRUCTION = 39, + + /* PCI pass-thru */ + IOCNUM_BIND_PPTDEV = 40, + IOCNUM_UNBIND_PPTDEV = 41, + IOCNUM_MAP_PPTDEV_MMIO = 42, + IOCNUM_PPTDEV_MSI = 43, + IOCNUM_PPTDEV_MSIX = 44, + IOCNUM_PPTDEV_DISABLE_MSIX = 45, + IOCNUM_UNMAP_PPTDEV_MMIO = 46, + + /* statistics */ + IOCNUM_VM_STATS = 50, + IOCNUM_VM_STAT_DESC = 51, + + /* kernel device state */ + IOCNUM_SET_X2APIC_STATE = 60, + IOCNUM_GET_X2APIC_STATE = 61, + IOCNUM_GET_HPET_CAPABILITIES = 62, + + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + + /* legacy interrupt injection */ + IOCNUM_ISA_ASSERT_IRQ = 80, + IOCNUM_ISA_DEASSERT_IRQ = 81, + IOCNUM_ISA_PULSE_IRQ = 82, + IOCNUM_ISA_SET_IRQ_TRIGGER = 83, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, + IOCNUM_SUSPEND_CPU = 92, + IOCNUM_RESUME_CPU = 93, + + /* RTC */ + IOCNUM_RTC_READ = 100, + IOCNUM_RTC_WRITE = 101, + IOCNUM_RTC_SETTIME = 102, + IOCNUM_RTC_GETTIME = 103, + + /* checkpoint */ + IOCNUM_SNAPSHOT_REQ = 113, + + IOCNUM_RESTORE_TIME = 115 +}; + +#define VM_RUN \ + _IOW('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) +#define VM_ALLOC_MEMSEG \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) +#define VM_MMAP_MEMSEG \ + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) +#define VM_MMAP_GETNEXT \ + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) +#define VM_MUNMAP_MEMSEG \ + _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) +#define VM_SET_REGISTER \ + _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) +#define VM_GET_REGISTER \ + _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) +#define VM_SET_SEGMENT_DESCRIPTOR \ + _IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) +#define VM_GET_SEGMENT_DESCRIPTOR \ + _IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) +#define VM_SET_REGISTER_SET \ + _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) +#define VM_GET_REGISTER_SET \ + _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) +#define VM_SET_KERNEMU_DEV \ + _IOW('v', IOCNUM_SET_KERNEMU_DEV, \ + struct vm_readwrite_kernemu_device) +#define VM_GET_KERNEMU_DEV \ + _IOWR('v', IOCNUM_GET_KERNEMU_DEV, \ + struct vm_readwrite_kernemu_device) +#define VM_INJECT_EXCEPTION \ + _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) +#define VM_LAPIC_IRQ \ + _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) +#define VM_LAPIC_LOCAL_IRQ \ + _IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq) +#define VM_LAPIC_MSI \ + _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) +#define VM_IOAPIC_ASSERT_IRQ \ + _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) +#define VM_IOAPIC_DEASSERT_IRQ \ + _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) +#define VM_IOAPIC_PULSE_IRQ \ + _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) +#define VM_IOAPIC_PINCOUNT \ + _IOR('v', IOCNUM_IOAPIC_PINCOUNT, int) +#define VM_ISA_ASSERT_IRQ \ + _IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq) +#define VM_ISA_DEASSERT_IRQ \ + _IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq) +#define VM_ISA_PULSE_IRQ \ + _IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq) +#define VM_ISA_SET_IRQ_TRIGGER \ + _IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger) +#define VM_SET_CAPABILITY \ + _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) +#define VM_GET_CAPABILITY \ + _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) +#define VM_BIND_PPTDEV \ + _IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev) +#define VM_UNBIND_PPTDEV \ + _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev) +#define VM_MAP_PPTDEV_MMIO \ + _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio) +#define VM_PPTDEV_MSI \ + _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) +#define VM_PPTDEV_MSIX \ + _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) +#define VM_PPTDEV_DISABLE_MSIX \ + _IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev) +#define VM_UNMAP_PPTDEV_MMIO \ + _IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio) +#define VM_INJECT_NMI \ + _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) +#define VM_STATS \ + _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) +#define VM_STAT_DESC \ + _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) +#define VM_SET_X2APIC_STATE \ + _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) +#define VM_GET_X2APIC_STATE \ + _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) +#define VM_GET_HPET_CAPABILITIES \ + _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_GPA_PMAP \ + _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) +#define VM_GLA2GPA \ + _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) +#define VM_GLA2GPA_NOFAULT \ + _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SUSPEND_CPU \ + _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) +#define VM_RESUME_CPU \ + _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) +#define VM_SET_INTINFO \ + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) +#define VM_GET_INTINFO \ + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) +#define VM_RTC_WRITE \ + _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data) +#define VM_RTC_READ \ + _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data) +#define VM_RTC_SETTIME \ + _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) +#define VM_RTC_GETTIME \ + _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) +#define VM_RESTART_INSTRUCTION \ + _IOW('v', IOCNUM_RESTART_INSTRUCTION, int) +#define VM_SNAPSHOT_REQ \ + _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta) +#define VM_RESTORE_TIME \ + _IOWR('v', IOCNUM_RESTORE_TIME, int) +#endif diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h new file mode 100644 index 000000000000..1fb0f97682a7 --- /dev/null +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -0,0 +1,158 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +#include <sys/mman.h> + +/* struct vie_op.op_type */ +enum { + VIE_OP_TYPE_NONE = 0, + VIE_OP_TYPE_MOV, + VIE_OP_TYPE_MOVSX, + VIE_OP_TYPE_MOVZX, + VIE_OP_TYPE_AND, + VIE_OP_TYPE_OR, + VIE_OP_TYPE_SUB, + VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, + VIE_OP_TYPE_POP, + VIE_OP_TYPE_MOVS, + VIE_OP_TYPE_GROUP1, + VIE_OP_TYPE_STOS, + VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, + VIE_OP_TYPE_ADD, + VIE_OP_TYPE_TEST, + VIE_OP_TYPE_BEXTR, + VIE_OP_TYPE_OUTS, + VIE_OP_TYPE_LAST +}; + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); + +typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Emulate the decoded 'vie' instruction. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * s + */ +int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); + +int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg, + uint64_t val, int size); + +/* + * Returns 1 if an alignment check exception should be injected and 0 otherwise. + */ +int vie_alignment_check(int cpl, int operand_size, uint64_t cr0, + uint64_t rflags, uint64_t gla); + +/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */ +int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla); + +uint64_t vie_size2mask(int size); + +int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot, + uint64_t *gla); + +#ifdef _KERNEL +/* + * APIs to fetch and decode the instruction from nested page fault handler. + * + * 'vie' must be initialized before calling 'vmm_fetch_instruction()' + */ +int vmm_fetch_instruction(struct vcpu *vcpu, + struct vm_guest_paging *guest_paging, + uint64_t rip, int inst_length, struct vie *vie, + int *is_fault); + +/* + * Translate the guest linear address 'gla' to a guest physical address. + * + * retval is_fault Interpretation + * 0 0 'gpa' contains result of the translation + * 0 1 An exception was injected into the guest + * EFAULT N/A An unrecoverable hypervisor error occurred + */ +int vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); + +/* + * Like vm_gla2gpa, but no exceptions are injected into the guest and + * PTEs are not changed. + */ +int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); +#endif /* _KERNEL */ + +void vie_restart(struct vie *vie); +void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); + +/* + * Decode the instruction fetched into 'vie' so it can be emulated. + * + * 'gla' is the guest linear address provided by the hardware assist + * that caused the nested page table fault. It is used to verify that + * the software instruction decoding is in agreement with the hardware. + * + * Some hardware assists do not provide the 'gla' to the hypervisor. + * To skip the 'gla' verification for this or any other reason pass + * in VIE_INVALID_GLA instead. + */ +#ifdef _KERNEL +#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ +int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla, + enum vm_cpu_mode cpu_mode, int csd, struct vie *vie); +#else /* !_KERNEL */ +/* + * Permit instruction decoding logic to be compiled outside of the kernel for + * rapid iteration and validation. No GLA validation is performed, obviously. + */ +int vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int csd, + struct vie *vie); +#endif /* _KERNEL */ + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ diff --git a/sys/amd64/include/vmm_snapshot.h b/sys/amd64/include/vmm_snapshot.h new file mode 100644 index 000000000000..b39c342bf6d9 --- /dev/null +++ b/sys/amd64/include/vmm_snapshot.h @@ -0,0 +1,132 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2016 Flavius Anton + * Copyright (c) 2016 Mihai Tiganus + * Copyright (c) 2016-2019 Mihai Carabas + * Copyright (c) 2017-2019 Darius Mihai + * Copyright (c) 2017-2019 Elena Mihailescu + * Copyright (c) 2018-2019 Sergiu Weisz + * All rights reserved. + * The bhyve-snapshot feature was developed under sponsorships + * from Matthew Grooms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_SNAPSHOT_ +#define _VMM_SNAPSHOT_ + +#include <sys/errno.h> +#include <sys/types.h> +#ifndef _KERNEL +#include <stdbool.h> +#endif + +enum snapshot_req { + STRUCT_VIOAPIC = 1, + STRUCT_VM, + STRUCT_VLAPIC, + VM_MEM, + STRUCT_VHPET, + STRUCT_VMCX, + STRUCT_VATPIC, + STRUCT_VATPIT, + STRUCT_VPMTMR, + STRUCT_VRTC, +}; + +struct vm_snapshot_buffer { + /* + * R/O for device-specific functions; + * written by generic snapshot functions. + */ + uint8_t *const buf_start; + const size_t buf_size; + + /* + * R/W for device-specific functions used to keep track of buffer + * current position and remaining size. + */ + uint8_t *buf; + size_t buf_rem; + + /* + * Length of the snapshot is either determined as (buf_size - buf_rem) + * or (buf - buf_start) -- the second variation returns a signed value + * so it may not be appropriate. + * + * Use vm_get_snapshot_size(meta). + */ +}; + +enum vm_snapshot_op { + VM_SNAPSHOT_SAVE, + VM_SNAPSHOT_RESTORE, +}; + +struct vm_snapshot_meta { + void *dev_data; + const char *dev_name; /* identify userspace devices */ + enum snapshot_req dev_req; /* identify kernel structs */ + + struct vm_snapshot_buffer buffer; + + enum vm_snapshot_op op; +}; + +void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op); +int vm_snapshot_buf(void *data, size_t data_size, + struct vm_snapshot_meta *meta); +size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta); + +#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ +do { \ + (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ +} while (0) + +#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \ + SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL) + +#ifndef _KERNEL +int vm_snapshot_buf_cmp(void *data, size_t data_size, + struct vm_snapshot_meta *meta); + +/* compare the value in the meta buffer with the data */ +#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ +do { \ + (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ +} while (0) + +#define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \ + SNAPSHOT_BUF_CMP_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL) + +#endif /* _KERNEL */ +#endif diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h new file mode 100644 index 000000000000..d2ac3c6648b2 --- /dev/null +++ b/sys/amd64/include/vmparam.h @@ -0,0 +1,307 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 2003 Peter Wemm + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __i386__ +#include <i386/vmparam.h> +#else /* !__i386__ */ + +#ifndef _MACHINE_VMPARAM_H_ +#define _MACHINE_VMPARAM_H_ 1 + +/* + * Machine dependent constants for AMD64. + */ + +/* + * Virtual memory related constants, all in bytes + */ +#define MAXTSIZ (32768UL*1024*1024) /* max text size */ +#ifndef DFLDSIZ +#define DFLDSIZ (32768UL*1024*1024) /* initial data size limit */ +#endif +#ifndef MAXDSIZ +#define MAXDSIZ (32768UL*1024*1024) /* max data size */ +#endif +#ifndef DFLSSIZ +#define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ +#endif +#ifndef MAXSSIZ +#define MAXSSIZ (512UL*1024*1024) /* max stack size */ +#endif +#ifndef SGROWSIZ +#define SGROWSIZ (128UL*1024) /* amount to grow stack */ +#endif + +/* + * We provide a single page allocator through the use of the + * direct mapped segment. This uses 2MB pages for reduced + * TLB pressure. + */ +#if !defined(KASAN) && !defined(KMSAN) +#define UMA_USE_DMAP +#endif + +/* + * The physical address space is densely populated. + */ +#define VM_PHYSSEG_DENSE + +/* + * The number of PHYSSEG entries must be one greater than the number + * of phys_avail entries because the phys_avail entry that spans the + * largest physical address that is accessible by ISA DMA is split + * into two PHYSSEG entries. + */ +#define VM_PHYSSEG_MAX 63 + +/* + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool from + * which physical pages are allocated and VM_FREEPOOL_DIRECT is the pool from + * which physical pages for page tables and small UMA objects are allocated. + * VM_FREEPOOL_LAZYINIT is a special-purpose pool that is populated only during + * boot and is used to implement deferred initialization of page structures. + */ +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_LAZYINIT 0 +#define VM_FREEPOOL_DEFAULT 1 +#define VM_FREEPOOL_DIRECT 2 + +/* + * Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages + * that have physical addresses below 4G but are not accessible by ISA DMA, + * and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA + * DMA. + */ +#define VM_NFREELIST 3 +#define VM_FREELIST_DEFAULT 0 +#define VM_FREELIST_DMA32 1 +#define VM_FREELIST_LOWMEM 2 + +#define VM_LOWMEM_BOUNDARY (16 << 20) /* 16MB ISA DMA limit */ + +/* + * Create the DMA32 free list only if the number of physical pages above + * physical address 4G is at least 16M, which amounts to 64GB of physical + * memory. + */ +#define VM_DMA32_NPAGES_THRESHOLD 16777216 + +/* + * An allocation size of 16MB is supported in order to optimize the + * use of the direct map by UMA. Specifically, a cache line contains + * at most 8 PDEs, collectively mapping 16MB of physical memory. By + * reducing the number of distinct 16MB "pages" that are used by UMA, + * the physical memory allocator reduces the likelihood of both 2MB + * page TLB misses and cache misses caused by 2MB page TLB misses. + */ +#define VM_NFREEORDER 13 + +/* + * Enable superpage reservations: 1 level. + */ +#ifndef VM_NRESERVLEVEL +#define VM_NRESERVLEVEL 1 +#endif + +/* + * Level 0 reservations consist of 512 pages. + */ +#ifndef VM_LEVEL_0_ORDER +#define VM_LEVEL_0_ORDER 9 +#endif + +/* + * Kernel physical load address for non-UEFI boot and for legacy UEFI loader. + * Newer UEFI loader loads kernel anywhere below 4G, with memory allocated + * by boot services. + * Needs to be aligned at 2MB superpage boundary. + */ +#ifndef KERNLOAD +#define KERNLOAD 0x200000 +#endif + +/* + * Virtual addresses of things. Derived from the page directory and + * page table indexes from pmap.h for precision. + * + * LA48: + * 0x0000000000000000 - 0x00007fffffffffff user map + * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) + * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) + * 0xffff804020100fff - 0xffff807fffffffff unused + * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) + * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) + * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map + * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional + * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map + * + * LA57: + * 0x0000000000000000 - 0x00ffffffffffffff user map + * 0x0100000000000000 - 0xf0ffffffffffffff does not exist (hole) + * 0xff00000000000000 - 0xff00ffffffffffff recursive page table (2048TB slot) + * 0xff01000000000000 - 0xff20ffffffffffff direct map (32 x 2048TB slots) + * 0xff21000000000000 - 0xff40ffffffffffff large map + * 0xff41000000000000 - 0xffff7fffffffffff unused + * 0xffff800000000000 - 0xfffff5ffffffffff unused (start of kernel pml4 entry) + * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff80000000000 - 0xfffffbffffffffff 4TB unused + * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional + * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map + * + * Within the kernel map: + * + * 0xfffffe0000000000 vm_page_array + * 0xffffffff80000000 KERNBASE + */ + +#define VM_MIN_KERNEL_ADDRESS_LA48 KV4ADDR(KPML4BASE, 0, 0, 0) +#define VM_MIN_KERNEL_ADDRESS kva_layout.km_low +#define VM_MAX_KERNEL_ADDRESS kva_layout.km_high + +#define KASAN_MIN_ADDRESS (kva_layout.kasan_shadow_low) +#define KASAN_MAX_ADDRESS (kva_layout.kasan_shadow_high) + +#define KMSAN_SHAD_MIN_ADDRESS (kva_layout.kmsan_shadow_low) +#define KMSAN_SHAD_MAX_ADDRESS (kva_layout.kmsan_shadow_high) + +#define KMSAN_ORIG_MIN_ADDRESS (kva_layout.kmsan_origin_low) +#define KMSAN_ORIG_MAX_ADDRESS (kva_layout.kmsan_origin_high) + +/* + * Formally kernel mapping starts at KERNBASE, but kernel linker + * script leaves first PDE reserved. For legacy BIOS boot, kernel is + * loaded at KERNLOAD = 2M, and initial kernel page table maps + * physical memory from zero to KERNend starting at KERNBASE. + * + * KERNSTART is where the first actual kernel page is mapped, after + * the compatibility mapping. + */ +#define KERNBASE KV4ADDR(KPML4I, KPDPI, 0, 0) +#define KERNSTART (KERNBASE + NBPDR) + +#define UPT_MAX_ADDRESS KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I) +#define UPT_MIN_ADDRESS KV4ADDR(PML4PML4I, 0, 0, 0) + +#define VM_MAXUSER_ADDRESS_LA57 UVADDR(NUPML5E, 0, 0, 0, 0) +#define VM_MAXUSER_ADDRESS_LA48 UVADDR(0, NUP4ML4E, 0, 0, 0) +#define VM_MAXUSER_ADDRESS VM_MAXUSER_ADDRESS_LA57 + +#define SHAREDPAGE_LA57 (VM_MAXUSER_ADDRESS_LA57 - PAGE_SIZE) +#define SHAREDPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - PAGE_SIZE) +#define USRSTACK_LA57 SHAREDPAGE_LA57 +#define USRSTACK_LA48 SHAREDPAGE_LA48 +#define USRSTACK USRSTACK_LA48 +#define PS_STRINGS_LA57 (USRSTACK_LA57 - sizeof(struct ps_strings)) +#define PS_STRINGS_LA48 (USRSTACK_LA48 - sizeof(struct ps_strings)) + +#define VM_MAX_ADDRESS UPT_MAX_ADDRESS +#define VM_MIN_ADDRESS (0) + +/* + * XXX Allowing dmaplimit == 0 is a temporary workaround for vt(4) efifb's + * early use of PHYS_TO_DMAP before the mapping is actually setup. This works + * because the result is not actually accessed until later, but the early + * vt fb startup needs to be reworked. + */ +#define PHYS_IN_DMAP(pa) (dmaplimit == 0 || (pa) < dmaplimit) +#define VIRT_IN_DMAP(va) \ + ((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_low + dmaplimit) + +#define PMAP_HAS_DMAP 1 +#define PHYS_TO_DMAP(x) __extension__ ({ \ + KASSERT(PHYS_IN_DMAP(x), \ + ("physical address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) + kva_layout.dmap_low; }) + +#define DMAP_TO_PHYS(x) __extension__ ({ \ + KASSERT(VIRT_IN_DMAP(x), \ + ("virtual address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) - kva_layout.dmap_low; }) + +/* + * amd64 maps the page array into KVA so that it can be more easily + * allocated on the correct memory domains. + */ +#define PMAP_HAS_PAGE_ARRAY 1 + +/* + * How many physical pages per kmem arena virtual page. + */ +#ifndef VM_KMEM_SIZE_SCALE +#define VM_KMEM_SIZE_SCALE (1) +#endif + +/* + * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the + * kernel map. + */ +#ifndef VM_KMEM_SIZE_MAX +#define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ + kva_layout.km_low + 1) * 3 / 5) +#endif + +/* initial pagein size of beginning of executable file */ +#ifndef VM_INITIAL_PAGEIN +#define VM_INITIAL_PAGEIN 16 +#endif + +#define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */ + +/* + * The pmap can create non-transparent large page mappings. + */ +#define PMAP_HAS_LARGEPAGES 1 + +/* + * Need a page dump array for minidump. + */ +#define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 1 + +#endif /* _MACHINE_VMPARAM_H_ */ + +#endif /* __i386__ */ diff --git a/sys/amd64/include/xen/arch-intr.h b/sys/amd64/include/xen/arch-intr.h new file mode 100644 index 000000000000..6ea4b79be03f --- /dev/null +++ b/sys/amd64/include/xen/arch-intr.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +#include <x86/include/xen/arch-intr.h> diff --git a/sys/amd64/include/xen/hypercall.h b/sys/amd64/include/xen/hypercall.h new file mode 100644 index 000000000000..48d60e345983 --- /dev/null +++ b/sys/amd64/include/xen/hypercall.h @@ -0,0 +1,390 @@ +/****************************************************************************** + * SPDX-License-Identifier: MIT OR GPL-2.0-only + * + * hypercall.h + * + * FreeBSD-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu <benjamin.liu@intel.com> + * Jun Nakajima <jun.nakajima@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __MACHINE_XEN_HYPERCALL_H__ +#define __MACHINE_XEN_HYPERCALL_H__ + +#include <sys/systm.h> + +#ifndef __XEN_HYPERVISOR_H__ +# error "please don't include this file directly" +#endif + +extern char *hypercall_page; + +#define __STR(x) #x +#define STR(x) __STR(x) +#define __must_check + +#define HYPERCALL_STR(name) \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)" + +#define _hypercall0(type, name) \ +({ \ + type __res; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res) \ + : \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + type __res; \ + long __ign1; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + type __res; \ + long __ign1, __ign2; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + register long __arg4 __asm__("r10") = (long)(a4); \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3), "+r" (__arg4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + register long __arg4 __asm__("r10") = (long)(a4); \ + register long __arg5 __asm__("r8") = (long)(a5); \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +static inline int +privcmd_hypercall(long op, long a1, long a2, long a3, long a4, long a5) +{ + int __res; + long __ign1, __ign2, __ign3; + register long __arg4 __asm__("r10") = (long)(a4); + register long __arg5 __asm__("r8") = (long)(a5); + long __call = (long)&hypercall_page + (op * 32); + + if (op >= PAGE_SIZE / 32) + return -EINVAL; + + __asm__ volatile ( + "call *%[call]" + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), + "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) + : "1" ((long)(a1)), "2" ((long)(a2)), + "3" ((long)(a3)), [call] "a" (__call) + : "memory" ); + + return (__res); +} + +static inline int __must_check +HYPERVISOR_set_trap_table( + const trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int __must_check +HYPERVISOR_mmu_update( + mmu_update_t *req, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_mmuext_op( + struct mmuext_op *op, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_set_gdt( + unsigned long *frame_list, unsigned int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int __must_check +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int __must_check +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int __must_check +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long __must_check +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + return _hypercall1(long, set_timer_op, timeout); +} + +static inline int __must_check +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} + +static inline int __must_check +HYPERVISOR_set_debugreg( + unsigned int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long __must_check +HYPERVISOR_get_debugreg( + unsigned int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int __must_check +HYPERVISOR_update_descriptor( + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int __must_check +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_multicall( + multicall_entry_t *call_list, unsigned int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int __must_check +HYPERVISOR_update_va_mapping( + unsigned long va, uint64_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val, flags); +} + +static inline int __must_check +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + return _hypercall2(int, event_channel_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_console_io( + int cmd, unsigned int count, const char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int __must_check +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + return _hypercall2(int, physdev_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int __must_check +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val, flags, domid); +} + +static inline int __must_check +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int __must_check +HYPERVISOR_vcpu_op( + int cmd, unsigned int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int __must_check +HYPERVISOR_set_segment_base( + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int __must_check +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); +} + +static inline unsigned long __must_check +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_callback_op( + int cmd, const void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +static inline int __must_check +HYPERVISOR_dm_op( + domid_t domid, unsigned int nr_bufs, const void *bufs) +{ + return _hypercall3(int, dm_op, domid, nr_bufs, bufs); +} + +#undef __must_check + +#endif /* __MACHINE_XEN_HYPERCALL_H__ */ diff --git a/sys/amd64/include/xen/xen-os.h b/sys/amd64/include/xen/xen-os.h new file mode 100644 index 000000000000..ac9f66b2b300 --- /dev/null +++ b/sys/amd64/include/xen/xen-os.h @@ -0,0 +1,5 @@ +/*- + * This file is in the public domain. + */ + +#include <x86/xen/xen-os.h> |