diff options
Diffstat (limited to 'sys/arm64')
66 files changed, 3174 insertions, 702 deletions
diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c index ebaaccea1d99..c28b1c62d78c 100644 --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -171,12 +171,13 @@ apple_pinctrl_attach(device_t dev) OF_xref_from_node(ofw_bus_get_node(dev))); } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } + bus_attach_children(dev); return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S index 26dd0b4cf14f..e41c4b5f6734 100644 --- a/sys/arm64/arm64/copyinout.S +++ b/sys/arm64/arm64/copyinout.S @@ -37,7 +37,14 @@ #include "assym.inc" .macro check_user_access user_arg, size_arg, bad_access_func - adds x6, x\user_arg, x\size_arg + /* + * TBI is enabled from 15.0. Clear the top byte of the userspace + * address before checking whether it's within the given limit. + * The later load/store instructions will fault if TBI is disabled + * for the current process. + */ + and x6, x\user_arg, #(~TBI_ADDR_MASK) + adds x6, x6, x\size_arg b.cs \bad_access_func ldr x7, =VM_MAXUSER_ADDRESS cmp x6, x7 @@ -100,13 +107,20 @@ ENTRY(copyinstr) adr x6, copyio_fault /* Get the handler address */ SET_FAULT_HANDLER(x6, x7) /* Set the handler */ + /* + * As in check_user_access mask off the TBI bits for the cmp + * instruction. The load will fail trap if TBI is disabled, but we + * need to check the address didn't wrap. + */ + and x6, x0, #(~TBI_ADDR_MASK) ldr x7, =VM_MAXUSER_ADDRESS -1: cmp x0, x7 +1: cmp x6, x7 b.cs copyio_fault ldtrb w4, [x0] /* Load from uaddr */ add x0, x0, #1 /* Next char */ strb w4, [x1], #1 /* Store in kaddr */ add x5, x5, #1 /* count++ */ + add x6, x6, #1 /* Increment masked address */ cbz w4, 2f /* Break when NUL-terminated */ sub x2, x2, #1 /* len-- */ cbnz x2, 1b diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c index 989924bc0567..b876703a2a15 100644 --- a/sys/arm64/arm64/cpu_errata.c +++ b/sys/arm64/arm64/cpu_errata.c @@ -52,56 +52,11 @@ struct cpu_quirks { u_int flags; }; -static enum { - SSBD_FORCE_ON, - SSBD_FORCE_OFF, - SSBD_KERNEL, -} ssbd_method = SSBD_KERNEL; - -static cpu_quirk_install install_psci_bp_hardening; -static cpu_quirk_install install_ssbd_workaround; static cpu_quirk_install install_thunderx_bcast_tlbi_workaround; static struct cpu_quirks cpu_quirks[] = { { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = - CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = 0, - .midr_value = 0, - .quirk_install = install_ssbd_workaround, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0), .quirk_install = install_thunderx_bcast_tlbi_workaround, @@ -114,57 +69,6 @@ static struct cpu_quirks cpu_quirks[] = { }, }; -static void -install_psci_bp_hardening(void) -{ - /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ - if (!psci_present) - return; - - if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS) - return; - - PCPU_SET(bp_harden, smccc_arch_workaround_1); -} - -static void -install_ssbd_workaround(void) -{ - char *env; - - if (PCPU_GET(cpuid) == 0) { - env = kern_getenv("kern.cfg.ssbd"); - if (env != NULL) { - if (strcmp(env, "force-on") == 0) { - ssbd_method = SSBD_FORCE_ON; - } else if (strcmp(env, "force-off") == 0) { - ssbd_method = SSBD_FORCE_OFF; - } - } - } - - /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ - if (!psci_present) - return; - - /* Enable the workaround on this CPU if it's enabled in the firmware */ - if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS) - return; - - switch(ssbd_method) { - case SSBD_FORCE_ON: - smccc_arch_workaround_2(1); - break; - case SSBD_FORCE_OFF: - smccc_arch_workaround_2(0); - break; - case SSBD_KERNEL: - default: - PCPU_SET(ssbd, smccc_arch_workaround_2); - break; - } -} - /* * Workaround Cavium erratum 27456. * diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c index cc262394913d..94114d47f846 100644 --- a/sys/arm64/arm64/cpu_feat.c +++ b/sys/arm64/arm64/cpu_feat.c @@ -32,16 +32,21 @@ #include <machine/cpu.h> #include <machine/cpu_feat.h> +SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata"); + /* TODO: Make this a list if we ever grow a callback other than smccc_errata */ static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL; void enable_cpu_feat(uint32_t stage) { + char tunable[32]; struct cpu_feat **featp, *feat; uint32_t midr; u_int errata_count, *errata_list; cpu_feat_errata errata_status; + cpu_feat_en check_status; + bool val; MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0); @@ -49,6 +54,21 @@ enable_cpu_feat(uint32_t stage) SET_FOREACH(featp, cpu_feat_set) { feat = *featp; + /* Read any tunable the user may have set */ + if (stage == CPU_FEAT_EARLY_BOOT && PCPU_GET(cpuid) == 0) { + snprintf(tunable, sizeof(tunable), "hw.feat.%s", + feat->feat_name); + if (TUNABLE_BOOL_FETCH(tunable, &val)) { + if (val) { + feat->feat_flags |= + CPU_FEAT_USER_ENABLED; + } else { + feat->feat_flags |= + CPU_FEAT_USER_DISABLED; + } + } + } + /* Run the enablement code at the correct stage of boot */ if ((feat->feat_flags & CPU_FEAT_STAGE_MASK) != stage) continue; @@ -58,8 +78,26 @@ enable_cpu_feat(uint32_t stage) PCPU_GET(cpuid) != 0) continue; - if (feat->feat_check != NULL && !feat->feat_check(feat, midr)) - continue; + if (feat->feat_check != NULL) { + check_status = feat->feat_check(feat, midr); + } else { + check_status = FEAT_DEFAULT_ENABLE; + } + /* Ignore features that are not present */ + if (check_status == FEAT_ALWAYS_DISABLE) + goto next; + + /* The user disabled the feature */ + if ((feat->feat_flags & CPU_FEAT_USER_DISABLED) != 0) + goto next; + + /* + * The feature was disabled by default and the user + * didn't enable it then skip. + */ + if (check_status == FEAT_DEFAULT_DISABLE && + (feat->feat_flags & CPU_FEAT_USER_ENABLED) == 0) + goto next; /* * Check if the feature has any errata that may need a @@ -97,8 +135,13 @@ enable_cpu_feat(uint32_t stage) /* Shouldn't be possible */ MPASS(errata_status != ERRATA_UNKNOWN); - feat->feat_enable(feat, errata_status, errata_list, - errata_count); + if (feat->feat_enable(feat, errata_status, errata_list, + errata_count)) + feat->feat_enabled = true; + +next: + if (!feat->feat_enabled && feat->feat_disabled != NULL) + feat->feat_disabled(feat); } } diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c index ab1002560b20..14ae2acc2ce6 100644 --- a/sys/arm64/arm64/db_disasm.c +++ b/sys/arm64/arm64/db_disasm.c @@ -31,6 +31,7 @@ #include <ddb/db_access.h> #include <ddb/db_sym.h> +#include <machine/armreg.h> #include <machine/disassem.h> static u_int db_disasm_read_word(vm_offset_t); diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c index 0f46e44f5d6a..bde0d4f784dc 100644 --- a/sys/arm64/arm64/efirt_machdep.c +++ b/sys/arm64/arm64/efirt_machdep.c @@ -106,7 +106,8 @@ efi_1t1_l3(vm_offset_t va) if (*l0 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); - *l0 = PHYS_TO_PTE(mphys) | L0_TABLE; + *l0 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE | + TATTR_AP_TABLE_NO_EL0 | L0_TABLE; } else { mphys = PTE_TO_PHYS(*l0); } @@ -117,7 +118,8 @@ efi_1t1_l3(vm_offset_t va) if (*l1 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); - *l1 = PHYS_TO_PTE(mphys) | L1_TABLE; + *l1 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE | + TATTR_AP_TABLE_NO_EL0 | L1_TABLE; } else { mphys = PTE_TO_PHYS(*l1); } @@ -128,7 +130,8 @@ efi_1t1_l3(vm_offset_t va) if (*l2 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); - *l2 = PHYS_TO_PTE(mphys) | L2_TABLE; + *l2 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE | + TATTR_AP_TABLE_NO_EL0 | L2_TABLE; } else { mphys = PTE_TO_PHYS(*l2); } @@ -218,8 +221,9 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) p->md_phys, mode, p->md_pages); } - l3_attr = ATTR_AF | pmap_sh_attr | ATTR_S1_IDX(mode) | - ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_nG | L3_PAGE; + l3_attr = ATTR_S1_UXN | ATTR_AF | pmap_sh_attr | + ATTR_S1_IDX(mode) | ATTR_S1_AP(ATTR_S1_AP_RW) | + ATTR_S1_nG | L3_PAGE; if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP) l3_attr |= ATTR_S1_XN; @@ -241,6 +245,7 @@ fail: int efi_arch_enter(void) { + uint64_t tcr; CRITICAL_ASSERT(curthread); curthread->td_md.md_efirt_dis_pf = vm_fault_disable_pagefaults(); @@ -249,7 +254,17 @@ efi_arch_enter(void) * Temporarily switch to EFI's page table. However, we leave curpmap * unchanged in order to prevent its ASID from being reclaimed before * we switch back to its page table in efi_arch_leave(). + * + * UEFI sdoesn't care about TBI, so enable it. It's more likely + * userspace will have TBI on as it's only disabled for backwards + * compatibility. */ + tcr = READ_SPECIALREG(tcr_el1); + if ((tcr & MD_TCR_FIELDS) != TCR_TBI0) { + tcr &= ~MD_TCR_FIELDS; + tcr |= TCR_TBI0; + WRITE_SPECIALREG(tcr_el1, tcr); + } set_ttbr0(efi_ttbr0); if (PCPU_GET(bcast_tlbi_workaround) != 0) invalidate_local_icache(); @@ -260,6 +275,7 @@ efi_arch_enter(void) void efi_arch_leave(void) { + uint64_t proc_tcr, tcr; /* * Restore the pcpu pointer. Some UEFI implementations trash it and @@ -271,6 +287,13 @@ efi_arch_leave(void) __asm __volatile( "mrs x18, tpidr_el1 \n" ); + proc_tcr = curthread->td_proc->p_md.md_tcr; + tcr = READ_SPECIALREG(tcr_el1); + if ((tcr & MD_TCR_FIELDS) != proc_tcr) { + tcr &= ~MD_TCR_FIELDS; + tcr |= proc_tcr; + WRITE_SPECIALREG(tcr_el1, tcr); + } set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap))); if (PCPU_GET(bcast_tlbi_workaround) != 0) invalidate_local_icache(); diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 7cd5327b9f1b..4cb8ee5f57ef 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -195,7 +195,7 @@ freebsd32_fetch_syscall_args(struct thread *td) register_t *ap; struct syscall_args *sa; int error, i, nap, narg; - unsigned int args[4]; + unsigned int args[6]; nap = 4; p = td->td_proc; @@ -210,7 +210,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { - sa->code = ap[1]; + sa->code = ap[_QUAD_LOWWORD]; nap -= 2; ap += 2; } @@ -225,7 +225,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->args[i] = ap[i]; if (narg > nap) { if (narg - nap > nitems(args)) - panic("Too many system call arguiments"); + panic("Too many system call arguments"); error = copyin((void *)td->td_frame->tf_x[13], args, (narg - nap) * sizeof(int)); if (error != 0) diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c index 970dba0ca7d9..207b37180a26 100644 --- a/sys/arm64/arm64/elf_machdep.c +++ b/sys/arm64/arm64/elf_machdep.c @@ -65,7 +65,13 @@ u_long __read_frequently linux_elf_hwcap2; u_long __read_frequently linux_elf_hwcap3; u_long __read_frequently linux_elf_hwcap4; -struct arm64_addr_mask elf64_addr_mask; +struct arm64_addr_mask elf64_addr_mask = { + .code = TBI_ADDR_MASK, + .data = TBI_ADDR_MASK, +}; +#ifdef COMPAT_FREEBSD14 +struct arm64_addr_mask elf64_addr_mask_14; +#endif static void arm64_exec_protect(struct image_params *, int); @@ -115,7 +121,7 @@ static struct sysentvec elf64_freebsd_sysvec = { }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); -static Elf64_Brandinfo freebsd_brand_info = { +static const Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", @@ -125,8 +131,7 @@ static Elf64_Brandinfo freebsd_brand_info = { .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; - -SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static bool @@ -136,7 +141,14 @@ get_arm64_addr_mask(struct regset *rs, struct thread *td, void *buf, if (buf != NULL) { KASSERT(*sizep == sizeof(elf64_addr_mask), ("%s: invalid size", __func__)); - memcpy(buf, &elf64_addr_mask, sizeof(elf64_addr_mask)); +#ifdef COMPAT_FREEBSD14 + /* running an old binary use the old address mask */ + if (td->td_proc->p_osrel < TBI_VERSION) + memcpy(buf, &elf64_addr_mask_14, + sizeof(elf64_addr_mask_14)); + else +#endif + memcpy(buf, &elf64_addr_mask, sizeof(elf64_addr_mask)); } *sizep = sizeof(elf64_addr_mask); @@ -323,7 +335,7 @@ elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) return (0); } -static Elf_Note gnu_property_note = { +static const Elf_Note gnu_property_note = { .n_namesz = sizeof(GNU_ABI_VENDOR), .n_descsz = 16, .n_type = NT_GNU_PROPERTY_TYPE_0, diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S index 13095def8b00..5a4181348a54 100644 --- a/sys/arm64/arm64/exception.S +++ b/sys/arm64/arm64/exception.S @@ -42,10 +42,9 @@ */ .macro save_registers_head el .if \el == 1 - mov x18, sp - stp x0, x1, [sp, #(TF_X - TF_SIZE - 128)]! + stp x0, x1, [sp, #-(TF_SIZE - TF_X + 128)]! .else - stp x0, x1, [sp, #(TF_X - TF_SIZE)]! + stp x0, x1, [sp, #-(TF_SIZE - TF_X)]! .endif stp x2, x3, [sp, #(2 * 8)] stp x4, x5, [sp, #(4 * 8)] @@ -61,7 +60,9 @@ stp x24, x25, [sp, #(24 * 8)] stp x26, x27, [sp, #(26 * 8)] stp x28, x29, [sp, #(28 * 8)] -.if \el == 0 +.if \el == 1 + add x18, sp, #(TF_SIZE - TF_X + 128) +.else mrs x18, sp_el0 .endif mrs x10, elr_el1 diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c index 751329affd91..7c50dc93fdb4 100644 --- a/sys/arm64/arm64/exec_machdep.c +++ b/sys/arm64/arm64/exec_machdep.c @@ -51,6 +51,7 @@ #include <vm/vm_map.h> #include <machine/armreg.h> +#include <machine/elf.h> #include <machine/kdb.h> #include <machine/md_var.h> #include <machine/pcb.h> @@ -411,6 +412,7 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; struct pcb *pcb = td->td_pcb; + uint64_t new_tcr, tcr; memset(tf, 0, sizeof(struct trapframe)); @@ -433,6 +435,35 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) */ bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs)); + /* If the process is new enough enable TBI */ + if (td->td_proc->p_osrel >= TBI_VERSION) + new_tcr = TCR_TBI0; + else + new_tcr = 0; + td->td_proc->p_md.md_tcr = new_tcr; + + /* TODO: should create a pmap function for this... */ + tcr = READ_SPECIALREG(tcr_el1); + if ((tcr & MD_TCR_FIELDS) != new_tcr) { + uint64_t asid; + + tcr &= ~MD_TCR_FIELDS; + tcr |= new_tcr; + WRITE_SPECIALREG(tcr_el1, tcr); + isb(); + + /* + * TCR_EL1.TBI0 is permitted to be cached in the TLB, so + * we need to perform a TLB invalidation. + */ + asid = READ_SPECIALREG(ttbr0_el1) & TTBR_ASID_MASK; + __asm __volatile( + "tlbi aside1is, %0 \n" + "dsb ish \n" + "isb \n" + : : "r" (asid)); + } + /* Generate new pointer authentication keys */ ptrauth_exec(td); } diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c index e3977798b046..22696796e69d 100644 --- a/sys/arm64/arm64/genassym.c +++ b/sys/arm64/arm64/genassym.c @@ -64,6 +64,7 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(P_PID, offsetof(struct proc, p_pid)); +ASSYM(P_MD_TCR, offsetof(struct proc, p_md.md_tcr)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c index bcacea43ad2f..2d07420bcdb0 100644 --- a/sys/arm64/arm64/identcpu.c +++ b/sys/arm64/arm64/identcpu.c @@ -232,6 +232,10 @@ static const struct cpu_parts cpu_parts_arm[] = { { CPU_PART_CORTEX_X2, "Cortex-X2" }, { CPU_PART_CORTEX_X3, "Cortex-X3" }, { CPU_PART_CORTEX_X4, "Cortex-X4" }, + { CPU_PART_C1_NANO, "C1-Nano" }, + { CPU_PART_C1_PRO, "C1-Pro" }, + { CPU_PART_C1_PREMIUM, "C1-Premium" }, + { CPU_PART_C1_ULTRA, "C1-Ultra" }, { CPU_PART_NEOVERSE_E1, "Neoverse-E1" }, { CPU_PART_NEOVERSE_N1, "Neoverse-N1" }, { CPU_PART_NEOVERSE_N2, "Neoverse-N2" }, @@ -2272,37 +2276,25 @@ static const struct mrs_user_reg user_regs[] = { static bool user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr) { - /* Skip non-Neoverse-N1 */ - if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM, - CPU_PART_NEOVERSE_N1, 0, 0)) - return (false); - - switch (CPU_VAR(midr)) { - default: - break; - case 4: - /* Fixed in r4p1 */ - if (CPU_REV(midr) > 0) - break; - /* FALLTHROUGH */ - case 3: - /* If DIC is enabled (coherent icache) then we are affected */ - return (CTR_DIC_VAL(ctr) != 0); - } - - return (false); + /* + * Neoverse-N1 erratum 1542419 + * Present in r3p0 - r4p0 + * Fixed in r4p1 + */ + return (midr_check_var_part_range(midr, CPU_IMPL_ARM, + CPU_PART_NEOVERSE_N1, 3, 0, 4, 0) && CTR_DIC_VAL(ctr) != 0); } -static bool -user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused) +static cpu_feat_en +user_ctr_check(const struct cpu_feat *feat __unused, u_int midr) { if (emulate_ctr) - return (true); + return (FEAT_DEFAULT_ENABLE); if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0))) - return (true); + return (FEAT_DEFAULT_ENABLE); - return (false); + return (FEAT_ALWAYS_DISABLE); } static bool @@ -2320,7 +2312,7 @@ user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr, return (false); } -static void +static bool user_ctr_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count) { @@ -2356,16 +2348,13 @@ user_ctr_enable(const struct cpu_feat *feat __unused, WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT); isb(); + + return (true); } -static struct cpu_feat user_ctr = { - .feat_name = "Trap CTR_EL0", - .feat_check = user_ctr_check, - .feat_has_errata = user_ctr_has_errata, - .feat_enable = user_ctr_enable, - .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, user_ctr); +CPU_FEAT(trap_ctr, "Trap CTR_EL0", + user_ctr_check, user_ctr_has_errata, user_ctr_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); static bool user_ctr_handler(uint64_t esr, struct trapframe *frame) diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c new file mode 100644 index 000000000000..8b9719c05b67 --- /dev/null +++ b/sys/arm64/arm64/kexec_support.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> + +#include <machine/armreg.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + * disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +extern pt_entry_t pagetable_l0_ttbr0_bootstrap[]; +extern unsigned long initstack_end[]; +void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *); + +#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \ + SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS) +#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base) +static inline vm_page_t +phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p) +{ + return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p)); +} + +/* First 2 args are filler for switch_stack() */ +static void __aligned(16) __dead2 +kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused, + struct kexec_image *image) +{ + void (*e)(void) = (void *)image->entry; + vm_offset_t vm_page_base = (vm_offset_t)vm_page_array; + vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array); + struct kexec_segment_stage *phys_segs = + (void *)pmap_kextract((vm_offset_t)&image->segments); + vm_paddr_t from_pa, to_pa; + vm_size_t size; + vm_page_t first, m, mp; + struct pctrie_iter pct_i; + + /* + * Create a linked list of all pages in the object before we disable the + * MMU. Once the MMU is disabled we can't use the vm_radix iterators, + * as they rely on virtual address pointers. + */ + first = NULL; + vm_radix_iter_init(&pct_i, &image->map_obj->rtree); + VM_RADIX_FORALL(m, &pct_i) { + if (first == NULL) + first = m; + else + SLIST_INSERT_AFTER(mp, m, plinks.s.ss); + mp = m; + } + + /* + * We're running out of the identity map now, disable the MMU before we + * continue. It's possible page tables can be overwritten, which would + * be very bad if we were running with the MMU enabled. + */ + WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU); + isb(); + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (phys_segs[i].size == 0) + break; + to_pa = phys_segs[i].target; + /* Copy the segment here... */ + for (vm_page_t p = phys_segs[i].first_page; + p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size; + p = SLIST_NEXT(p, plinks.s.ss)) { + p = phys_vm_page(p, vm_page_base, vm_page_phys); + from_pa = p->phys_addr; + if (p->phys_addr == to_pa) { + to_pa += PAGE_SIZE; + continue; + } + for (size = PAGE_SIZE / sizeof(register_t); + size > 0; --size) { + *(register_t *)to_pa = *(register_t *)from_pa; + to_pa += sizeof(register_t); + from_pa += sizeof(register_t); + } + } + } + invalidate_icache(); + e(); + while (1) + ; +} + +void +kexec_reboot_md(struct kexec_image *image) +{ + uintptr_t ptr; + register_t reg; + + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (image->segments[i].size > 0) + cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target), + image->segments[i].size); + } + ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + serror_disable(); + + reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap); + set_ttbr0(reg); + cpu_tlb_flushID(); + + typeof(kexec_reboot_bottom) *p = (void *)ptr; + switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end), + p, image); + while (1) + ; +} + +int +kexec_load_md(struct kexec_image *image) +{ + vm_paddr_t tmp; + pt_entry_t *pte; + + /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */ + + /* + * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so + * move to there. + */ + pte = pagetable_l0_ttbr0_bootstrap; + pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */ + + /* + * Populate the identity map with symbols we know we'll need before we + * turn off the MMU. + */ + tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + tmp = pmap_kextract((vm_offset_t)initstack_end); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + /* We'll need vm_page_array for doing offset calculations. */ + tmp = pmap_kextract((vm_offset_t)&vm_page_array); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + + return (0); +} diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index bb323dbafd85..c22d5fe76468 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -39,6 +39,23 @@ #define VIRT_BITS 48 +/* + * Loads a 64-bit value into reg using 1 to 4 mov/movk instructions. + * This can be used early on when we don't know the CPUs endianness. + */ +.macro mov_q reg, val + mov \reg, :abs_g0_nc:\val +.if (\val >> 16) & 0xffff != 0 + movk \reg, :abs_g1_nc:\val +.endif +.if (\val >> 32) & 0xffff != 0 + movk \reg, :abs_g2_nc:\val +.endif +.if (\val >> 48) & 0xffff != 0 + movk \reg, :abs_g3:\val +.endif +.endm + #if PAGE_SIZE == PAGE_SIZE_16K /* * The number of level 3 tables to create. 32 will allow for 1G of address @@ -308,6 +325,19 @@ mp_virtdone: b init_secondary LEND(mpentry_common) + +ENTRY(mp_cpu_spinloop) +0: + wfe + ldr x0, mp_cpu_spin_table_release_addr + cbz x0, 0b + blr x0 + .globl mp_cpu_spin_table_release_addr +mp_cpu_spin_table_release_addr: + .quad 0 + .globl mp_cpu_spinloop_end +mp_cpu_spinloop_end: +END(mp_cpu_spinloop) #endif /* @@ -319,22 +349,28 @@ LEND(mpentry_common) * - Configure EL2 to support running the kernel at EL1 and exit to that */ LENTRY(enter_kernel_el) -#define INIT_SCTLR_EL1 (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_EIS | \ - SCTLR_TSCXT | SCTLR_EOS) mrs x23, CurrentEL and x23, x23, #(CURRENTEL_EL_MASK) cmp x23, #(CURRENTEL_EL_EL2) b.eq 1f - ldr x2, =INIT_SCTLR_EL1 + /* + * Ensure there are no memory operations here. If the boot loader + * enters the kernel in big-endian mode then loading sctlr will + * be incorrect. As instructions are the same in both endians it is + * safe to use mov instructions. + */ + mov_q x2, SCTLR_MMU_OFF msr sctlr_el1, x2 - /* SCTLR_EOS is set so eret is a context synchronizing event so we + /* + * SCTLR_EOS is set to make eret a context synchronizing event. We * need an isb here to ensure it's observed by later instructions, * but don't need it in the eret below. */ isb - /* Ensure SPSR_EL1 and pstate are in sync. The only wat to set the + /* + * Ensure SPSR_EL1 and pstate are in sync. The only way to set the * latter is to set the former and return from an exception with eret. */ mov x2, #(PSR_DAIF | PSR_M_EL1h) @@ -348,11 +384,19 @@ LENTRY(enter_kernel_el) * Set just the reserved bits in sctlr_el2. This will disable the * MMU which may have broken the kernel if we enter the kernel in * EL2, e.g. when using VHE. + * + * As with sctlr_el1 above use mov instructions to ensure there are + * no memory operations. */ - ldr x2, =(SCTLR_EL2_RES1 | SCTLR_EL2_EIS | SCTLR_EL2_EOS) + mov_q x2, (SCTLR_EL2_RES1 | SCTLR_EL2_EIS | SCTLR_EL2_EOS) msr sctlr_el2, x2 isb + /* + * The hardware is now in little-endian mode so memory operations + * are safe. + */ + /* Configure the Hypervisor */ ldr x2, =(HCR_RW | HCR_APK | HCR_API) msr hcr_el2, x2 @@ -370,11 +414,11 @@ LENTRY(enter_kernel_el) msr vmpidr_el2, x2 /* Set the initial sctlr_el1 */ - ldr x2, =INIT_SCTLR_EL1 + ldr x2, =SCTLR_MMU_OFF msr sctlr_el1, x2 /* Check for VHE */ - CHECK_CPU_FEAT(x2, ID_AA64MMFR1, VH, .Lno_vhe) + CHECK_CPU_FEAT(x2, ID_AA64MMFR1, VH, IMPL, .Lno_vhe) /* * The kernel will be running in EL2, route exceptions here rather @@ -387,7 +431,7 @@ LENTRY(enter_kernel_el) msr SCTLR_EL12_REG, x2 mov x2, xzr /* CPTR_EL2 is managed by vfp.c */ - ldr x3, =(CNTHCTL_E2H_EL1PCTEN | CNTHCTL_E2H_EL1PTEN) + ldr x3, =(CNTHCTL_E2H_EL1PCTEN_NOTRAP | CNTHCTL_E2H_EL1PTEN_NOTRAP) ldr x5, =(PSR_DAIF | PSR_M_EL2h) b .Ldone_vhe @@ -398,9 +442,13 @@ LENTRY(enter_kernel_el) msr vbar_el2, x2 ldr x2, =(CPTR_RES1) - ldr x3, =(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) + ldr x3, =(CNTHCTL_EL1PCTEN_NOTRAP | CNTHCTL_EL1PCEN_NOTRAP) ldr x5, =(PSR_DAIF | PSR_M_EL1h) + /* Enable SPE at EL1 via Monitor Debug Configuration Register */ + mov x6, MDCR_EL2_E2PB_EL1_0_NO_TRAP + msr mdcr_el2, x6 + .Ldone_vhe: msr cptr_el2, x2 @@ -413,10 +461,9 @@ LENTRY(enter_kernel_el) * Configure the Extended Hypervisor register. This is only valid if * FEAT_HCX is enabled. */ - CHECK_CPU_FEAT(x2, ID_AA64MMFR1, HCX, 2f) + CHECK_CPU_FEAT(x2, ID_AA64MMFR1, HCX, IMPL, 2f) /* Extended Hypervisor Configuration */ - mov x2, xzr - msr HCRX_EL2_REG, x2 + msr HCRX_EL2_REG, xzr isb 2: @@ -430,7 +477,7 @@ LENTRY(enter_kernel_el) msr vttbr_el2, xzr /* Check the CPU supports GIC, and configure the CPU interface */ - CHECK_CPU_FEAT(x2, ID_AA64PFR0, GIC, 3f) + CHECK_CPU_FEAT(x2, ID_AA64PFR0, GIC, CPUIF_EN, 3f) mrs x2, icc_sre_el2 orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */ @@ -443,9 +490,31 @@ LENTRY(enter_kernel_el) isb eret -#undef INIT_SCTLR_EL1 LEND(enter_kernel_el) +/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there. + * Does not return. + * - x0 - target address to jump to after stopping the MMU. + * - x1 - kernel load address + */ +ENTRY(stop_mmu) + mov x16, x0 /* Save target. */ + ldr x2, =(1f - KERNBASE) + add x17, x1, x2 + ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE) + add x1, x1, x3 + msr ttbr0_el1, x1 + isb + br x17 +1: + BTI_J + mrs x0, sctlr_el1 + bic x0, x0, SCTLR_M + bic x0, x0, SCTLR_C + msr sctlr_el1, x0 + isb + br x16 +END(stop_mmu) /* * Get the physical address the kernel was loaded at. */ @@ -1029,7 +1098,7 @@ LENTRY(start_mmu) * HW management of dirty state is set in C code as it may * need to be disabled because of CPU errata. */ - CHECK_CPU_FEAT(x3, ID_AA64MMFR1, HAFDBS, 1f) + CHECK_CPU_FEAT(x3, ID_AA64MMFR1, HAFDBS, AF, 1f) orr x2, x2, #(TCR_HA) 1: @@ -1038,11 +1107,7 @@ LENTRY(start_mmu) /* * Setup SCTLR. */ - ldr x2, sctlr_set - ldr x3, sctlr_clear - mrs x1, sctlr_el1 - bic x1, x1, x3 /* Clear the required bits */ - orr x1, x1, x2 /* Set the required bits */ + ldr x1, =SCTLR_MMU_ON msr sctlr_el1, x1 isb @@ -1067,24 +1132,21 @@ tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG | \ TCR_SH1_IS | TCR_ORGN1_WBWA | TCR_IRGN1_WBWA | \ TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA) -sctlr_set: - /* Bits to set */ - .quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \ - SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ - SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \ - SCTLR_M | SCTLR_CP15BEN | SCTLR_BT1 | SCTLR_BT0) -sctlr_clear: - /* Bits to clear */ - .quad (SCTLR_EE | SCTLR_E0E | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \ - SCTLR_ITD | SCTLR_A) LEND(start_mmu) +ENTRY(switch_stack) + mov sp, x0 + mov x16, x1 + br x16 +END(switch_stack) + ENTRY(abort) b abort END(abort) .bss .align PAGE_SHIFT + .globl initstack_end initstack: .space BOOT_STACK_SIZE initstack_end: @@ -1101,6 +1163,7 @@ initstack_end: * L0 for user */ .globl pagetable_l0_ttbr1 + .globl pagetable_l0_ttbr0_bootstrap pagetable: pagetable_l3_ttbr1: .space (PAGE_SIZE * L3_PAGE_COUNT) diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index 53856dd90cae..322bad273a08 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -173,16 +173,20 @@ SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); #endif -static bool +static cpu_feat_en pan_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t id_aa64mfr1; - id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE); + if (!get_kernel_reg(ID_AA64MMFR1_EL1, &id_aa64mfr1)) + return (FEAT_ALWAYS_DISABLE); + if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); } -static void +static bool pan_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status __unused, u_int *errata_list __unused, u_int errata_count __unused) @@ -200,15 +204,20 @@ pan_enable(const struct cpu_feat *feat __unused, ".arch_extension pan \n" "msr pan, #1 \n" ".arch_extension nopan \n"); + + return (true); } -static struct cpu_feat feat_pan = { - .feat_name = "FEAT_PAN", - .feat_check = pan_check, - .feat_enable = pan_enable, - .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, feat_pan); +static void +pan_disabled(const struct cpu_feat *feat __unused) +{ + if (PCPU_GET(cpuid) == 0) + update_special_reg(ID_AA64MMFR1_EL1, ID_AA64MMFR1_PAN_MASK, 0); +} + +CPU_FEAT(feat_pan, "Privileged access never", + pan_check, NULL, pan_enable, pan_disabled, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); bool has_hyp(void) @@ -857,7 +866,7 @@ initarm(struct arm64_bootparams *abp) cninit(); set_ttbr0(abp->kern_ttbr0); - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); if (!valid) panic("Invalid bus configuration: %s", diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c index 83bd74ea7317..1c5e8189e436 100644 --- a/sys/arm64/arm64/machdep_boot.c +++ b/sys/arm64/arm64/machdep_boot.c @@ -106,7 +106,8 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size) PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE); PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t)); - PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS)); + PRELOAD_PUSH_VALUE(uint64_t, + (size_t)((vm_offset_t)&end - VM_MIN_KERNEL_ADDRESS)); if (dtb_ptr != NULL) { /* Copy DTB to KVA space and insert it into module chain. */ diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index e4d011df3a06..0bdd2ecfd8a7 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -60,6 +60,7 @@ #include <machine/debug_monitor.h> #include <machine/intr.h> #include <machine/smp.h> +#include <machine/vmparam.h> #ifdef VFP #include <machine/vfp.h> #endif @@ -103,6 +104,7 @@ static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); +static void ipi_off(void *); #ifdef FDT static u_int fdt_cpuid; @@ -193,6 +195,7 @@ release_aps(void *dummy __unused) intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); + intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL); atomic_store_int(&aps_started, 0); atomic_store_rel_int(&aps_ready, 1); @@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused) CTR0(KTR_SMP, "IPI_STOP (restart)"); } +void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2; +extern uint32_t mp_cpu_spinloop[]; +extern uint32_t mp_cpu_spinloop_end[]; +extern uint64_t mp_cpu_spin_table_release_addr; +static void +ipi_off(void *dummy __unused) +{ + CTR0(KTR_SMP, "IPI_OFF"); + if (psci_present) + psci_cpu_off(); + else { + uint64_t release_addr; + vm_size_t size; + + size = (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + release_addr = PCPU_GET(release_addr) - size; + isb(); + invalidate_icache(); + /* Go catatonic, don't take any interrupts. */ + intr_disable(); + stop_mmu(release_addr, pmap_kextract(KERNBASE)); + + + } + CTR0(KTR_SMP, "IPI_OFF failed"); +} + struct cpu_group * cpu_topo(void) { @@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr) pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; bootpcpu = pcpup; + pcpup->pc_release_addr = release_addr; dpcpu[cpuid - 1] = (void *)(pcpup + 1); dpcpu_init(dpcpu[cpuid - 1], cpuid); @@ -752,6 +784,52 @@ cpu_mp_start(void) } } +void +cpu_mp_stop(void) +{ + + /* Short-circuit for single-CPU */ + if (CPU_COUNT(&all_cpus) == 1) + return; + + KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n")); + + /* + * If we use spin-table, assume U-boot method for now (single address + * shared by all CPUs). + */ + if (!psci_present) { + int cpu; + vm_paddr_t release_addr; + void *release_vaddr; + vm_size_t size; + + /* Find the shared release address. */ + CPU_FOREACH(cpu) { + release_addr = pcpu_find(cpu)->pc_release_addr; + if (release_addr != 0) + break; + } + /* No release address? No way of notifying other CPUs. */ + if (release_addr == 0) + return; + + size = (vm_offset_t)&mp_cpu_spinloop_end - + (vm_offset_t)&mp_cpu_spinloop; + + release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + + release_vaddr = pmap_mapdev(release_addr, size); + bcopy(mp_cpu_spinloop, release_vaddr, size); + cpu_dcache_wbinv_range(release_vaddr, size); + pmap_unmapdev(release_vaddr, size); + invalidate_icache(); + } + ipi_all_but_self(IPI_OFF); + DELAY(1000000); +} + /* Introduce rest of cores to the world */ void cpu_mp_announce(void) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 2152f7fcc1c6..dbf5c820d20b 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr; #define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \ ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW)) +static bool __read_mostly pmap_multiple_tlbi = false; + struct pmap_large_md_page { struct rwlock pv_lock; struct md_page pv_page; @@ -469,7 +471,7 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); -static bool pmap_activate_int(pmap_t pmap); +static bool pmap_activate_int(struct thread *td, pmap_t pmap); static void pmap_alloc_asid(pmap_t pmap); static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, int mode, bool skip_unmapped); @@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen) } } - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); bs_state.dmap_valid = true; @@ -1399,7 +1401,7 @@ pmap_bootstrap(void) /* And the l3 tables for the early devmap */ pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE)); - cpu_tlb_flushID(); + pmap_s1_invalidate_all_kernel(); #define alloc_pages(var, np) \ (var) = bs_state.freemempos; \ @@ -1656,14 +1658,17 @@ pmap_init_pv_table(void) } } -static bool +static cpu_feat_en pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t id_aa64mmfr1; id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >= - ID_AA64MMFR1_HAFDBS_AF_DBS); + if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >= + ID_AA64MMFR1_HAFDBS_AF_DBS) + return (FEAT_DEFAULT_ENABLE); + + return (FEAT_ALWAYS_DISABLE); } static bool @@ -1671,8 +1676,8 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr, u_int **errata_list, u_int *errata_count) { /* Disable on Cortex-A55 for erratum 1024718 - all revisions */ - if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM, - CPU_PART_CORTEX_A55, 0, 0)) { + if (CPU_IMPL(midr) == CPU_IMPL_ARM && + CPU_PART(midr) == CPU_PART_CORTEX_A55) { static u_int errata_id = 1024718; *errata_list = &errata_id; @@ -1681,21 +1686,19 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr, } /* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */ - if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK, - CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) { - if (CPU_REV(PCPU_GET(midr)) < 3) { - static u_int errata_id = 2051678; + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510, + 0, 0, 0, 2)) { + static u_int errata_id = 2051678; - *errata_list = &errata_id; - *errata_count = 1; - return (true); - } + *errata_list = &errata_id; + *errata_count = 1; + return (true); } return (false); } -static void +static bool pmap_dbm_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status, u_int *errata_list __unused, u_int errata_count) @@ -1704,7 +1707,7 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused, /* Skip if there is an erratum affecting DBM */ if (errata_status != ERRATA_NONE) - return; + return (false); tcr = READ_SPECIALREG(tcr_el1) | TCR_HD; WRITE_SPECIALREG(tcr_el1, tcr); @@ -1714,16 +1717,58 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused, __asm __volatile("tlbi vmalle1"); dsb(nsh); isb(); + + return (true); } -static struct cpu_feat feat_dbm = { - .feat_name = "FEAT_HAFDBS (DBM)", - .feat_check = pmap_dbm_check, - .feat_has_errata = pmap_dbm_has_errata, - .feat_enable = pmap_dbm_enable, - .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU, -}; -DATA_SET(cpu_feat_set, feat_dbm); +CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state", + pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); + +static cpu_feat_en +pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr) +{ + /* + * Cortex-A55 erratum 2441007 (Cat B rare) + * Present in all revisions + */ + if (CPU_IMPL(midr) == CPU_IMPL_ARM && + CPU_PART(midr) == CPU_PART_CORTEX_A55) + return (FEAT_DEFAULT_DISABLE); + + /* + * Cortex-A76 erratum 1286807 (Cat B rare) + * Present in r0p0 - r3p0 + * Fixed in r3p1 + */ + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76, + 0, 0, 3, 0)) + return (FEAT_DEFAULT_DISABLE); + + /* + * Cortex-A510 erratum 2441009 (Cat B rare) + * Present in r0p0 - r1p1 + * Fixed in r1p2 + */ + if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510, + 0, 0, 1, 1)) + return (FEAT_DEFAULT_DISABLE); + + return (FEAT_ALWAYS_DISABLE); +} + +static bool +pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status, u_int *errata_list __unused, + u_int errata_count __unused) +{ + pmap_multiple_tlbi = true; + return (true); +} + +CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata", + pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable, NULL, + CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU); /* * Initialize the pmap module. @@ -1878,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only) r = TLBI_VA(va); if (pmap == kernel_pmap) { pmap_s1_invalidate_kernel(r, final_only); + if (pmap_multiple_tlbi) { + dsb(ish); + pmap_s1_invalidate_kernel(r, final_only); + } } else { r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); pmap_s1_invalidate_user(r, final_only); + if (pmap_multiple_tlbi) { + dsb(ish); + pmap_s1_invalidate_user(r, final_only); + } } dsb(ish); isb(); @@ -1922,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, end = TLBI_VA(eva); for (r = start; r < end; r += TLBI_VA(stride)) pmap_s1_invalidate_kernel(r, final_only); + + if (pmap_multiple_tlbi) { + dsb(ish); + for (r = start; r < end; r += TLBI_VA(stride)) + pmap_s1_invalidate_kernel(r, final_only); + } } else { start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); start |= TLBI_VA(sva); end |= TLBI_VA(eva); for (r = start; r < end; r += TLBI_VA(stride)) pmap_s1_invalidate_user(r, final_only); + + if (pmap_multiple_tlbi) { + dsb(ish); + for (r = start; r < end; r += TLBI_VA(stride)) + pmap_s1_invalidate_user(r, final_only); + } } dsb(ish); isb(); @@ -1963,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pmap_s2_invalidate_range(pmap, sva, eva, final_only); } +void +pmap_s1_invalidate_all_kernel(void) +{ + dsb(ishst); + __asm __volatile("tlbi vmalle1is"); + dsb(ish); + if (pmap_multiple_tlbi) { + __asm __volatile("tlbi vmalle1is"); + dsb(ish); + } + isb(); +} + /* * Invalidates all cached intermediate- and final-level TLB entries for the * given virtual address space. @@ -1977,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap) dsb(ishst); if (pmap == kernel_pmap) { __asm __volatile("tlbi vmalle1is"); + if (pmap_multiple_tlbi) { + dsb(ish); + __asm __volatile("tlbi vmalle1is"); + } } else { r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + if (pmap_multiple_tlbi) { + dsb(ish); + __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + } } dsb(ish); isb(); @@ -2915,13 +3001,13 @@ retry: l1 = pmap_l1(pmap, va); if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) { l2 = pmap_l1_to_l2(l1, va); - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { /* Add a reference to the L2 page. */ l2pg = PTE_TO_VM_PAGE(pmap_load(l1)); l2pg->ref_count++; } else l2pg = NULL; - } else if (!ADDR_IS_KERNEL(va)) { + } else if (ADDR_IS_USER(va)) { /* Allocate a L2 page. */ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); @@ -4082,7 +4168,7 @@ pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva, PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE), ("pmap_remove_l3_range: range crosses an L3 page table boundary")); - l3pg = !ADDR_IS_KERNEL(sva) ? PTE_TO_VM_PAGE(l2e) : NULL; + l3pg = ADDR_IS_USER(sva) ? PTE_TO_VM_PAGE(l2e) : NULL; va = eva; for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) { old_l3 = pmap_load(l3); @@ -5310,7 +5396,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= ATTR_SW_WIRED; if (pmap->pm_stage == PM_STAGE1) { - if (!ADDR_IS_KERNEL(va)) + if (ADDR_IS_USER(va)) new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; else new_l3 |= ATTR_S1_UXN; @@ -5401,7 +5487,7 @@ retry: pde = pmap_pde(pmap, va, &lvl); if (pde != NULL && lvl == 2) { l3 = pmap_l2_to_l3(pde, va); - if (!ADDR_IS_KERNEL(va) && mpte == NULL) { + if (ADDR_IS_USER(va) && mpte == NULL) { mpte = PTE_TO_VM_PAGE(pmap_load(pde)); mpte->ref_count++; } @@ -5411,7 +5497,7 @@ retry: if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) { l3 = &l3[pmap_l3_index(va)]; - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { mpte = PTE_TO_VM_PAGE(pmap_load(l2)); mpte->ref_count++; } @@ -5419,7 +5505,7 @@ retry: } /* We need to allocate an L3 table. */ } - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; /* @@ -5657,7 +5743,7 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == VM_MEMATTR_DEVICE) new_l2 |= ATTR_S1_XN; - if (!ADDR_IS_KERNEL(va)) + if (ADDR_IS_USER(va)) new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; else new_l2 |= ATTR_S1_UXN; @@ -5745,7 +5831,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, "pmap_enter_l2: no space for va %#lx" " in pmap %p", va, pmap); return (KERN_NO_SPACE); - } else if (!ADDR_IS_KERNEL(va) || + } else if (ADDR_IS_USER(va) || !pmap_every_pte_zero(PTE_TO_PHYS(old_l2))) { if (l2pg != NULL) l2pg->ref_count--; @@ -5796,7 +5882,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, } KASSERT(pmap_load(l2) == 0, ("pmap_enter_l2: non-zero L2 entry %p", l2)); - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { vm_page_free_pages_toq(&free, true); } else { KASSERT(SLIST_EMPTY(&free), @@ -5916,7 +6002,7 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p, if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == VM_MEMATTR_DEVICE) l3e |= ATTR_S1_XN; - if (!ADDR_IS_KERNEL(va)) + if (ADDR_IS_USER(va)) l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; else l3e |= ATTR_S1_UXN; @@ -5948,7 +6034,7 @@ pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags, /* * If the L3 PTP is not resident, we attempt to create it here. */ - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { /* * Were we given the correct L3 PTP? If so, we can simply * increment its ref count. @@ -6224,7 +6310,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * In the case that a page table page is not * resident, we are creating it here. */ - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { vm_pindex_t l2pindex; /* @@ -6310,7 +6396,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == VM_MEMATTR_DEVICE) l3_val |= ATTR_S1_XN; - if (!ADDR_IS_KERNEL(va)) + if (ADDR_IS_USER(va)) l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; else l3_val |= ATTR_S1_UXN; @@ -7967,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size) pa += L2_SIZE; } if ((old_l2e & ATTR_DESCR_VALID) != 0) - pmap_s1_invalidate_all(kernel_pmap); + pmap_s1_invalidate_all_kernel(); else { /* * Because the old entries were invalid and the new @@ -8058,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size) } } if (preinit_map) { - pmap_s1_invalidate_all(kernel_pmap); + pmap_s1_invalidate_all_kernel(); return; } @@ -8528,7 +8614,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, * region and early kernel memory are the only parts of the * kernel address space that must be handled here. */ - KASSERT(!ADDR_IS_KERNEL(va) || VIRT_IN_DMAP(va) || + KASSERT(ADDR_IS_USER(va) || VIRT_IN_DMAP(va) || (va >= VM_MIN_KERNEL_ADDRESS && va < kernel_vm_end), ("pmap_demote_l2: No saved mpte for va %#lx", va)); @@ -8555,7 +8641,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, } ml3->pindex = pmap_l2_pindex(va); - if (!ADDR_IS_KERNEL(va)) { + if (ADDR_IS_USER(va)) { ml3->ref_count = NL3PG; pmap_resident_count_inc(pmap, 1); } @@ -9113,7 +9199,7 @@ pmap_init_cnp(void *dummy __unused) SYSINIT(pmap_init_cnp, SI_SUB_SMP, SI_ORDER_ANY, pmap_init_cnp, NULL); static bool -pmap_activate_int(pmap_t pmap) +pmap_activate_int(struct thread *td, pmap_t pmap) { struct asid_set *set; int epoch; @@ -9152,6 +9238,15 @@ pmap_activate_int(pmap_t pmap) pmap_alloc_asid(pmap); if (pmap->pm_stage == PM_STAGE1) { + uint64_t new_tcr, tcr; + + new_tcr = td->td_proc->p_md.md_tcr; + tcr = READ_SPECIALREG(tcr_el1); + if ((tcr & MD_TCR_FIELDS) != new_tcr) { + tcr &= ~MD_TCR_FIELDS; + tcr |= new_tcr; + WRITE_SPECIALREG(tcr_el1, tcr); + } set_ttbr0(pmap_to_ttbr0(pmap)); if (PCPU_GET(bcast_tlbi_workaround) != 0) invalidate_local_icache(); @@ -9165,7 +9260,7 @@ pmap_activate_vm(pmap_t pmap) PMAP_ASSERT_STAGE2(pmap); - (void)pmap_activate_int(pmap); + (void)pmap_activate_int(NULL, pmap); } void @@ -9176,7 +9271,7 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); PMAP_ASSERT_STAGE1(pmap); critical_enter(); - (void)pmap_activate_int(pmap); + (void)pmap_activate_int(td, pmap); critical_exit(); } @@ -9202,7 +9297,7 @@ pmap_switch(struct thread *new) * to a user process. */ - if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) { + if (pmap_activate_int(new, vmspace_pmap(new->td_proc->p_vmspace))) { /* * Stop userspace from training the branch predictor against * other processes. This will call into a CPU specific diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c index 767b7e115479..ab40b72887e9 100644 --- a/sys/arm64/arm64/ptrauth.c +++ b/sys/arm64/arm64/ptrauth.c @@ -82,7 +82,7 @@ ptrauth_disable(void) return (false); } -static bool +static cpu_feat_en ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused) { uint64_t isar; @@ -97,11 +97,11 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused) if (!pac_enable) { if (boothowto & RB_VERBOSE) printf("Pointer authentication is disabled\n"); - goto out; + return (FEAT_ALWAYS_DISABLE); } if (ptrauth_disable()) - goto out; + return (FEAT_ALWAYS_DISABLE); /* * This assumes if there is pointer authentication on the boot CPU @@ -116,32 +116,21 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused) if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) { if (ID_AA64ISAR1_APA_VAL(isar) > 0 || ID_AA64ISAR1_API_VAL(isar) > 0) { - return (true); + return (FEAT_DEFAULT_ENABLE); } } /* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */ if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) { if (ID_AA64ISAR2_APA3_VAL(isar) > 0) { - return (true); + return (FEAT_DEFAULT_ENABLE); } } -out: - /* - * Pointer authentication may be disabled, mask out the ID fields we - * expose to userspace and the rest of the kernel so they don't try - * to use it. - */ - update_special_reg(ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_MASK | - ID_AA64ISAR1_APA_MASK | ID_AA64ISAR1_GPA_MASK | - ID_AA64ISAR1_GPI_MASK, 0); - update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0); - - return (false); + return (FEAT_ALWAYS_DISABLE); } -static void +static bool ptrauth_enable(const struct cpu_feat *feat __unused, cpu_feat_errata errata_status __unused, u_int *errata_list __unused, u_int errata_count __unused) @@ -149,16 +138,34 @@ ptrauth_enable(const struct cpu_feat *feat __unused, enable_ptrauth = true; elf64_addr_mask.code |= PAC_ADDR_MASK; elf64_addr_mask.data |= PAC_ADDR_MASK; +#ifdef COMPAT_FREEBSD14 + elf64_addr_mask_14.code |= PAC_ADDR_MASK_14; + elf64_addr_mask_14.data |= PAC_ADDR_MASK_14; +#endif + + return (true); } +static void +ptrauth_disabled(const struct cpu_feat *feat __unused) +{ + /* + * Pointer authentication may be disabled, mask out the ID fields we + * expose to userspace and the rest of the kernel so they don't try + * to use it. + */ + if (PCPU_GET(cpuid) == 0) { + update_special_reg(ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_MASK | + ID_AA64ISAR1_APA_MASK | ID_AA64ISAR1_GPA_MASK | + ID_AA64ISAR1_GPI_MASK, 0); + update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0); + } + +} -static struct cpu_feat feat_pauth = { - .feat_name = "FEAT_PAuth", - .feat_check = ptrauth_check, - .feat_enable = ptrauth_enable, - .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM, -}; -DATA_SET(cpu_feat_set, feat_pauth); +CPU_FEAT(feat_pauth, "Pointer Authentication", + ptrauth_check, NULL, ptrauth_enable, ptrauth_disabled, + CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM); /* Copy the keys when forking a new process */ void diff --git a/sys/arm64/arm64/spec_workaround.c b/sys/arm64/arm64/spec_workaround.c new file mode 100644 index 000000000000..7f4f86cdb48c --- /dev/null +++ b/sys/arm64/arm64/spec_workaround.c @@ -0,0 +1,166 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Arm Ltd + * Copyright (c) 2018 Andrew Turner + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/pcpu.h> +#include <sys/systm.h> + +#include <machine/cpu.h> +#include <machine/cpu_feat.h> + +#include <dev/psci/psci.h> +#include <dev/psci/smccc.h> + +static enum { + SSBD_FORCE_ON, + SSBD_FORCE_OFF, + SSBD_KERNEL, +} ssbd_method = SSBD_KERNEL; + +struct psci_bp_hardening_impl { + u_int midr_mask; + u_int midr_value; +}; + +static struct psci_bp_hardening_impl psci_bp_hardening_impl[] = { + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0), + } +}; + +static cpu_feat_en +psci_bp_hardening_check(const struct cpu_feat *feat __unused, u_int midr) +{ + size_t i; + + for (i = 0; i < nitems(psci_bp_hardening_impl); i++) { + if ((midr & psci_bp_hardening_impl[i].midr_mask) == + psci_bp_hardening_impl[i].midr_value) { + /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ + if (!psci_present) + return (FEAT_ALWAYS_DISABLE); + + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != + SMCCC_RET_SUCCESS) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); + } + } + + return (FEAT_ALWAYS_DISABLE); +} + +static bool +psci_bp_hardening_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status __unused, u_int *errata_list __unused, + u_int errata_count __unused) +{ + PCPU_SET(bp_harden, smccc_arch_workaround_1); + + return (true); +} + +CPU_FEAT(feat_csv2_missing, "Branch Predictor Hardening", + psci_bp_hardening_check, NULL, psci_bp_hardening_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); + +static cpu_feat_en +ssbd_workaround_check(const struct cpu_feat *feat __unused, u_int midr __unused) +{ + char *env; + + if (PCPU_GET(cpuid) == 0) { + env = kern_getenv("kern.cfg.ssbd"); + if (env != NULL) { + if (strcmp(env, "force-on") == 0) { + ssbd_method = SSBD_FORCE_ON; + } else if (strcmp(env, "force-off") == 0) { + ssbd_method = SSBD_FORCE_OFF; + } + } + } + + /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ + if (!psci_present) + return (FEAT_ALWAYS_DISABLE); + + /* Enable the workaround on this CPU if it's enabled in the firmware */ + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); +} + +static bool +ssbd_workaround_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status __unused, u_int *errata_list __unused, + u_int errata_count __unused) +{ + switch(ssbd_method) { + case SSBD_FORCE_ON: + smccc_arch_workaround_2(1); + break; + case SSBD_FORCE_OFF: + smccc_arch_workaround_2(0); + break; + case SSBD_KERNEL: + default: + PCPU_SET(ssbd, smccc_arch_workaround_2); + break; + } + + return (true); +} + +CPU_FEAT(feat_ssbs_missing, "Speculator Store Bypass Disable Workaround", + ssbd_workaround_check, NULL, ssbd_workaround_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S index 2d067c7f7730..bf6fc931e4b0 100644 --- a/sys/arm64/arm64/support.S +++ b/sys/arm64/arm64/support.S @@ -39,8 +39,15 @@ #include "assym.inc" .macro check_user_access user_arg, limit, bad_addr_func + /* + * TBI is enabled from 15.0. Clear the top byte of the userspace + * address before checking whether it's within the given limit. + * The later load/store instructions will fault if TBI is disabled + * for the current process. + */ + and x6, x\user_arg, #(~TBI_ADDR_MASK) ldr x7, =(\limit) - cmp x\user_arg, x7 + cmp x6, x7 b.cs \bad_addr_func .endm diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S index 7b6010a5f51f..a461fded929c 100644 --- a/sys/arm64/arm64/swtch.S +++ b/sys/arm64/arm64/swtch.S @@ -37,6 +37,8 @@ #include <machine/asm.h> #include <machine/armreg.h> +#include <machine/proc.h> + .macro clear_step_flag pcbflags, tmp tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f mrs \tmp, mdscr_el1 @@ -239,6 +241,16 @@ ENTRY(fork_trampoline) msr daifset, #(DAIF_D | DAIF_INTR) ldr x0, [x18, #PC_CURTHREAD] + + /* Set the per-process tcr_el1 fields */ + ldr x10, [x0, #TD_PROC] + ldr x10, [x10, #P_MD_TCR] + mrs x11, tcr_el1 + and x11, x11, #(~MD_TCR_FIELDS) + orr x11, x11, x10 + msr tcr_el1, x11 + /* No isb as the eret below is the context-synchronising event */ + bl ptrauth_enter_el0 /* Restore sp, lr, elr, and spsr */ diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index bed58095201a..75c9b5f87892 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -246,6 +246,7 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr, print_registers(frame); print_gp_register("far", far); + printf(" esr: 0x%.16lx\n", esr); panic("Unhandled external data abort"); } diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c index 38a126ff602f..0134feb65b6a 100644 --- a/sys/arm64/arm64/vm_machdep.c +++ b/sys/arm64/arm64/vm_machdep.c @@ -120,6 +120,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_daif = PSR_DAIF_DEFAULT; + /* Copy the TCR_EL1 value */ + td2->td_proc->p_md.md_tcr = td1->td_proc->p_md.md_tcr; + #if defined(PERTHREAD_SSP) /* Set the new canary */ arc4random_buf(&td2->td_md.md_canary, sizeof(td2->td_md.md_canary)); diff --git a/sys/arm64/conf/std.arm b/sys/arm64/conf/std.arm index fb5561506531..309059a096eb 100644 --- a/sys/arm64/conf/std.arm +++ b/sys/arm64/conf/std.arm @@ -21,3 +21,6 @@ device arm_doorbell # ARM Message Handling Unit (MHU) options FDT device acpi + +# DTBs +makeoptions MODULES_EXTRA+="dtb/arm" diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64 index c83e98c17a33..02bdd25f2d52 100644 --- a/sys/arm64/conf/std.arm64 +++ b/sys/arm64/conf/std.arm64 @@ -7,6 +7,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption +options EXTERR_STRINGS options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols @@ -105,3 +106,9 @@ device efirtc # EFI RTC # SMBIOS -- all EFI platforms device smbios + +# random(4) +device tpm # Trusted Platform Module +options RANDOM_ENABLE_TPM # enable entropy from TPM 2.0 +options RANDOM_ENABLE_KBD +options RANDOM_ENABLE_MOUSE diff --git a/sys/arm64/coresight/coresight.c b/sys/arm64/coresight/coresight.c index 5928c153f4ae..9b9d3c65ecc9 100644 --- a/sys/arm64/coresight/coresight.c +++ b/sys/arm64/coresight/coresight.c @@ -113,7 +113,7 @@ coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp) } static void -coresight_init(void) +coresight_init(void *dummy __unused) { mtx_init(&cs_mtx, "ARM Coresight", NULL, MTX_DEF); diff --git a/sys/arm64/include/_armreg.h b/sys/arm64/include/_armreg.h new file mode 100644 index 000000000000..0f5134e5a978 --- /dev/null +++ b/sys/arm64/include/_armreg.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2013, 2014 Andrew Turner + * Copyright (c) 2015,2021 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(_MACHINE_ARMREG_H_) && \ + !defined(_MACHINE_CPU_H_) && \ + !defined(_MACHINE_HYPERVISOR_H_) +#error Do not include this file directly +#endif + +#ifndef _MACHINE__ARMREG_H_ +#define _MACHINE__ARMREG_H_ + +#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + S##op0##_##op1##_C##crn##_C##crm##_##op2 +#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) +#define MRS_REG_ALT_NAME(reg) \ + _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) + + +#define READ_SPECIALREG(reg) \ +({ uint64_t _val; \ + __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ + _val; \ +}) +#define WRITE_SPECIALREG(reg, _val) \ + __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) + +#define UL(x) UINT64_C(x) + +#endif /* !_MACHINE__ARMREG_H_ */ diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index 38b7f57f7853..27b02c44cd76 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -34,25 +34,9 @@ #ifndef _MACHINE_ARMREG_H_ #define _MACHINE_ARMREG_H_ -#define INSN_SIZE 4 - -#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - S##op0##_##op1##_C##crn##_C##crm##_##op2 -#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) -#define MRS_REG_ALT_NAME(reg) \ - _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) - +#include <machine/_armreg.h> -#define READ_SPECIALREG(reg) \ -({ uint64_t _val; \ - __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ - _val; \ -}) -#define WRITE_SPECIALREG(reg, _val) \ - __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) - -#define UL(x) UINT64_C(x) +#define INSN_SIZE 4 /* AFSR0_EL1 - Auxiliary Fault Status Register 0 */ #define AFSR0_EL1_REG MRS_REG_ALT_NAME(AFSR0_EL1) @@ -232,6 +216,14 @@ #define CNTP_CTL_IMASK (1 << 1) #define CNTP_CTL_ISTATUS (1 << 2) +/* CNTP_CTL_EL02 - Counter-timer Physical Timer Control register */ +#define CNTP_CTL_EL02_REG MRS_REG_ALT_NAME(CNTP_CTL_EL02) +#define CNTP_CTL_EL02_op0 3 +#define CNTP_CTL_EL02_op1 5 +#define CNTP_CTL_EL02_CRn 14 +#define CNTP_CTL_EL02_CRm 2 +#define CNTP_CTL_EL02_op2 1 + /* CNTP_CVAL_EL0 - Counter-timer Physical Timer CompareValue register */ #define CNTP_CVAL_EL0_op0 3 #define CNTP_CVAL_EL0_op1 3 @@ -239,6 +231,14 @@ #define CNTP_CVAL_EL0_CRm 2 #define CNTP_CVAL_EL0_op2 2 +/* CNTP_CVAL_EL02 - Counter-timer Physical Timer CompareValue register */ +#define CNTP_CVAL_EL02_REG MRS_REG_ALT_NAME(CNTP_CVAL_EL02) +#define CNTP_CVAL_EL02_op0 3 +#define CNTP_CVAL_EL02_op1 5 +#define CNTP_CVAL_EL02_CRn 14 +#define CNTP_CVAL_EL02_CRm 2 +#define CNTP_CVAL_EL02_op2 2 + /* CNTP_TVAL_EL0 - Counter-timer Physical Timer TimerValue register */ #define CNTP_TVAL_EL0_op0 3 #define CNTP_TVAL_EL0_op1 3 @@ -254,6 +254,14 @@ #define CNTPCT_EL0_CRm 0 #define CNTPCT_EL0_op2 1 +/* CNTPCTSS_EL0 - Counter-timer Self-Synchronized Physical Count register */ +#define CNTPCTSS_EL0_REG MRS_REG_ALT_NAME(CNTPCTSS_EL0) +#define CNTPCTSS_EL0_op0 3 +#define CNTPCTSS_EL0_op1 3 +#define CNTPCTSS_EL0_CRn 14 +#define CNTPCTSS_EL0_CRm 0 +#define CNTPCTSS_EL0_op2 5 + /* CNTV_CTL_EL0 - Counter-timer Virtual Timer Control register */ #define CNTV_CTL_EL0_op0 3 #define CNTV_CTL_EL0_op1 3 @@ -282,6 +290,14 @@ #define CNTV_CVAL_EL02_CRm 3 #define CNTV_CVAL_EL02_op2 2 +/* CNTVCTSS_EL0 - Counter-timer Self-Synchronized Virtual Count register */ +#define CNTVCTSS_EL0_REG MRS_REG_ALT_NAME(CNTVCTSS_EL0) +#define CNTVCTSS_EL0_op0 3 +#define CNTVCTSS_EL0_op1 3 +#define CNTVCTSS_EL0_CRn 14 +#define CNTVCTSS_EL0_CRm 0 +#define CNTVCTSS_EL0_op2 6 + /* CONTEXTIDR_EL1 - Context ID register */ #define CONTEXTIDR_EL1_REG MRS_REG_ALT_NAME(CONTEXTIDR_EL1) #define CONTEXTIDR_EL1_op0 3 @@ -2148,6 +2164,7 @@ #define OSLAR_EL1_CRn 1 #define OSLAR_EL1_CRm 0 #define OSLAR_EL1_op2 4 +#define OSLAR_OSLK (0x1ul << 0) /* OSLSR_EL1 */ #define OSLSR_EL1_op0 2 @@ -2155,6 +2172,10 @@ #define OSLSR_EL1_CRn 1 #define OSLSR_EL1_CRm 1 #define OSLSR_EL1_op2 4 +#define OSLSR_OSLM_1 (0x1ul << 3) +#define OSLSR_nTT (0x1ul << 2) +#define OSLSR_OSLK (0x1ul << 1) +#define OSLSR_OSLM_0 (0x1ul << 0) /* PAR_EL1 - Physical Address Register */ #define PAR_F_SHIFT 0 @@ -2230,6 +2251,7 @@ #define PMBSR_MSS_SHIFT 0 #define PMBSR_MSS_MASK (UL(0xffff) << PMBSR_MSS_SHIFT) #define PMBSR_MSS_BSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT) +#define PMBSR_MSS_BSC_BUFFER_FILLED (UL(0x01) << PMBSR_MSS_SHIFT) #define PMBSR_MSS_FSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT) #define PMBSR_COLL_SHIFT 16 #define PMBSR_COLL (UL(0x1) << PMBSR_COLL_SHIFT) @@ -2241,6 +2263,11 @@ #define PMBSR_DL (UL(0x1) << PMBSR_DL_SHIFT) #define PMBSR_EC_SHIFT 26 #define PMBSR_EC_MASK (UL(0x3f) << PMBSR_EC_SHIFT) +#define PMBSR_EC_VAL(x) (((x) & PMBSR_EC_MASK) >> PMBSR_EC_SHIFT) +#define PMBSR_EC_OTHER_BUF_MGMT 0x00 +#define PMBSR_EC_GRAN_PROT_CHK 0x1e +#define PMBSR_EC_STAGE1_DA 0x24 +#define PMBSR_EC_STAGE2_DA 0x25 /* PMCCFILTR_EL0 */ #define PMCCFILTR_EL0_op0 3 @@ -2476,6 +2503,15 @@ #define PMSIDR_FnE (UL(0x1) << PMSIDR_FnE_SHIFT) #define PMSIDR_Interval_SHIFT 8 #define PMSIDR_Interval_MASK (UL(0xf) << PMSIDR_Interval_SHIFT) +#define PMSIDR_Interval_VAL(x) (((x) & PMSIDR_Interval_MASK) >> PMSIDR_Interval_SHIFT) +#define PMSIDR_Interval_256 0 +#define PMSIDR_Interval_512 2 +#define PMSIDR_Interval_768 3 +#define PMSIDR_Interval_1024 4 +#define PMSIDR_Interval_1536 5 +#define PMSIDR_Interval_2048 6 +#define PMSIDR_Interval_3072 7 +#define PMSIDR_Interval_4096 8 #define PMSIDR_MaxSize_SHIFT 12 #define PMSIDR_MaxSize_MASK (UL(0xf) << PMSIDR_MaxSize_SHIFT) #define PMSIDR_CountSize_SHIFT 16 @@ -2608,6 +2644,28 @@ #define SCTLR_EnALS (UL(0x1) << 56) #define SCTLR_EPAN (UL(0x1) << 57) +#define SCTLR_MMU_OFF \ + (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS) +#define SCTLR_MMU_ON \ + (SCTLR_MMU_OFF | \ + SCTLR_EPAN | \ + SCTLR_BT1 | \ + SCTLR_BT0 | \ + SCTLR_UCI | \ + SCTLR_SPAN | \ + SCTLR_IESB | \ + SCTLR_nTWE | \ + SCTLR_nTWI | \ + SCTLR_UCT | \ + SCTLR_DZE | \ + SCTLR_I | \ + SCTLR_SED | \ + SCTLR_CP15BEN | \ + SCTLR_SA0 | \ + SCTLR_SA | \ + SCTLR_C | \ + SCTLR_M) + /* SCTLR_EL12 */ #define SCTLR_EL12_REG MRS_REG_ALT_NAME(SCTLR_EL12) #define SCTLR_EL12_op0 3 diff --git a/sys/arm64/include/asm.h b/sys/arm64/include/asm.h index 4f373dc4b7e1..f9a64f574fca 100644 --- a/sys/arm64/include/asm.h +++ b/sys/arm64/include/asm.h @@ -77,10 +77,11 @@ * to the given label. The tmp register should be a register able to hold the * temporary data. */ -#define CHECK_CPU_FEAT(tmp, feat_reg, feat, label) \ - mrs tmp, ##feat_reg##_el1; \ +#define CHECK_CPU_FEAT(tmp, feat_reg, feat, min_val, label) \ + mrs tmp, ##feat_reg##_el1; \ ubfx tmp, tmp, ##feat_reg##_##feat##_SHIFT, ##feat_reg##_##feat##_WIDTH; \ - cbz tmp, label + cmp tmp, #(##feat_reg##_##feat##_##min_val## >> ##feat_reg##_##feat##_SHIFT); \ + b.lt label /* * Sets the trap fault handler. The exception handler will return to the diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h index 935e3754bf25..b15210633d37 100644 --- a/sys/arm64/include/cpu.h +++ b/sys/arm64/include/cpu.h @@ -43,10 +43,10 @@ #define _MACHINE_CPU_H_ #if !defined(__ASSEMBLER__) +#include <machine/_armreg.h> #include <machine/atomic.h> #include <machine/frame.h> #endif -#include <machine/armreg.h> #define TRAPF_PC(tfp) ((tfp)->tf_elr) #define TRAPF_USERMODE(tfp) (((tfp)->tf_spsr & PSR_M_MASK) == PSR_M_EL0t) @@ -125,7 +125,11 @@ #define CPU_PART_NEOVERSE_V3 0xD84 #define CPU_PART_CORTEX_X925 0xD85 #define CPU_PART_CORTEX_A725 0xD87 +#define CPU_PART_C1_NANO 0xD8A +#define CPU_PART_C1_PRO 0xD8B +#define CPU_PART_C1_ULTRA 0xD8C #define CPU_PART_NEOVERSE_N3 0xD8E +#define CPU_PART_C1_PREMIUM 0xD90 /* Cavium Part numbers */ #define CPU_PART_THUNDERX 0x0A1 @@ -193,8 +197,30 @@ (((mask) & PCPU_GET(midr)) == \ ((mask) & CPU_ID_RAW((impl), (part), (var), (rev)))) -#define CPU_MATCH_RAW(mask, devid) \ - (((mask) & PCPU_GET(midr)) == ((mask) & (devid))) +#if !defined(__ASSEMBLER__) +static inline bool +midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low, + u_int part_low, u_int var_high, u_int part_high) +{ + /* Check for the correct part */ + if (CPU_IMPL(midr) != impl || CPU_PART(midr) != part) + return (false); + + /* Check if the variant is between var_low and var_high inclusive */ + if (CPU_VAR(midr) < var_low || CPU_VAR(midr) > var_high) + return (false); + + /* If the variant is the low value, check if the part is high enough */ + if (CPU_VAR(midr) == var_low && CPU_PART(midr) < part_low) + return (false); + + /* If the variant is the high value, check if the part is low enough */ + if (CPU_VAR(midr) == var_high && CPU_PART(midr) > part_high) + return (false); + + return (true); +} +#endif /* * Chip-specific errata. This defines are intended to be @@ -226,6 +252,9 @@ extern uint64_t __cpu_affinity[]; struct arm64_addr_mask; extern struct arm64_addr_mask elf64_addr_mask; +#ifdef COMPAT_FREEBSD14 +extern struct arm64_addr_mask elf64_addr_mask_14; +#endif typedef void (*cpu_reset_hook_t)(void); extern cpu_reset_hook_t cpu_reset_hook; diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h index 9fe6a9dd95d9..6a311d4000bb 100644 --- a/sys/arm64/include/cpu_feat.h +++ b/sys/arm64/include/cpu_feat.h @@ -29,6 +29,7 @@ #define _MACHINE_CPU_FEAT_H_ #include <sys/linker_set.h> +#include <sys/sysctl.h> typedef enum { ERRATA_UNKNOWN, /* Unknown erratum */ @@ -39,6 +40,31 @@ typedef enum { /* kernel component. */ } cpu_feat_errata; +typedef enum { + /* + * Don't implement the feature or erratum wrokarount, + * e.g. the feature is not implemented or erratum is + * for another CPU. + */ + FEAT_ALWAYS_DISABLE, + + /* + * Disable by default, but allow the user to enable, + * e.g. For a rare erratum with a workaround, Arm + * Category B (rare) or similar. + */ + FEAT_DEFAULT_DISABLE, + + /* + * Enabled by default, bit allow the user to disable, + * e.g. For a common erratum with a workaround, Arm + * Category A or B or similar. + */ + FEAT_DEFAULT_ENABLE, + + /* We could add FEAT_ALWAYS_ENABLE if a need was found. */ +} cpu_feat_en; + #define CPU_FEAT_STAGE_MASK 0x00000001 #define CPU_FEAT_EARLY_BOOT 0x00000000 #define CPU_FEAT_AFTER_DEV 0x00000001 @@ -47,23 +73,45 @@ typedef enum { #define CPU_FEAT_PER_CPU 0x00000000 #define CPU_FEAT_SYSTEM 0x00000010 +#define CPU_FEAT_USER_ENABLED 0x40000000 +#define CPU_FEAT_USER_DISABLED 0x80000000 + struct cpu_feat; -typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int); +typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int); typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int, u_int **, u_int *); -typedef void (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata, +typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata, u_int *, u_int); +typedef void (cpu_feat_disabled)(const struct cpu_feat *); struct cpu_feat { const char *feat_name; cpu_feat_check *feat_check; cpu_feat_has_errata *feat_has_errata; cpu_feat_enable *feat_enable; + cpu_feat_disabled *feat_disabled; uint32_t feat_flags; + bool feat_enabled; }; SET_DECLARE(cpu_feat_set, struct cpu_feat); +SYSCTL_DECL(_hw_feat); + +#define CPU_FEAT(name, descr, check, has_errata, enable, disabled, flags) \ +static struct cpu_feat name = { \ + .feat_name = #name, \ + .feat_check = check, \ + .feat_has_errata = has_errata, \ + .feat_enable = enable, \ + .feat_disabled = disabled, \ + .feat_flags = flags, \ + .feat_enabled = false, \ +}; \ +DATA_SET(cpu_feat_set, name); \ +SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \ + 0, descr) + /* * Allow drivers to mark an erratum as worked around, e.g. the Errata * Management ABI may know the workaround isn't needed on a given system. diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h index e6e1f682794e..e9eee643216b 100644 --- a/sys/arm64/include/cpufunc.h +++ b/sys/arm64/include/cpufunc.h @@ -96,6 +96,13 @@ serror_enable(void) __asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")"); } +static __inline void +serror_disable(void) +{ + + __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")"); +} + static __inline register_t get_midr(void) { diff --git a/sys/arm64/include/db_machdep.h b/sys/arm64/include/db_machdep.h index 5dc496ca851d..3ef95f7802ea 100644 --- a/sys/arm64/include/db_machdep.h +++ b/sys/arm64/include/db_machdep.h @@ -31,7 +31,6 @@ #ifndef _MACHINE_DB_MACHDEP_H_ #define _MACHINE_DB_MACHDEP_H_ -#include <machine/armreg.h> #include <machine/frame.h> #include <machine/trap.h> diff --git a/sys/arm64/include/elf.h b/sys/arm64/include/elf.h index d6328c143585..81ee7392f866 100644 --- a/sys/arm64/include/elf.h +++ b/sys/arm64/include/elf.h @@ -93,6 +93,9 @@ __ElfType(Auxinfo); #define ET_DYN_LOAD_ADDR 0x100000 #endif +/* First __FreeBSD_version that supports Top Byte Ignore (TBI) */ +#define TBI_VERSION 1500058 + /* HWCAP */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h index e3a880afbe9c..f3d7027269c9 100644 --- a/sys/arm64/include/hypervisor.h +++ b/sys/arm64/include/hypervisor.h @@ -30,26 +30,85 @@ #ifndef _MACHINE_HYPERVISOR_H_ #define _MACHINE_HYPERVISOR_H_ +#include <machine/_armreg.h> + /* * These registers are only useful when in hypervisor context, * e.g. specific to EL2, or controlling the hypervisor. */ /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */ -#define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */ /* Valid if HCR_EL2.E2H == 0 */ -#define CNTHCTL_EL1PCTEN (1 << 0) /* Allow physical counter access */ -#define CNTHCTL_EL1PCEN (1 << 1) /* Allow physical timer access */ +#define CNTHCTL_EL1PCTEN_SHIFT 0 +#define CNTHCTL_EL1PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT) +#define CNTHCTL_EL1PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PCTEN_SHIFT) +#define CNTHCTL_EL1PCTEN_NOTRAP (0x1ul << CNTHCTL_EL1PCTEN_SHIFT) +#define CNTHCTL_EL1PCEN_SHIFT 1 +#define CNTHCTL_EL1PCEN_MASK (0x1ul << CNTHCTL_EL1PCEN_SHIFT) +#define CNTHCTL_EL1PCEN_TRAP (0x0ul << CNTHCTL_EL1PCEN_SHIFT) +#define CNTHCTL_EL1PCEN_NOTRAP (0x1ul << CNTHCTL_EL1PCEN_SHIFT) /* Valid if HCR_EL2.E2H == 1 */ -#define CNTHCTL_E2H_EL0PCTEN (1 << 0) /* Allow EL0 physical counter access */ -#define CNTHCTL_E2H_EL0VCTEN (1 << 1) /* Allow EL0 virtual counter access */ -#define CNTHCTL_E2H_EL0VTEN (1 << 8) -#define CNTHCTL_E2H_EL0PTEN (1 << 9) -#define CNTHCTL_E2H_EL1PCTEN (1 << 10) /* Allow physical counter access */ -#define CNTHCTL_E2H_EL1PTEN (1 << 11) /* Allow physical timer access */ +#define CNTHCTL_E2H_EL0PCTEN_SHIFT 0 +#define CNTHCTL_E2H_EL0PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL0PCTEN_SHIFT) +#define CNTHCTL_E2H_EL0PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0PCTEN_SHIFT) +#define CNTHCTL_E2H_EL0PCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0PCTEN_SHIFT) +#define CNTHCTL_E2H_EL0VCTEN_SHIFT 1 +#define CNTHCTL_E2H_EL0VCTEN_MASK (0x1ul << CNTHCTL_E2H_EL0VCTEN_SHIFT) +#define CNTHCTL_E2H_EL0VCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0VCTEN_SHIFT) +#define CNTHCTL_E2H_EL0VCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0VCTEN_SHIFT) +#define CNTHCTL_E2H_EL0VTEN_SHIFT 8 +#define CNTHCTL_E2H_EL0VTEN_MASK (0x1ul << CNTHCTL_E2H_EL0VTEN_SHIFT) +#define CNTHCTL_E2H_EL0VTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0VTEN_SHIFT) +#define CNTHCTL_E2H_EL0VTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0VTEN_SHIFT) +#define CNTHCTL_E2H_EL0PTEN_SHIFT 9 +#define CNTHCTL_E2H_EL0PTEN_MASK (0x1ul << CNTHCTL_E2H_EL0PTEN_SHIFT) +#define CNTHCTL_E2H_EL0PTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0PTEN_SHIFT) +#define CNTHCTL_E2H_EL0PTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0PTEN_SHIFT) +#define CNTHCTL_E2H_EL1PCTEN_SHIFT 10 +#define CNTHCTL_E2H_EL1PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT) +#define CNTHCTL_E2H_EL1PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PCTEN_SHIFT) +#define CNTHCTL_E2H_EL1PCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT) +#define CNTHCTL_E2H_EL1PTEN_SHIFT 11 +#define CNTHCTL_E2H_EL1PTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PTEN_SHIFT) +#define CNTHCTL_E2H_EL1PTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PTEN_SHIFT) +#define CNTHCTL_E2H_EL1PTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL1PTEN_SHIFT) /* Unconditionally valid */ -#define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */ -#define CNTHCTL_EVNTEN (1 << 2) /* Enable event stream */ +#define CNTHCTL_EVNTEN_SHIFT 2 +#define CNTHCTL_EVNTEN_MASK (0x1ul << CNTHCTL_EVNTEN_SHIFT) +#define CNTHCTL_EVNTEN_DIS (0x0ul << CNTHCTL_EVNTEN_SHIFT) +#define CNTHCTL_EVNTEN_EN (0x1ul << CNTHCTL_EVNTEN_SHIFT) +#define CNTHCTL_EVNTDIR_SHIFT 3 +#define CNTHCTL_EVNTDIR_MASK (0x1ul << CNTHCTL_EVNTDIR_SHIFT) +#define CNTHCTL_EVNTDIR_HIGH (0x0ul << CNTHCTL_EVNTDIR_SHIFT) +#define CNTHCTL_EVNTDIR_LOW (0x1ul << CNTHCTL_EVNTDIR_SHIFT) +#define CNTHCTL_EVNTI_SHIFT 4 +#define CNTHCTL_EVNTI_MASK (0xful << CNTHCTL_EVNTI_SHIFT) +#define CNTHCTL_ECV_SHIFT 12 +#define CNTHCTL_ECV_MASK (0x1ul << CNTHCTL_ECV_SHIFT) +#define CNTHCTL_ECV_DIS (0x0ul << CNTHCTL_ECV_SHIFT) +#define CNTHCTL_ECV_EN (0x1ul << CNTHCTL_ECV_SHIFT) +#define CNTHCTL_EL1TVT_SHIFT 13 +#define CNTHCTL_EL1TVT_MASK (0x1ul << CNTHCTL_EL1TVT_SHIFT) +#define CNTHCTL_EL1TVT_NOTRAP (0x0ul << CNTHCTL_EL1TVT_SHIFT) +#define CNTHCTL_EL1TVT_TRAP (0x1ul << CNTHCTL_EL1TVT_SHIFT) +#define CNTHCTL_EL1TVCT_SHIFT 14 +#define CNTHCTL_EL1TVCT_MASK (0x1ul << CNTHCTL_EL1TVCT_SHIFT) +#define CNTHCTL_EL1TVCT_NOTRAP (0x0ul << CNTHCTL_EL1TVCT_SHIFT) +#define CNTHCTL_EL1TVCT_TRAP (0x1ul << CNTHCTL_EL1TVCT_SHIFT) +#define CNTHCTL_EL1NVPCT_SHIFT 15 +#define CNTHCTL_EL1NVPCT_MASK (0x1ul << CNTHCTL_EL1NVPCT_SHIFT) +#define CNTHCTL_EL1NVPCT_NOTRAP (0x0ul << CNTHCTL_EL1NVPCT_SHIFT) +#define CNTHCTL_EL1NVPCT_TRAP (0x1ul << CNTHCTL_EL1NVPCT_SHIFT) +#define CNTHCTL_EL1NVVCT_SHIFT 16 +#define CNTHCTL_EL1NVVCT_MASK (0x1ul << CNTHCTL_EL1NVVCT_SHIFT) +#define CNTHCTL_EL1NVVCT_NOTRAP (0x0ul << CNTHCTL_EL1NVVCT_SHIFT) +#define CNTHCTL_EL1NVVCT_TRAP (0x1ul << CNTHCTL_EL1NVVCT_SHIFT) +#define CNTHCTL_EVNTIS_SHIFT 17 +#define CNTHCTL_EVNTIS_MASK (0x1ul << CNTHCTL_EVNTIS_SHIFT) +#define CNTHCTL_CNTVMASK_SHIFT 18 +#define CNTHCTL_CNTVMASK_MASK (0x1ul << CNTHCTL_CNTVMASK_SHIFT) +#define CNTHCTL_CNTPMASK_SHIFT 19 +#define CNTHCTL_CNTPMASK_MASK (0x1ul << CNTHCTL_CNTPMASK_SHIFT) /* CNTPOFF_EL2 - Counter-timer Physical Offset Register */ #define CNTPOFF_EL2_REG MRS_REG_ALT_NAME(CNTPOFF_EL2) @@ -190,6 +249,55 @@ #define ICC_SRE_EL2_SRE (1UL << 0) #define ICC_SRE_EL2_EN (1UL << 3) +/* MDCR_EL2 - Hyp Debug Control Register */ +#define MDCR_EL2_HPMN_MASK 0x1f +#define MDCR_EL2_HPMN_SHIFT 0 +#define MDCR_EL2_TPMCR_SHIFT 5 +#define MDCR_EL2_TPMCR (0x1UL << MDCR_EL2_TPMCR_SHIFT) +#define MDCR_EL2_TPM_SHIFT 6 +#define MDCR_EL2_TPM (0x1UL << MDCR_EL2_TPM_SHIFT) +#define MDCR_EL2_HPME_SHIFT 7 +#define MDCR_EL2_HPME (0x1UL << MDCR_EL2_HPME_SHIFT) +#define MDCR_EL2_TDE_SHIFT 8 +#define MDCR_EL2_TDE (0x1UL << MDCR_EL2_TDE_SHIFT) +#define MDCR_EL2_TDA_SHIFT 9 +#define MDCR_EL2_TDA (0x1UL << MDCR_EL2_TDA_SHIFT) +#define MDCR_EL2_TDOSA_SHIFT 10 +#define MDCR_EL2_TDOSA (0x1UL << MDCR_EL2_TDOSA_SHIFT) +#define MDCR_EL2_TDRA_SHIFT 11 +#define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT) +#define MDCR_EL2_E2PB_SHIFT 12 +#define MDCR_EL2_E2PB_MASK (0x3UL << MDCR_EL2_E2PB_SHIFT) +#define MDCR_EL2_E2PB_EL1_0_NO_TRAP (0x3UL << MDCR_EL2_E2PB_SHIFT) +#define MDCR_EL2_TPMS_SHIFT 14 +#define MDCR_EL2_TPMS (0x1UL << MDCR_EL2_TPMS_SHIFT) +#define MDCR_EL2_EnSPM_SHIFT 15 +#define MDCR_EL2_EnSPM (0x1UL << MDCR_EL2_EnSPM_SHIFT) +#define MDCR_EL2_HPMD_SHIFT 17 +#define MDCR_EL2_HPMD (0x1UL << MDCR_EL2_HPMD_SHIFT) +#define MDCR_EL2_TTRF_SHIFT 19 +#define MDCR_EL2_TTRF (0x1UL << MDCR_EL2_TTRF_SHIFT) +#define MDCR_EL2_HCCD_SHIFT 23 +#define MDCR_EL2_HCCD (0x1UL << MDCR_EL2_HCCD_SHIFT) +#define MDCR_EL2_E2TB_SHIFT 24 +#define MDCR_EL2_E2TB_MASK (0x3UL << MDCR_EL2_E2TB_SHIFT) +#define MDCR_EL2_HLP_SHIFT 26 +#define MDCR_EL2_HLP (0x1UL << MDCR_EL2_HLP_SHIFT) +#define MDCR_EL2_TDCC_SHIFT 27 +#define MDCR_EL2_TDCC (0x1UL << MDCR_EL2_TDCC_SHIFT) +#define MDCR_EL2_MTPME_SHIFT 28 +#define MDCR_EL2_MTPME (0x1UL << MDCR_EL2_MTPME_SHIFT) +#define MDCR_EL2_HPMFZO_SHIFT 29 +#define MDCR_EL2_HPMFZO (0x1UL << MDCR_EL2_HPMFZO_SHIFT) +#define MDCR_EL2_PMSSE_SHIFT 30 +#define MDCR_EL2_PMSSE_MASK (0x3UL << MDCR_EL2_PMSSE_SHIFT) +#define MDCR_EL2_HPMFZS_SHIFT 36 +#define MDCR_EL2_HPMFZS (0x1UL << MDCR_EL2_HPMFZS_SHIFT) +#define MDCR_EL2_PMEE_SHIFT 40 +#define MDCR_EL2_PMEE_MASK (0x3UL << MDCR_EL2_PMEE_SHIFT) +#define MDCR_EL2_EBWE_SHIFT 43 +#define MDCR_EL2_EBWE (0x1UL << MDCR_EL2_EBWE_SHIFT) + /* SCTLR_EL2 - System Control Register */ #define SCTLR_EL2_RES1 0x30c50830 #define SCTLR_EL2_M_SHIFT 0 @@ -299,52 +407,4 @@ /* Assumed to be 0 by locore.S */ #define VTTBR_HOST 0x0000000000000000 -/* MDCR_EL2 - Hyp Debug Control Register */ -#define MDCR_EL2_HPMN_MASK 0x1f -#define MDCR_EL2_HPMN_SHIFT 0 -#define MDCR_EL2_TPMCR_SHIFT 5 -#define MDCR_EL2_TPMCR (0x1UL << MDCR_EL2_TPMCR_SHIFT) -#define MDCR_EL2_TPM_SHIFT 6 -#define MDCR_EL2_TPM (0x1UL << MDCR_EL2_TPM_SHIFT) -#define MDCR_EL2_HPME_SHIFT 7 -#define MDCR_EL2_HPME (0x1UL << MDCR_EL2_HPME_SHIFT) -#define MDCR_EL2_TDE_SHIFT 8 -#define MDCR_EL2_TDE (0x1UL << MDCR_EL2_TDE_SHIFT) -#define MDCR_EL2_TDA_SHIFT 9 -#define MDCR_EL2_TDA (0x1UL << MDCR_EL2_TDA_SHIFT) -#define MDCR_EL2_TDOSA_SHIFT 10 -#define MDCR_EL2_TDOSA (0x1UL << MDCR_EL2_TDOSA_SHIFT) -#define MDCR_EL2_TDRA_SHIFT 11 -#define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT) -#define MDCR_E2PB_SHIFT 12 -#define MDCR_E2PB_MASK (0x3UL << MDCR_E2PB_SHIFT) -#define MDCR_TPMS_SHIFT 14 -#define MDCR_TPMS (0x1UL << MDCR_TPMS_SHIFT) -#define MDCR_EnSPM_SHIFT 15 -#define MDCR_EnSPM (0x1UL << MDCR_EnSPM_SHIFT) -#define MDCR_HPMD_SHIFT 17 -#define MDCR_HPMD (0x1UL << MDCR_HPMD_SHIFT) -#define MDCR_TTRF_SHIFT 19 -#define MDCR_TTRF (0x1UL << MDCR_TTRF_SHIFT) -#define MDCR_HCCD_SHIFT 23 -#define MDCR_HCCD (0x1UL << MDCR_HCCD_SHIFT) -#define MDCR_E2TB_SHIFT 24 -#define MDCR_E2TB_MASK (0x3UL << MDCR_E2TB_SHIFT) -#define MDCR_HLP_SHIFT 26 -#define MDCR_HLP (0x1UL << MDCR_HLP_SHIFT) -#define MDCR_TDCC_SHIFT 27 -#define MDCR_TDCC (0x1UL << MDCR_TDCC_SHIFT) -#define MDCR_MTPME_SHIFT 28 -#define MDCR_MTPME (0x1UL << MDCR_MTPME_SHIFT) -#define MDCR_HPMFZO_SHIFT 29 -#define MDCR_HPMFZO (0x1UL << MDCR_HPMFZO_SHIFT) -#define MDCR_PMSSE_SHIFT 30 -#define MDCR_PMSSE_MASK (0x3UL << MDCR_PMSSE_SHIFT) -#define MDCR_HPMFZS_SHIFT 36 -#define MDCR_HPMFZS (0x1UL << MDCR_HPMFZS_SHIFT) -#define MDCR_PMEE_SHIFT 40 -#define MDCR_PMEE_MASK (0x3UL << MDCR_PMEE_SHIFT) -#define MDCR_EBWE_SHIFT 43 -#define MDCR_EBWE (0x1UL << MDCR_EBWE_SHIFT) - #endif /* !_MACHINE_HYPERVISOR_H_ */ diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h new file mode 100644 index 000000000000..0a8c7a053331 --- /dev/null +++ b/sys/arm64/include/kexec.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_KEXEC_H_ +#define _ARM64_KEXEC_H_ + +#define KEXEC_MD_PAGES(x) 0 + +#endif /* _ARM64_KEXEC_H_ */ diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h index 09bd8fa8a966..73399d2c3f8c 100644 --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -50,7 +50,8 @@ struct debug_monitor_state; struct pmap *pc_curvmpmap; \ uint64_t pc_mpidr; \ u_int pc_bcast_tlbi_workaround; \ - char __pad[197] + uint64_t pc_release_addr; \ + char __pad[189] #ifdef _KERNEL diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 0f23f200f0f6..406b6e2c5e0a 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -69,6 +69,7 @@ struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; vm_memattr_t pv_memattr; + uint8_t pv_reserve[3]; }; enum pmap_stage { @@ -174,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t); struct pcb *pmap_switch(struct thread *); +void pmap_s1_invalidate_all_kernel(void); + extern void (*pmap_clean_stage2_tlbi)(void); extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t, bool); diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h index dc2fa2df654d..b40990e89385 100644 --- a/sys/arm64/include/proc.h +++ b/sys/arm64/include/proc.h @@ -35,6 +35,7 @@ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ +#ifndef LOCORE struct ptrauth_key { uint64_t pa_key_lo; uint64_t pa_key_hi; @@ -73,8 +74,13 @@ struct mdthread { }; struct mdproc { - long md_dummy; + uint64_t md_tcr; /* TCR_EL1 fields to update */ + uint64_t md_reserved[2]; }; +#endif /* !LOCORE */ + +/* Fields that can be set in md_tcr */ +#define MD_TCR_FIELDS TCR_TBI0 #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 816 diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h index 500cd1ef4f02..4a5bfda3ac1c 100644 --- a/sys/arm64/include/smp.h +++ b/sys/arm64/include/smp.h @@ -40,6 +40,7 @@ enum { IPI_STOP, IPI_STOP_HARD, IPI_HARDCLOCK, + IPI_OFF, INTR_IPI_COUNT, }; diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 1d783cdacb0d..e67540eac66d 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -42,6 +42,7 @@ enum vm_suspend_how { VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, + VM_SUSPEND_DESTROY, VM_SUSPEND_LAST }; @@ -89,6 +90,7 @@ enum vm_reg_name { VM_REG_GUEST_TTBR1_EL1, VM_REG_GUEST_TCR_EL1, VM_REG_GUEST_TCR2_EL1, + VM_REG_GUEST_MPIDR_EL1, VM_REG_LAST }; @@ -104,27 +106,6 @@ enum vm_reg_name { #define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */ -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL struct vm; struct vm_exception; @@ -141,10 +122,41 @@ struct vm_eventinfo { int *iptr; /* reqidle cookie */ }; +#define DECLARE_VMMOPS_FUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args + +DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum)); +DECLARE_VMMOPS_FUNC(int, modcleanup, (void)); +DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap)); +DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)); +DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)); +DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi)); +DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)); +DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui)); +DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far)); +DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)); +DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val)); +DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval)); +DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val)); +DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)); +DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); +#ifdef notyet +#ifdef BHYVE_SNAPSHOT +DECLARE_VMMOPS_FUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)); +DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui, + struct vm_snapshot_meta *meta)); +DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now)); +#endif +#endif + int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); @@ -230,7 +242,6 @@ vcpu_should_yield(struct vcpu *vcpu) void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu); -struct vmspace *vm_vmspace(struct vm *vm); struct vm_mem *vm_mem(struct vm *vm); enum vm_reg_name vm_segment_name(int seg_encoding); diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 219f1116c728..289ff0fe1fc9 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -31,6 +31,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h index 349849845e73..781602306436 100644 --- a/sys/arm64/include/vmparam.h +++ b/sys/arm64/include/vmparam.h @@ -209,10 +209,16 @@ #define KMSAN_ORIG_MAX_ADDRESS (0xffff028000000000UL) /* The address bits that hold a pointer authentication code */ -#define PAC_ADDR_MASK (0xff7f000000000000UL) +#define PAC_ADDR_MASK (0x007f000000000000UL) +#define PAC_ADDR_MASK_14 (0xff7f000000000000UL) + +/* The top-byte ignore address bits */ +#define TBI_ADDR_MASK 0xff00000000000000UL /* If true addr is in the kernel address space */ #define ADDR_IS_KERNEL(addr) (((addr) & (1ul << 55)) == (1ul << 55)) +/* If true addr is in the user address space */ +#define ADDR_IS_USER(addr) (((addr) & (1ul << 55)) == 0) /* If true addr is in its canonical form (i.e. no TBI, PAC, etc.) */ #define ADDR_IS_CANONICAL(addr) \ (((addr) & 0xffff000000000000UL) == 0 || \ diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c index 084b7a11b01f..ac05820f89bc 100644 --- a/sys/arm64/linux/linux_sysvec.c +++ b/sys/arm64/linux/linux_sysvec.c @@ -584,7 +584,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset) } } -static Elf_Brandnote linux64_brandnote = { +static const Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, @@ -593,7 +593,7 @@ static Elf_Brandnote linux64_brandnote = { .trans_osrel = linux_trans_osrel }; -static Elf64_Brandinfo linux_glibc2brand = { +static const Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_AARCH64, .compat_3_brand = "Linux", @@ -604,7 +604,7 @@ static Elf64_Brandinfo linux_glibc2brand = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -Elf64_Brandinfo *linux_brandlist[] = { +const Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, NULL }; @@ -612,8 +612,8 @@ Elf64_Brandinfo *linux_brandlist[] = { static int linux64_elf_modevent(module_t mod, int type, void *data) { - Elf64_Brandinfo **brandinfo; - struct linux_ioctl_handler**lihp; + const Elf64_Brandinfo **brandinfo; + struct linux_ioctl_handler **lihp; int error; error = 0; diff --git a/sys/arm64/nvidia/tegra210/max77620_regulators.c b/sys/arm64/nvidia/tegra210/max77620_regulators.c index af1a5af20ec3..d52aeaef1287 100644 --- a/sys/arm64/nvidia/tegra210/max77620_regulators.c +++ b/sys/arm64/nvidia/tegra210/max77620_regulators.c @@ -364,7 +364,7 @@ max77620_get_sel(struct max77620_reg_sc *sc, uint8_t *sel) rv = RD1(sc->base_sc, sc->def->volt_reg, sel); if (rv != 0) { - printf("%s: cannot read volatge selector: %d\n", + printf("%s: cannot read voltage selector: %d\n", regnode_get_name(sc->regnode), rv); return (rv); } @@ -384,7 +384,7 @@ max77620_set_sel(struct max77620_reg_sc *sc, uint8_t sel) rv = RM1(sc->base_sc, sc->def->volt_reg, sc->def->volt_vsel_mask, sel); if (rv != 0) { - printf("%s: cannot set volatge selector: %d\n", + printf("%s: cannot set voltage selector: %d\n", regnode_get_name(sc->regnode), rv); return (rv); } diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index 847bc7394dd0..8da37d516802 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -90,6 +90,11 @@ struct rk_pin_irqsrc { uint32_t mode; }; +struct rk_gpio_reg { + uint8_t single; + uint8_t offset; +}; + struct rk_gpio_softc { device_t sc_dev; device_t sc_busdev; @@ -103,7 +108,7 @@ struct rk_gpio_softc { uint32_t swporta_ddr; uint32_t version; struct pin_cached pin_cached[RK_GPIO_MAX_PINS]; - uint8_t regs[RK_GPIO_REGNUM]; + struct rk_gpio_reg regs[RK_GPIO_REGNUM]; void *ihandle; struct rk_pin_irqsrc isrcs[RK_GPIO_MAX_PINS]; }; @@ -138,14 +143,15 @@ static int rk_gpio_detach(device_t dev); static int rk_gpio_read_bit(struct rk_gpio_softc *sc, int reg, int bit) { - int offset = sc->regs[reg]; + struct rk_gpio_reg *rk_reg = &sc->regs[reg]; uint32_t value; - if (sc->version == RK_GPIO_TYPE_V1) { - value = RK_GPIO_READ(sc, offset); + if (rk_reg->single) { + value = RK_GPIO_READ(sc, rk_reg->offset); value >>= bit; } else { - value = RK_GPIO_READ(sc, bit > 15 ? offset + 4 : offset); + value = RK_GPIO_READ(sc, bit > 15 ? + rk_reg->offset + 4 : rk_reg->offset); value >>= (bit % 16); } return (value & 1); @@ -154,50 +160,53 @@ rk_gpio_read_bit(struct rk_gpio_softc *sc, int reg, int bit) static void rk_gpio_write_bit(struct rk_gpio_softc *sc, int reg, int bit, int data) { - int offset = sc->regs[reg]; + struct rk_gpio_reg *rk_reg = &sc->regs[reg]; uint32_t value; - if (sc->version == RK_GPIO_TYPE_V1) { - value = RK_GPIO_READ(sc, offset); + if (rk_reg->single) { + value = RK_GPIO_READ(sc, rk_reg->offset); if (data) value |= (1 << bit); else value &= ~(1 << bit); - RK_GPIO_WRITE(sc, offset, value); + RK_GPIO_WRITE(sc, rk_reg->offset, value); } else { if (data) value = (1 << (bit % 16)); else value = 0; value |= (1 << ((bit % 16) + 16)); - RK_GPIO_WRITE(sc, bit > 15 ? offset + 4 : offset, value); + RK_GPIO_WRITE(sc, bit > 15 ? + rk_reg->offset + 4 : rk_reg->offset, value); } } static uint32_t rk_gpio_read_4(struct rk_gpio_softc *sc, int reg) { - int offset = sc->regs[reg]; + struct rk_gpio_reg *rk_reg = &sc->regs[reg]; uint32_t value; - if (sc->version == RK_GPIO_TYPE_V1) - value = RK_GPIO_READ(sc, offset); + if (rk_reg->single) + value = RK_GPIO_READ(sc, rk_reg->offset); else - value = (RK_GPIO_READ(sc, offset) & 0xffff) | - (RK_GPIO_READ(sc, offset + 4) << 16); + value = (RK_GPIO_READ(sc, rk_reg->offset) & 0xffff) | + (RK_GPIO_READ(sc, rk_reg->offset + 4) << 16); return (value); } static void rk_gpio_write_4(struct rk_gpio_softc *sc, int reg, uint32_t value) { - int offset = sc->regs[reg]; + struct rk_gpio_reg *rk_reg = &sc->regs[reg]; - if (sc->version == RK_GPIO_TYPE_V1) - RK_GPIO_WRITE(sc, offset, value); + if (rk_reg->single) + RK_GPIO_WRITE(sc, rk_reg->offset, value); else { - RK_GPIO_WRITE(sc, offset, (value & 0xffff) | 0xffff0000); - RK_GPIO_WRITE(sc, offset + 4, (value >> 16) | 0xffff0000); + RK_GPIO_WRITE(sc, rk_reg->offset, + (value & 0xffff) | 0xffff0000); + RK_GPIO_WRITE(sc, rk_reg->offset + 4, + (value >> 16) | 0xffff0000); } } @@ -313,31 +322,31 @@ rk_gpio_attach(device_t dev) switch (sc->version) { case RK_GPIO_TYPE_V1: - sc->regs[RK_GPIO_SWPORTA_DR] = 0x00; - sc->regs[RK_GPIO_SWPORTA_DDR] = 0x04; - sc->regs[RK_GPIO_INTEN] = 0x30; - sc->regs[RK_GPIO_INTMASK] = 0x34; - sc->regs[RK_GPIO_INTTYPE_LEVEL] = 0x38; - sc->regs[RK_GPIO_INT_POLARITY] = 0x3c; - sc->regs[RK_GPIO_INT_STATUS] = 0x40; - sc->regs[RK_GPIO_INT_RAWSTATUS] = 0x44; - sc->regs[RK_GPIO_DEBOUNCE] = 0x48; - sc->regs[RK_GPIO_PORTA_EOI] = 0x4c; - sc->regs[RK_GPIO_EXT_PORTA] = 0x50; + sc->regs[RK_GPIO_SWPORTA_DR] = (struct rk_gpio_reg){ 1, 0x00 }; + sc->regs[RK_GPIO_SWPORTA_DDR] = (struct rk_gpio_reg){ 1, 0x04 }; + sc->regs[RK_GPIO_INTEN] = (struct rk_gpio_reg){ 1, 0x30 }; + sc->regs[RK_GPIO_INTMASK] = (struct rk_gpio_reg){ 1, 0x34 }; + sc->regs[RK_GPIO_INTTYPE_LEVEL] = (struct rk_gpio_reg){ 1, 0x38 }; + sc->regs[RK_GPIO_INT_POLARITY] = (struct rk_gpio_reg){ 1, 0x3c }; + sc->regs[RK_GPIO_INT_STATUS] = (struct rk_gpio_reg){ 1, 0x40 }; + sc->regs[RK_GPIO_INT_RAWSTATUS] = (struct rk_gpio_reg){ 1, 0x44 }; + sc->regs[RK_GPIO_DEBOUNCE] = (struct rk_gpio_reg){ 1, 0x48 }; + sc->regs[RK_GPIO_PORTA_EOI] = (struct rk_gpio_reg){ 1, 0x4c }; + sc->regs[RK_GPIO_EXT_PORTA] = (struct rk_gpio_reg){ 1, 0x50 }; break; case RK_GPIO_TYPE_V2: - sc->regs[RK_GPIO_SWPORTA_DR] = 0x00; - sc->regs[RK_GPIO_SWPORTA_DDR] = 0x08; - sc->regs[RK_GPIO_INTEN] = 0x10; - sc->regs[RK_GPIO_INTMASK] = 0x18; - sc->regs[RK_GPIO_INTTYPE_LEVEL] = 0x20; - sc->regs[RK_GPIO_INTTYPE_BOTH] = 0x30; - sc->regs[RK_GPIO_INT_POLARITY] = 0x28; - sc->regs[RK_GPIO_INT_STATUS] = 0x50; - sc->regs[RK_GPIO_INT_RAWSTATUS] = 0x58; - sc->regs[RK_GPIO_DEBOUNCE] = 0x38; - sc->regs[RK_GPIO_PORTA_EOI] = 0x60; - sc->regs[RK_GPIO_EXT_PORTA] = 0x70; + sc->regs[RK_GPIO_SWPORTA_DR] = (struct rk_gpio_reg){ 0, 0x00 }; + sc->regs[RK_GPIO_SWPORTA_DDR] = (struct rk_gpio_reg){ 0, 0x08 }; + sc->regs[RK_GPIO_INTEN] = (struct rk_gpio_reg){ 0, 0x10 }; + sc->regs[RK_GPIO_INTMASK] = (struct rk_gpio_reg){ 0, 0x18 }; + sc->regs[RK_GPIO_INTTYPE_LEVEL] = (struct rk_gpio_reg){ 0, 0x20 }; + sc->regs[RK_GPIO_INTTYPE_BOTH] = (struct rk_gpio_reg){ 0, 0x30 }; + sc->regs[RK_GPIO_INT_POLARITY] = (struct rk_gpio_reg){ 0, 0x28 }; + sc->regs[RK_GPIO_INT_STATUS] = (struct rk_gpio_reg){ 1, 0x50 }; + sc->regs[RK_GPIO_INT_RAWSTATUS] = (struct rk_gpio_reg){ 1, 0x58 }; + sc->regs[RK_GPIO_DEBOUNCE] = (struct rk_gpio_reg){ 0, 0x38 }; + sc->regs[RK_GPIO_PORTA_EOI] = (struct rk_gpio_reg){ 0, 0x60 }; + sc->regs[RK_GPIO_EXT_PORTA] = (struct rk_gpio_reg){ 1, 0x70 }; break; default: device_printf(dev, "Unknown gpio version %08x\n", sc->version); @@ -371,12 +380,13 @@ rk_gpio_attach(device_t dev) sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { rk_gpio_detach(dev); return (ENXIO); } + bus_attach_children(dev); return (0); } @@ -393,7 +403,7 @@ rk_gpio_detach(device_t dev) mtx_destroy(&sc->sc_mtx); clk_disable(sc->clk); - return(0); + return (0); } static device_t @@ -470,7 +480,7 @@ rk_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) { if (pin >= RK_GPIO_MAX_PINS) - return EINVAL; + return (EINVAL); *caps = RK_GPIO_DEFAULT_CAPS; return (0); @@ -653,46 +663,108 @@ rk_gpio_get_node(device_t bus, device_t dev) } static int -rk_pic_map_intr(device_t dev, struct intr_map_data *data, - struct intr_irqsrc **isrcp) +rk_gpio_pic_map_fdt(struct rk_gpio_softc *sc, + struct intr_map_data_fdt *daf, + u_int *irqp, uint32_t *modep) { - struct rk_gpio_softc *sc = device_get_softc(dev); - struct intr_map_data_gpio *gdata; uint32_t irq; + uint32_t mode; - if (data->type != INTR_MAP_DATA_GPIO) { - device_printf(dev, "Wrong type\n"); - return (ENOTSUP); - } - gdata = (struct intr_map_data_gpio *)data; - irq = gdata->gpio_pin_num; + if (daf->ncells != 2) + return (EINVAL); + + irq = daf->cells[0]; + if (irq >= RK_GPIO_MAX_PINS) + return (EINVAL); + + /* Only reasonable modes are supported. */ + if (daf->cells[1] == 1) + mode = GPIO_INTR_EDGE_RISING; + else if (daf->cells[1] == 2) + mode = GPIO_INTR_EDGE_FALLING; + else if (daf->cells[1] == 3) + mode = GPIO_INTR_EDGE_BOTH; + else if (daf->cells[1] == 4) + mode = GPIO_INTR_LEVEL_HIGH; + else if (daf->cells[1] == 8) + mode = GPIO_INTR_LEVEL_LOW; + else + return (EINVAL); + + *irqp = irq; + if (modep != NULL) + *modep = mode; + return (0); +} + +static int +rk_gpio_pic_map_gpio(struct rk_gpio_softc *sc, + struct intr_map_data_gpio *dag, + u_int *irqp, uint32_t *modep) +{ + uint32_t irq; + irq = dag->gpio_pin_num; if (irq >= RK_GPIO_MAX_PINS) { - device_printf(dev, "Invalid interrupt %u\n", irq); + device_printf(sc->sc_dev, "Invalid interrupt %u\n", + irq); return (EINVAL); } - *isrcp = RK_GPIO_ISRC(sc, irq); + + *irqp = irq; + if (modep != NULL) + *modep = dag->gpio_intr_mode; return (0); } static int +rk_gpio_pic_map(struct rk_gpio_softc *sc, struct intr_map_data *data, + u_int *irqp, uint32_t *modep) +{ + switch (data->type) { + case INTR_MAP_DATA_FDT: + return (rk_gpio_pic_map_fdt(sc, + (struct intr_map_data_fdt *)data, irqp, modep)); + case INTR_MAP_DATA_GPIO: + return (rk_gpio_pic_map_gpio(sc, + (struct intr_map_data_gpio *)data, irqp, modep)); + default: + device_printf(sc->sc_dev, "Wrong type\n"); + return (ENOTSUP); + } +} + +static int +rk_pic_map_intr(device_t dev, struct intr_map_data *data, + struct intr_irqsrc **isrcp) +{ + int error; + struct rk_gpio_softc *sc = device_get_softc(dev); + uint32_t irq; + + error = rk_gpio_pic_map(sc, data, &irq, NULL); + if (error == 0) + *isrcp = RK_GPIO_ISRC(sc, irq); + return (error); +} + +static int rk_pic_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct rk_gpio_softc *sc = device_get_softc(dev); struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc; - struct intr_map_data_gpio *gdata; uint32_t mode; - uint8_t pin; + uint32_t pin; if (!data) { device_printf(dev, "No map data\n"); return (ENOTSUP); } - gdata = (struct intr_map_data_gpio *)data; - mode = gdata->gpio_intr_mode; - pin = gdata->gpio_pin_num; - if (rkisrc->irq != gdata->gpio_pin_num) { + if (rk_gpio_pic_map(sc, data, &pin, &mode) != 0) + return (EINVAL); + + if (rkisrc->irq != pin) { device_printf(dev, "Interrupts don't match\n"); return (EINVAL); } @@ -779,6 +851,10 @@ static device_method_t rk_gpio_methods[] = { DEVMETHOD(device_attach, rk_gpio_attach), DEVMETHOD(device_detach, rk_gpio_detach), + /* Bus interface */ + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + /* GPIO protocol */ DEVMETHOD(gpio_get_bus, rk_gpio_get_bus), DEVMETHOD(gpio_pin_max, rk_gpio_pin_max), diff --git a/sys/arm64/rockchip/rk_grf_gpio.c b/sys/arm64/rockchip/rk_grf_gpio.c index 6818bd85bb95..6ac419889614 100644 --- a/sys/arm64/rockchip/rk_grf_gpio.c +++ b/sys/arm64/rockchip/rk_grf_gpio.c @@ -181,11 +181,12 @@ rk_grf_gpio_attach(device_t dev) return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); + sc->sc_busdev = gpiobus_add_bus(dev); if (sc->sc_busdev == NULL) { return (ENXIO); } + bus_attach_children(dev); return (0); } diff --git a/sys/arm64/rockchip/rk_i2s.c b/sys/arm64/rockchip/rk_i2s.c index 5f1b6bbdeabf..856fa20e6ce4 100644 --- a/sys/arm64/rockchip/rk_i2s.c +++ b/sys/arm64/rockchip/rk_i2s.c @@ -403,10 +403,10 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu count = sndbuf_getready(play_buf); if (count > FIFO_SIZE - 1) count = FIFO_SIZE - 1; - size = sndbuf_getsize(play_buf); + size = play_buf->bufsize; readyptr = sndbuf_getreadyptr(play_buf); - samples = (uint8_t*)sndbuf_getbuf(play_buf); + samples = play_buf->buf; written = 0; for (; level < count; level++) { val = (samples[readyptr++ % size] << 0); @@ -426,9 +426,9 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu uint8_t *samples; uint32_t count, size, freeptr, recorded; count = sndbuf_getfree(rec_buf); - size = sndbuf_getsize(rec_buf); + size = rec_buf->bufsize; freeptr = sndbuf_getfreeptr(rec_buf); - samples = (uint8_t*)sndbuf_getbuf(rec_buf); + samples = rec_buf->buf; recorded = 0; if (level > count / 4) level = count / 4; diff --git a/sys/arm64/rockchip/rk_tsadc.c b/sys/arm64/rockchip/rk_tsadc.c index e6cbad36f697..d83b09480a0c 100644 --- a/sys/arm64/rockchip/rk_tsadc.c +++ b/sys/arm64/rockchip/rk_tsadc.c @@ -484,7 +484,7 @@ tsadc_init_tsensor(struct tsadc_softc *sc, struct tsensor *sensor) WR4(sc, TSADC_INT_EN, val); /* Shutdown temperature */ - val = tsadc_raw_to_temp(sc, sc->shutdown_temp); + val = tsadc_temp_to_raw(sc, sc->shutdown_temp); WR4(sc, TSADC_COMP_SHUT(sensor->channel), val); val = RD4(sc, TSADC_AUTO_CON); val |= TSADC_AUTO_SRC_EN(sensor->channel); diff --git a/sys/arm64/spe/arm_spe.h b/sys/arm64/spe/arm_spe.h new file mode 100644 index 000000000000..5dba20673a77 --- /dev/null +++ b/sys/arm64/spe/arm_spe.h @@ -0,0 +1,77 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_ARM_SPE_H_ +#define _ARM64_ARM_SPE_H_ + +/* kqueue events */ +#define ARM_SPE_KQ_BUF 138 +#define ARM_SPE_KQ_SHUTDOWN 139 +#define ARM_SPE_KQ_SIGNAL 140 + +/* spe_backend_read() u64 data encoding */ +#define KQ_BUF_POS_SHIFT 0 +#define KQ_BUF_POS (1 << KQ_BUF_POS_SHIFT) +#define KQ_PARTREC_SHIFT 1 +#define KQ_PARTREC (1 << KQ_PARTREC_SHIFT) +#define KQ_FINAL_BUF_SHIFT 2 +#define KQ_FINAL_BUF (1 << KQ_FINAL_BUF_SHIFT) + +enum arm_spe_ctx_field { + ARM_SPE_CTX_NONE, + ARM_SPE_CTX_PID, + ARM_SPE_CTX_CPU_ID +}; + +enum arm_spe_profiling_level { + ARM_SPE_KERNEL_AND_USER, + ARM_SPE_KERNEL_ONLY, + ARM_SPE_USER_ONLY +}; +struct arm_spe_config { + /* Minimum interval is IMP DEF up to maximum 24 bit value */ + uint32_t interval; + + /* Profile kernel (EL1), userspace (EL0) or both */ + enum arm_spe_profiling_level level; + + /* + * Configure context field in SPE records to store either the + * current PID, the CPU ID or neither + * + * In PID mode, kernel threads without a process context are + * logged as PID 0 + */ + enum arm_spe_ctx_field ctx_field; +}; + +struct arm_spe_svc_buf { + uint32_t ident; + uint8_t buf_idx : 1; +}; + +#endif /* _ARM64_ARM_SPE_H_ */ diff --git a/sys/arm64/spe/arm_spe_acpi.c b/sys/arm64/spe/arm_spe_acpi.c new file mode 100644 index 000000000000..b9f40448d940 --- /dev/null +++ b/sys/arm64/spe/arm_spe_acpi.c @@ -0,0 +1,146 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> + +#include <arm64/spe/arm_spe_dev.h> + +static device_identify_t arm_spe_acpi_identify; +static device_probe_t arm_spe_acpi_probe; + +static device_method_t arm_spe_acpi_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, arm_spe_acpi_identify), + DEVMETHOD(device_probe, arm_spe_acpi_probe), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_1(spe, arm_spe_acpi_driver, arm_spe_acpi_methods, + sizeof(struct arm_spe_softc), arm_spe_driver); + +DRIVER_MODULE(spe, acpi, arm_spe_acpi_driver, 0, 0); + +struct madt_data { + u_int irq; + bool found; + bool valid; +}; + +static void +madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_INTERRUPT *intr; + struct madt_data *madt_data; + u_int irq; + + madt_data = (struct madt_data *)arg; + + /* Exit early if we are have decided to not attach */ + if (!madt_data->valid) + return; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_INTERRUPT: + intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; + irq = intr->SpeInterrupt; + + if (irq == 0) { + madt_data->valid = false; + } else if (!madt_data->found) { + madt_data->found = true; + madt_data->irq = irq; + } else if (madt_data->irq != irq) { + madt_data->valid = false; + } + break; + + default: + break; + } +} + +static void +arm_spe_acpi_identify(driver_t *driver, device_t parent) +{ + struct madt_data madt_data; + ACPI_TABLE_MADT *madt; + device_t dev; + vm_paddr_t physaddr; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return; + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + device_printf(parent, "spe: Unable to map the MADT\n"); + return; + } + + madt_data.irq = 0; + madt_data.found = false; + madt_data.valid = true; + + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + madt_handler, &madt_data); + + if (!madt_data.found || !madt_data.valid) + goto out; + + MPASS(madt_data.irq != 0); + + dev = BUS_ADD_CHILD(parent, 0, "spe", -1); + if (dev == NULL) { + device_printf(parent, "add spe child failed\n"); + goto out; + } + + BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, madt_data.irq, 1); + +out: + acpi_unmap_table(madt); +} + +static int +arm_spe_acpi_probe(device_t dev) +{ + device_set_desc(dev, "ARM Statistical Profiling Extension"); + return (BUS_PROBE_NOWILDCARD); +} diff --git a/sys/arm64/spe/arm_spe_backend.c b/sys/arm64/spe/arm_spe_backend.c new file mode 100644 index 000000000000..b4e1132f9cbc --- /dev/null +++ b/sys/arm64/spe/arm_spe_backend.c @@ -0,0 +1,586 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Arm Statistical Profiling Extension (SPE) backend + * + * Basic SPE operation + * + * SPE is enabled and configured on a per-core basis, with each core requiring + * separate code to enable and configure. Each core also requires a separate + * buffer passed as config where the CPU will write profiling data. When the + * profiling buffer is full, an interrupt will be taken on the same CPU. + * + * Driver Design + * + * - HWT allocates a large single buffer per core. This buffer is split in half + * to create a 2 element circular buffer (aka ping-pong buffer) where the + * kernel writes to one half while userspace is copying the other half + * - SMP calls are used to enable and configure each core, with SPE initially + * configured to write to the first half of the buffer + * - When the first half of the buffer is full, a buffer full interrupt will + * immediately switch writing to the second half. The kernel adds the details + * of the half that needs copying to a FIFO STAILQ and notifies userspace via + * kqueue by sending a ARM_SPE_KQ_BUF kevent with how many buffers on the + * queue need servicing + * - The kernel responds to HWT_IOC_BUFPTR_GET ioctl by sending details of the + * first item from the queue + * - The buffers pending copying will not be overwritten until an + * HWT_IOC_SVC_BUF ioctl is received from userspace confirming the data has + * been copied out + * - In the case where both halfs of the buffer are full, profiling will be + * paused until notification via HWT_IOC_SVC_BUF is received + * + * Future improvements and limitations + * + * - Using large buffer sizes should minimise pauses and loss of profiling + * data while kernel is waiting for userspace to copy out data. Since it is + * generally expected that consuming (copying) this data is faster than + * producing it, in practice this has not so far been an issue. If it does + * prove to be an issue even with large buffer sizes then additional buffering + * i.e. n element circular buffers might be required. + * + * - kqueue can only notify and queue one kevent of the same type, with + * subsequent events overwriting data in the first event. The kevent + * ARM_SPE_KQ_BUF can therefore only contain the number of buffers on the + * STAILQ, incrementing each time a new buffer is full. In this case kqueue + * serves just as a notification to userspace to wake up and query the kernel + * with the appropriate ioctl. An alternative might be custom kevents where + * the kevent identifier is encoded with something like n+cpu_id or n+tid. In + * this case data could be sent directly with kqueue via the kevent data and + * fflags elements, avoiding the extra ioctl. + * + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/hwt.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <machine/bus.h> + +#include <arm64/spe/arm_spe_dev.h> + +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_cpu.h> +#include <dev/hwt/hwt_thread.h> + +MALLOC_DECLARE(M_ARM_SPE); + +extern u_int mp_maxid; +extern struct taskqueue *taskqueue_arm_spe; + +int spe_backend_disable_smp(struct hwt_context *ctx); + +static device_t spe_dev; +static struct hwt_backend_ops spe_ops; +static struct hwt_backend backend = { + .ops = &spe_ops, + .name = "spe", + .kva_req = 1, +}; + +static struct arm_spe_info *spe_info; + +static int +spe_backend_init_thread(struct hwt_context *ctx) +{ + return (ENOTSUP); +} + +static void +spe_backend_init_cpu(struct hwt_context *ctx) +{ + struct arm_spe_info *info; + struct arm_spe_softc *sc = device_get_softc(spe_dev); + char lock_name[32]; + char *tmp = "Arm SPE lock/cpu/"; + int cpu_id; + + spe_info = malloc(sizeof(struct arm_spe_info) * mp_ncpus, + M_ARM_SPE, M_WAITOK | M_ZERO); + + sc->spe_info = spe_info; + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + info->sc = sc; + info->ident = cpu_id; + info->buf_info[0].info = info; + info->buf_info[0].buf_idx = 0; + info->buf_info[1].info = info; + info->buf_info[1].buf_idx = 1; + snprintf(lock_name, sizeof(lock_name), "%s%d", tmp, cpu_id); + mtx_init(&info->lock, lock_name, NULL, MTX_SPIN); + } +} + +static int +spe_backend_init(struct hwt_context *ctx) +{ + struct arm_spe_softc *sc = device_get_softc(spe_dev); + int error = 0; + + /* + * HWT currently specifies buffer size must be a multiple of PAGE_SIZE, + * i.e. minimum 4KB + the maximum PMBIDR.Align is 2KB + * This should never happen but it's good to sense check + */ + if (ctx->bufsize % sc->kva_align != 0) + return (EINVAL); + + /* + * Since we're splitting the buffer in half + PMBLIMITR needs to be page + * aligned, minimum buffer size needs to be 2x PAGE_SIZE + */ + if (ctx->bufsize < (2 * PAGE_SIZE)) + return (EINVAL); + + sc->ctx = ctx; + sc->kqueue_fd = ctx->kqueue_fd; + sc->hwt_td = ctx->hwt_td; + + if (ctx->mode == HWT_MODE_THREAD) + error = spe_backend_init_thread(ctx); + else + spe_backend_init_cpu(ctx); + + return (error); +} + +#ifdef ARM_SPE_DEBUG +static void hex_dump(uint8_t *buf, size_t len) +{ + size_t i; + + printf("--------------------------------------------------------------\n"); + for (i = 0; i < len; ++i) { + if (i % 8 == 0) { + printf(" "); + } + if (i % 16 == 0) { + if (i != 0) { + printf("\r\n"); + } + printf("\t"); + } + printf("%02X ", buf[i]); + } + printf("\r\n"); +} +#endif + +static int +spe_backend_deinit(struct hwt_context *ctx) +{ +#ifdef ARM_SPE_DEBUG + struct arm_spe_info *info; + int cpu_id; + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + hex_dump((void *)info->kvaddr, 128); + hex_dump((void *)(info->kvaddr + (info->buf_size/2)), 128); + } +#endif + + if (ctx->state == CTX_STATE_RUNNING) { + spe_backend_disable_smp(ctx); + ctx->state = CTX_STATE_STOPPED; + } + + free(spe_info, M_ARM_SPE); + + return (0); +} + +static uint64_t +arm_spe_min_interval(struct arm_spe_softc *sc) +{ + /* IMPLEMENTATION DEFINED */ + switch (PMSIDR_Interval_VAL(sc->pmsidr)) + { + case PMSIDR_Interval_256: + return (256); + case PMSIDR_Interval_512: + return (512); + case PMSIDR_Interval_768: + return (768); + case PMSIDR_Interval_1024: + return (1024); + case PMSIDR_Interval_1536: + return (1536); + case PMSIDR_Interval_2048: + return (2048); + case PMSIDR_Interval_3072: + return (3072); + case PMSIDR_Interval_4096: + return (4096); + default: + return (4096); + } +} + +static inline void +arm_spe_set_interval(struct arm_spe_info *info, uint64_t interval) +{ + uint64_t min_interval = arm_spe_min_interval(info->sc); + + interval = MAX(interval, min_interval); + interval = MIN(interval, 1 << 24); /* max 24 bits */ + + dprintf("%s %lu\n", __func__, interval); + + info->pmsirr &= ~(PMSIRR_INTERVAL_MASK); + info->pmsirr |= (interval << PMSIRR_INTERVAL_SHIFT); +} + +static int +spe_backend_configure(struct hwt_context *ctx, int cpu_id, int session_id) +{ + struct arm_spe_info *info = &spe_info[cpu_id]; + struct arm_spe_config *cfg; + int err = 0; + + mtx_lock_spin(&info->lock); + info->ident = cpu_id; + /* Set defaults */ + info->pmsfcr = 0; + info->pmsevfr = 0xFFFFFFFFFFFFFFFFUL; + info->pmslatfr = 0; + info->pmsirr = + (arm_spe_min_interval(info->sc) << PMSIRR_INTERVAL_SHIFT) + | PMSIRR_RND; + info->pmsicr = 0; + info->pmscr = PMSCR_TS | PMSCR_PA | PMSCR_CX | PMSCR_E1SPE | PMSCR_E0SPE; + + if (ctx->config != NULL && + ctx->config_size == sizeof(struct arm_spe_config) && + ctx->config_version == 1) { + cfg = (struct arm_spe_config *)ctx->config; + if (cfg->interval) + arm_spe_set_interval(info, cfg->interval); + if (cfg->level == ARM_SPE_KERNEL_ONLY) + info->pmscr &= ~(PMSCR_E0SPE); /* turn off user */ + if (cfg->level == ARM_SPE_USER_ONLY) + info->pmscr &= ~(PMSCR_E1SPE); /* turn off kern */ + if (cfg->ctx_field) + info->ctx_field = cfg->ctx_field; + } else + err = (EINVAL); + mtx_unlock_spin(&info->lock); + + return (err); +} + + +static void +arm_spe_enable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)]; + uint64_t base, limit; + + dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid)); + + mtx_lock_spin(&info->lock); + + if (info->ctx_field == ARM_SPE_CTX_CPU_ID) + WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, PCPU_GET(cpuid)); + + WRITE_SPECIALREG(PMSFCR_EL1_REG, info->pmsfcr); + WRITE_SPECIALREG(PMSEVFR_EL1_REG, info->pmsevfr); + WRITE_SPECIALREG(PMSLATFR_EL1_REG, info->pmslatfr); + + /* Set the sampling interval */ + WRITE_SPECIALREG(PMSIRR_EL1_REG, info->pmsirr); + isb(); + + /* Write 0 here before enabling sampling */ + WRITE_SPECIALREG(PMSICR_EL1_REG, info->pmsicr); + isb(); + + base = info->kvaddr; + limit = base + (info->buf_size/2); + /* Enable the buffer */ + limit &= PMBLIMITR_LIMIT_MASK; /* Zero lower 12 bits */ + limit |= PMBLIMITR_E; + /* Set the base and limit */ + WRITE_SPECIALREG(PMBPTR_EL1_REG, base); + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit); + isb(); + + /* Enable sampling */ + WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr); + isb(); + + info->enabled = true; + + mtx_unlock_spin(&info->lock); +} + +static int +spe_backend_enable_smp(struct hwt_context *ctx) +{ + struct arm_spe_info *info; + struct hwt_vm *vm; + int cpu_id; + + HWT_CTX_LOCK(ctx); + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + vm = hwt_cpu_get(ctx, cpu_id)->vm; + + info = &spe_info[cpu_id]; + + mtx_lock_spin(&info->lock); + info->kvaddr = vm->kvaddr; + info->buf_size = ctx->bufsize; + mtx_unlock_spin(&info->lock); + } + HWT_CTX_UNLOCK(ctx); + + cpu_id = CPU_FFS(&ctx->cpu_map) - 1; + info = &spe_info[cpu_id]; + if (info->ctx_field == ARM_SPE_CTX_PID) + arm64_pid_in_contextidr = true; + else + arm64_pid_in_contextidr = false; + + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_enable, smp_no_rendezvous_barrier, NULL); + + return (0); +} + +void +arm_spe_disable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)]; + struct arm_spe_buf_info *buf = &info->buf_info[info->buf_idx]; + + if (!info->enabled) + return; + + dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid)); + + /* Disable profiling */ + WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0); + isb(); + + /* Drain any remaining tracing data */ + psb_csync(); + dsb(nsh); + + /* Disable the profiling buffer */ + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, 0); + isb(); + + /* Clear interrupt status reg */ + WRITE_SPECIALREG(PMBSR_EL1_REG, 0x0); + + /* Clear PID/CPU_ID from context ID reg */ + WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, 0); + + mtx_lock_spin(&info->lock); + buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG); + info->enabled = false; + mtx_unlock_spin(&info->lock); +} + +int +spe_backend_disable_smp(struct hwt_context *ctx) +{ + struct kevent kev; + struct arm_spe_info *info; + struct arm_spe_buf_info *buf; + int cpu_id; + int ret; + + /* Disable and send out remaining data in bufs */ + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_disable, smp_no_rendezvous_barrier, NULL); + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + buf = &info->buf_info[info->buf_idx]; + arm_spe_send_buffer(buf, 0); + } + + arm64_pid_in_contextidr = false; + + /* + * Tracing on all CPUs has been disabled, and we've sent write ptr + * offsets for all bufs - let userspace know it can shutdown + */ + EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); + ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK); + if (ret) + dprintf("%s kqfd_register ret:%d\n", __func__, ret); + + return (0); +} + +static void +spe_backend_stop(struct hwt_context *ctx) +{ + spe_backend_disable_smp(ctx); +} + +static void +arm_spe_reenable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];; + + WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr); + isb(); +} + +static int +spe_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size, + int data_version) +{ + struct arm_spe_info *info; + struct arm_spe_buf_info *buf; + struct arm_spe_svc_buf *s; + int err = 0; + cpuset_t cpu_set; + + if (data_size != sizeof(struct arm_spe_svc_buf)) + return (E2BIG); + + if (data_version != 1) + return (EINVAL); + + s = (struct arm_spe_svc_buf *)data; + if (s->buf_idx > 1) + return (ENODEV); + if (s->ident >= mp_ncpus) + return (EINVAL); + + info = &spe_info[s->ident]; + mtx_lock_spin(&info->lock); + + buf = &info->buf_info[s->buf_idx]; + + if (!info->enabled) { + err = ENXIO; + goto end; + } + + /* Clear the flag the signals buffer needs servicing */ + buf->buf_svc = false; + + /* Re-enable profiling if we've been waiting for this notification */ + if (buf->buf_wait) { + CPU_SETOF(s->ident, &cpu_set); + + mtx_unlock_spin(&info->lock); + smp_rendezvous_cpus(cpu_set, smp_no_rendezvous_barrier, + arm_spe_reenable, smp_no_rendezvous_barrier, NULL); + mtx_lock_spin(&info->lock); + + buf->buf_wait = false; + } + +end: + mtx_unlock_spin(&info->lock); + return (err); +} + +static int +spe_backend_read(struct hwt_vm *vm, int *ident, vm_offset_t *offset, + uint64_t *data) +{ + struct arm_spe_queue *q; + struct arm_spe_softc *sc = device_get_softc(spe_dev); + int error = 0; + + mtx_lock_spin(&sc->sc_lock); + + /* Return the first pending buffer that needs servicing */ + q = STAILQ_FIRST(&sc->pending); + if (q == NULL) { + error = ENOENT; + goto error; + } + *ident = q->ident; + *offset = q->offset; + *data = (q->buf_idx << KQ_BUF_POS_SHIFT) | + (q->partial_rec << KQ_PARTREC_SHIFT) | + (q->final_buf << KQ_FINAL_BUF_SHIFT); + + STAILQ_REMOVE_HEAD(&sc->pending, next); + sc->npending--; + +error: + mtx_unlock_spin(&sc->sc_lock); + if (error) + return (error); + + free(q, M_ARM_SPE); + return (0); +} + +static struct hwt_backend_ops spe_ops = { + .hwt_backend_init = spe_backend_init, + .hwt_backend_deinit = spe_backend_deinit, + + .hwt_backend_configure = spe_backend_configure, + .hwt_backend_svc_buf = spe_backend_svc_buf, + .hwt_backend_stop = spe_backend_stop, + + .hwt_backend_enable_smp = spe_backend_enable_smp, + .hwt_backend_disable_smp = spe_backend_disable_smp, + + .hwt_backend_read = spe_backend_read, +}; + +int +spe_register(device_t dev) +{ + spe_dev = dev; + + return (hwt_backend_register(&backend)); +} diff --git a/sys/arm64/spe/arm_spe_dev.c b/sys/arm64/spe/arm_spe_dev.c new file mode 100644 index 000000000000..8a834197eeef --- /dev/null +++ b/sys/arm64/spe/arm_spe_dev.c @@ -0,0 +1,324 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/event.h> +#include <sys/hwt.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/rman.h> +#include <sys/smp.h> +#include <sys/systm.h> +#include <sys/taskqueue.h> + +#include <machine/bus.h> + +#include <arm64/spe/arm_spe.h> +#include <arm64/spe/arm_spe_dev.h> + +MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing"); + +/* + * taskqueue(9) used for sleepable routines called from interrupt handlers + */ +TASKQUEUE_FAST_DEFINE_THREAD(arm_spe); + +void arm_spe_send_buffer(void *, int); +static void arm_spe_error(void *, int); +static int arm_spe_intr(void *); +device_attach_t arm_spe_attach; + +static device_method_t arm_spe_methods[] = { + /* Device interface */ + DEVMETHOD(device_attach, arm_spe_attach), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods, + sizeof(struct arm_spe_softc)); + +#define ARM_SPE_KVA_MAX_ALIGN UL(2048) + +int +arm_spe_attach(device_t dev) +{ + struct arm_spe_softc *sc; + int error, rid; + + sc = device_get_softc(dev); + sc->dev = dev; + + sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG); + sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG); + device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr); + device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr); + if ((sc->pmbidr & PMBIDR_P) != 0) { + device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n"); + return (EPERM); + } + + sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT); + if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) { + device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align); + return (EINVAL); + } + + rid = 0; + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE); + if (sc->sc_irq_res == NULL) { + device_printf(dev, "Unable to allocate interrupt\n"); + return (ENXIO); + } + error = bus_setup_intr(dev, sc->sc_irq_res, + INTR_TYPE_MISC | INTR_MPSAFE, arm_spe_intr, NULL, sc, + &sc->sc_irq_cookie); + if (error != 0) { + device_printf(dev, "Unable to set up interrupt\n"); + return (error); + } + + mtx_init(&sc->sc_lock, "Arm SPE lock", NULL, MTX_SPIN); + + STAILQ_INIT(&sc->pending); + sc->npending = 0; + + spe_register(dev); + + return (0); +} + +/* Interrupt handler runs on the same core that triggered the exception */ +static int +arm_spe_intr(void *arg) +{ + int cpu_id = PCPU_GET(cpuid); + struct arm_spe_softc *sc = arg; + uint64_t pmbsr; + uint64_t base, limit; + uint8_t ec; + struct arm_spe_info *info = &sc->spe_info[cpu_id]; + uint8_t i = info->buf_idx; + struct arm_spe_buf_info *buf = &info->buf_info[i]; + struct arm_spe_buf_info *prev_buf = &info->buf_info[!i]; + device_t dev = sc->dev; + + /* Make sure the profiling data is visible to the CPU */ + psb_csync(); + dsb(nsh); + + /* Make sure any HW update of PMBPTR_EL1 is visible to the CPU */ + isb(); + + pmbsr = READ_SPECIALREG(PMBSR_EL1_REG); + + if (!(pmbsr & PMBSR_S)) + return (FILTER_STRAY); + + /* Event Class */ + ec = PMBSR_EC_VAL(pmbsr); + switch (ec) + { + case PMBSR_EC_OTHER_BUF_MGMT: /* Other buffer management event */ + break; + case PMBSR_EC_GRAN_PROT_CHK: /* Granule Protection Check fault */ + device_printf(dev, "PMBSR_EC_GRAN_PROT_CHK\n"); + break; + case PMBSR_EC_STAGE1_DA: /* Stage 1 Data Abort */ + device_printf(dev, "PMBSR_EC_STAGE1_DA\n"); + break; + case PMBSR_EC_STAGE2_DA: /* Stage 2 Data Abort */ + device_printf(dev, "PMBSR_EC_STAGE2_DA\n"); + break; + default: + /* Unknown EC */ + device_printf(dev, "unknown PMBSR_EC: %#x\n", ec); + arm_spe_disable(NULL); + TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); + taskqueue_enqueue(taskqueue_arm_spe, &sc->task); + return (FILTER_HANDLED); + } + + switch (ec) { + case PMBSR_EC_OTHER_BUF_MGMT: + /* Buffer Status Code = buffer filled */ + if ((pmbsr & PMBSR_MSS_BSC_MASK) == PMBSR_MSS_BSC_BUFFER_FILLED) { + dprintf("%s SPE buffer full event (cpu:%d)\n", + __func__, cpu_id); + break; + } + case PMBSR_EC_GRAN_PROT_CHK: + case PMBSR_EC_STAGE1_DA: + case PMBSR_EC_STAGE2_DA: + /* + * If we have one of these, we've messed up the + * programming somehow (e.g. passed invalid memory to + * SPE) and can't recover + */ + arm_spe_disable(NULL); + TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); + taskqueue_enqueue(taskqueue_arm_spe, &sc->task); + /* PMBPTR_EL1 is fault address if PMBSR_DL is 1 */ + device_printf(dev, "CPU:%d PMBSR_EL1:%#lx\n", cpu_id, pmbsr); + device_printf(dev, "PMBPTR_EL1:%#lx PMBLIMITR_EL1:%#lx\n", + READ_SPECIALREG(PMBPTR_EL1_REG), + READ_SPECIALREG(PMBLIMITR_EL1_REG)); + return (FILTER_HANDLED); + } + + mtx_lock_spin(&info->lock); + + /* + * Data Loss bit - pmbptr might not be pointing to the end of the last + * complete record + */ + if ((pmbsr & PMBSR_DL) == PMBSR_DL) + buf->partial_rec = 1; + buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG); + buf->buf_svc = true; + + /* Setup regs ready to start writing to the other half of the buffer */ + info->buf_idx = !info->buf_idx; + base = buf_start_addr(info->buf_idx, info); + limit = base + (info->buf_size/2); + limit &= PMBLIMITR_LIMIT_MASK; + limit |= PMBLIMITR_E; + WRITE_SPECIALREG(PMBPTR_EL1_REG, base); + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit); + isb(); + + /* + * Notify userspace via kqueue that buffer is full and needs copying + * out - since kqueue can sleep, don't do this in the interrupt handler, + * add to a taskqueue to be scheduled later instead + */ + TASK_INIT(&info->task[i], 0, (task_fn_t *)arm_spe_send_buffer, buf); + taskqueue_enqueue(taskqueue_arm_spe, &info->task[i]); + + /* + * It's possible userspace hasn't yet notified us they've copied out the + * other half of the buffer + * + * This might be because: + * a) Kernel hasn't scheduled the task via taskqueue to notify + * userspace to copy out the data + * b) Userspace is still copying the buffer or hasn't notified us + * back via the HWT_IOC_SVC_BUF ioctl + * + * Either way we need to avoid overwriting uncopied data in the + * buffer, so disable profiling until we receive that SVC_BUF + * ioctl + * + * Using a larger buffer size should help to minimise these events and + * loss of profiling data while profiling is disabled + */ + if (prev_buf->buf_svc) { + device_printf(sc->dev, "cpu%d: buffer full interrupt, but other" + " half of buffer has not been copied out - consider" + " increasing buffer size to minimise loss of profiling data\n", + cpu_id); + WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0); + prev_buf->buf_wait = true; + } + + mtx_unlock_spin(&info->lock); + + /* Clear Profiling Buffer Status Register */ + WRITE_SPECIALREG(PMBSR_EL1_REG, 0); + + isb(); + + return (FILTER_HANDLED); +} + +/* note: Scheduled and run via taskqueue, so can run on any CPU at any time */ +void +arm_spe_send_buffer(void *arg, int pending __unused) +{ + struct arm_spe_buf_info *buf = (struct arm_spe_buf_info *)arg; + struct arm_spe_info *info = buf->info; + struct arm_spe_queue *queue; + struct kevent kev; + int ret; + + queue = malloc(sizeof(struct arm_spe_queue), M_ARM_SPE, + M_WAITOK | M_ZERO); + + mtx_lock_spin(&info->lock); + + /* Add to queue for userspace to pickup */ + queue->ident = info->ident; + queue->offset = buf->pmbptr - buf_start_addr(buf->buf_idx, info); + queue->buf_idx = buf->buf_idx; + queue->final_buf = !info->enabled; + queue->partial_rec = buf->partial_rec; + mtx_unlock_spin(&info->lock); + + mtx_lock_spin(&info->sc->sc_lock); + STAILQ_INSERT_TAIL(&info->sc->pending, queue, next); + info->sc->npending++; + EV_SET(&kev, ARM_SPE_KQ_BUF, EVFILT_USER, 0, NOTE_TRIGGER, + info->sc->npending, NULL); + mtx_unlock_spin(&info->sc->sc_lock); + + /* Notify userspace */ + ret = kqfd_register(info->sc->kqueue_fd, &kev, info->sc->hwt_td, + M_WAITOK); + if (ret) { + dprintf("%s kqfd_register ret:%d\n", __func__, ret); + arm_spe_error(info->sc->ctx, 0); + } +} + +static void +arm_spe_error(void *arg, int pending __unused) +{ + struct hwt_context *ctx = arg; + struct kevent kev; + int ret; + + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_disable, smp_no_rendezvous_barrier, NULL); + + EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); + ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK); + if (ret) + dprintf("%s kqfd_register ret:%d\n", __func__, ret); +} + +MODULE_DEPEND(spe, hwt, 1, 1, 1); +MODULE_VERSION(spe, 1); diff --git a/sys/arm64/spe/arm_spe_dev.h b/sys/arm64/spe/arm_spe_dev.h new file mode 100644 index 000000000000..df88d98ef1c0 --- /dev/null +++ b/sys/arm64/spe/arm_spe_dev.h @@ -0,0 +1,162 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_ARM_SPE_DEV_H_ +#define _ARM64_ARM_SPE_DEV_H_ + +#include <sys/mutex.h> +#include <sys/taskqueue.h> + +#include <vm/vm.h> + +#include <arm64/spe/arm_spe.h> + +#include <dev/hwt/hwt_context.h> + +#define ARM_SPE_DEBUG +#undef ARM_SPE_DEBUG + +#ifdef ARM_SPE_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +DECLARE_CLASS(arm_spe_driver); + +struct cdev; +struct resource; + +extern bool arm64_pid_in_contextidr; + +int spe_register(device_t dev); +void arm_spe_disable(void *arg __unused); +int spe_backend_disable_smp(struct hwt_context *ctx); +void arm_spe_send_buffer(void *arg, int pending __unused); + +/* + PSB CSYNC is a Profiling Synchronization Barrier encoded in the hint space + * so it is a NOP on earlier architecture. + */ +#define psb_csync() __asm __volatile("hint #17" ::: "memory"); + +struct arm_spe_softc { + device_t dev; + + struct resource *sc_irq_res; + void *sc_irq_cookie; + struct cdev *sc_cdev; + struct mtx sc_lock; + struct task task; + + int64_t sc_pmsidr; + int kqueue_fd; + struct thread *hwt_td; + struct arm_spe_info *spe_info; + struct hwt_context *ctx; + STAILQ_HEAD(, arm_spe_queue) pending; + uint64_t npending; + + uint64_t pmbidr; + uint64_t pmsidr; + + uint16_t kva_align; +}; + +struct arm_spe_buf_info { + struct arm_spe_info *info; + uint64_t pmbptr; + uint8_t buf_idx : 1; + bool buf_svc : 1; + bool buf_wait : 1; + bool partial_rec : 1; +}; + +struct arm_spe_info { + int ident; /* tid or cpu_id */ + struct mtx lock; + struct arm_spe_softc *sc; + struct task task[2]; + bool enabled : 1; + + /* buffer is split in half as a ping-pong buffer */ + vm_object_t bufobj; + vm_offset_t kvaddr; + size_t buf_size; + uint8_t buf_idx : 1; /* 0 = first half of buf, 1 = 2nd half */ + struct arm_spe_buf_info buf_info[2]; + + /* config */ + enum arm_spe_profiling_level level; + enum arm_spe_ctx_field ctx_field; + /* filters */ + uint64_t pmsfcr; + uint64_t pmsevfr; + uint64_t pmslatfr; + /* interval */ + uint64_t pmsirr; + uint64_t pmsicr; + /* control */ + uint64_t pmscr; +}; + +struct arm_spe_queue { + int ident; + u_int buf_idx : 1; + bool partial_rec : 1; + bool final_buf : 1; + vm_offset_t offset; + STAILQ_ENTRY(arm_spe_queue) next; +}; + +static inline vm_offset_t buf_start_addr(u_int buf_idx, struct arm_spe_info *info) +{ + vm_offset_t addr; + if (buf_idx == 0) + addr = info->kvaddr; + if (buf_idx == 1) + addr = info->kvaddr + (info->buf_size/2); + + return (addr); +} + +static inline vm_offset_t buf_end_addr(u_int buf_idx, struct arm_spe_info *info) +{ + vm_offset_t addr; + if (buf_idx == 0) + addr = info->kvaddr + (info->buf_size/2); + if (buf_idx == 1) + addr = info->kvaddr + info->buf_size; + + return (addr); +} + +#endif /* _ARM64_ARM_SPE_DEV_H_ */ diff --git a/sys/arm64/spe/arm_spe_fdt.c b/sys/arm64/spe/arm_spe_fdt.c new file mode 100644 index 000000000000..d16f1dee2ac8 --- /dev/null +++ b/sys/arm64/spe/arm_spe_fdt.c @@ -0,0 +1,75 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> + +#include <arm64/spe/arm_spe_dev.h> + +static device_probe_t arm_spe_fdt_probe; + +static struct ofw_compat_data compat_data[] = { + {"arm,statistical-profiling-extension-v1", true}, + {NULL, false}, +}; + +static device_method_t arm_spe_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, arm_spe_fdt_probe), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_1(spe, arm_spe_fdt_driver, arm_spe_fdt_methods, + sizeof(struct arm_spe_softc), arm_spe_driver); + +DRIVER_MODULE(spe, simplebus, arm_spe_fdt_driver, 0, 0); + +static int +arm_spe_fdt_probe(device_t dev) +{ + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) + return (ENXIO); + + device_set_desc(dev, "ARM Statistical Profiling Extension"); + return (BUS_PROBE_DEFAULT); +} diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h index 6a0c4c78e568..f530dab05331 100644 --- a/sys/arm64/vmm/arm64.h +++ b/sys/arm64/vmm/arm64.h @@ -78,14 +78,16 @@ struct hypctx { uint64_t pmcr_el0; /* Performance Monitors Control Register */ uint64_t pmccntr_el0; uint64_t pmccfiltr_el0; + uint64_t pmuserenr_el0; + uint64_t pmselr_el0; + uint64_t pmxevcntr_el0; uint64_t pmcntenset_el0; uint64_t pmintenset_el1; uint64_t pmovsset_el0; - uint64_t pmselr_el0; - uint64_t pmuserenr_el0; uint64_t pmevcntr_el0[31]; uint64_t pmevtyper_el0[31]; + uint64_t dbgclaimset_el1; uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */ @@ -117,6 +119,7 @@ struct hypctx { struct vgic_v3_regs vgic_v3_regs; struct vgic_v3_cpu *vgic_cpu; bool has_exception; + bool dbg_oslock; }; struct hyp { @@ -125,42 +128,14 @@ struct hyp { uint64_t vmid_generation; uint64_t vttbr_el2; uint64_t el2_addr; /* The address of this in el2 space */ + uint64_t feats; /* Which features are enabled */ +#define HYP_FEAT_HCX (0x1ul << 0) +#define HYP_FEAT_ECV_POFF (0x1ul << 1) bool vgic_attached; struct vgic_v3 *vgic; struct hypctx *ctx[]; }; -#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - ret_type vmmops_##opname args; - -DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) -DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) -DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) -DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) -DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, - struct vm_eventinfo *info)) -DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) -DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, - int vcpu_id)) -DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) -DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far)) -DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) -DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) -DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) -DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) -DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, - vm_offset_t max)) -DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) -#ifdef notyet -#ifdef BHYVE_SNAPSHOT -DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)) -DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, - struct vm_snapshot_meta *meta)) -DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) -#endif -#endif - uint64_t vmm_call_hyp(uint64_t, ...); #if 0 diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h index 0b2977c73960..0c8d0fb28b18 100644 --- a/sys/arm64/vmm/hyp.h +++ b/sys/arm64/vmm/hyp.h @@ -80,7 +80,6 @@ #define HYP_ENTER_GUEST 0x00000002 #define HYP_READ_REGISTER 0x00000003 #define HYP_REG_ICH_VTR 0x1 -#define HYP_REG_CNTHCTL 0x2 #define HYP_CLEAN_S2_TLBI 0x00000004 #define HYP_DC_CIVAC 0x00000005 #define HYP_EL2_TLBI 0x00000006 diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c index 67afb3374815..023406c64182 100644 --- a/sys/arm64/vmm/io/vgic_v3.c +++ b/sys/arm64/vmm/io/vgic_v3.c @@ -47,7 +47,6 @@ #include <dev/ofw/openfirm.h> -#include <machine/armreg.h> #include <machine/atomic.h> #include <machine/bus.h> #include <machine/cpufunc.h> diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c index f59d7ebc1ad4..7c7fbb49e691 100644 --- a/sys/arm64/vmm/io/vtimer.c +++ b/sys/arm64/vmm/io/vtimer.c @@ -36,6 +36,7 @@ #include <sys/module.h> #include <sys/mutex.h> #include <sys/rman.h> +#include <sys/sysctl.h> #include <sys/time.h> #include <sys/timeet.h> #include <sys/timetc.h> @@ -43,7 +44,6 @@ #include <machine/bus.h> #include <machine/machdep.h> #include <machine/vmm.h> -#include <machine/armreg.h> #include <arm64/vmm/arm64.h> @@ -55,11 +55,18 @@ #define timer_enabled(ctl) \ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE)) -static uint64_t cnthctl_el2_reg; static uint32_t tmr_frq; #define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS) +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, vtimer, CTLFLAG_RW, NULL, NULL); + +static bool allow_ecv_phys = false; +SYSCTL_BOOL(_hw_vmm_vtimer, OID_AUTO, allow_ecv_phys, CTLFLAG_RW, + &allow_ecv_phys, 0, + "Enable hardware access to the physical timer if FEAT_ECV_POFF is supported"); + static void vtimer_schedule_irq(struct hypctx *hypctx, bool phys); static int @@ -111,9 +118,8 @@ out: } int -vtimer_init(uint64_t cnthctl_el2) +vtimer_init(void) { - cnthctl_el2_reg = cnthctl_el2; /* * The guest *MUST* use the same timer frequency as the host. The * register CNTFRQ_EL0 is accessible to the guest and a different value @@ -128,8 +134,12 @@ void vtimer_vminit(struct hyp *hyp) { uint64_t now; + bool ecv_poff; - hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg; + ecv_poff = false; + + if (allow_ecv_phys && (hyp->feats & HYP_FEAT_ECV_POFF) != 0) + ecv_poff = true; /* * Configure the Counter-timer Hypervisor Control Register for the VM. @@ -137,35 +147,58 @@ vtimer_vminit(struct hyp *hyp) if (in_vhe()) { /* * CNTHCTL_E2H_EL0PCTEN: trap EL0 access to CNTP{CT,CTSS}_EL0 - * CNTHCTL_E2H_EL1VCTEN: don't trap EL0 access to - * CNTV{CT,CTSS}_EL0 + * CNTHCTL_E2H_EL0VCTEN: don't trap EL0 access to + * CNTV{CT,CTXX}_EL0 * CNTHCTL_E2H_EL0VTEN: don't trap EL0 access to * CNTV_{CTL,CVAL,TVAL}_EL0 * CNTHCTL_E2H_EL0PTEN: trap EL0 access to * CNTP_{CTL,CVAL,TVAL}_EL0 - * CNTHCTL_E2H_EL1PCEN: trap EL1 access to - CNTP_{CTL,CVAL,TVAL}_EL0 * CNTHCTL_E2H_EL1PCTEN: trap access to CNTPCT_EL0 + * CNTHCTL_E2H_EL1PTEN: trap access to + * CNTP_{CTL,CVAL,TVAL}_EL0 + * CNTHCTL_E2H_EL1VCTEN: don't trap EL0 access to + * CNTV{CT,CTSS}_EL0 + * CNTHCTL_E2H_EL1PCEN: trap EL1 access to + * CNTP_{CTL,CVAL,TVAL}_EL0 * * TODO: Don't trap when FEAT_ECV is present */ - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PCTEN; - hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VCTEN; - hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VTEN; - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PTEN; - - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PTEN; - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PCTEN; + hyp->vtimer.cnthctl_el2 = + CNTHCTL_E2H_EL0VCTEN_NOTRAP | + CNTHCTL_E2H_EL0VTEN_NOTRAP; + if (ecv_poff) { + hyp->vtimer.cnthctl_el2 |= + CNTHCTL_E2H_EL0PCTEN_NOTRAP | + CNTHCTL_E2H_EL0PTEN_NOTRAP | + CNTHCTL_E2H_EL1PCTEN_NOTRAP | + CNTHCTL_E2H_EL1PTEN_NOTRAP; + } else { + hyp->vtimer.cnthctl_el2 |= + CNTHCTL_E2H_EL0PCTEN_TRAP | + CNTHCTL_E2H_EL0PTEN_TRAP | + CNTHCTL_E2H_EL1PCTEN_TRAP | + CNTHCTL_E2H_EL1PTEN_TRAP; + } } else { /* * CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 * from EL1 * CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0 */ - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCEN; - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN; + if (ecv_poff) { + hyp->vtimer.cnthctl_el2 = + CNTHCTL_EL1PCTEN_NOTRAP | + CNTHCTL_EL1PCEN_NOTRAP; + } else { + hyp->vtimer.cnthctl_el2 = + CNTHCTL_EL1PCTEN_TRAP | + CNTHCTL_EL1PCEN_TRAP; + } } + if (ecv_poff) + hyp->vtimer.cnthctl_el2 |= CNTHCTL_ECV_EN; + now = READ_SPECIALREG(cntpct_el0); hyp->vtimer.cntvoff_el2 = now; @@ -231,15 +264,10 @@ vtimer_cleanup(void) { } -void -vtimer_sync_hwstate(struct hypctx *hypctx) +static void +vtime_sync_timer(struct hypctx *hypctx, struct vtimer_timer *timer, + uint64_t cntpct_el0) { - struct vtimer_timer *timer; - uint64_t cntpct_el0; - - timer = &hypctx->vtimer_cpu.virt_timer; - cntpct_el0 = READ_SPECIALREG(cntpct_el0) - - hypctx->hyp->vtimer.cntvoff_el2; if (!timer_enabled(timer->cntx_ctl_el0)) { vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), timer->irqid, false); @@ -253,6 +281,21 @@ vtimer_sync_hwstate(struct hypctx *hypctx) } } +void +vtimer_sync_hwstate(struct hypctx *hypctx) +{ + uint64_t cntpct_el0; + + cntpct_el0 = READ_SPECIALREG(cntpct_el0) - + hypctx->hyp->vtimer.cntvoff_el2; + vtime_sync_timer(hypctx, &hypctx->vtimer_cpu.virt_timer, cntpct_el0); + /* If FEAT_ECV_POFF is in use then we need to sync the physical timer */ + if ((hypctx->hyp->vtimer.cnthctl_el2 & CNTHCTL_ECV_EN) != 0) { + vtime_sync_timer(hypctx, &hypctx->vtimer_cpu.phys_timer, + cntpct_el0); + } +} + static void vtimer_inject_irq_callout_phys(void *context) { diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h index 71a20344d05e..92ce025968d2 100644 --- a/sys/arm64/vmm/io/vtimer.h +++ b/sys/arm64/vmm/io/vtimer.h @@ -66,7 +66,7 @@ struct vtimer_cpu { uint32_t cntkctl_el1; }; -int vtimer_init(uint64_t cnthctl_el2); +int vtimer_init(void); void vtimer_vminit(struct hyp *); void vtimer_cpuinit(struct hypctx *); void vtimer_cpucleanup(struct hypctx *); diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 3082d2941221..31d2fb3f516b 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -33,7 +33,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -51,7 +50,6 @@ #include <vm/vm_extern.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/fpu.h> #include <machine/machdep.h> @@ -88,7 +86,6 @@ struct vcpu { struct vfpstate *guestfpu; /* (a,i) guest fpu state */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -126,9 +123,8 @@ struct vm { bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - struct vmspace *vmspace; /* (o) guest's address space */ struct vm_mem mem; /* (i) guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -141,8 +137,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); @@ -211,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = { /* Host registers masked by vmm_arch_regs_masks. */ static struct vmm_regs vmm_arch_regs; -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -234,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static int vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { @@ -274,6 +258,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy) vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); + free(vcpu, M_VMM); } } @@ -325,20 +310,14 @@ vmm_unsupported_quirk(void) return (0); } -static int -vmm_init(void) +int +vmm_modinit(void) { int error; - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; + error = vmm_unsupported_quirk(); + if (error != 0) + return (error); error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); if (error != 0) @@ -347,67 +326,18 @@ vmm_init(void) return (vmmops_modinit(0)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmm_unsupported_quirk(); - if (error != 0) - break; - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - HYP initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { int i; - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); MPASS(vm->cookie != NULL); CPU_ZERO(&vm->active_cpus); @@ -443,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); - /* Some interrupt controllers may have a CPU limit */ - if (vcpuid >= vgic_max_cpu_count(vm->cookie)) - return (NULL); - vcpu = (struct vcpu *) atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) @@ -455,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= vgic_max_cpu_count(vm->cookie)) { + sx_xunlock(&vm->vcpus_init_lock); + return (NULL); + } + vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); @@ -470,9 +402,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -485,26 +417,15 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; - - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - - vmspace = vmmops_vmspace_alloc(0, 1ul << 39); - if (vmspace == NULL) - return (ENOMEM); + int error; vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, 1ul << 39); + if (error != 0) { + free(vm, M_VMM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; - vm_mem_init(&vm->mem); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; @@ -558,7 +479,7 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_xlock_memsegs(vm); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); sched_pin(); PCPU_SET(curvmpmap, NULL); sched_unpin(); @@ -582,11 +503,6 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; - - for (i = 0; i < vm->maxcpus; i++) - free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); } @@ -651,6 +567,33 @@ vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) return (0); } +static int +vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) +{ + struct hypctx *hypctx; + + hypctx = vcpu_get_cookie(vcpu); + /* All other fields are RES0 & we don't do anything with this */ + /* TODO: Disable access to other debug state when locked */ + hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; + return (0); +} + +static int +vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) +{ + struct hypctx *hypctx; + uint64_t val; + + hypctx = vcpu_get_cookie(vcpu); + val = OSLSR_OSLM_1; + if (hypctx->dbg_oslock) + val |= OSLSR_OSLK; + *rval = val; + + return (0); +} + static const struct vmm_special_reg vmm_special_regs[] = { #define SPECIAL_REG(_reg, _read, _write) \ { \ @@ -707,6 +650,13 @@ static const struct vmm_special_reg vmm_special_regs[] = { SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, vtimer_phys_tval_write), SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), + + /* Debug registers */ + SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), + SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), + /* TODO: Exceptions on invalid access */ + SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), + SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), #undef SPECIAL_REG }; @@ -1056,12 +1006,6 @@ vcpu_notify_event(struct vcpu *vcpu) vcpu_unlock(vcpu); } -struct vmspace * -vm_vmspace(struct vm *vm) -{ - return (vm->vmspace); -} - struct vm_mem * vm_mem(struct vm *vm) { @@ -1258,8 +1202,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu) int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); @@ -1270,7 +1213,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_PC) @@ -1342,8 +1285,14 @@ vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { + struct vm *vm; + + vm = vcpu->vm; vcpu_lock(vcpu); while (1) { + if (vm->suspend) + break; + if (vgic_has_pending_irq(vcpu->cookie)) break; @@ -1376,7 +1325,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) vme = &vcpu->exitinfo; - pmap = vmspace_pmap(vcpu->vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vcpu->vm)); addr = vme->u.paging.gpa; esr = vme->u.paging.esr; @@ -1393,7 +1342,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) panic("%s: Invalid exception (esr = %lx)", __func__, esr); } - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); if (rv != KERN_SUCCESS) return (EFAULT); @@ -1467,7 +1416,7 @@ vm_run(struct vcpu *vcpu) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; evinfo.rptr = NULL; evinfo.sptr = &vm->suspend; diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index de2425aae0a1..aa1361049f49 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -47,7 +47,6 @@ #include <vm/vm_page.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/vm.h> #include <machine/cpufunc.h> #include <machine/cpu.h> @@ -238,7 +237,6 @@ vmmops_modinit(int ipinum) vm_offset_t next_hyp_va; vm_paddr_t vmm_base; uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; - uint64_t cnthctl_el2; int cpu, i; bool rv __diagused; @@ -444,10 +442,9 @@ vmmops_modinit(int ipinum) vmem_add(el2_mem_alloc, next_hyp_va, HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); } - cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL); vgic_init(); - vtimer_init(cnthctl_el2); + vtimer_init(); return (0); } @@ -517,6 +514,7 @@ vmmops_init(struct vm *vm, pmap_t pmap) { struct hyp *hyp; vm_size_t size; + uint64_t idreg; size = el2_hyp_size(vm); hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); @@ -524,6 +522,16 @@ vmmops_init(struct vm *vm, pmap_t pmap) hyp->vm = vm; hyp->vgic_attached = false; + if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) { + if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF) + hyp->feats |= HYP_FEAT_ECV_POFF; + } + + if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) { + if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL) + hyp->feats |= HYP_FEAT_HCX; + } + vtimer_vminit(hyp); vgic_vminit(hyp); @@ -1251,6 +1259,8 @@ hypctx_regptr(struct hypctx *hypctx, int reg) return (&hypctx->tcr_el1); case VM_REG_GUEST_TCR2_EL1: return (&hypctx->tcr2_el1); + case VM_REG_GUEST_MPIDR_EL1: + return (&hypctx->vmpidr_el2); default: break; } @@ -1354,7 +1364,7 @@ vmmops_setcap(void *vcpui, int num, int val) break; if (val != 0) hypctx->mdcr_el2 |= MDCR_EL2_TDE; - else + else if ((hypctx->setcaps & (1ul << VM_CAP_SS_EXIT)) == 0) hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; break; case VM_CAP_SS_EXIT: @@ -1363,20 +1373,20 @@ vmmops_setcap(void *vcpui, int num, int val) if (val != 0) { hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); - hypctx->debug_mdscr |= hypctx->mdscr_el1 & - (MDSCR_SS | MDSCR_KDE); + hypctx->debug_mdscr |= (hypctx->mdscr_el1 & MDSCR_SS); hypctx->tf.tf_spsr |= PSR_SS; - hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; + hypctx->mdscr_el1 |= MDSCR_SS; hypctx->mdcr_el2 |= MDCR_EL2_TDE; } else { hypctx->tf.tf_spsr &= ~PSR_SS; hypctx->tf.tf_spsr |= hypctx->debug_spsr; hypctx->debug_spsr &= ~PSR_SS; - hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdscr_el1 &= ~MDSCR_SS; hypctx->mdscr_el1 |= hypctx->debug_mdscr; - hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); - hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + hypctx->debug_mdscr &= ~MDSCR_SS; + if ((hypctx->setcaps & (1ul << VM_CAP_BRK_EXIT)) == 0) + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; } break; case VM_CAP_MASK_HWINTR: diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c index 926a74fa528b..29d14e1ba952 100644 --- a/sys/arm64/vmm/vmm_dev_machdep.c +++ b/sys/arm64/vmm/vmm_dev_machdep.c @@ -68,19 +68,13 @@ int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td) { - struct vm_run *vmrun; - struct vm_vgic_version *vgv; - struct vm_vgic_descr *vgic; - struct vm_irq *vi; - struct vm_exception *vmexc; - struct vm_gla2gpa *gg; - struct vm_msi *vmsi; int error; error = 0; switch (cmd) { case VM_RUN: { struct vm_exit *vme; + struct vm_run *vmrun; vmrun = (struct vm_run *)data; vme = vm_exitinfo(vcpu); @@ -94,41 +88,62 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, break; break; } - case VM_INJECT_EXCEPTION: + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + vmexc = (struct vm_exception *)data; error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); break; - case VM_GLA2GPA_NOFAULT: + } + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; - case VM_GET_VGIC_VERSION: + } + case VM_GET_VGIC_VERSION: { + struct vm_vgic_version *vgv; + vgv = (struct vm_vgic_version *)data; /* TODO: Query the vgic driver for this */ vgv->version = 3; vgv->flags = 0; error = 0; break; - case VM_ATTACH_VGIC: + } + case VM_ATTACH_VGIC: { + struct vm_vgic_descr *vgic; + vgic = (struct vm_vgic_descr *)data; error = vm_attach_vgic(vm, vgic); break; - case VM_RAISE_MSI: + } + case VM_RAISE_MSI: { + struct vm_msi *vmsi; + vmsi = (struct vm_msi *)data; error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, vmsi->slot, vmsi->func); break; - case VM_ASSERT_IRQ: + } + case VM_ASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_assert_irq(vm, vi->irq); break; - case VM_DEASSERT_IRQ: + } + case VM_DEASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_deassert_irq(vm, vi->irq); break; + } default: error = ENOTTY; break; diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c index d61885c15871..0ad7930e9a87 100644 --- a/sys/arm64/vmm/vmm_hyp.c +++ b/sys/arm64/vmm/vmm_hyp.c @@ -32,7 +32,6 @@ #include <sys/types.h> #include <sys/proc.h> -#include <machine/armreg.h> #include "arm64.h" #include "hyp.h" @@ -42,11 +41,11 @@ struct hypctx; uint64_t VMM_HYP_FUNC(do_call_guest)(struct hypctx *); static void -vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) +vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest, + bool ecv_poff) { uint64_t dfr0; - /* Store the guest VFP registers */ if (guest) { /* Store the timer registers */ hypctx->vtimer_cpu.cntkctl_el1 = @@ -55,7 +54,20 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) READ_SPECIALREG(EL0_REG(CNTV_CVAL)); hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 = READ_SPECIALREG(EL0_REG(CNTV_CTL)); + } + if (guest_or_nonvhe(guest) && ecv_poff) { + /* + * If we have ECV then the guest could modify these registers. + * If VHE is enabled then the kernel will see a different view + * of the registers, so doesn't need to handle them. + */ + hypctx->vtimer_cpu.phys_timer.cntx_cval_el0 = + READ_SPECIALREG(EL0_REG(CNTP_CVAL)); + hypctx->vtimer_cpu.phys_timer.cntx_ctl_el0 = + READ_SPECIALREG(EL0_REG(CNTP_CTL)); + } + if (guest) { /* Store the GICv3 registers */ hypctx->vgic_v3_regs.ich_eisr_el2 = READ_SPECIALREG(ich_eisr_el2); @@ -108,6 +120,8 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) } } + hypctx->dbgclaimset_el1 = READ_SPECIALREG(dbgclaimset_el1); + dfr0 = READ_SPECIALREG(id_aa64dfr0_el1); switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) { #define STORE_DBG_BRP(x) \ @@ -167,10 +181,13 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0); hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0); hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0); + hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0); + hypctx->pmselr_el0 = READ_SPECIALREG(pmselr_el0); + hypctx->pmxevcntr_el0 = READ_SPECIALREG(pmxevcntr_el0); hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0); hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1); hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0); - hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0); + switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) { #define STORE_PMU(x) \ case (x + 1): \ @@ -259,29 +276,20 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2); hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2); hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2); - -#ifndef VMM_VHE - /* hcrx_el2 depends on feat_hcx */ - uint64_t mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - if (ID_AA64MMFR1_HCX_VAL(mmfr1) >> ID_AA64MMFR1_HCX_SHIFT) { - hypctx->hcrx_el2 = READ_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2)); - } -#endif } static void -vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) +vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest, + bool ecv_poff) { uint64_t dfr0; /* Restore the special registers */ WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2); - if (guest_or_nonvhe(guest)) { - uint64_t mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - if (ID_AA64MMFR1_HCX_VAL(mmfr1) >> ID_AA64MMFR1_HCX_SHIFT) { - WRITE_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2), hypctx->hcrx_el2); - } + if (guest) { + if ((hyp->feats & HYP_FEAT_HCX) != 0) + WRITE_SPECIALREG(HCRX_EL2_REG, hypctx->hcrx_el2); } isb(); @@ -333,12 +341,15 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0); WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0); WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0); + WRITE_SPECIALREG(pmuserenr_el0, hypctx->pmuserenr_el0); + WRITE_SPECIALREG(pmselr_el0, hypctx->pmselr_el0); + WRITE_SPECIALREG(pmxevcntr_el0, hypctx->pmxevcntr_el0); /* Clear all events/interrupts then enable them */ - WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful); + WRITE_SPECIALREG(pmcntenclr_el0, ~0ul); WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0); - WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful); + WRITE_SPECIALREG(pmintenclr_el1, ~0ul); WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1); - WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful); + WRITE_SPECIALREG(pmovsclr_el0, ~0ul); WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0); switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) { @@ -384,6 +395,9 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) #undef LOAD_PMU } + WRITE_SPECIALREG(dbgclaimclr_el1, ~0ul); + WRITE_SPECIALREG(dbgclaimclr_el1, hypctx->dbgclaimset_el1); + dfr0 = READ_SPECIALREG(id_aa64dfr0_el1); switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) { #define LOAD_DBG_BRP(x) \ @@ -450,6 +464,29 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2); WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2); + if (ecv_poff) { + /* + * Load the same offset as the virtual timer + * to keep in sync. + */ + WRITE_SPECIALREG(CNTPOFF_EL2_REG, + hyp->vtimer.cntvoff_el2); + isb(); + } + } + if (guest_or_nonvhe(guest) && ecv_poff) { + /* + * If we have ECV then the guest could modify these registers. + * If VHE is enabled then the kernel will see a different view + * of the registers, so doesn't need to handle them. + */ + WRITE_SPECIALREG(EL0_REG(CNTP_CVAL), + hypctx->vtimer_cpu.phys_timer.cntx_cval_el0); + WRITE_SPECIALREG(EL0_REG(CNTP_CTL), + hypctx->vtimer_cpu.phys_timer.cntx_ctl_el0); + } + + if (guest) { /* Load the GICv3 registers */ WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_v3_regs.ich_hcr_el2); WRITE_SPECIALREG(ich_vmcr_el2, @@ -502,11 +539,19 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) struct hypctx host_hypctx; uint64_t cntvoff_el2; uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1; +#ifndef VMM_VHE + uint64_t hcrx_el2; +#endif uint64_t ret; uint64_t s1e1r, hpfar_el2; - bool hpfar_valid; + bool ecv_poff, hpfar_valid; - vmm_hyp_reg_store(&host_hypctx, NULL, false); + ecv_poff = (hyp->vtimer.cnthctl_el2 & CNTHCTL_ECV_EN) != 0; + vmm_hyp_reg_store(&host_hypctx, NULL, false, ecv_poff); +#ifndef VMM_VHE + if ((hyp->feats & HYP_FEAT_HCX) != 0) + hcrx_el2 = READ_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2)); +#endif /* Save the host special registers */ cnthctl_el2 = READ_SPECIALREG(cnthctl_el2); @@ -516,7 +561,7 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2); ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2); - vmm_hyp_reg_restore(hypctx, hyp, true); + vmm_hyp_reg_restore(hypctx, hyp, true, ecv_poff); /* Load the common hypervisor registers */ WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2); @@ -532,7 +577,7 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) /* Store the exit info */ hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2); - vmm_hyp_reg_store(hypctx, hyp, true); + vmm_hyp_reg_store(hypctx, hyp, true, ecv_poff); hpfar_valid = true; if (ret == EXCP_TYPE_EL1_SYNC) { @@ -582,7 +627,12 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) } } - vmm_hyp_reg_restore(&host_hypctx, NULL, false); + vmm_hyp_reg_restore(&host_hypctx, NULL, false, ecv_poff); + +#ifndef VMM_VHE + if ((hyp->feats & HYP_FEAT_HCX) != 0) + WRITE_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2), hcrx_el2); +#endif /* Restore the host special registers */ WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2); @@ -613,8 +663,6 @@ VMM_HYP_FUNC(read_reg)(uint64_t reg) switch (reg) { case HYP_REG_ICH_VTR: return (READ_SPECIALREG(ich_vtr_el2)); - case HYP_REG_CNTHCTL: - return (READ_SPECIALREG(cnthctl_el2)); } return (0); diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c index 79d022cf33e8..0e4910ea87b4 100644 --- a/sys/arm64/vmm/vmm_reset.c +++ b/sys/arm64/vmm/vmm_reset.c @@ -31,7 +31,6 @@ #include <sys/kernel.h> #include <sys/lock.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/hypervisor.h> @@ -100,10 +99,12 @@ reset_vm_el01_regs(void *vcpu) el2ctx->pmcr_el0 |= PMCR_LC; set_arch_unknown(el2ctx->pmccntr_el0); set_arch_unknown(el2ctx->pmccfiltr_el0); + set_arch_unknown(el2ctx->pmuserenr_el0); + set_arch_unknown(el2ctx->pmselr_el0); + set_arch_unknown(el2ctx->pmxevcntr_el0); set_arch_unknown(el2ctx->pmcntenset_el0); set_arch_unknown(el2ctx->pmintenset_el1); set_arch_unknown(el2ctx->pmovsset_el0); - set_arch_unknown(el2ctx->pmuserenr_el0); memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0)); memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0)); } @@ -143,7 +144,8 @@ reset_vm_el2_regs(void *vcpu) /* Set the Extended Hypervisor Configuration Register */ el2ctx->hcrx_el2 = 0; /* TODO: Trap all extensions we don't support */ - el2ctx->mdcr_el2 = 0; + el2ctx->mdcr_el2 = MDCR_EL2_TDOSA | MDCR_EL2_TDRA | MDCR_EL2_TPMS | + MDCR_EL2_TTRF; /* PMCR_EL0.N is read from MDCR_EL2.HPMN */ el2ctx->mdcr_el2 |= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT; |
