aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64')
-rw-r--r--sys/arm64/apple/apple_pinctrl.c3
-rw-r--r--sys/arm64/arm64/copyinout.S18
-rw-r--r--sys/arm64/arm64/cpu_errata.c96
-rw-r--r--sys/arm64/arm64/cpu_feat.c51
-rw-r--r--sys/arm64/arm64/db_disasm.c1
-rw-r--r--sys/arm64/arm64/efirt_machdep.c33
-rw-r--r--sys/arm64/arm64/elf32_machdep.c6
-rw-r--r--sys/arm64/arm64/elf_machdep.c24
-rw-r--r--sys/arm64/arm64/exception.S9
-rw-r--r--sys/arm64/arm64/exec_machdep.c31
-rw-r--r--sys/arm64/arm64/genassym.c1
-rw-r--r--sys/arm64/arm64/identcpu.c55
-rw-r--r--sys/arm64/arm64/kexec_support.c188
-rw-r--r--sys/arm64/arm64/locore.S125
-rw-r--r--sys/arm64/arm64/machdep.c33
-rw-r--r--sys/arm64/arm64/machdep_boot.c3
-rw-r--r--sys/arm64/arm64/mp_machdep.c78
-rw-r--r--sys/arm64/arm64/pmap.c191
-rw-r--r--sys/arm64/arm64/ptrauth.c57
-rw-r--r--sys/arm64/arm64/spec_workaround.c166
-rw-r--r--sys/arm64/arm64/support.S9
-rw-r--r--sys/arm64/arm64/swtch.S12
-rw-r--r--sys/arm64/arm64/trap.c1
-rw-r--r--sys/arm64/arm64/vm_machdep.c3
-rw-r--r--sys/arm64/conf/std.arm3
-rw-r--r--sys/arm64/conf/std.arm647
-rw-r--r--sys/arm64/coresight/coresight.c2
-rw-r--r--sys/arm64/include/_armreg.h57
-rw-r--r--sys/arm64/include/armreg.h94
-rw-r--r--sys/arm64/include/asm.h7
-rw-r--r--sys/arm64/include/cpu.h35
-rw-r--r--sys/arm64/include/cpu_feat.h52
-rw-r--r--sys/arm64/include/cpufunc.h7
-rw-r--r--sys/arm64/include/db_machdep.h1
-rw-r--r--sys/arm64/include/elf.h3
-rw-r--r--sys/arm64/include/hypervisor.h178
-rw-r--r--sys/arm64/include/kexec.h33
-rw-r--r--sys/arm64/include/pcpu.h3
-rw-r--r--sys/arm64/include/pmap.h3
-rw-r--r--sys/arm64/include/proc.h8
-rw-r--r--sys/arm64/include/smp.h1
-rw-r--r--sys/arm64/include/vmm.h57
-rw-r--r--sys/arm64/include/vmm_dev.h2
-rw-r--r--sys/arm64/include/vmparam.h8
-rw-r--r--sys/arm64/linux/linux_sysvec.c10
-rw-r--r--sys/arm64/nvidia/tegra210/max77620_regulators.c4
-rw-r--r--sys/arm64/rockchip/rk_gpio.c204
-rw-r--r--sys/arm64/rockchip/rk_grf_gpio.c3
-rw-r--r--sys/arm64/rockchip/rk_i2s.c8
-rw-r--r--sys/arm64/rockchip/rk_tsadc.c2
-rw-r--r--sys/arm64/spe/arm_spe.h77
-rw-r--r--sys/arm64/spe/arm_spe_acpi.c146
-rw-r--r--sys/arm64/spe/arm_spe_backend.c586
-rw-r--r--sys/arm64/spe/arm_spe_dev.c324
-rw-r--r--sys/arm64/spe/arm_spe_dev.h162
-rw-r--r--sys/arm64/spe/arm_spe_fdt.c75
-rw-r--r--sys/arm64/vmm/arm64.h41
-rw-r--r--sys/arm64/vmm/hyp.h1
-rw-r--r--sys/arm64/vmm/io/vgic_v3.c1
-rw-r--r--sys/arm64/vmm/io/vtimer.c95
-rw-r--r--sys/arm64/vmm/io/vtimer.h2
-rw-r--r--sys/arm64/vmm/vmm.c193
-rw-r--r--sys/arm64/vmm/vmm_arm64.c32
-rw-r--r--sys/arm64/vmm/vmm_dev_machdep.c43
-rw-r--r--sys/arm64/vmm/vmm_hyp.c104
-rw-r--r--sys/arm64/vmm/vmm_reset.c8
66 files changed, 3174 insertions, 702 deletions
diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c
index ebaaccea1d99..c28b1c62d78c 100644
--- a/sys/arm64/apple/apple_pinctrl.c
+++ b/sys/arm64/apple/apple_pinctrl.c
@@ -171,12 +171,13 @@ apple_pinctrl_attach(device_t dev)
OF_xref_from_node(ofw_bus_get_node(dev)));
}
- sc->sc_busdev = gpiobus_attach_bus(dev);
+ sc->sc_busdev = gpiobus_add_bus(dev);
if (sc->sc_busdev == NULL) {
device_printf(dev, "failed to attach gpiobus\n");
goto error;
}
+ bus_attach_children(dev);
return (0);
error:
mtx_destroy(&sc->sc_mtx);
diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S
index 26dd0b4cf14f..e41c4b5f6734 100644
--- a/sys/arm64/arm64/copyinout.S
+++ b/sys/arm64/arm64/copyinout.S
@@ -37,7 +37,14 @@
#include "assym.inc"
.macro check_user_access user_arg, size_arg, bad_access_func
- adds x6, x\user_arg, x\size_arg
+ /*
+ * TBI is enabled from 15.0. Clear the top byte of the userspace
+ * address before checking whether it's within the given limit.
+ * The later load/store instructions will fault if TBI is disabled
+ * for the current process.
+ */
+ and x6, x\user_arg, #(~TBI_ADDR_MASK)
+ adds x6, x6, x\size_arg
b.cs \bad_access_func
ldr x7, =VM_MAXUSER_ADDRESS
cmp x6, x7
@@ -100,13 +107,20 @@ ENTRY(copyinstr)
adr x6, copyio_fault /* Get the handler address */
SET_FAULT_HANDLER(x6, x7) /* Set the handler */
+ /*
+ * As in check_user_access mask off the TBI bits for the cmp
+ * instruction. The load will fail trap if TBI is disabled, but we
+ * need to check the address didn't wrap.
+ */
+ and x6, x0, #(~TBI_ADDR_MASK)
ldr x7, =VM_MAXUSER_ADDRESS
-1: cmp x0, x7
+1: cmp x6, x7
b.cs copyio_fault
ldtrb w4, [x0] /* Load from uaddr */
add x0, x0, #1 /* Next char */
strb w4, [x1], #1 /* Store in kaddr */
add x5, x5, #1 /* count++ */
+ add x6, x6, #1 /* Increment masked address */
cbz w4, 2f /* Break when NUL-terminated */
sub x2, x2, #1 /* len-- */
cbnz x2, 1b
diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c
index 989924bc0567..b876703a2a15 100644
--- a/sys/arm64/arm64/cpu_errata.c
+++ b/sys/arm64/arm64/cpu_errata.c
@@ -52,56 +52,11 @@ struct cpu_quirks {
u_int flags;
};
-static enum {
- SSBD_FORCE_ON,
- SSBD_FORCE_OFF,
- SSBD_KERNEL,
-} ssbd_method = SSBD_KERNEL;
-
-static cpu_quirk_install install_psci_bp_hardening;
-static cpu_quirk_install install_ssbd_workaround;
static cpu_quirk_install install_thunderx_bcast_tlbi_workaround;
static struct cpu_quirks cpu_quirks[] = {
{
.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value =
- CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = 0,
- .midr_value = 0,
- .quirk_install = install_ssbd_workaround,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
.midr_value =
CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0),
.quirk_install = install_thunderx_bcast_tlbi_workaround,
@@ -114,57 +69,6 @@ static struct cpu_quirks cpu_quirks[] = {
},
};
-static void
-install_psci_bp_hardening(void)
-{
- /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
- if (!psci_present)
- return;
-
- if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS)
- return;
-
- PCPU_SET(bp_harden, smccc_arch_workaround_1);
-}
-
-static void
-install_ssbd_workaround(void)
-{
- char *env;
-
- if (PCPU_GET(cpuid) == 0) {
- env = kern_getenv("kern.cfg.ssbd");
- if (env != NULL) {
- if (strcmp(env, "force-on") == 0) {
- ssbd_method = SSBD_FORCE_ON;
- } else if (strcmp(env, "force-off") == 0) {
- ssbd_method = SSBD_FORCE_OFF;
- }
- }
- }
-
- /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
- if (!psci_present)
- return;
-
- /* Enable the workaround on this CPU if it's enabled in the firmware */
- if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
- return;
-
- switch(ssbd_method) {
- case SSBD_FORCE_ON:
- smccc_arch_workaround_2(1);
- break;
- case SSBD_FORCE_OFF:
- smccc_arch_workaround_2(0);
- break;
- case SSBD_KERNEL:
- default:
- PCPU_SET(ssbd, smccc_arch_workaround_2);
- break;
- }
-}
-
/*
* Workaround Cavium erratum 27456.
*
diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c
index cc262394913d..94114d47f846 100644
--- a/sys/arm64/arm64/cpu_feat.c
+++ b/sys/arm64/arm64/cpu_feat.c
@@ -32,16 +32,21 @@
#include <machine/cpu.h>
#include <machine/cpu_feat.h>
+SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata");
+
/* TODO: Make this a list if we ever grow a callback other than smccc_errata */
static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL;
void
enable_cpu_feat(uint32_t stage)
{
+ char tunable[32];
struct cpu_feat **featp, *feat;
uint32_t midr;
u_int errata_count, *errata_list;
cpu_feat_errata errata_status;
+ cpu_feat_en check_status;
+ bool val;
MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0);
@@ -49,6 +54,21 @@ enable_cpu_feat(uint32_t stage)
SET_FOREACH(featp, cpu_feat_set) {
feat = *featp;
+ /* Read any tunable the user may have set */
+ if (stage == CPU_FEAT_EARLY_BOOT && PCPU_GET(cpuid) == 0) {
+ snprintf(tunable, sizeof(tunable), "hw.feat.%s",
+ feat->feat_name);
+ if (TUNABLE_BOOL_FETCH(tunable, &val)) {
+ if (val) {
+ feat->feat_flags |=
+ CPU_FEAT_USER_ENABLED;
+ } else {
+ feat->feat_flags |=
+ CPU_FEAT_USER_DISABLED;
+ }
+ }
+ }
+
/* Run the enablement code at the correct stage of boot */
if ((feat->feat_flags & CPU_FEAT_STAGE_MASK) != stage)
continue;
@@ -58,8 +78,26 @@ enable_cpu_feat(uint32_t stage)
PCPU_GET(cpuid) != 0)
continue;
- if (feat->feat_check != NULL && !feat->feat_check(feat, midr))
- continue;
+ if (feat->feat_check != NULL) {
+ check_status = feat->feat_check(feat, midr);
+ } else {
+ check_status = FEAT_DEFAULT_ENABLE;
+ }
+ /* Ignore features that are not present */
+ if (check_status == FEAT_ALWAYS_DISABLE)
+ goto next;
+
+ /* The user disabled the feature */
+ if ((feat->feat_flags & CPU_FEAT_USER_DISABLED) != 0)
+ goto next;
+
+ /*
+ * The feature was disabled by default and the user
+ * didn't enable it then skip.
+ */
+ if (check_status == FEAT_DEFAULT_DISABLE &&
+ (feat->feat_flags & CPU_FEAT_USER_ENABLED) == 0)
+ goto next;
/*
* Check if the feature has any errata that may need a
@@ -97,8 +135,13 @@ enable_cpu_feat(uint32_t stage)
/* Shouldn't be possible */
MPASS(errata_status != ERRATA_UNKNOWN);
- feat->feat_enable(feat, errata_status, errata_list,
- errata_count);
+ if (feat->feat_enable(feat, errata_status, errata_list,
+ errata_count))
+ feat->feat_enabled = true;
+
+next:
+ if (!feat->feat_enabled && feat->feat_disabled != NULL)
+ feat->feat_disabled(feat);
}
}
diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c
index ab1002560b20..14ae2acc2ce6 100644
--- a/sys/arm64/arm64/db_disasm.c
+++ b/sys/arm64/arm64/db_disasm.c
@@ -31,6 +31,7 @@
#include <ddb/db_access.h>
#include <ddb/db_sym.h>
+#include <machine/armreg.h>
#include <machine/disassem.h>
static u_int db_disasm_read_word(vm_offset_t);
diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c
index 0f46e44f5d6a..bde0d4f784dc 100644
--- a/sys/arm64/arm64/efirt_machdep.c
+++ b/sys/arm64/arm64/efirt_machdep.c
@@ -106,7 +106,8 @@ efi_1t1_l3(vm_offset_t va)
if (*l0 == 0) {
m = efi_1t1_page();
mphys = VM_PAGE_TO_PHYS(m);
- *l0 = PHYS_TO_PTE(mphys) | L0_TABLE;
+ *l0 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE |
+ TATTR_AP_TABLE_NO_EL0 | L0_TABLE;
} else {
mphys = PTE_TO_PHYS(*l0);
}
@@ -117,7 +118,8 @@ efi_1t1_l3(vm_offset_t va)
if (*l1 == 0) {
m = efi_1t1_page();
mphys = VM_PAGE_TO_PHYS(m);
- *l1 = PHYS_TO_PTE(mphys) | L1_TABLE;
+ *l1 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE |
+ TATTR_AP_TABLE_NO_EL0 | L1_TABLE;
} else {
mphys = PTE_TO_PHYS(*l1);
}
@@ -128,7 +130,8 @@ efi_1t1_l3(vm_offset_t va)
if (*l2 == 0) {
m = efi_1t1_page();
mphys = VM_PAGE_TO_PHYS(m);
- *l2 = PHYS_TO_PTE(mphys) | L2_TABLE;
+ *l2 = PHYS_TO_PTE(mphys) | TATTR_UXN_TABLE |
+ TATTR_AP_TABLE_NO_EL0 | L2_TABLE;
} else {
mphys = PTE_TO_PHYS(*l2);
}
@@ -218,8 +221,9 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
p->md_phys, mode, p->md_pages);
}
- l3_attr = ATTR_AF | pmap_sh_attr | ATTR_S1_IDX(mode) |
- ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_nG | L3_PAGE;
+ l3_attr = ATTR_S1_UXN | ATTR_AF | pmap_sh_attr |
+ ATTR_S1_IDX(mode) | ATTR_S1_AP(ATTR_S1_AP_RW) |
+ ATTR_S1_nG | L3_PAGE;
if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP)
l3_attr |= ATTR_S1_XN;
@@ -241,6 +245,7 @@ fail:
int
efi_arch_enter(void)
{
+ uint64_t tcr;
CRITICAL_ASSERT(curthread);
curthread->td_md.md_efirt_dis_pf = vm_fault_disable_pagefaults();
@@ -249,7 +254,17 @@ efi_arch_enter(void)
* Temporarily switch to EFI's page table. However, we leave curpmap
* unchanged in order to prevent its ASID from being reclaimed before
* we switch back to its page table in efi_arch_leave().
+ *
+ * UEFI sdoesn't care about TBI, so enable it. It's more likely
+ * userspace will have TBI on as it's only disabled for backwards
+ * compatibility.
*/
+ tcr = READ_SPECIALREG(tcr_el1);
+ if ((tcr & MD_TCR_FIELDS) != TCR_TBI0) {
+ tcr &= ~MD_TCR_FIELDS;
+ tcr |= TCR_TBI0;
+ WRITE_SPECIALREG(tcr_el1, tcr);
+ }
set_ttbr0(efi_ttbr0);
if (PCPU_GET(bcast_tlbi_workaround) != 0)
invalidate_local_icache();
@@ -260,6 +275,7 @@ efi_arch_enter(void)
void
efi_arch_leave(void)
{
+ uint64_t proc_tcr, tcr;
/*
* Restore the pcpu pointer. Some UEFI implementations trash it and
@@ -271,6 +287,13 @@ efi_arch_leave(void)
__asm __volatile(
"mrs x18, tpidr_el1 \n"
);
+ proc_tcr = curthread->td_proc->p_md.md_tcr;
+ tcr = READ_SPECIALREG(tcr_el1);
+ if ((tcr & MD_TCR_FIELDS) != proc_tcr) {
+ tcr &= ~MD_TCR_FIELDS;
+ tcr |= proc_tcr;
+ WRITE_SPECIALREG(tcr_el1, tcr);
+ }
set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap)));
if (PCPU_GET(bcast_tlbi_workaround) != 0)
invalidate_local_icache();
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 7cd5327b9f1b..4cb8ee5f57ef 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -195,7 +195,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
register_t *ap;
struct syscall_args *sa;
int error, i, nap, narg;
- unsigned int args[4];
+ unsigned int args[6];
nap = 4;
p = td->td_proc;
@@ -210,7 +210,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->code = *ap++;
nap--;
} else if (sa->code == SYS___syscall) {
- sa->code = ap[1];
+ sa->code = ap[_QUAD_LOWWORD];
nap -= 2;
ap += 2;
}
@@ -225,7 +225,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->args[i] = ap[i];
if (narg > nap) {
if (narg - nap > nitems(args))
- panic("Too many system call arguiments");
+ panic("Too many system call arguments");
error = copyin((void *)td->td_frame->tf_x[13], args,
(narg - nap) * sizeof(int));
if (error != 0)
diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c
index 970dba0ca7d9..207b37180a26 100644
--- a/sys/arm64/arm64/elf_machdep.c
+++ b/sys/arm64/arm64/elf_machdep.c
@@ -65,7 +65,13 @@ u_long __read_frequently linux_elf_hwcap2;
u_long __read_frequently linux_elf_hwcap3;
u_long __read_frequently linux_elf_hwcap4;
-struct arm64_addr_mask elf64_addr_mask;
+struct arm64_addr_mask elf64_addr_mask = {
+ .code = TBI_ADDR_MASK,
+ .data = TBI_ADDR_MASK,
+};
+#ifdef COMPAT_FREEBSD14
+struct arm64_addr_mask elf64_addr_mask_14;
+#endif
static void arm64_exec_protect(struct image_params *, int);
@@ -115,7 +121,7 @@ static struct sysentvec elf64_freebsd_sysvec = {
};
INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
-static Elf64_Brandinfo freebsd_brand_info = {
+static const Elf64_Brandinfo freebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_AARCH64,
.compat_3_brand = "FreeBSD",
@@ -125,8 +131,7 @@ static Elf64_Brandinfo freebsd_brand_info = {
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-
-SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
+C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
(sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
static bool
@@ -136,7 +141,14 @@ get_arm64_addr_mask(struct regset *rs, struct thread *td, void *buf,
if (buf != NULL) {
KASSERT(*sizep == sizeof(elf64_addr_mask),
("%s: invalid size", __func__));
- memcpy(buf, &elf64_addr_mask, sizeof(elf64_addr_mask));
+#ifdef COMPAT_FREEBSD14
+ /* running an old binary use the old address mask */
+ if (td->td_proc->p_osrel < TBI_VERSION)
+ memcpy(buf, &elf64_addr_mask_14,
+ sizeof(elf64_addr_mask_14));
+ else
+#endif
+ memcpy(buf, &elf64_addr_mask, sizeof(elf64_addr_mask));
}
*sizep = sizeof(elf64_addr_mask);
@@ -323,7 +335,7 @@ elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
return (0);
}
-static Elf_Note gnu_property_note = {
+static const Elf_Note gnu_property_note = {
.n_namesz = sizeof(GNU_ABI_VENDOR),
.n_descsz = 16,
.n_type = NT_GNU_PROPERTY_TYPE_0,
diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S
index 13095def8b00..5a4181348a54 100644
--- a/sys/arm64/arm64/exception.S
+++ b/sys/arm64/arm64/exception.S
@@ -42,10 +42,9 @@
*/
.macro save_registers_head el
.if \el == 1
- mov x18, sp
- stp x0, x1, [sp, #(TF_X - TF_SIZE - 128)]!
+ stp x0, x1, [sp, #-(TF_SIZE - TF_X + 128)]!
.else
- stp x0, x1, [sp, #(TF_X - TF_SIZE)]!
+ stp x0, x1, [sp, #-(TF_SIZE - TF_X)]!
.endif
stp x2, x3, [sp, #(2 * 8)]
stp x4, x5, [sp, #(4 * 8)]
@@ -61,7 +60,9 @@
stp x24, x25, [sp, #(24 * 8)]
stp x26, x27, [sp, #(26 * 8)]
stp x28, x29, [sp, #(28 * 8)]
-.if \el == 0
+.if \el == 1
+ add x18, sp, #(TF_SIZE - TF_X + 128)
+.else
mrs x18, sp_el0
.endif
mrs x10, elr_el1
diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c
index 751329affd91..7c50dc93fdb4 100644
--- a/sys/arm64/arm64/exec_machdep.c
+++ b/sys/arm64/arm64/exec_machdep.c
@@ -51,6 +51,7 @@
#include <vm/vm_map.h>
#include <machine/armreg.h>
+#include <machine/elf.h>
#include <machine/kdb.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
@@ -411,6 +412,7 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
{
struct trapframe *tf = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ uint64_t new_tcr, tcr;
memset(tf, 0, sizeof(struct trapframe));
@@ -433,6 +435,35 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
*/
bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
+ /* If the process is new enough enable TBI */
+ if (td->td_proc->p_osrel >= TBI_VERSION)
+ new_tcr = TCR_TBI0;
+ else
+ new_tcr = 0;
+ td->td_proc->p_md.md_tcr = new_tcr;
+
+ /* TODO: should create a pmap function for this... */
+ tcr = READ_SPECIALREG(tcr_el1);
+ if ((tcr & MD_TCR_FIELDS) != new_tcr) {
+ uint64_t asid;
+
+ tcr &= ~MD_TCR_FIELDS;
+ tcr |= new_tcr;
+ WRITE_SPECIALREG(tcr_el1, tcr);
+ isb();
+
+ /*
+ * TCR_EL1.TBI0 is permitted to be cached in the TLB, so
+ * we need to perform a TLB invalidation.
+ */
+ asid = READ_SPECIALREG(ttbr0_el1) & TTBR_ASID_MASK;
+ __asm __volatile(
+ "tlbi aside1is, %0 \n"
+ "dsb ish \n"
+ "isb \n"
+ : : "r" (asid));
+ }
+
/* Generate new pointer authentication keys */
ptrauth_exec(td);
}
diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
index e3977798b046..22696796e69d 100644
--- a/sys/arm64/arm64/genassym.c
+++ b/sys/arm64/arm64/genassym.c
@@ -64,6 +64,7 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(P_PID, offsetof(struct proc, p_pid));
+ASSYM(P_MD_TCR, offsetof(struct proc, p_md.md_tcr));
ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index bcacea43ad2f..2d07420bcdb0 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -232,6 +232,10 @@ static const struct cpu_parts cpu_parts_arm[] = {
{ CPU_PART_CORTEX_X2, "Cortex-X2" },
{ CPU_PART_CORTEX_X3, "Cortex-X3" },
{ CPU_PART_CORTEX_X4, "Cortex-X4" },
+ { CPU_PART_C1_NANO, "C1-Nano" },
+ { CPU_PART_C1_PRO, "C1-Pro" },
+ { CPU_PART_C1_PREMIUM, "C1-Premium" },
+ { CPU_PART_C1_ULTRA, "C1-Ultra" },
{ CPU_PART_NEOVERSE_E1, "Neoverse-E1" },
{ CPU_PART_NEOVERSE_N1, "Neoverse-N1" },
{ CPU_PART_NEOVERSE_N2, "Neoverse-N2" },
@@ -2272,37 +2276,25 @@ static const struct mrs_user_reg user_regs[] = {
static bool
user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr)
{
- /* Skip non-Neoverse-N1 */
- if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_NEOVERSE_N1, 0, 0))
- return (false);
-
- switch (CPU_VAR(midr)) {
- default:
- break;
- case 4:
- /* Fixed in r4p1 */
- if (CPU_REV(midr) > 0)
- break;
- /* FALLTHROUGH */
- case 3:
- /* If DIC is enabled (coherent icache) then we are affected */
- return (CTR_DIC_VAL(ctr) != 0);
- }
-
- return (false);
+ /*
+ * Neoverse-N1 erratum 1542419
+ * Present in r3p0 - r4p0
+ * Fixed in r4p1
+ */
+ return (midr_check_var_part_range(midr, CPU_IMPL_ARM,
+ CPU_PART_NEOVERSE_N1, 3, 0, 4, 0) && CTR_DIC_VAL(ctr) != 0);
}
-static bool
-user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+static cpu_feat_en
+user_ctr_check(const struct cpu_feat *feat __unused, u_int midr)
{
if (emulate_ctr)
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0)))
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -2320,7 +2312,7 @@ user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr,
return (false);
}
-static void
+static bool
user_ctr_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count)
{
@@ -2356,16 +2348,13 @@ user_ctr_enable(const struct cpu_feat *feat __unused,
WRITE_SPECIALREG(sctlr_el1,
READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT);
isb();
+
+ return (true);
}
-static struct cpu_feat user_ctr = {
- .feat_name = "Trap CTR_EL0",
- .feat_check = user_ctr_check,
- .feat_has_errata = user_ctr_has_errata,
- .feat_enable = user_ctr_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, user_ctr);
+CPU_FEAT(trap_ctr, "Trap CTR_EL0",
+ user_ctr_check, user_ctr_has_errata, user_ctr_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
static bool
user_ctr_handler(uint64_t esr, struct trapframe *frame)
diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c
new file mode 100644
index 000000000000..8b9719c05b67
--- /dev/null
+++ b/sys/arm64/arm64/kexec_support.c
@@ -0,0 +1,188 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/kexec.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+
+#include <machine/armreg.h>
+#include <machine/pmap.h>
+#include <machine/pte.h>
+
+/*
+ * Idea behind this:
+ *
+ * kexec_load_md():
+ * - Update boot page tables (identity map) to include all pages needed before
+ * disabling MMU.
+ *
+ * kexec_reboot_md():
+ * - Copy pages into target(s)
+ * - Do "other stuff"
+ * - Does not return
+ */
+
+extern pt_entry_t pagetable_l0_ttbr0_bootstrap[];
+extern unsigned long initstack_end[];
+void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *);
+
+#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \
+ SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS)
+#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base)
+static inline vm_page_t
+phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p)
+{
+ return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p));
+}
+
+/* First 2 args are filler for switch_stack() */
+static void __aligned(16) __dead2
+kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused,
+ struct kexec_image *image)
+{
+ void (*e)(void) = (void *)image->entry;
+ vm_offset_t vm_page_base = (vm_offset_t)vm_page_array;
+ vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array);
+ struct kexec_segment_stage *phys_segs =
+ (void *)pmap_kextract((vm_offset_t)&image->segments);
+ vm_paddr_t from_pa, to_pa;
+ vm_size_t size;
+ vm_page_t first, m, mp;
+ struct pctrie_iter pct_i;
+
+ /*
+ * Create a linked list of all pages in the object before we disable the
+ * MMU. Once the MMU is disabled we can't use the vm_radix iterators,
+ * as they rely on virtual address pointers.
+ */
+ first = NULL;
+ vm_radix_iter_init(&pct_i, &image->map_obj->rtree);
+ VM_RADIX_FORALL(m, &pct_i) {
+ if (first == NULL)
+ first = m;
+ else
+ SLIST_INSERT_AFTER(mp, m, plinks.s.ss);
+ mp = m;
+ }
+
+ /*
+ * We're running out of the identity map now, disable the MMU before we
+ * continue. It's possible page tables can be overwritten, which would
+ * be very bad if we were running with the MMU enabled.
+ */
+ WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU);
+ isb();
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (phys_segs[i].size == 0)
+ break;
+ to_pa = phys_segs[i].target;
+ /* Copy the segment here... */
+ for (vm_page_t p = phys_segs[i].first_page;
+ p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size;
+ p = SLIST_NEXT(p, plinks.s.ss)) {
+ p = phys_vm_page(p, vm_page_base, vm_page_phys);
+ from_pa = p->phys_addr;
+ if (p->phys_addr == to_pa) {
+ to_pa += PAGE_SIZE;
+ continue;
+ }
+ for (size = PAGE_SIZE / sizeof(register_t);
+ size > 0; --size) {
+ *(register_t *)to_pa = *(register_t *)from_pa;
+ to_pa += sizeof(register_t);
+ from_pa += sizeof(register_t);
+ }
+ }
+ }
+ invalidate_icache();
+ e();
+ while (1)
+ ;
+}
+
+void
+kexec_reboot_md(struct kexec_image *image)
+{
+ uintptr_t ptr;
+ register_t reg;
+
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (image->segments[i].size > 0)
+ cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target),
+ image->segments[i].size);
+ }
+ ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ serror_disable();
+
+ reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap);
+ set_ttbr0(reg);
+ cpu_tlb_flushID();
+
+ typeof(kexec_reboot_bottom) *p = (void *)ptr;
+ switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end),
+ p, image);
+ while (1)
+ ;
+}
+
+int
+kexec_load_md(struct kexec_image *image)
+{
+ vm_paddr_t tmp;
+ pt_entry_t *pte;
+
+ /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */
+
+ /*
+ * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so
+ * move to there.
+ */
+ pte = pagetable_l0_ttbr0_bootstrap;
+ pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */
+
+ /*
+ * Populate the identity map with symbols we know we'll need before we
+ * turn off the MMU.
+ */
+ tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ tmp = pmap_kextract((vm_offset_t)initstack_end);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ /* We'll need vm_page_array for doing offset calculations. */
+ tmp = pmap_kextract((vm_offset_t)&vm_page_array);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index bb323dbafd85..c22d5fe76468 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -39,6 +39,23 @@
#define VIRT_BITS 48
+/*
+ * Loads a 64-bit value into reg using 1 to 4 mov/movk instructions.
+ * This can be used early on when we don't know the CPUs endianness.
+ */
+.macro mov_q reg, val
+ mov \reg, :abs_g0_nc:\val
+.if (\val >> 16) & 0xffff != 0
+ movk \reg, :abs_g1_nc:\val
+.endif
+.if (\val >> 32) & 0xffff != 0
+ movk \reg, :abs_g2_nc:\val
+.endif
+.if (\val >> 48) & 0xffff != 0
+ movk \reg, :abs_g3:\val
+.endif
+.endm
+
#if PAGE_SIZE == PAGE_SIZE_16K
/*
* The number of level 3 tables to create. 32 will allow for 1G of address
@@ -308,6 +325,19 @@ mp_virtdone:
b init_secondary
LEND(mpentry_common)
+
+ENTRY(mp_cpu_spinloop)
+0:
+ wfe
+ ldr x0, mp_cpu_spin_table_release_addr
+ cbz x0, 0b
+ blr x0
+ .globl mp_cpu_spin_table_release_addr
+mp_cpu_spin_table_release_addr:
+ .quad 0
+ .globl mp_cpu_spinloop_end
+mp_cpu_spinloop_end:
+END(mp_cpu_spinloop)
#endif
/*
@@ -319,22 +349,28 @@ LEND(mpentry_common)
* - Configure EL2 to support running the kernel at EL1 and exit to that
*/
LENTRY(enter_kernel_el)
-#define INIT_SCTLR_EL1 (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_EIS | \
- SCTLR_TSCXT | SCTLR_EOS)
mrs x23, CurrentEL
and x23, x23, #(CURRENTEL_EL_MASK)
cmp x23, #(CURRENTEL_EL_EL2)
b.eq 1f
- ldr x2, =INIT_SCTLR_EL1
+ /*
+ * Ensure there are no memory operations here. If the boot loader
+ * enters the kernel in big-endian mode then loading sctlr will
+ * be incorrect. As instructions are the same in both endians it is
+ * safe to use mov instructions.
+ */
+ mov_q x2, SCTLR_MMU_OFF
msr sctlr_el1, x2
- /* SCTLR_EOS is set so eret is a context synchronizing event so we
+ /*
+ * SCTLR_EOS is set to make eret a context synchronizing event. We
* need an isb here to ensure it's observed by later instructions,
* but don't need it in the eret below.
*/
isb
- /* Ensure SPSR_EL1 and pstate are in sync. The only wat to set the
+ /*
+ * Ensure SPSR_EL1 and pstate are in sync. The only way to set the
* latter is to set the former and return from an exception with eret.
*/
mov x2, #(PSR_DAIF | PSR_M_EL1h)
@@ -348,11 +384,19 @@ LENTRY(enter_kernel_el)
* Set just the reserved bits in sctlr_el2. This will disable the
* MMU which may have broken the kernel if we enter the kernel in
* EL2, e.g. when using VHE.
+ *
+ * As with sctlr_el1 above use mov instructions to ensure there are
+ * no memory operations.
*/
- ldr x2, =(SCTLR_EL2_RES1 | SCTLR_EL2_EIS | SCTLR_EL2_EOS)
+ mov_q x2, (SCTLR_EL2_RES1 | SCTLR_EL2_EIS | SCTLR_EL2_EOS)
msr sctlr_el2, x2
isb
+ /*
+ * The hardware is now in little-endian mode so memory operations
+ * are safe.
+ */
+
/* Configure the Hypervisor */
ldr x2, =(HCR_RW | HCR_APK | HCR_API)
msr hcr_el2, x2
@@ -370,11 +414,11 @@ LENTRY(enter_kernel_el)
msr vmpidr_el2, x2
/* Set the initial sctlr_el1 */
- ldr x2, =INIT_SCTLR_EL1
+ ldr x2, =SCTLR_MMU_OFF
msr sctlr_el1, x2
/* Check for VHE */
- CHECK_CPU_FEAT(x2, ID_AA64MMFR1, VH, .Lno_vhe)
+ CHECK_CPU_FEAT(x2, ID_AA64MMFR1, VH, IMPL, .Lno_vhe)
/*
* The kernel will be running in EL2, route exceptions here rather
@@ -387,7 +431,7 @@ LENTRY(enter_kernel_el)
msr SCTLR_EL12_REG, x2
mov x2, xzr /* CPTR_EL2 is managed by vfp.c */
- ldr x3, =(CNTHCTL_E2H_EL1PCTEN | CNTHCTL_E2H_EL1PTEN)
+ ldr x3, =(CNTHCTL_E2H_EL1PCTEN_NOTRAP | CNTHCTL_E2H_EL1PTEN_NOTRAP)
ldr x5, =(PSR_DAIF | PSR_M_EL2h)
b .Ldone_vhe
@@ -398,9 +442,13 @@ LENTRY(enter_kernel_el)
msr vbar_el2, x2
ldr x2, =(CPTR_RES1)
- ldr x3, =(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
+ ldr x3, =(CNTHCTL_EL1PCTEN_NOTRAP | CNTHCTL_EL1PCEN_NOTRAP)
ldr x5, =(PSR_DAIF | PSR_M_EL1h)
+ /* Enable SPE at EL1 via Monitor Debug Configuration Register */
+ mov x6, MDCR_EL2_E2PB_EL1_0_NO_TRAP
+ msr mdcr_el2, x6
+
.Ldone_vhe:
msr cptr_el2, x2
@@ -413,10 +461,9 @@ LENTRY(enter_kernel_el)
* Configure the Extended Hypervisor register. This is only valid if
* FEAT_HCX is enabled.
*/
- CHECK_CPU_FEAT(x2, ID_AA64MMFR1, HCX, 2f)
+ CHECK_CPU_FEAT(x2, ID_AA64MMFR1, HCX, IMPL, 2f)
/* Extended Hypervisor Configuration */
- mov x2, xzr
- msr HCRX_EL2_REG, x2
+ msr HCRX_EL2_REG, xzr
isb
2:
@@ -430,7 +477,7 @@ LENTRY(enter_kernel_el)
msr vttbr_el2, xzr
/* Check the CPU supports GIC, and configure the CPU interface */
- CHECK_CPU_FEAT(x2, ID_AA64PFR0, GIC, 3f)
+ CHECK_CPU_FEAT(x2, ID_AA64PFR0, GIC, CPUIF_EN, 3f)
mrs x2, icc_sre_el2
orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */
@@ -443,9 +490,31 @@ LENTRY(enter_kernel_el)
isb
eret
-#undef INIT_SCTLR_EL1
LEND(enter_kernel_el)
+/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there.
+ * Does not return.
+ * - x0 - target address to jump to after stopping the MMU.
+ * - x1 - kernel load address
+ */
+ENTRY(stop_mmu)
+ mov x16, x0 /* Save target. */
+ ldr x2, =(1f - KERNBASE)
+ add x17, x1, x2
+ ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE)
+ add x1, x1, x3
+ msr ttbr0_el1, x1
+ isb
+ br x17
+1:
+ BTI_J
+ mrs x0, sctlr_el1
+ bic x0, x0, SCTLR_M
+ bic x0, x0, SCTLR_C
+ msr sctlr_el1, x0
+ isb
+ br x16
+END(stop_mmu)
/*
* Get the physical address the kernel was loaded at.
*/
@@ -1029,7 +1098,7 @@ LENTRY(start_mmu)
* HW management of dirty state is set in C code as it may
* need to be disabled because of CPU errata.
*/
- CHECK_CPU_FEAT(x3, ID_AA64MMFR1, HAFDBS, 1f)
+ CHECK_CPU_FEAT(x3, ID_AA64MMFR1, HAFDBS, AF, 1f)
orr x2, x2, #(TCR_HA)
1:
@@ -1038,11 +1107,7 @@ LENTRY(start_mmu)
/*
* Setup SCTLR.
*/
- ldr x2, sctlr_set
- ldr x3, sctlr_clear
- mrs x1, sctlr_el1
- bic x1, x1, x3 /* Clear the required bits */
- orr x1, x1, x2 /* Set the required bits */
+ ldr x1, =SCTLR_MMU_ON
msr sctlr_el1, x1
isb
@@ -1067,24 +1132,21 @@ tcr:
.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG | \
TCR_SH1_IS | TCR_ORGN1_WBWA | TCR_IRGN1_WBWA | \
TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA)
-sctlr_set:
- /* Bits to set */
- .quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \
- SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
- SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \
- SCTLR_M | SCTLR_CP15BEN | SCTLR_BT1 | SCTLR_BT0)
-sctlr_clear:
- /* Bits to clear */
- .quad (SCTLR_EE | SCTLR_E0E | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \
- SCTLR_ITD | SCTLR_A)
LEND(start_mmu)
+ENTRY(switch_stack)
+ mov sp, x0
+ mov x16, x1
+ br x16
+END(switch_stack)
+
ENTRY(abort)
b abort
END(abort)
.bss
.align PAGE_SHIFT
+ .globl initstack_end
initstack:
.space BOOT_STACK_SIZE
initstack_end:
@@ -1101,6 +1163,7 @@ initstack_end:
* L0 for user
*/
.globl pagetable_l0_ttbr1
+ .globl pagetable_l0_ttbr0_bootstrap
pagetable:
pagetable_l3_ttbr1:
.space (PAGE_SIZE * L3_PAGE_COUNT)
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 53856dd90cae..322bad273a08 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -173,16 +173,20 @@ SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
#endif
-static bool
+static cpu_feat_en
pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mfr1;
- id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
+ if (!get_kernel_reg(ID_AA64MMFR1_EL1, &id_aa64mfr1))
+ return (FEAT_ALWAYS_DISABLE);
+ if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
}
-static void
+static bool
pan_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -200,15 +204,20 @@ pan_enable(const struct cpu_feat *feat __unused,
".arch_extension pan \n"
"msr pan, #1 \n"
".arch_extension nopan \n");
+
+ return (true);
}
-static struct cpu_feat feat_pan = {
- .feat_name = "FEAT_PAN",
- .feat_check = pan_check,
- .feat_enable = pan_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_pan);
+static void
+pan_disabled(const struct cpu_feat *feat __unused)
+{
+ if (PCPU_GET(cpuid) == 0)
+ update_special_reg(ID_AA64MMFR1_EL1, ID_AA64MMFR1_PAN_MASK, 0);
+}
+
+CPU_FEAT(feat_pan, "Privileged access never",
+ pan_check, NULL, pan_enable, pan_disabled,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
bool
has_hyp(void)
@@ -857,7 +866,7 @@ initarm(struct arm64_bootparams *abp)
cninit();
set_ttbr0(abp->kern_ttbr0);
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
if (!valid)
panic("Invalid bus configuration: %s",
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
index 83bd74ea7317..1c5e8189e436 100644
--- a/sys/arm64/arm64/machdep_boot.c
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -106,7 +106,8 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE);
PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t));
- PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS));
+ PRELOAD_PUSH_VALUE(uint64_t,
+ (size_t)((vm_offset_t)&end - VM_MIN_KERNEL_ADDRESS));
if (dtb_ptr != NULL) {
/* Copy DTB to KVA space and insert it into module chain. */
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index e4d011df3a06..0bdd2ecfd8a7 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -60,6 +60,7 @@
#include <machine/debug_monitor.h>
#include <machine/intr.h>
#include <machine/smp.h>
+#include <machine/vmparam.h>
#ifdef VFP
#include <machine/vfp.h>
#endif
@@ -103,6 +104,7 @@ static void ipi_hardclock(void *);
static void ipi_preempt(void *);
static void ipi_rendezvous(void *);
static void ipi_stop(void *);
+static void ipi_off(void *);
#ifdef FDT
static u_int fdt_cpuid;
@@ -193,6 +195,7 @@ release_aps(void *dummy __unused)
intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
+ intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL);
atomic_store_int(&aps_started, 0);
atomic_store_rel_int(&aps_ready, 1);
@@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused)
CTR0(KTR_SMP, "IPI_STOP (restart)");
}
+void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2;
+extern uint32_t mp_cpu_spinloop[];
+extern uint32_t mp_cpu_spinloop_end[];
+extern uint64_t mp_cpu_spin_table_release_addr;
+static void
+ipi_off(void *dummy __unused)
+{
+ CTR0(KTR_SMP, "IPI_OFF");
+ if (psci_present)
+ psci_cpu_off();
+ else {
+ uint64_t release_addr;
+ vm_size_t size;
+
+ size = (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+ release_addr = PCPU_GET(release_addr) - size;
+ isb();
+ invalidate_icache();
+ /* Go catatonic, don't take any interrupts. */
+ intr_disable();
+ stop_mmu(release_addr, pmap_kextract(KERNBASE));
+
+
+ }
+ CTR0(KTR_SMP, "IPI_OFF failed");
+}
+
struct cpu_group *
cpu_topo(void)
{
@@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
bootpcpu = pcpup;
+ pcpup->pc_release_addr = release_addr;
dpcpu[cpuid - 1] = (void *)(pcpup + 1);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
@@ -752,6 +784,52 @@ cpu_mp_start(void)
}
}
+void
+cpu_mp_stop(void)
+{
+
+ /* Short-circuit for single-CPU */
+ if (CPU_COUNT(&all_cpus) == 1)
+ return;
+
+ KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n"));
+
+ /*
+ * If we use spin-table, assume U-boot method for now (single address
+ * shared by all CPUs).
+ */
+ if (!psci_present) {
+ int cpu;
+ vm_paddr_t release_addr;
+ void *release_vaddr;
+ vm_size_t size;
+
+ /* Find the shared release address. */
+ CPU_FOREACH(cpu) {
+ release_addr = pcpu_find(cpu)->pc_release_addr;
+ if (release_addr != 0)
+ break;
+ }
+ /* No release address? No way of notifying other CPUs. */
+ if (release_addr == 0)
+ return;
+
+ size = (vm_offset_t)&mp_cpu_spinloop_end -
+ (vm_offset_t)&mp_cpu_spinloop;
+
+ release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+
+ release_vaddr = pmap_mapdev(release_addr, size);
+ bcopy(mp_cpu_spinloop, release_vaddr, size);
+ cpu_dcache_wbinv_range(release_vaddr, size);
+ pmap_unmapdev(release_vaddr, size);
+ invalidate_icache();
+ }
+ ipi_all_but_self(IPI_OFF);
+ DELAY(1000000);
+}
+
/* Introduce rest of cores to the world */
void
cpu_mp_announce(void)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 2152f7fcc1c6..dbf5c820d20b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr;
#define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \
ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW))
+static bool __read_mostly pmap_multiple_tlbi = false;
+
struct pmap_large_md_page {
struct rwlock pv_lock;
struct md_page pv_page;
@@ -469,7 +471,7 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
-static bool pmap_activate_int(pmap_t pmap);
+static bool pmap_activate_int(struct thread *td, pmap_t pmap);
static void pmap_alloc_asid(pmap_t pmap);
static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
vm_prot_t prot, int mode, bool skip_unmapped);
@@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen)
}
}
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
bs_state.dmap_valid = true;
@@ -1399,7 +1401,7 @@ pmap_bootstrap(void)
/* And the l3 tables for the early devmap */
pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE));
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
@@ -1656,14 +1658,17 @@ pmap_init_pv_table(void)
}
}
-static bool
+static cpu_feat_en
pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mmfr1;
id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
- ID_AA64MMFR1_HAFDBS_AF_DBS);
+ if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
+ ID_AA64MMFR1_HAFDBS_AF_DBS)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
@@ -1671,8 +1676,8 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
u_int **errata_list, u_int *errata_count)
{
/* Disable on Cortex-A55 for erratum 1024718 - all revisions */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
- CPU_PART_CORTEX_A55, 0, 0)) {
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55) {
static u_int errata_id = 1024718;
*errata_list = &errata_id;
@@ -1681,21 +1686,19 @@ pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
}
/* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */
- if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK,
- CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) {
- if (CPU_REV(PCPU_GET(midr)) < 3) {
- static u_int errata_id = 2051678;
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 0, 2)) {
+ static u_int errata_id = 2051678;
- *errata_list = &errata_id;
- *errata_count = 1;
- return (true);
- }
+ *errata_list = &errata_id;
+ *errata_count = 1;
+ return (true);
}
return (false);
}
-static void
+static bool
pmap_dbm_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list __unused,
u_int errata_count)
@@ -1704,7 +1707,7 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
/* Skip if there is an erratum affecting DBM */
if (errata_status != ERRATA_NONE)
- return;
+ return (false);
tcr = READ_SPECIALREG(tcr_el1) | TCR_HD;
WRITE_SPECIALREG(tcr_el1, tcr);
@@ -1714,16 +1717,58 @@ pmap_dbm_enable(const struct cpu_feat *feat __unused,
__asm __volatile("tlbi vmalle1");
dsb(nsh);
isb();
+
+ return (true);
}
-static struct cpu_feat feat_dbm = {
- .feat_name = "FEAT_HAFDBS (DBM)",
- .feat_check = pmap_dbm_check,
- .feat_has_errata = pmap_dbm_has_errata,
- .feat_enable = pmap_dbm_enable,
- .feat_flags = CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU,
-};
-DATA_SET(cpu_feat_set, feat_dbm);
+CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state",
+ pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ /*
+ * Cortex-A55 erratum 2441007 (Cat B rare)
+ * Present in all revisions
+ */
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55)
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A76 erratum 1286807 (Cat B rare)
+ * Present in r0p0 - r3p0
+ * Fixed in r3p1
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76,
+ 0, 0, 3, 0))
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A510 erratum 2441009 (Cat B rare)
+ * Present in r0p0 - r1p1
+ * Fixed in r1p2
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 1, 1))
+ return (FEAT_DEFAULT_DISABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ pmap_multiple_tlbi = true;
+ return (true);
+}
+
+CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata",
+ pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable, NULL,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
/*
* Initialize the pmap module.
@@ -1878,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
r = TLBI_VA(va);
if (pmap == kernel_pmap) {
pmap_s1_invalidate_kernel(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
pmap_s1_invalidate_user(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1922,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
end = TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_kernel(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= TLBI_VA(sva);
end |= TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_user(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1963,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
pmap_s2_invalidate_range(pmap, sva, eva, final_only);
}
+void
+pmap_s1_invalidate_all_kernel(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ if (pmap_multiple_tlbi) {
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ }
+ isb();
+}
+
/*
* Invalidates all cached intermediate- and final-level TLB entries for the
* given virtual address space.
@@ -1977,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap)
dsb(ishst);
if (pmap == kernel_pmap) {
__asm __volatile("tlbi vmalle1is");
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi vmalle1is");
+ }
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
__asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ }
}
dsb(ish);
isb();
@@ -2915,13 +3001,13 @@ retry:
l1 = pmap_l1(pmap, va);
if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) {
l2 = pmap_l1_to_l2(l1, va);
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
/* Add a reference to the L2 page. */
l2pg = PTE_TO_VM_PAGE(pmap_load(l1));
l2pg->ref_count++;
} else
l2pg = NULL;
- } else if (!ADDR_IS_KERNEL(va)) {
+ } else if (ADDR_IS_USER(va)) {
/* Allocate a L2 page. */
l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
@@ -4082,7 +4168,7 @@ pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE),
("pmap_remove_l3_range: range crosses an L3 page table boundary"));
- l3pg = !ADDR_IS_KERNEL(sva) ? PTE_TO_VM_PAGE(l2e) : NULL;
+ l3pg = ADDR_IS_USER(sva) ? PTE_TO_VM_PAGE(l2e) : NULL;
va = eva;
for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) {
old_l3 = pmap_load(l3);
@@ -5310,7 +5396,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((flags & PMAP_ENTER_WIRED) != 0)
new_l3 |= ATTR_SW_WIRED;
if (pmap->pm_stage == PM_STAGE1) {
- if (!ADDR_IS_KERNEL(va))
+ if (ADDR_IS_USER(va))
new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
new_l3 |= ATTR_S1_UXN;
@@ -5401,7 +5487,7 @@ retry:
pde = pmap_pde(pmap, va, &lvl);
if (pde != NULL && lvl == 2) {
l3 = pmap_l2_to_l3(pde, va);
- if (!ADDR_IS_KERNEL(va) && mpte == NULL) {
+ if (ADDR_IS_USER(va) && mpte == NULL) {
mpte = PTE_TO_VM_PAGE(pmap_load(pde));
mpte->ref_count++;
}
@@ -5411,7 +5497,7 @@ retry:
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
(l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
l3 = &l3[pmap_l3_index(va)];
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
mpte = PTE_TO_VM_PAGE(pmap_load(l2));
mpte->ref_count++;
}
@@ -5419,7 +5505,7 @@ retry:
}
/* We need to allocate an L3 table. */
}
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
/*
@@ -5657,7 +5743,7 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
new_l2 |= ATTR_S1_XN;
- if (!ADDR_IS_KERNEL(va))
+ if (ADDR_IS_USER(va))
new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
new_l2 |= ATTR_S1_UXN;
@@ -5745,7 +5831,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
"pmap_enter_l2: no space for va %#lx"
" in pmap %p", va, pmap);
return (KERN_NO_SPACE);
- } else if (!ADDR_IS_KERNEL(va) ||
+ } else if (ADDR_IS_USER(va) ||
!pmap_every_pte_zero(PTE_TO_PHYS(old_l2))) {
if (l2pg != NULL)
l2pg->ref_count--;
@@ -5796,7 +5882,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
}
KASSERT(pmap_load(l2) == 0,
("pmap_enter_l2: non-zero L2 entry %p", l2));
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
vm_page_free_pages_toq(&free, true);
} else {
KASSERT(SLIST_EMPTY(&free),
@@ -5916,7 +6002,7 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p,
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
l3e |= ATTR_S1_XN;
- if (!ADDR_IS_KERNEL(va))
+ if (ADDR_IS_USER(va))
l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
l3e |= ATTR_S1_UXN;
@@ -5948,7 +6034,7 @@ pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags,
/*
* If the L3 PTP is not resident, we attempt to create it here.
*/
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
/*
* Were we given the correct L3 PTP? If so, we can simply
* increment its ref count.
@@ -6224,7 +6310,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
* In the case that a page table page is not
* resident, we are creating it here.
*/
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
vm_pindex_t l2pindex;
/*
@@ -6310,7 +6396,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
l3_val |= ATTR_S1_XN;
- if (!ADDR_IS_KERNEL(va))
+ if (ADDR_IS_USER(va))
l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
l3_val |= ATTR_S1_UXN;
@@ -7967,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
pa += L2_SIZE;
}
if ((old_l2e & ATTR_DESCR_VALID) != 0)
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
else {
/*
* Because the old entries were invalid and the new
@@ -8058,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size)
}
}
if (preinit_map) {
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
return;
}
@@ -8528,7 +8614,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
* region and early kernel memory are the only parts of the
* kernel address space that must be handled here.
*/
- KASSERT(!ADDR_IS_KERNEL(va) || VIRT_IN_DMAP(va) ||
+ KASSERT(ADDR_IS_USER(va) || VIRT_IN_DMAP(va) ||
(va >= VM_MIN_KERNEL_ADDRESS && va < kernel_vm_end),
("pmap_demote_l2: No saved mpte for va %#lx", va));
@@ -8555,7 +8641,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
}
ml3->pindex = pmap_l2_pindex(va);
- if (!ADDR_IS_KERNEL(va)) {
+ if (ADDR_IS_USER(va)) {
ml3->ref_count = NL3PG;
pmap_resident_count_inc(pmap, 1);
}
@@ -9113,7 +9199,7 @@ pmap_init_cnp(void *dummy __unused)
SYSINIT(pmap_init_cnp, SI_SUB_SMP, SI_ORDER_ANY, pmap_init_cnp, NULL);
static bool
-pmap_activate_int(pmap_t pmap)
+pmap_activate_int(struct thread *td, pmap_t pmap)
{
struct asid_set *set;
int epoch;
@@ -9152,6 +9238,15 @@ pmap_activate_int(pmap_t pmap)
pmap_alloc_asid(pmap);
if (pmap->pm_stage == PM_STAGE1) {
+ uint64_t new_tcr, tcr;
+
+ new_tcr = td->td_proc->p_md.md_tcr;
+ tcr = READ_SPECIALREG(tcr_el1);
+ if ((tcr & MD_TCR_FIELDS) != new_tcr) {
+ tcr &= ~MD_TCR_FIELDS;
+ tcr |= new_tcr;
+ WRITE_SPECIALREG(tcr_el1, tcr);
+ }
set_ttbr0(pmap_to_ttbr0(pmap));
if (PCPU_GET(bcast_tlbi_workaround) != 0)
invalidate_local_icache();
@@ -9165,7 +9260,7 @@ pmap_activate_vm(pmap_t pmap)
PMAP_ASSERT_STAGE2(pmap);
- (void)pmap_activate_int(pmap);
+ (void)pmap_activate_int(NULL, pmap);
}
void
@@ -9176,7 +9271,7 @@ pmap_activate(struct thread *td)
pmap = vmspace_pmap(td->td_proc->p_vmspace);
PMAP_ASSERT_STAGE1(pmap);
critical_enter();
- (void)pmap_activate_int(pmap);
+ (void)pmap_activate_int(td, pmap);
critical_exit();
}
@@ -9202,7 +9297,7 @@ pmap_switch(struct thread *new)
* to a user process.
*/
- if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) {
+ if (pmap_activate_int(new, vmspace_pmap(new->td_proc->p_vmspace))) {
/*
* Stop userspace from training the branch predictor against
* other processes. This will call into a CPU specific
diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c
index 767b7e115479..ab40b72887e9 100644
--- a/sys/arm64/arm64/ptrauth.c
+++ b/sys/arm64/arm64/ptrauth.c
@@ -82,7 +82,7 @@ ptrauth_disable(void)
return (false);
}
-static bool
+static cpu_feat_en
ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t isar;
@@ -97,11 +97,11 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
if (!pac_enable) {
if (boothowto & RB_VERBOSE)
printf("Pointer authentication is disabled\n");
- goto out;
+ return (FEAT_ALWAYS_DISABLE);
}
if (ptrauth_disable())
- goto out;
+ return (FEAT_ALWAYS_DISABLE);
/*
* This assumes if there is pointer authentication on the boot CPU
@@ -116,32 +116,21 @@ ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) {
if (ID_AA64ISAR1_APA_VAL(isar) > 0 ||
ID_AA64ISAR1_API_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
/* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */
if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) {
if (ID_AA64ISAR2_APA3_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
-out:
- /*
- * Pointer authentication may be disabled, mask out the ID fields we
- * expose to userspace and the rest of the kernel so they don't try
- * to use it.
- */
- update_special_reg(ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_MASK |
- ID_AA64ISAR1_APA_MASK | ID_AA64ISAR1_GPA_MASK |
- ID_AA64ISAR1_GPI_MASK, 0);
- update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0);
-
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
-static void
+static bool
ptrauth_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
@@ -149,16 +138,34 @@ ptrauth_enable(const struct cpu_feat *feat __unused,
enable_ptrauth = true;
elf64_addr_mask.code |= PAC_ADDR_MASK;
elf64_addr_mask.data |= PAC_ADDR_MASK;
+#ifdef COMPAT_FREEBSD14
+ elf64_addr_mask_14.code |= PAC_ADDR_MASK_14;
+ elf64_addr_mask_14.data |= PAC_ADDR_MASK_14;
+#endif
+
+ return (true);
}
+static void
+ptrauth_disabled(const struct cpu_feat *feat __unused)
+{
+ /*
+ * Pointer authentication may be disabled, mask out the ID fields we
+ * expose to userspace and the rest of the kernel so they don't try
+ * to use it.
+ */
+ if (PCPU_GET(cpuid) == 0) {
+ update_special_reg(ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_MASK |
+ ID_AA64ISAR1_APA_MASK | ID_AA64ISAR1_GPA_MASK |
+ ID_AA64ISAR1_GPI_MASK, 0);
+ update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0);
+ }
+
+}
-static struct cpu_feat feat_pauth = {
- .feat_name = "FEAT_PAuth",
- .feat_check = ptrauth_check,
- .feat_enable = ptrauth_enable,
- .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM,
-};
-DATA_SET(cpu_feat_set, feat_pauth);
+CPU_FEAT(feat_pauth, "Pointer Authentication",
+ ptrauth_check, NULL, ptrauth_enable, ptrauth_disabled,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM);
/* Copy the keys when forking a new process */
void
diff --git a/sys/arm64/arm64/spec_workaround.c b/sys/arm64/arm64/spec_workaround.c
new file mode 100644
index 000000000000..7f4f86cdb48c
--- /dev/null
+++ b/sys/arm64/arm64/spec_workaround.c
@@ -0,0 +1,166 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Arm Ltd
+ * Copyright (c) 2018 Andrew Turner
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+#include <machine/cpu_feat.h>
+
+#include <dev/psci/psci.h>
+#include <dev/psci/smccc.h>
+
+static enum {
+ SSBD_FORCE_ON,
+ SSBD_FORCE_OFF,
+ SSBD_KERNEL,
+} ssbd_method = SSBD_KERNEL;
+
+struct psci_bp_hardening_impl {
+ u_int midr_mask;
+ u_int midr_value;
+};
+
+static struct psci_bp_hardening_impl psci_bp_hardening_impl[] = {
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value =
+ CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
+ }
+};
+
+static cpu_feat_en
+psci_bp_hardening_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ size_t i;
+
+ for (i = 0; i < nitems(psci_bp_hardening_impl); i++) {
+ if ((midr & psci_bp_hardening_impl[i].midr_mask) ==
+ psci_bp_hardening_impl[i].midr_value) {
+ /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+ if (!psci_present)
+ return (FEAT_ALWAYS_DISABLE);
+
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) !=
+ SMCCC_RET_SUCCESS)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
+ }
+ }
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+psci_bp_hardening_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ PCPU_SET(bp_harden, smccc_arch_workaround_1);
+
+ return (true);
+}
+
+CPU_FEAT(feat_csv2_missing, "Branch Predictor Hardening",
+ psci_bp_hardening_check, NULL, psci_bp_hardening_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+ssbd_workaround_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+{
+ char *env;
+
+ if (PCPU_GET(cpuid) == 0) {
+ env = kern_getenv("kern.cfg.ssbd");
+ if (env != NULL) {
+ if (strcmp(env, "force-on") == 0) {
+ ssbd_method = SSBD_FORCE_ON;
+ } else if (strcmp(env, "force-off") == 0) {
+ ssbd_method = SSBD_FORCE_OFF;
+ }
+ }
+ }
+
+ /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+ if (!psci_present)
+ return (FEAT_ALWAYS_DISABLE);
+
+ /* Enable the workaround on this CPU if it's enabled in the firmware */
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
+}
+
+static bool
+ssbd_workaround_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ switch(ssbd_method) {
+ case SSBD_FORCE_ON:
+ smccc_arch_workaround_2(1);
+ break;
+ case SSBD_FORCE_OFF:
+ smccc_arch_workaround_2(0);
+ break;
+ case SSBD_KERNEL:
+ default:
+ PCPU_SET(ssbd, smccc_arch_workaround_2);
+ break;
+ }
+
+ return (true);
+}
+
+CPU_FEAT(feat_ssbs_missing, "Speculator Store Bypass Disable Workaround",
+ ssbd_workaround_check, NULL, ssbd_workaround_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S
index 2d067c7f7730..bf6fc931e4b0 100644
--- a/sys/arm64/arm64/support.S
+++ b/sys/arm64/arm64/support.S
@@ -39,8 +39,15 @@
#include "assym.inc"
.macro check_user_access user_arg, limit, bad_addr_func
+ /*
+ * TBI is enabled from 15.0. Clear the top byte of the userspace
+ * address before checking whether it's within the given limit.
+ * The later load/store instructions will fault if TBI is disabled
+ * for the current process.
+ */
+ and x6, x\user_arg, #(~TBI_ADDR_MASK)
ldr x7, =(\limit)
- cmp x\user_arg, x7
+ cmp x6, x7
b.cs \bad_addr_func
.endm
diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S
index 7b6010a5f51f..a461fded929c 100644
--- a/sys/arm64/arm64/swtch.S
+++ b/sys/arm64/arm64/swtch.S
@@ -37,6 +37,8 @@
#include <machine/asm.h>
#include <machine/armreg.h>
+#include <machine/proc.h>
+
.macro clear_step_flag pcbflags, tmp
tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f
mrs \tmp, mdscr_el1
@@ -239,6 +241,16 @@ ENTRY(fork_trampoline)
msr daifset, #(DAIF_D | DAIF_INTR)
ldr x0, [x18, #PC_CURTHREAD]
+
+ /* Set the per-process tcr_el1 fields */
+ ldr x10, [x0, #TD_PROC]
+ ldr x10, [x10, #P_MD_TCR]
+ mrs x11, tcr_el1
+ and x11, x11, #(~MD_TCR_FIELDS)
+ orr x11, x11, x10
+ msr tcr_el1, x11
+ /* No isb as the eret below is the context-synchronising event */
+
bl ptrauth_enter_el0
/* Restore sp, lr, elr, and spsr */
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
index bed58095201a..75c9b5f87892 100644
--- a/sys/arm64/arm64/trap.c
+++ b/sys/arm64/arm64/trap.c
@@ -246,6 +246,7 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
print_registers(frame);
print_gp_register("far", far);
+ printf(" esr: 0x%.16lx\n", esr);
panic("Unhandled external data abort");
}
diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c
index 38a126ff602f..0134feb65b6a 100644
--- a/sys/arm64/arm64/vm_machdep.c
+++ b/sys/arm64/arm64/vm_machdep.c
@@ -120,6 +120,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_daif = PSR_DAIF_DEFAULT;
+ /* Copy the TCR_EL1 value */
+ td2->td_proc->p_md.md_tcr = td1->td_proc->p_md.md_tcr;
+
#if defined(PERTHREAD_SSP)
/* Set the new canary */
arc4random_buf(&td2->td_md.md_canary, sizeof(td2->td_md.md_canary));
diff --git a/sys/arm64/conf/std.arm b/sys/arm64/conf/std.arm
index fb5561506531..309059a096eb 100644
--- a/sys/arm64/conf/std.arm
+++ b/sys/arm64/conf/std.arm
@@ -21,3 +21,6 @@ device arm_doorbell # ARM Message Handling Unit (MHU)
options FDT
device acpi
+
+# DTBs
+makeoptions MODULES_EXTRA+="dtb/arm"
diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64
index c83e98c17a33..02bdd25f2d52 100644
--- a/sys/arm64/conf/std.arm64
+++ b/sys/arm64/conf/std.arm64
@@ -7,6 +7,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
+options EXTERR_STRINGS
options VIMAGE # Subsystem virtualization, e.g. VNET
options INET # InterNETworking
options INET6 # IPv6 communications protocols
@@ -105,3 +106,9 @@ device efirtc # EFI RTC
# SMBIOS -- all EFI platforms
device smbios
+
+# random(4)
+device tpm # Trusted Platform Module
+options RANDOM_ENABLE_TPM # enable entropy from TPM 2.0
+options RANDOM_ENABLE_KBD
+options RANDOM_ENABLE_MOUSE
diff --git a/sys/arm64/coresight/coresight.c b/sys/arm64/coresight/coresight.c
index 5928c153f4ae..9b9d3c65ecc9 100644
--- a/sys/arm64/coresight/coresight.c
+++ b/sys/arm64/coresight/coresight.c
@@ -113,7 +113,7 @@ coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp)
}
static void
-coresight_init(void)
+coresight_init(void *dummy __unused)
{
mtx_init(&cs_mtx, "ARM Coresight", NULL, MTX_DEF);
diff --git a/sys/arm64/include/_armreg.h b/sys/arm64/include/_armreg.h
new file mode 100644
index 000000000000..0f5134e5a978
--- /dev/null
+++ b/sys/arm64/include/_armreg.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2013, 2014 Andrew Turner
+ * Copyright (c) 2015,2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(_MACHINE_ARMREG_H_) && \
+ !defined(_MACHINE_CPU_H_) && \
+ !defined(_MACHINE_HYPERVISOR_H_)
+#error Do not include this file directly
+#endif
+
+#ifndef _MACHINE__ARMREG_H_
+#define _MACHINE__ARMREG_H_
+
+#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
+ S##op0##_##op1##_C##crn##_C##crm##_##op2
+#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
+ __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2)
+#define MRS_REG_ALT_NAME(reg) \
+ _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2)
+
+
+#define READ_SPECIALREG(reg) \
+({ uint64_t _val; \
+ __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \
+ _val; \
+})
+#define WRITE_SPECIALREG(reg, _val) \
+ __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val))
+
+#define UL(x) UINT64_C(x)
+
+#endif /* !_MACHINE__ARMREG_H_ */
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
index 38b7f57f7853..27b02c44cd76 100644
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -34,25 +34,9 @@
#ifndef _MACHINE_ARMREG_H_
#define _MACHINE_ARMREG_H_
-#define INSN_SIZE 4
-
-#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
- S##op0##_##op1##_C##crn##_C##crm##_##op2
-#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
- __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2)
-#define MRS_REG_ALT_NAME(reg) \
- _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2)
-
+#include <machine/_armreg.h>
-#define READ_SPECIALREG(reg) \
-({ uint64_t _val; \
- __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \
- _val; \
-})
-#define WRITE_SPECIALREG(reg, _val) \
- __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val))
-
-#define UL(x) UINT64_C(x)
+#define INSN_SIZE 4
/* AFSR0_EL1 - Auxiliary Fault Status Register 0 */
#define AFSR0_EL1_REG MRS_REG_ALT_NAME(AFSR0_EL1)
@@ -232,6 +216,14 @@
#define CNTP_CTL_IMASK (1 << 1)
#define CNTP_CTL_ISTATUS (1 << 2)
+/* CNTP_CTL_EL02 - Counter-timer Physical Timer Control register */
+#define CNTP_CTL_EL02_REG MRS_REG_ALT_NAME(CNTP_CTL_EL02)
+#define CNTP_CTL_EL02_op0 3
+#define CNTP_CTL_EL02_op1 5
+#define CNTP_CTL_EL02_CRn 14
+#define CNTP_CTL_EL02_CRm 2
+#define CNTP_CTL_EL02_op2 1
+
/* CNTP_CVAL_EL0 - Counter-timer Physical Timer CompareValue register */
#define CNTP_CVAL_EL0_op0 3
#define CNTP_CVAL_EL0_op1 3
@@ -239,6 +231,14 @@
#define CNTP_CVAL_EL0_CRm 2
#define CNTP_CVAL_EL0_op2 2
+/* CNTP_CVAL_EL02 - Counter-timer Physical Timer CompareValue register */
+#define CNTP_CVAL_EL02_REG MRS_REG_ALT_NAME(CNTP_CVAL_EL02)
+#define CNTP_CVAL_EL02_op0 3
+#define CNTP_CVAL_EL02_op1 5
+#define CNTP_CVAL_EL02_CRn 14
+#define CNTP_CVAL_EL02_CRm 2
+#define CNTP_CVAL_EL02_op2 2
+
/* CNTP_TVAL_EL0 - Counter-timer Physical Timer TimerValue register */
#define CNTP_TVAL_EL0_op0 3
#define CNTP_TVAL_EL0_op1 3
@@ -254,6 +254,14 @@
#define CNTPCT_EL0_CRm 0
#define CNTPCT_EL0_op2 1
+/* CNTPCTSS_EL0 - Counter-timer Self-Synchronized Physical Count register */
+#define CNTPCTSS_EL0_REG MRS_REG_ALT_NAME(CNTPCTSS_EL0)
+#define CNTPCTSS_EL0_op0 3
+#define CNTPCTSS_EL0_op1 3
+#define CNTPCTSS_EL0_CRn 14
+#define CNTPCTSS_EL0_CRm 0
+#define CNTPCTSS_EL0_op2 5
+
/* CNTV_CTL_EL0 - Counter-timer Virtual Timer Control register */
#define CNTV_CTL_EL0_op0 3
#define CNTV_CTL_EL0_op1 3
@@ -282,6 +290,14 @@
#define CNTV_CVAL_EL02_CRm 3
#define CNTV_CVAL_EL02_op2 2
+/* CNTVCTSS_EL0 - Counter-timer Self-Synchronized Virtual Count register */
+#define CNTVCTSS_EL0_REG MRS_REG_ALT_NAME(CNTVCTSS_EL0)
+#define CNTVCTSS_EL0_op0 3
+#define CNTVCTSS_EL0_op1 3
+#define CNTVCTSS_EL0_CRn 14
+#define CNTVCTSS_EL0_CRm 0
+#define CNTVCTSS_EL0_op2 6
+
/* CONTEXTIDR_EL1 - Context ID register */
#define CONTEXTIDR_EL1_REG MRS_REG_ALT_NAME(CONTEXTIDR_EL1)
#define CONTEXTIDR_EL1_op0 3
@@ -2148,6 +2164,7 @@
#define OSLAR_EL1_CRn 1
#define OSLAR_EL1_CRm 0
#define OSLAR_EL1_op2 4
+#define OSLAR_OSLK (0x1ul << 0)
/* OSLSR_EL1 */
#define OSLSR_EL1_op0 2
@@ -2155,6 +2172,10 @@
#define OSLSR_EL1_CRn 1
#define OSLSR_EL1_CRm 1
#define OSLSR_EL1_op2 4
+#define OSLSR_OSLM_1 (0x1ul << 3)
+#define OSLSR_nTT (0x1ul << 2)
+#define OSLSR_OSLK (0x1ul << 1)
+#define OSLSR_OSLM_0 (0x1ul << 0)
/* PAR_EL1 - Physical Address Register */
#define PAR_F_SHIFT 0
@@ -2230,6 +2251,7 @@
#define PMBSR_MSS_SHIFT 0
#define PMBSR_MSS_MASK (UL(0xffff) << PMBSR_MSS_SHIFT)
#define PMBSR_MSS_BSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT)
+#define PMBSR_MSS_BSC_BUFFER_FILLED (UL(0x01) << PMBSR_MSS_SHIFT)
#define PMBSR_MSS_FSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT)
#define PMBSR_COLL_SHIFT 16
#define PMBSR_COLL (UL(0x1) << PMBSR_COLL_SHIFT)
@@ -2241,6 +2263,11 @@
#define PMBSR_DL (UL(0x1) << PMBSR_DL_SHIFT)
#define PMBSR_EC_SHIFT 26
#define PMBSR_EC_MASK (UL(0x3f) << PMBSR_EC_SHIFT)
+#define PMBSR_EC_VAL(x) (((x) & PMBSR_EC_MASK) >> PMBSR_EC_SHIFT)
+#define PMBSR_EC_OTHER_BUF_MGMT 0x00
+#define PMBSR_EC_GRAN_PROT_CHK 0x1e
+#define PMBSR_EC_STAGE1_DA 0x24
+#define PMBSR_EC_STAGE2_DA 0x25
/* PMCCFILTR_EL0 */
#define PMCCFILTR_EL0_op0 3
@@ -2476,6 +2503,15 @@
#define PMSIDR_FnE (UL(0x1) << PMSIDR_FnE_SHIFT)
#define PMSIDR_Interval_SHIFT 8
#define PMSIDR_Interval_MASK (UL(0xf) << PMSIDR_Interval_SHIFT)
+#define PMSIDR_Interval_VAL(x) (((x) & PMSIDR_Interval_MASK) >> PMSIDR_Interval_SHIFT)
+#define PMSIDR_Interval_256 0
+#define PMSIDR_Interval_512 2
+#define PMSIDR_Interval_768 3
+#define PMSIDR_Interval_1024 4
+#define PMSIDR_Interval_1536 5
+#define PMSIDR_Interval_2048 6
+#define PMSIDR_Interval_3072 7
+#define PMSIDR_Interval_4096 8
#define PMSIDR_MaxSize_SHIFT 12
#define PMSIDR_MaxSize_MASK (UL(0xf) << PMSIDR_MaxSize_SHIFT)
#define PMSIDR_CountSize_SHIFT 16
@@ -2608,6 +2644,28 @@
#define SCTLR_EnALS (UL(0x1) << 56)
#define SCTLR_EPAN (UL(0x1) << 57)
+#define SCTLR_MMU_OFF \
+ (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS)
+#define SCTLR_MMU_ON \
+ (SCTLR_MMU_OFF | \
+ SCTLR_EPAN | \
+ SCTLR_BT1 | \
+ SCTLR_BT0 | \
+ SCTLR_UCI | \
+ SCTLR_SPAN | \
+ SCTLR_IESB | \
+ SCTLR_nTWE | \
+ SCTLR_nTWI | \
+ SCTLR_UCT | \
+ SCTLR_DZE | \
+ SCTLR_I | \
+ SCTLR_SED | \
+ SCTLR_CP15BEN | \
+ SCTLR_SA0 | \
+ SCTLR_SA | \
+ SCTLR_C | \
+ SCTLR_M)
+
/* SCTLR_EL12 */
#define SCTLR_EL12_REG MRS_REG_ALT_NAME(SCTLR_EL12)
#define SCTLR_EL12_op0 3
diff --git a/sys/arm64/include/asm.h b/sys/arm64/include/asm.h
index 4f373dc4b7e1..f9a64f574fca 100644
--- a/sys/arm64/include/asm.h
+++ b/sys/arm64/include/asm.h
@@ -77,10 +77,11 @@
* to the given label. The tmp register should be a register able to hold the
* temporary data.
*/
-#define CHECK_CPU_FEAT(tmp, feat_reg, feat, label) \
- mrs tmp, ##feat_reg##_el1; \
+#define CHECK_CPU_FEAT(tmp, feat_reg, feat, min_val, label) \
+ mrs tmp, ##feat_reg##_el1; \
ubfx tmp, tmp, ##feat_reg##_##feat##_SHIFT, ##feat_reg##_##feat##_WIDTH; \
- cbz tmp, label
+ cmp tmp, #(##feat_reg##_##feat##_##min_val## >> ##feat_reg##_##feat##_SHIFT); \
+ b.lt label
/*
* Sets the trap fault handler. The exception handler will return to the
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 935e3754bf25..b15210633d37 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -43,10 +43,10 @@
#define _MACHINE_CPU_H_
#if !defined(__ASSEMBLER__)
+#include <machine/_armreg.h>
#include <machine/atomic.h>
#include <machine/frame.h>
#endif
-#include <machine/armreg.h>
#define TRAPF_PC(tfp) ((tfp)->tf_elr)
#define TRAPF_USERMODE(tfp) (((tfp)->tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
@@ -125,7 +125,11 @@
#define CPU_PART_NEOVERSE_V3 0xD84
#define CPU_PART_CORTEX_X925 0xD85
#define CPU_PART_CORTEX_A725 0xD87
+#define CPU_PART_C1_NANO 0xD8A
+#define CPU_PART_C1_PRO 0xD8B
+#define CPU_PART_C1_ULTRA 0xD8C
#define CPU_PART_NEOVERSE_N3 0xD8E
+#define CPU_PART_C1_PREMIUM 0xD90
/* Cavium Part numbers */
#define CPU_PART_THUNDERX 0x0A1
@@ -193,8 +197,30 @@
(((mask) & PCPU_GET(midr)) == \
((mask) & CPU_ID_RAW((impl), (part), (var), (rev))))
-#define CPU_MATCH_RAW(mask, devid) \
- (((mask) & PCPU_GET(midr)) == ((mask) & (devid)))
+#if !defined(__ASSEMBLER__)
+static inline bool
+midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low,
+ u_int part_low, u_int var_high, u_int part_high)
+{
+ /* Check for the correct part */
+ if (CPU_IMPL(midr) != impl || CPU_PART(midr) != part)
+ return (false);
+
+ /* Check if the variant is between var_low and var_high inclusive */
+ if (CPU_VAR(midr) < var_low || CPU_VAR(midr) > var_high)
+ return (false);
+
+ /* If the variant is the low value, check if the part is high enough */
+ if (CPU_VAR(midr) == var_low && CPU_PART(midr) < part_low)
+ return (false);
+
+ /* If the variant is the high value, check if the part is low enough */
+ if (CPU_VAR(midr) == var_high && CPU_PART(midr) > part_high)
+ return (false);
+
+ return (true);
+}
+#endif
/*
* Chip-specific errata. This defines are intended to be
@@ -226,6 +252,9 @@ extern uint64_t __cpu_affinity[];
struct arm64_addr_mask;
extern struct arm64_addr_mask elf64_addr_mask;
+#ifdef COMPAT_FREEBSD14
+extern struct arm64_addr_mask elf64_addr_mask_14;
+#endif
typedef void (*cpu_reset_hook_t)(void);
extern cpu_reset_hook_t cpu_reset_hook;
diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h
index 9fe6a9dd95d9..6a311d4000bb 100644
--- a/sys/arm64/include/cpu_feat.h
+++ b/sys/arm64/include/cpu_feat.h
@@ -29,6 +29,7 @@
#define _MACHINE_CPU_FEAT_H_
#include <sys/linker_set.h>
+#include <sys/sysctl.h>
typedef enum {
ERRATA_UNKNOWN, /* Unknown erratum */
@@ -39,6 +40,31 @@ typedef enum {
/* kernel component. */
} cpu_feat_errata;
+typedef enum {
+ /*
+ * Don't implement the feature or erratum wrokarount,
+ * e.g. the feature is not implemented or erratum is
+ * for another CPU.
+ */
+ FEAT_ALWAYS_DISABLE,
+
+ /*
+ * Disable by default, but allow the user to enable,
+ * e.g. For a rare erratum with a workaround, Arm
+ * Category B (rare) or similar.
+ */
+ FEAT_DEFAULT_DISABLE,
+
+ /*
+ * Enabled by default, bit allow the user to disable,
+ * e.g. For a common erratum with a workaround, Arm
+ * Category A or B or similar.
+ */
+ FEAT_DEFAULT_ENABLE,
+
+ /* We could add FEAT_ALWAYS_ENABLE if a need was found. */
+} cpu_feat_en;
+
#define CPU_FEAT_STAGE_MASK 0x00000001
#define CPU_FEAT_EARLY_BOOT 0x00000000
#define CPU_FEAT_AFTER_DEV 0x00000001
@@ -47,23 +73,45 @@ typedef enum {
#define CPU_FEAT_PER_CPU 0x00000000
#define CPU_FEAT_SYSTEM 0x00000010
+#define CPU_FEAT_USER_ENABLED 0x40000000
+#define CPU_FEAT_USER_DISABLED 0x80000000
+
struct cpu_feat;
-typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int);
+typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int);
typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int,
u_int **, u_int *);
-typedef void (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
+typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
u_int *, u_int);
+typedef void (cpu_feat_disabled)(const struct cpu_feat *);
struct cpu_feat {
const char *feat_name;
cpu_feat_check *feat_check;
cpu_feat_has_errata *feat_has_errata;
cpu_feat_enable *feat_enable;
+ cpu_feat_disabled *feat_disabled;
uint32_t feat_flags;
+ bool feat_enabled;
};
SET_DECLARE(cpu_feat_set, struct cpu_feat);
+SYSCTL_DECL(_hw_feat);
+
+#define CPU_FEAT(name, descr, check, has_errata, enable, disabled, flags) \
+static struct cpu_feat name = { \
+ .feat_name = #name, \
+ .feat_check = check, \
+ .feat_has_errata = has_errata, \
+ .feat_enable = enable, \
+ .feat_disabled = disabled, \
+ .feat_flags = flags, \
+ .feat_enabled = false, \
+}; \
+DATA_SET(cpu_feat_set, name); \
+SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \
+ 0, descr)
+
/*
* Allow drivers to mark an erratum as worked around, e.g. the Errata
* Management ABI may know the workaround isn't needed on a given system.
diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h
index e6e1f682794e..e9eee643216b 100644
--- a/sys/arm64/include/cpufunc.h
+++ b/sys/arm64/include/cpufunc.h
@@ -96,6 +96,13 @@ serror_enable(void)
__asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")");
}
+static __inline void
+serror_disable(void)
+{
+
+ __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")");
+}
+
static __inline register_t
get_midr(void)
{
diff --git a/sys/arm64/include/db_machdep.h b/sys/arm64/include/db_machdep.h
index 5dc496ca851d..3ef95f7802ea 100644
--- a/sys/arm64/include/db_machdep.h
+++ b/sys/arm64/include/db_machdep.h
@@ -31,7 +31,6 @@
#ifndef _MACHINE_DB_MACHDEP_H_
#define _MACHINE_DB_MACHDEP_H_
-#include <machine/armreg.h>
#include <machine/frame.h>
#include <machine/trap.h>
diff --git a/sys/arm64/include/elf.h b/sys/arm64/include/elf.h
index d6328c143585..81ee7392f866 100644
--- a/sys/arm64/include/elf.h
+++ b/sys/arm64/include/elf.h
@@ -93,6 +93,9 @@ __ElfType(Auxinfo);
#define ET_DYN_LOAD_ADDR 0x100000
#endif
+/* First __FreeBSD_version that supports Top Byte Ignore (TBI) */
+#define TBI_VERSION 1500058
+
/* HWCAP */
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h
index e3a880afbe9c..f3d7027269c9 100644
--- a/sys/arm64/include/hypervisor.h
+++ b/sys/arm64/include/hypervisor.h
@@ -30,26 +30,85 @@
#ifndef _MACHINE_HYPERVISOR_H_
#define _MACHINE_HYPERVISOR_H_
+#include <machine/_armreg.h>
+
/*
* These registers are only useful when in hypervisor context,
* e.g. specific to EL2, or controlling the hypervisor.
*/
/* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
-#define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */
/* Valid if HCR_EL2.E2H == 0 */
-#define CNTHCTL_EL1PCTEN (1 << 0) /* Allow physical counter access */
-#define CNTHCTL_EL1PCEN (1 << 1) /* Allow physical timer access */
+#define CNTHCTL_EL1PCTEN_SHIFT 0
+#define CNTHCTL_EL1PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT)
+#define CNTHCTL_EL1PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PCTEN_SHIFT)
+#define CNTHCTL_EL1PCTEN_NOTRAP (0x1ul << CNTHCTL_EL1PCTEN_SHIFT)
+#define CNTHCTL_EL1PCEN_SHIFT 1
+#define CNTHCTL_EL1PCEN_MASK (0x1ul << CNTHCTL_EL1PCEN_SHIFT)
+#define CNTHCTL_EL1PCEN_TRAP (0x0ul << CNTHCTL_EL1PCEN_SHIFT)
+#define CNTHCTL_EL1PCEN_NOTRAP (0x1ul << CNTHCTL_EL1PCEN_SHIFT)
/* Valid if HCR_EL2.E2H == 1 */
-#define CNTHCTL_E2H_EL0PCTEN (1 << 0) /* Allow EL0 physical counter access */
-#define CNTHCTL_E2H_EL0VCTEN (1 << 1) /* Allow EL0 virtual counter access */
-#define CNTHCTL_E2H_EL0VTEN (1 << 8)
-#define CNTHCTL_E2H_EL0PTEN (1 << 9)
-#define CNTHCTL_E2H_EL1PCTEN (1 << 10) /* Allow physical counter access */
-#define CNTHCTL_E2H_EL1PTEN (1 << 11) /* Allow physical timer access */
+#define CNTHCTL_E2H_EL0PCTEN_SHIFT 0
+#define CNTHCTL_E2H_EL0PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL0PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0PCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VCTEN_SHIFT 1
+#define CNTHCTL_E2H_EL0VCTEN_MASK (0x1ul << CNTHCTL_E2H_EL0VCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0VCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0VCTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VTEN_SHIFT 8
+#define CNTHCTL_E2H_EL0VTEN_MASK (0x1ul << CNTHCTL_E2H_EL0VTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0VTEN_SHIFT)
+#define CNTHCTL_E2H_EL0VTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0VTEN_SHIFT)
+#define CNTHCTL_E2H_EL0PTEN_SHIFT 9
+#define CNTHCTL_E2H_EL0PTEN_MASK (0x1ul << CNTHCTL_E2H_EL0PTEN_SHIFT)
+#define CNTHCTL_E2H_EL0PTEN_TRAP (0x0ul << CNTHCTL_E2H_EL0PTEN_SHIFT)
+#define CNTHCTL_E2H_EL0PTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL0PTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PCTEN_SHIFT 10
+#define CNTHCTL_E2H_EL1PCTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PCTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PCTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL1PCTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PTEN_SHIFT 11
+#define CNTHCTL_E2H_EL1PTEN_MASK (0x1ul << CNTHCTL_E2H_EL1PTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PTEN_TRAP (0x0ul << CNTHCTL_E2H_EL1PTEN_SHIFT)
+#define CNTHCTL_E2H_EL1PTEN_NOTRAP (0x1ul << CNTHCTL_E2H_EL1PTEN_SHIFT)
/* Unconditionally valid */
-#define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */
-#define CNTHCTL_EVNTEN (1 << 2) /* Enable event stream */
+#define CNTHCTL_EVNTEN_SHIFT 2
+#define CNTHCTL_EVNTEN_MASK (0x1ul << CNTHCTL_EVNTEN_SHIFT)
+#define CNTHCTL_EVNTEN_DIS (0x0ul << CNTHCTL_EVNTEN_SHIFT)
+#define CNTHCTL_EVNTEN_EN (0x1ul << CNTHCTL_EVNTEN_SHIFT)
+#define CNTHCTL_EVNTDIR_SHIFT 3
+#define CNTHCTL_EVNTDIR_MASK (0x1ul << CNTHCTL_EVNTDIR_SHIFT)
+#define CNTHCTL_EVNTDIR_HIGH (0x0ul << CNTHCTL_EVNTDIR_SHIFT)
+#define CNTHCTL_EVNTDIR_LOW (0x1ul << CNTHCTL_EVNTDIR_SHIFT)
+#define CNTHCTL_EVNTI_SHIFT 4
+#define CNTHCTL_EVNTI_MASK (0xful << CNTHCTL_EVNTI_SHIFT)
+#define CNTHCTL_ECV_SHIFT 12
+#define CNTHCTL_ECV_MASK (0x1ul << CNTHCTL_ECV_SHIFT)
+#define CNTHCTL_ECV_DIS (0x0ul << CNTHCTL_ECV_SHIFT)
+#define CNTHCTL_ECV_EN (0x1ul << CNTHCTL_ECV_SHIFT)
+#define CNTHCTL_EL1TVT_SHIFT 13
+#define CNTHCTL_EL1TVT_MASK (0x1ul << CNTHCTL_EL1TVT_SHIFT)
+#define CNTHCTL_EL1TVT_NOTRAP (0x0ul << CNTHCTL_EL1TVT_SHIFT)
+#define CNTHCTL_EL1TVT_TRAP (0x1ul << CNTHCTL_EL1TVT_SHIFT)
+#define CNTHCTL_EL1TVCT_SHIFT 14
+#define CNTHCTL_EL1TVCT_MASK (0x1ul << CNTHCTL_EL1TVCT_SHIFT)
+#define CNTHCTL_EL1TVCT_NOTRAP (0x0ul << CNTHCTL_EL1TVCT_SHIFT)
+#define CNTHCTL_EL1TVCT_TRAP (0x1ul << CNTHCTL_EL1TVCT_SHIFT)
+#define CNTHCTL_EL1NVPCT_SHIFT 15
+#define CNTHCTL_EL1NVPCT_MASK (0x1ul << CNTHCTL_EL1NVPCT_SHIFT)
+#define CNTHCTL_EL1NVPCT_NOTRAP (0x0ul << CNTHCTL_EL1NVPCT_SHIFT)
+#define CNTHCTL_EL1NVPCT_TRAP (0x1ul << CNTHCTL_EL1NVPCT_SHIFT)
+#define CNTHCTL_EL1NVVCT_SHIFT 16
+#define CNTHCTL_EL1NVVCT_MASK (0x1ul << CNTHCTL_EL1NVVCT_SHIFT)
+#define CNTHCTL_EL1NVVCT_NOTRAP (0x0ul << CNTHCTL_EL1NVVCT_SHIFT)
+#define CNTHCTL_EL1NVVCT_TRAP (0x1ul << CNTHCTL_EL1NVVCT_SHIFT)
+#define CNTHCTL_EVNTIS_SHIFT 17
+#define CNTHCTL_EVNTIS_MASK (0x1ul << CNTHCTL_EVNTIS_SHIFT)
+#define CNTHCTL_CNTVMASK_SHIFT 18
+#define CNTHCTL_CNTVMASK_MASK (0x1ul << CNTHCTL_CNTVMASK_SHIFT)
+#define CNTHCTL_CNTPMASK_SHIFT 19
+#define CNTHCTL_CNTPMASK_MASK (0x1ul << CNTHCTL_CNTPMASK_SHIFT)
/* CNTPOFF_EL2 - Counter-timer Physical Offset Register */
#define CNTPOFF_EL2_REG MRS_REG_ALT_NAME(CNTPOFF_EL2)
@@ -190,6 +249,55 @@
#define ICC_SRE_EL2_SRE (1UL << 0)
#define ICC_SRE_EL2_EN (1UL << 3)
+/* MDCR_EL2 - Hyp Debug Control Register */
+#define MDCR_EL2_HPMN_MASK 0x1f
+#define MDCR_EL2_HPMN_SHIFT 0
+#define MDCR_EL2_TPMCR_SHIFT 5
+#define MDCR_EL2_TPMCR (0x1UL << MDCR_EL2_TPMCR_SHIFT)
+#define MDCR_EL2_TPM_SHIFT 6
+#define MDCR_EL2_TPM (0x1UL << MDCR_EL2_TPM_SHIFT)
+#define MDCR_EL2_HPME_SHIFT 7
+#define MDCR_EL2_HPME (0x1UL << MDCR_EL2_HPME_SHIFT)
+#define MDCR_EL2_TDE_SHIFT 8
+#define MDCR_EL2_TDE (0x1UL << MDCR_EL2_TDE_SHIFT)
+#define MDCR_EL2_TDA_SHIFT 9
+#define MDCR_EL2_TDA (0x1UL << MDCR_EL2_TDA_SHIFT)
+#define MDCR_EL2_TDOSA_SHIFT 10
+#define MDCR_EL2_TDOSA (0x1UL << MDCR_EL2_TDOSA_SHIFT)
+#define MDCR_EL2_TDRA_SHIFT 11
+#define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT)
+#define MDCR_EL2_E2PB_SHIFT 12
+#define MDCR_EL2_E2PB_MASK (0x3UL << MDCR_EL2_E2PB_SHIFT)
+#define MDCR_EL2_E2PB_EL1_0_NO_TRAP (0x3UL << MDCR_EL2_E2PB_SHIFT)
+#define MDCR_EL2_TPMS_SHIFT 14
+#define MDCR_EL2_TPMS (0x1UL << MDCR_EL2_TPMS_SHIFT)
+#define MDCR_EL2_EnSPM_SHIFT 15
+#define MDCR_EL2_EnSPM (0x1UL << MDCR_EL2_EnSPM_SHIFT)
+#define MDCR_EL2_HPMD_SHIFT 17
+#define MDCR_EL2_HPMD (0x1UL << MDCR_EL2_HPMD_SHIFT)
+#define MDCR_EL2_TTRF_SHIFT 19
+#define MDCR_EL2_TTRF (0x1UL << MDCR_EL2_TTRF_SHIFT)
+#define MDCR_EL2_HCCD_SHIFT 23
+#define MDCR_EL2_HCCD (0x1UL << MDCR_EL2_HCCD_SHIFT)
+#define MDCR_EL2_E2TB_SHIFT 24
+#define MDCR_EL2_E2TB_MASK (0x3UL << MDCR_EL2_E2TB_SHIFT)
+#define MDCR_EL2_HLP_SHIFT 26
+#define MDCR_EL2_HLP (0x1UL << MDCR_EL2_HLP_SHIFT)
+#define MDCR_EL2_TDCC_SHIFT 27
+#define MDCR_EL2_TDCC (0x1UL << MDCR_EL2_TDCC_SHIFT)
+#define MDCR_EL2_MTPME_SHIFT 28
+#define MDCR_EL2_MTPME (0x1UL << MDCR_EL2_MTPME_SHIFT)
+#define MDCR_EL2_HPMFZO_SHIFT 29
+#define MDCR_EL2_HPMFZO (0x1UL << MDCR_EL2_HPMFZO_SHIFT)
+#define MDCR_EL2_PMSSE_SHIFT 30
+#define MDCR_EL2_PMSSE_MASK (0x3UL << MDCR_EL2_PMSSE_SHIFT)
+#define MDCR_EL2_HPMFZS_SHIFT 36
+#define MDCR_EL2_HPMFZS (0x1UL << MDCR_EL2_HPMFZS_SHIFT)
+#define MDCR_EL2_PMEE_SHIFT 40
+#define MDCR_EL2_PMEE_MASK (0x3UL << MDCR_EL2_PMEE_SHIFT)
+#define MDCR_EL2_EBWE_SHIFT 43
+#define MDCR_EL2_EBWE (0x1UL << MDCR_EL2_EBWE_SHIFT)
+
/* SCTLR_EL2 - System Control Register */
#define SCTLR_EL2_RES1 0x30c50830
#define SCTLR_EL2_M_SHIFT 0
@@ -299,52 +407,4 @@
/* Assumed to be 0 by locore.S */
#define VTTBR_HOST 0x0000000000000000
-/* MDCR_EL2 - Hyp Debug Control Register */
-#define MDCR_EL2_HPMN_MASK 0x1f
-#define MDCR_EL2_HPMN_SHIFT 0
-#define MDCR_EL2_TPMCR_SHIFT 5
-#define MDCR_EL2_TPMCR (0x1UL << MDCR_EL2_TPMCR_SHIFT)
-#define MDCR_EL2_TPM_SHIFT 6
-#define MDCR_EL2_TPM (0x1UL << MDCR_EL2_TPM_SHIFT)
-#define MDCR_EL2_HPME_SHIFT 7
-#define MDCR_EL2_HPME (0x1UL << MDCR_EL2_HPME_SHIFT)
-#define MDCR_EL2_TDE_SHIFT 8
-#define MDCR_EL2_TDE (0x1UL << MDCR_EL2_TDE_SHIFT)
-#define MDCR_EL2_TDA_SHIFT 9
-#define MDCR_EL2_TDA (0x1UL << MDCR_EL2_TDA_SHIFT)
-#define MDCR_EL2_TDOSA_SHIFT 10
-#define MDCR_EL2_TDOSA (0x1UL << MDCR_EL2_TDOSA_SHIFT)
-#define MDCR_EL2_TDRA_SHIFT 11
-#define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT)
-#define MDCR_E2PB_SHIFT 12
-#define MDCR_E2PB_MASK (0x3UL << MDCR_E2PB_SHIFT)
-#define MDCR_TPMS_SHIFT 14
-#define MDCR_TPMS (0x1UL << MDCR_TPMS_SHIFT)
-#define MDCR_EnSPM_SHIFT 15
-#define MDCR_EnSPM (0x1UL << MDCR_EnSPM_SHIFT)
-#define MDCR_HPMD_SHIFT 17
-#define MDCR_HPMD (0x1UL << MDCR_HPMD_SHIFT)
-#define MDCR_TTRF_SHIFT 19
-#define MDCR_TTRF (0x1UL << MDCR_TTRF_SHIFT)
-#define MDCR_HCCD_SHIFT 23
-#define MDCR_HCCD (0x1UL << MDCR_HCCD_SHIFT)
-#define MDCR_E2TB_SHIFT 24
-#define MDCR_E2TB_MASK (0x3UL << MDCR_E2TB_SHIFT)
-#define MDCR_HLP_SHIFT 26
-#define MDCR_HLP (0x1UL << MDCR_HLP_SHIFT)
-#define MDCR_TDCC_SHIFT 27
-#define MDCR_TDCC (0x1UL << MDCR_TDCC_SHIFT)
-#define MDCR_MTPME_SHIFT 28
-#define MDCR_MTPME (0x1UL << MDCR_MTPME_SHIFT)
-#define MDCR_HPMFZO_SHIFT 29
-#define MDCR_HPMFZO (0x1UL << MDCR_HPMFZO_SHIFT)
-#define MDCR_PMSSE_SHIFT 30
-#define MDCR_PMSSE_MASK (0x3UL << MDCR_PMSSE_SHIFT)
-#define MDCR_HPMFZS_SHIFT 36
-#define MDCR_HPMFZS (0x1UL << MDCR_HPMFZS_SHIFT)
-#define MDCR_PMEE_SHIFT 40
-#define MDCR_PMEE_MASK (0x3UL << MDCR_PMEE_SHIFT)
-#define MDCR_EBWE_SHIFT 43
-#define MDCR_EBWE (0x1UL << MDCR_EBWE_SHIFT)
-
#endif /* !_MACHINE_HYPERVISOR_H_ */
diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h
new file mode 100644
index 000000000000..0a8c7a053331
--- /dev/null
+++ b/sys/arm64/include/kexec.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_KEXEC_H_
+#define _ARM64_KEXEC_H_
+
+#define KEXEC_MD_PAGES(x) 0
+
+#endif /* _ARM64_KEXEC_H_ */
diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
index 09bd8fa8a966..73399d2c3f8c 100644
--- a/sys/arm64/include/pcpu.h
+++ b/sys/arm64/include/pcpu.h
@@ -50,7 +50,8 @@ struct debug_monitor_state;
struct pmap *pc_curvmpmap; \
uint64_t pc_mpidr; \
u_int pc_bcast_tlbi_workaround; \
- char __pad[197]
+ uint64_t pc_release_addr; \
+ char __pad[189]
#ifdef _KERNEL
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 0f23f200f0f6..406b6e2c5e0a 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -69,6 +69,7 @@ struct md_page {
TAILQ_HEAD(,pv_entry) pv_list;
int pv_gen;
vm_memattr_t pv_memattr;
+ uint8_t pv_reserve[3];
};
enum pmap_stage {
@@ -174,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t);
struct pcb *pmap_switch(struct thread *);
+void pmap_s1_invalidate_all_kernel(void);
+
extern void (*pmap_clean_stage2_tlbi)(void);
extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t,
bool);
diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h
index dc2fa2df654d..b40990e89385 100644
--- a/sys/arm64/include/proc.h
+++ b/sys/arm64/include/proc.h
@@ -35,6 +35,7 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#ifndef LOCORE
struct ptrauth_key {
uint64_t pa_key_lo;
uint64_t pa_key_hi;
@@ -73,8 +74,13 @@ struct mdthread {
};
struct mdproc {
- long md_dummy;
+ uint64_t md_tcr; /* TCR_EL1 fields to update */
+ uint64_t md_reserved[2];
};
+#endif /* !LOCORE */
+
+/* Fields that can be set in md_tcr */
+#define MD_TCR_FIELDS TCR_TBI0
#define KINFO_PROC_SIZE 1088
#define KINFO_PROC32_SIZE 816
diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h
index 500cd1ef4f02..4a5bfda3ac1c 100644
--- a/sys/arm64/include/smp.h
+++ b/sys/arm64/include/smp.h
@@ -40,6 +40,7 @@ enum {
IPI_STOP,
IPI_STOP_HARD,
IPI_HARDCLOCK,
+ IPI_OFF,
INTR_IPI_COUNT,
};
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index 1d783cdacb0d..e67540eac66d 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -42,6 +42,7 @@ enum vm_suspend_how {
VM_SUSPEND_RESET,
VM_SUSPEND_POWEROFF,
VM_SUSPEND_HALT,
+ VM_SUSPEND_DESTROY,
VM_SUSPEND_LAST
};
@@ -89,6 +90,7 @@ enum vm_reg_name {
VM_REG_GUEST_TTBR1_EL1,
VM_REG_GUEST_TCR_EL1,
VM_REG_GUEST_TCR2_EL1,
+ VM_REG_GUEST_MPIDR_EL1,
VM_REG_LAST
};
@@ -104,27 +106,6 @@ enum vm_reg_name {
#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
-/*
- * The VM name has to fit into the pathname length constraints of devfs,
- * governed primarily by SPECNAMELEN. The length is the total number of
- * characters in the full path, relative to the mount point and not
- * including any leading '/' characters.
- * A prefix and a suffix are added to the name specified by the user.
- * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
- * longer for future use.
- * The suffix is a string that identifies a bootrom image or some similar
- * image that is attached to the VM. A separator character gets added to
- * the suffix automatically when generating the full path, so it must be
- * accounted for, reducing the effective length by 1.
- * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
- * bytes for FreeBSD 12. A minimum length is set for safety and supports
- * a SPECNAMELEN as small as 32 on old systems.
- */
-#define VM_MAX_PREFIXLEN 10
-#define VM_MAX_SUFFIXLEN 15
-#define VM_MAX_NAMELEN \
- (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
-
#ifdef _KERNEL
struct vm;
struct vm_exception;
@@ -141,10 +122,41 @@ struct vm_eventinfo {
int *iptr; /* reqidle cookie */
};
+#define DECLARE_VMMOPS_FUNC(ret_type, opname, args) \
+ ret_type vmmops_##opname args
+
+DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum));
+DECLARE_VMMOPS_FUNC(int, modcleanup, (void));
+DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap));
+DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault));
+DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+ struct vm_eventinfo *info));
+DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi));
+DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+ int vcpu_id));
+DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui));
+DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far));
+DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval));
+DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val));
+DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval));
+DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val));
+DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+ vm_offset_t max));
+DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
+#ifdef notyet
+#ifdef BHYVE_SNAPSHOT
+DECLARE_VMMOPS_FUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui,
+ struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
+#endif
+#endif
+
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_disable_vcpu_creation(struct vm *vm);
-void vm_slock_vcpus(struct vm *vm);
+void vm_lock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
@@ -230,7 +242,6 @@ vcpu_should_yield(struct vcpu *vcpu)
void *vcpu_stats(struct vcpu *vcpu);
void vcpu_notify_event(struct vcpu *vcpu);
-struct vmspace *vm_vmspace(struct vm *vm);
struct vm_mem *vm_mem(struct vm *vm);
enum vm_reg_name vm_segment_name(int seg_encoding);
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
index 219f1116c728..289ff0fe1fc9 100644
--- a/sys/arm64/include/vmm_dev.h
+++ b/sys/arm64/include/vmm_dev.h
@@ -31,6 +31,8 @@
#include <machine/vmm.h>
+#include <dev/vmm/vmm_param.h>
+
struct vm_memmap {
vm_paddr_t gpa;
int segid; /* memory segment */
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index 349849845e73..781602306436 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -209,10 +209,16 @@
#define KMSAN_ORIG_MAX_ADDRESS (0xffff028000000000UL)
/* The address bits that hold a pointer authentication code */
-#define PAC_ADDR_MASK (0xff7f000000000000UL)
+#define PAC_ADDR_MASK (0x007f000000000000UL)
+#define PAC_ADDR_MASK_14 (0xff7f000000000000UL)
+
+/* The top-byte ignore address bits */
+#define TBI_ADDR_MASK 0xff00000000000000UL
/* If true addr is in the kernel address space */
#define ADDR_IS_KERNEL(addr) (((addr) & (1ul << 55)) == (1ul << 55))
+/* If true addr is in the user address space */
+#define ADDR_IS_USER(addr) (((addr) & (1ul << 55)) == 0)
/* If true addr is in its canonical form (i.e. no TBI, PAC, etc.) */
#define ADDR_IS_CANONICAL(addr) \
(((addr) & 0xffff000000000000UL) == 0 || \
diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c
index 084b7a11b01f..ac05820f89bc 100644
--- a/sys/arm64/linux/linux_sysvec.c
+++ b/sys/arm64/linux/linux_sysvec.c
@@ -584,7 +584,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
}
}
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
.hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
.hdr.n_descsz = 16,
.hdr.n_type = 1,
@@ -593,7 +593,7 @@ static Elf_Brandnote linux64_brandnote = {
.trans_osrel = linux_trans_osrel
};
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
.brand = ELFOSABI_LINUX,
.machine = EM_AARCH64,
.compat_3_brand = "Linux",
@@ -604,7 +604,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-Elf64_Brandinfo *linux_brandlist[] = {
+const Elf64_Brandinfo *linux_brandlist[] = {
&linux_glibc2brand,
NULL
};
@@ -612,8 +612,8 @@ Elf64_Brandinfo *linux_brandlist[] = {
static int
linux64_elf_modevent(module_t mod, int type, void *data)
{
- Elf64_Brandinfo **brandinfo;
- struct linux_ioctl_handler**lihp;
+ const Elf64_Brandinfo **brandinfo;
+ struct linux_ioctl_handler **lihp;
int error;
error = 0;
diff --git a/sys/arm64/nvidia/tegra210/max77620_regulators.c b/sys/arm64/nvidia/tegra210/max77620_regulators.c
index af1a5af20ec3..d52aeaef1287 100644
--- a/sys/arm64/nvidia/tegra210/max77620_regulators.c
+++ b/sys/arm64/nvidia/tegra210/max77620_regulators.c
@@ -364,7 +364,7 @@ max77620_get_sel(struct max77620_reg_sc *sc, uint8_t *sel)
rv = RD1(sc->base_sc, sc->def->volt_reg, sel);
if (rv != 0) {
- printf("%s: cannot read volatge selector: %d\n",
+ printf("%s: cannot read voltage selector: %d\n",
regnode_get_name(sc->regnode), rv);
return (rv);
}
@@ -384,7 +384,7 @@ max77620_set_sel(struct max77620_reg_sc *sc, uint8_t sel)
rv = RM1(sc->base_sc, sc->def->volt_reg,
sc->def->volt_vsel_mask, sel);
if (rv != 0) {
- printf("%s: cannot set volatge selector: %d\n",
+ printf("%s: cannot set voltage selector: %d\n",
regnode_get_name(sc->regnode), rv);
return (rv);
}
diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c
index 847bc7394dd0..8da37d516802 100644
--- a/sys/arm64/rockchip/rk_gpio.c
+++ b/sys/arm64/rockchip/rk_gpio.c
@@ -90,6 +90,11 @@ struct rk_pin_irqsrc {
uint32_t mode;
};
+struct rk_gpio_reg {
+ uint8_t single;
+ uint8_t offset;
+};
+
struct rk_gpio_softc {
device_t sc_dev;
device_t sc_busdev;
@@ -103,7 +108,7 @@ struct rk_gpio_softc {
uint32_t swporta_ddr;
uint32_t version;
struct pin_cached pin_cached[RK_GPIO_MAX_PINS];
- uint8_t regs[RK_GPIO_REGNUM];
+ struct rk_gpio_reg regs[RK_GPIO_REGNUM];
void *ihandle;
struct rk_pin_irqsrc isrcs[RK_GPIO_MAX_PINS];
};
@@ -138,14 +143,15 @@ static int rk_gpio_detach(device_t dev);
static int
rk_gpio_read_bit(struct rk_gpio_softc *sc, int reg, int bit)
{
- int offset = sc->regs[reg];
+ struct rk_gpio_reg *rk_reg = &sc->regs[reg];
uint32_t value;
- if (sc->version == RK_GPIO_TYPE_V1) {
- value = RK_GPIO_READ(sc, offset);
+ if (rk_reg->single) {
+ value = RK_GPIO_READ(sc, rk_reg->offset);
value >>= bit;
} else {
- value = RK_GPIO_READ(sc, bit > 15 ? offset + 4 : offset);
+ value = RK_GPIO_READ(sc, bit > 15 ?
+ rk_reg->offset + 4 : rk_reg->offset);
value >>= (bit % 16);
}
return (value & 1);
@@ -154,50 +160,53 @@ rk_gpio_read_bit(struct rk_gpio_softc *sc, int reg, int bit)
static void
rk_gpio_write_bit(struct rk_gpio_softc *sc, int reg, int bit, int data)
{
- int offset = sc->regs[reg];
+ struct rk_gpio_reg *rk_reg = &sc->regs[reg];
uint32_t value;
- if (sc->version == RK_GPIO_TYPE_V1) {
- value = RK_GPIO_READ(sc, offset);
+ if (rk_reg->single) {
+ value = RK_GPIO_READ(sc, rk_reg->offset);
if (data)
value |= (1 << bit);
else
value &= ~(1 << bit);
- RK_GPIO_WRITE(sc, offset, value);
+ RK_GPIO_WRITE(sc, rk_reg->offset, value);
} else {
if (data)
value = (1 << (bit % 16));
else
value = 0;
value |= (1 << ((bit % 16) + 16));
- RK_GPIO_WRITE(sc, bit > 15 ? offset + 4 : offset, value);
+ RK_GPIO_WRITE(sc, bit > 15 ?
+ rk_reg->offset + 4 : rk_reg->offset, value);
}
}
static uint32_t
rk_gpio_read_4(struct rk_gpio_softc *sc, int reg)
{
- int offset = sc->regs[reg];
+ struct rk_gpio_reg *rk_reg = &sc->regs[reg];
uint32_t value;
- if (sc->version == RK_GPIO_TYPE_V1)
- value = RK_GPIO_READ(sc, offset);
+ if (rk_reg->single)
+ value = RK_GPIO_READ(sc, rk_reg->offset);
else
- value = (RK_GPIO_READ(sc, offset) & 0xffff) |
- (RK_GPIO_READ(sc, offset + 4) << 16);
+ value = (RK_GPIO_READ(sc, rk_reg->offset) & 0xffff) |
+ (RK_GPIO_READ(sc, rk_reg->offset + 4) << 16);
return (value);
}
static void
rk_gpio_write_4(struct rk_gpio_softc *sc, int reg, uint32_t value)
{
- int offset = sc->regs[reg];
+ struct rk_gpio_reg *rk_reg = &sc->regs[reg];
- if (sc->version == RK_GPIO_TYPE_V1)
- RK_GPIO_WRITE(sc, offset, value);
+ if (rk_reg->single)
+ RK_GPIO_WRITE(sc, rk_reg->offset, value);
else {
- RK_GPIO_WRITE(sc, offset, (value & 0xffff) | 0xffff0000);
- RK_GPIO_WRITE(sc, offset + 4, (value >> 16) | 0xffff0000);
+ RK_GPIO_WRITE(sc, rk_reg->offset,
+ (value & 0xffff) | 0xffff0000);
+ RK_GPIO_WRITE(sc, rk_reg->offset + 4,
+ (value >> 16) | 0xffff0000);
}
}
@@ -313,31 +322,31 @@ rk_gpio_attach(device_t dev)
switch (sc->version) {
case RK_GPIO_TYPE_V1:
- sc->regs[RK_GPIO_SWPORTA_DR] = 0x00;
- sc->regs[RK_GPIO_SWPORTA_DDR] = 0x04;
- sc->regs[RK_GPIO_INTEN] = 0x30;
- sc->regs[RK_GPIO_INTMASK] = 0x34;
- sc->regs[RK_GPIO_INTTYPE_LEVEL] = 0x38;
- sc->regs[RK_GPIO_INT_POLARITY] = 0x3c;
- sc->regs[RK_GPIO_INT_STATUS] = 0x40;
- sc->regs[RK_GPIO_INT_RAWSTATUS] = 0x44;
- sc->regs[RK_GPIO_DEBOUNCE] = 0x48;
- sc->regs[RK_GPIO_PORTA_EOI] = 0x4c;
- sc->regs[RK_GPIO_EXT_PORTA] = 0x50;
+ sc->regs[RK_GPIO_SWPORTA_DR] = (struct rk_gpio_reg){ 1, 0x00 };
+ sc->regs[RK_GPIO_SWPORTA_DDR] = (struct rk_gpio_reg){ 1, 0x04 };
+ sc->regs[RK_GPIO_INTEN] = (struct rk_gpio_reg){ 1, 0x30 };
+ sc->regs[RK_GPIO_INTMASK] = (struct rk_gpio_reg){ 1, 0x34 };
+ sc->regs[RK_GPIO_INTTYPE_LEVEL] = (struct rk_gpio_reg){ 1, 0x38 };
+ sc->regs[RK_GPIO_INT_POLARITY] = (struct rk_gpio_reg){ 1, 0x3c };
+ sc->regs[RK_GPIO_INT_STATUS] = (struct rk_gpio_reg){ 1, 0x40 };
+ sc->regs[RK_GPIO_INT_RAWSTATUS] = (struct rk_gpio_reg){ 1, 0x44 };
+ sc->regs[RK_GPIO_DEBOUNCE] = (struct rk_gpio_reg){ 1, 0x48 };
+ sc->regs[RK_GPIO_PORTA_EOI] = (struct rk_gpio_reg){ 1, 0x4c };
+ sc->regs[RK_GPIO_EXT_PORTA] = (struct rk_gpio_reg){ 1, 0x50 };
break;
case RK_GPIO_TYPE_V2:
- sc->regs[RK_GPIO_SWPORTA_DR] = 0x00;
- sc->regs[RK_GPIO_SWPORTA_DDR] = 0x08;
- sc->regs[RK_GPIO_INTEN] = 0x10;
- sc->regs[RK_GPIO_INTMASK] = 0x18;
- sc->regs[RK_GPIO_INTTYPE_LEVEL] = 0x20;
- sc->regs[RK_GPIO_INTTYPE_BOTH] = 0x30;
- sc->regs[RK_GPIO_INT_POLARITY] = 0x28;
- sc->regs[RK_GPIO_INT_STATUS] = 0x50;
- sc->regs[RK_GPIO_INT_RAWSTATUS] = 0x58;
- sc->regs[RK_GPIO_DEBOUNCE] = 0x38;
- sc->regs[RK_GPIO_PORTA_EOI] = 0x60;
- sc->regs[RK_GPIO_EXT_PORTA] = 0x70;
+ sc->regs[RK_GPIO_SWPORTA_DR] = (struct rk_gpio_reg){ 0, 0x00 };
+ sc->regs[RK_GPIO_SWPORTA_DDR] = (struct rk_gpio_reg){ 0, 0x08 };
+ sc->regs[RK_GPIO_INTEN] = (struct rk_gpio_reg){ 0, 0x10 };
+ sc->regs[RK_GPIO_INTMASK] = (struct rk_gpio_reg){ 0, 0x18 };
+ sc->regs[RK_GPIO_INTTYPE_LEVEL] = (struct rk_gpio_reg){ 0, 0x20 };
+ sc->regs[RK_GPIO_INTTYPE_BOTH] = (struct rk_gpio_reg){ 0, 0x30 };
+ sc->regs[RK_GPIO_INT_POLARITY] = (struct rk_gpio_reg){ 0, 0x28 };
+ sc->regs[RK_GPIO_INT_STATUS] = (struct rk_gpio_reg){ 1, 0x50 };
+ sc->regs[RK_GPIO_INT_RAWSTATUS] = (struct rk_gpio_reg){ 1, 0x58 };
+ sc->regs[RK_GPIO_DEBOUNCE] = (struct rk_gpio_reg){ 0, 0x38 };
+ sc->regs[RK_GPIO_PORTA_EOI] = (struct rk_gpio_reg){ 0, 0x60 };
+ sc->regs[RK_GPIO_EXT_PORTA] = (struct rk_gpio_reg){ 1, 0x70 };
break;
default:
device_printf(dev, "Unknown gpio version %08x\n", sc->version);
@@ -371,12 +380,13 @@ rk_gpio_attach(device_t dev)
sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR);
RK_GPIO_UNLOCK(sc);
- sc->sc_busdev = gpiobus_attach_bus(dev);
+ sc->sc_busdev = gpiobus_add_bus(dev);
if (sc->sc_busdev == NULL) {
rk_gpio_detach(dev);
return (ENXIO);
}
+ bus_attach_children(dev);
return (0);
}
@@ -393,7 +403,7 @@ rk_gpio_detach(device_t dev)
mtx_destroy(&sc->sc_mtx);
clk_disable(sc->clk);
- return(0);
+ return (0);
}
static device_t
@@ -470,7 +480,7 @@ rk_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps)
{
if (pin >= RK_GPIO_MAX_PINS)
- return EINVAL;
+ return (EINVAL);
*caps = RK_GPIO_DEFAULT_CAPS;
return (0);
@@ -653,46 +663,108 @@ rk_gpio_get_node(device_t bus, device_t dev)
}
static int
-rk_pic_map_intr(device_t dev, struct intr_map_data *data,
- struct intr_irqsrc **isrcp)
+rk_gpio_pic_map_fdt(struct rk_gpio_softc *sc,
+ struct intr_map_data_fdt *daf,
+ u_int *irqp, uint32_t *modep)
{
- struct rk_gpio_softc *sc = device_get_softc(dev);
- struct intr_map_data_gpio *gdata;
uint32_t irq;
+ uint32_t mode;
- if (data->type != INTR_MAP_DATA_GPIO) {
- device_printf(dev, "Wrong type\n");
- return (ENOTSUP);
- }
- gdata = (struct intr_map_data_gpio *)data;
- irq = gdata->gpio_pin_num;
+ if (daf->ncells != 2)
+ return (EINVAL);
+
+ irq = daf->cells[0];
+ if (irq >= RK_GPIO_MAX_PINS)
+ return (EINVAL);
+
+ /* Only reasonable modes are supported. */
+ if (daf->cells[1] == 1)
+ mode = GPIO_INTR_EDGE_RISING;
+ else if (daf->cells[1] == 2)
+ mode = GPIO_INTR_EDGE_FALLING;
+ else if (daf->cells[1] == 3)
+ mode = GPIO_INTR_EDGE_BOTH;
+ else if (daf->cells[1] == 4)
+ mode = GPIO_INTR_LEVEL_HIGH;
+ else if (daf->cells[1] == 8)
+ mode = GPIO_INTR_LEVEL_LOW;
+ else
+ return (EINVAL);
+
+ *irqp = irq;
+ if (modep != NULL)
+ *modep = mode;
+ return (0);
+}
+
+static int
+rk_gpio_pic_map_gpio(struct rk_gpio_softc *sc,
+ struct intr_map_data_gpio *dag,
+ u_int *irqp, uint32_t *modep)
+{
+ uint32_t irq;
+ irq = dag->gpio_pin_num;
if (irq >= RK_GPIO_MAX_PINS) {
- device_printf(dev, "Invalid interrupt %u\n", irq);
+ device_printf(sc->sc_dev, "Invalid interrupt %u\n",
+ irq);
return (EINVAL);
}
- *isrcp = RK_GPIO_ISRC(sc, irq);
+
+ *irqp = irq;
+ if (modep != NULL)
+ *modep = dag->gpio_intr_mode;
return (0);
}
static int
+rk_gpio_pic_map(struct rk_gpio_softc *sc, struct intr_map_data *data,
+ u_int *irqp, uint32_t *modep)
+{
+ switch (data->type) {
+ case INTR_MAP_DATA_FDT:
+ return (rk_gpio_pic_map_fdt(sc,
+ (struct intr_map_data_fdt *)data, irqp, modep));
+ case INTR_MAP_DATA_GPIO:
+ return (rk_gpio_pic_map_gpio(sc,
+ (struct intr_map_data_gpio *)data, irqp, modep));
+ default:
+ device_printf(sc->sc_dev, "Wrong type\n");
+ return (ENOTSUP);
+ }
+}
+
+static int
+rk_pic_map_intr(device_t dev, struct intr_map_data *data,
+ struct intr_irqsrc **isrcp)
+{
+ int error;
+ struct rk_gpio_softc *sc = device_get_softc(dev);
+ uint32_t irq;
+
+ error = rk_gpio_pic_map(sc, data, &irq, NULL);
+ if (error == 0)
+ *isrcp = RK_GPIO_ISRC(sc, irq);
+ return (error);
+}
+
+static int
rk_pic_setup_intr(device_t dev, struct intr_irqsrc *isrc,
struct resource *res, struct intr_map_data *data)
{
struct rk_gpio_softc *sc = device_get_softc(dev);
struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc;
- struct intr_map_data_gpio *gdata;
uint32_t mode;
- uint8_t pin;
+ uint32_t pin;
if (!data) {
device_printf(dev, "No map data\n");
return (ENOTSUP);
}
- gdata = (struct intr_map_data_gpio *)data;
- mode = gdata->gpio_intr_mode;
- pin = gdata->gpio_pin_num;
- if (rkisrc->irq != gdata->gpio_pin_num) {
+ if (rk_gpio_pic_map(sc, data, &pin, &mode) != 0)
+ return (EINVAL);
+
+ if (rkisrc->irq != pin) {
device_printf(dev, "Interrupts don't match\n");
return (EINVAL);
}
@@ -779,6 +851,10 @@ static device_method_t rk_gpio_methods[] = {
DEVMETHOD(device_attach, rk_gpio_attach),
DEVMETHOD(device_detach, rk_gpio_detach),
+ /* Bus interface */
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+
/* GPIO protocol */
DEVMETHOD(gpio_get_bus, rk_gpio_get_bus),
DEVMETHOD(gpio_pin_max, rk_gpio_pin_max),
diff --git a/sys/arm64/rockchip/rk_grf_gpio.c b/sys/arm64/rockchip/rk_grf_gpio.c
index 6818bd85bb95..6ac419889614 100644
--- a/sys/arm64/rockchip/rk_grf_gpio.c
+++ b/sys/arm64/rockchip/rk_grf_gpio.c
@@ -181,11 +181,12 @@ rk_grf_gpio_attach(device_t dev)
return (ENXIO);
}
- sc->sc_busdev = gpiobus_attach_bus(dev);
+ sc->sc_busdev = gpiobus_add_bus(dev);
if (sc->sc_busdev == NULL) {
return (ENXIO);
}
+ bus_attach_children(dev);
return (0);
}
diff --git a/sys/arm64/rockchip/rk_i2s.c b/sys/arm64/rockchip/rk_i2s.c
index 5f1b6bbdeabf..856fa20e6ce4 100644
--- a/sys/arm64/rockchip/rk_i2s.c
+++ b/sys/arm64/rockchip/rk_i2s.c
@@ -403,10 +403,10 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu
count = sndbuf_getready(play_buf);
if (count > FIFO_SIZE - 1)
count = FIFO_SIZE - 1;
- size = sndbuf_getsize(play_buf);
+ size = play_buf->bufsize;
readyptr = sndbuf_getreadyptr(play_buf);
- samples = (uint8_t*)sndbuf_getbuf(play_buf);
+ samples = play_buf->buf;
written = 0;
for (; level < count; level++) {
val = (samples[readyptr++ % size] << 0);
@@ -426,9 +426,9 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu
uint8_t *samples;
uint32_t count, size, freeptr, recorded;
count = sndbuf_getfree(rec_buf);
- size = sndbuf_getsize(rec_buf);
+ size = rec_buf->bufsize;
freeptr = sndbuf_getfreeptr(rec_buf);
- samples = (uint8_t*)sndbuf_getbuf(rec_buf);
+ samples = rec_buf->buf;
recorded = 0;
if (level > count / 4)
level = count / 4;
diff --git a/sys/arm64/rockchip/rk_tsadc.c b/sys/arm64/rockchip/rk_tsadc.c
index e6cbad36f697..d83b09480a0c 100644
--- a/sys/arm64/rockchip/rk_tsadc.c
+++ b/sys/arm64/rockchip/rk_tsadc.c
@@ -484,7 +484,7 @@ tsadc_init_tsensor(struct tsadc_softc *sc, struct tsensor *sensor)
WR4(sc, TSADC_INT_EN, val);
/* Shutdown temperature */
- val = tsadc_raw_to_temp(sc, sc->shutdown_temp);
+ val = tsadc_temp_to_raw(sc, sc->shutdown_temp);
WR4(sc, TSADC_COMP_SHUT(sensor->channel), val);
val = RD4(sc, TSADC_AUTO_CON);
val |= TSADC_AUTO_SRC_EN(sensor->channel);
diff --git a/sys/arm64/spe/arm_spe.h b/sys/arm64/spe/arm_spe.h
new file mode 100644
index 000000000000..5dba20673a77
--- /dev/null
+++ b/sys/arm64/spe/arm_spe.h
@@ -0,0 +1,77 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_ARM_SPE_H_
+#define _ARM64_ARM_SPE_H_
+
+/* kqueue events */
+#define ARM_SPE_KQ_BUF 138
+#define ARM_SPE_KQ_SHUTDOWN 139
+#define ARM_SPE_KQ_SIGNAL 140
+
+/* spe_backend_read() u64 data encoding */
+#define KQ_BUF_POS_SHIFT 0
+#define KQ_BUF_POS (1 << KQ_BUF_POS_SHIFT)
+#define KQ_PARTREC_SHIFT 1
+#define KQ_PARTREC (1 << KQ_PARTREC_SHIFT)
+#define KQ_FINAL_BUF_SHIFT 2
+#define KQ_FINAL_BUF (1 << KQ_FINAL_BUF_SHIFT)
+
+enum arm_spe_ctx_field {
+ ARM_SPE_CTX_NONE,
+ ARM_SPE_CTX_PID,
+ ARM_SPE_CTX_CPU_ID
+};
+
+enum arm_spe_profiling_level {
+ ARM_SPE_KERNEL_AND_USER,
+ ARM_SPE_KERNEL_ONLY,
+ ARM_SPE_USER_ONLY
+};
+struct arm_spe_config {
+ /* Minimum interval is IMP DEF up to maximum 24 bit value */
+ uint32_t interval;
+
+ /* Profile kernel (EL1), userspace (EL0) or both */
+ enum arm_spe_profiling_level level;
+
+ /*
+ * Configure context field in SPE records to store either the
+ * current PID, the CPU ID or neither
+ *
+ * In PID mode, kernel threads without a process context are
+ * logged as PID 0
+ */
+ enum arm_spe_ctx_field ctx_field;
+};
+
+struct arm_spe_svc_buf {
+ uint32_t ident;
+ uint8_t buf_idx : 1;
+};
+
+#endif /* _ARM64_ARM_SPE_H_ */
diff --git a/sys/arm64/spe/arm_spe_acpi.c b/sys/arm64/spe/arm_spe_acpi.c
new file mode 100644
index 000000000000..b9f40448d940
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_acpi.c
@@ -0,0 +1,146 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+static device_identify_t arm_spe_acpi_identify;
+static device_probe_t arm_spe_acpi_probe;
+
+static device_method_t arm_spe_acpi_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, arm_spe_acpi_identify),
+ DEVMETHOD(device_probe, arm_spe_acpi_probe),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(spe, arm_spe_acpi_driver, arm_spe_acpi_methods,
+ sizeof(struct arm_spe_softc), arm_spe_driver);
+
+DRIVER_MODULE(spe, acpi, arm_spe_acpi_driver, 0, 0);
+
+struct madt_data {
+ u_int irq;
+ bool found;
+ bool valid;
+};
+
+static void
+madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_INTERRUPT *intr;
+ struct madt_data *madt_data;
+ u_int irq;
+
+ madt_data = (struct madt_data *)arg;
+
+ /* Exit early if we are have decided to not attach */
+ if (!madt_data->valid)
+ return;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+ intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
+ irq = intr->SpeInterrupt;
+
+ if (irq == 0) {
+ madt_data->valid = false;
+ } else if (!madt_data->found) {
+ madt_data->found = true;
+ madt_data->irq = irq;
+ } else if (madt_data->irq != irq) {
+ madt_data->valid = false;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+arm_spe_acpi_identify(driver_t *driver, device_t parent)
+{
+ struct madt_data madt_data;
+ ACPI_TABLE_MADT *madt;
+ device_t dev;
+ vm_paddr_t physaddr;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return;
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ device_printf(parent, "spe: Unable to map the MADT\n");
+ return;
+ }
+
+ madt_data.irq = 0;
+ madt_data.found = false;
+ madt_data.valid = true;
+
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ madt_handler, &madt_data);
+
+ if (!madt_data.found || !madt_data.valid)
+ goto out;
+
+ MPASS(madt_data.irq != 0);
+
+ dev = BUS_ADD_CHILD(parent, 0, "spe", -1);
+ if (dev == NULL) {
+ device_printf(parent, "add spe child failed\n");
+ goto out;
+ }
+
+ BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, madt_data.irq, 1);
+
+out:
+ acpi_unmap_table(madt);
+}
+
+static int
+arm_spe_acpi_probe(device_t dev)
+{
+ device_set_desc(dev, "ARM Statistical Profiling Extension");
+ return (BUS_PROBE_NOWILDCARD);
+}
diff --git a/sys/arm64/spe/arm_spe_backend.c b/sys/arm64/spe/arm_spe_backend.c
new file mode 100644
index 000000000000..b4e1132f9cbc
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_backend.c
@@ -0,0 +1,586 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Arm Statistical Profiling Extension (SPE) backend
+ *
+ * Basic SPE operation
+ *
+ * SPE is enabled and configured on a per-core basis, with each core requiring
+ * separate code to enable and configure. Each core also requires a separate
+ * buffer passed as config where the CPU will write profiling data. When the
+ * profiling buffer is full, an interrupt will be taken on the same CPU.
+ *
+ * Driver Design
+ *
+ * - HWT allocates a large single buffer per core. This buffer is split in half
+ * to create a 2 element circular buffer (aka ping-pong buffer) where the
+ * kernel writes to one half while userspace is copying the other half
+ * - SMP calls are used to enable and configure each core, with SPE initially
+ * configured to write to the first half of the buffer
+ * - When the first half of the buffer is full, a buffer full interrupt will
+ * immediately switch writing to the second half. The kernel adds the details
+ * of the half that needs copying to a FIFO STAILQ and notifies userspace via
+ * kqueue by sending a ARM_SPE_KQ_BUF kevent with how many buffers on the
+ * queue need servicing
+ * - The kernel responds to HWT_IOC_BUFPTR_GET ioctl by sending details of the
+ * first item from the queue
+ * - The buffers pending copying will not be overwritten until an
+ * HWT_IOC_SVC_BUF ioctl is received from userspace confirming the data has
+ * been copied out
+ * - In the case where both halfs of the buffer are full, profiling will be
+ * paused until notification via HWT_IOC_SVC_BUF is received
+ *
+ * Future improvements and limitations
+ *
+ * - Using large buffer sizes should minimise pauses and loss of profiling
+ * data while kernel is waiting for userspace to copy out data. Since it is
+ * generally expected that consuming (copying) this data is faster than
+ * producing it, in practice this has not so far been an issue. If it does
+ * prove to be an issue even with large buffer sizes then additional buffering
+ * i.e. n element circular buffers might be required.
+ *
+ * - kqueue can only notify and queue one kevent of the same type, with
+ * subsequent events overwriting data in the first event. The kevent
+ * ARM_SPE_KQ_BUF can therefore only contain the number of buffers on the
+ * STAILQ, incrementing each time a new buffer is full. In this case kqueue
+ * serves just as a notification to userspace to wake up and query the kernel
+ * with the appropriate ioctl. An alternative might be custom kevents where
+ * the kevent identifier is encoded with something like n+cpu_id or n+tid. In
+ * this case data could be sent directly with kqueue via the kevent data and
+ * fflags elements, avoiding the extra ioctl.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+#include <dev/hwt/hwt_vm.h>
+#include <dev/hwt/hwt_backend.h>
+#include <dev/hwt/hwt_config.h>
+#include <dev/hwt/hwt_context.h>
+#include <dev/hwt/hwt_cpu.h>
+#include <dev/hwt/hwt_thread.h>
+
+MALLOC_DECLARE(M_ARM_SPE);
+
+extern u_int mp_maxid;
+extern struct taskqueue *taskqueue_arm_spe;
+
+int spe_backend_disable_smp(struct hwt_context *ctx);
+
+static device_t spe_dev;
+static struct hwt_backend_ops spe_ops;
+static struct hwt_backend backend = {
+ .ops = &spe_ops,
+ .name = "spe",
+ .kva_req = 1,
+};
+
+static struct arm_spe_info *spe_info;
+
+static int
+spe_backend_init_thread(struct hwt_context *ctx)
+{
+ return (ENOTSUP);
+}
+
+static void
+spe_backend_init_cpu(struct hwt_context *ctx)
+{
+ struct arm_spe_info *info;
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ char lock_name[32];
+ char *tmp = "Arm SPE lock/cpu/";
+ int cpu_id;
+
+ spe_info = malloc(sizeof(struct arm_spe_info) * mp_ncpus,
+ M_ARM_SPE, M_WAITOK | M_ZERO);
+
+ sc->spe_info = spe_info;
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ info->sc = sc;
+ info->ident = cpu_id;
+ info->buf_info[0].info = info;
+ info->buf_info[0].buf_idx = 0;
+ info->buf_info[1].info = info;
+ info->buf_info[1].buf_idx = 1;
+ snprintf(lock_name, sizeof(lock_name), "%s%d", tmp, cpu_id);
+ mtx_init(&info->lock, lock_name, NULL, MTX_SPIN);
+ }
+}
+
+static int
+spe_backend_init(struct hwt_context *ctx)
+{
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ int error = 0;
+
+ /*
+ * HWT currently specifies buffer size must be a multiple of PAGE_SIZE,
+ * i.e. minimum 4KB + the maximum PMBIDR.Align is 2KB
+ * This should never happen but it's good to sense check
+ */
+ if (ctx->bufsize % sc->kva_align != 0)
+ return (EINVAL);
+
+ /*
+ * Since we're splitting the buffer in half + PMBLIMITR needs to be page
+ * aligned, minimum buffer size needs to be 2x PAGE_SIZE
+ */
+ if (ctx->bufsize < (2 * PAGE_SIZE))
+ return (EINVAL);
+
+ sc->ctx = ctx;
+ sc->kqueue_fd = ctx->kqueue_fd;
+ sc->hwt_td = ctx->hwt_td;
+
+ if (ctx->mode == HWT_MODE_THREAD)
+ error = spe_backend_init_thread(ctx);
+ else
+ spe_backend_init_cpu(ctx);
+
+ return (error);
+}
+
+#ifdef ARM_SPE_DEBUG
+static void hex_dump(uint8_t *buf, size_t len)
+{
+ size_t i;
+
+ printf("--------------------------------------------------------------\n");
+ for (i = 0; i < len; ++i) {
+ if (i % 8 == 0) {
+ printf(" ");
+ }
+ if (i % 16 == 0) {
+ if (i != 0) {
+ printf("\r\n");
+ }
+ printf("\t");
+ }
+ printf("%02X ", buf[i]);
+ }
+ printf("\r\n");
+}
+#endif
+
+static int
+spe_backend_deinit(struct hwt_context *ctx)
+{
+#ifdef ARM_SPE_DEBUG
+ struct arm_spe_info *info;
+ int cpu_id;
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ hex_dump((void *)info->kvaddr, 128);
+ hex_dump((void *)(info->kvaddr + (info->buf_size/2)), 128);
+ }
+#endif
+
+ if (ctx->state == CTX_STATE_RUNNING) {
+ spe_backend_disable_smp(ctx);
+ ctx->state = CTX_STATE_STOPPED;
+ }
+
+ free(spe_info, M_ARM_SPE);
+
+ return (0);
+}
+
+static uint64_t
+arm_spe_min_interval(struct arm_spe_softc *sc)
+{
+ /* IMPLEMENTATION DEFINED */
+ switch (PMSIDR_Interval_VAL(sc->pmsidr))
+ {
+ case PMSIDR_Interval_256:
+ return (256);
+ case PMSIDR_Interval_512:
+ return (512);
+ case PMSIDR_Interval_768:
+ return (768);
+ case PMSIDR_Interval_1024:
+ return (1024);
+ case PMSIDR_Interval_1536:
+ return (1536);
+ case PMSIDR_Interval_2048:
+ return (2048);
+ case PMSIDR_Interval_3072:
+ return (3072);
+ case PMSIDR_Interval_4096:
+ return (4096);
+ default:
+ return (4096);
+ }
+}
+
+static inline void
+arm_spe_set_interval(struct arm_spe_info *info, uint64_t interval)
+{
+ uint64_t min_interval = arm_spe_min_interval(info->sc);
+
+ interval = MAX(interval, min_interval);
+ interval = MIN(interval, 1 << 24); /* max 24 bits */
+
+ dprintf("%s %lu\n", __func__, interval);
+
+ info->pmsirr &= ~(PMSIRR_INTERVAL_MASK);
+ info->pmsirr |= (interval << PMSIRR_INTERVAL_SHIFT);
+}
+
+static int
+spe_backend_configure(struct hwt_context *ctx, int cpu_id, int session_id)
+{
+ struct arm_spe_info *info = &spe_info[cpu_id];
+ struct arm_spe_config *cfg;
+ int err = 0;
+
+ mtx_lock_spin(&info->lock);
+ info->ident = cpu_id;
+ /* Set defaults */
+ info->pmsfcr = 0;
+ info->pmsevfr = 0xFFFFFFFFFFFFFFFFUL;
+ info->pmslatfr = 0;
+ info->pmsirr =
+ (arm_spe_min_interval(info->sc) << PMSIRR_INTERVAL_SHIFT)
+ | PMSIRR_RND;
+ info->pmsicr = 0;
+ info->pmscr = PMSCR_TS | PMSCR_PA | PMSCR_CX | PMSCR_E1SPE | PMSCR_E0SPE;
+
+ if (ctx->config != NULL &&
+ ctx->config_size == sizeof(struct arm_spe_config) &&
+ ctx->config_version == 1) {
+ cfg = (struct arm_spe_config *)ctx->config;
+ if (cfg->interval)
+ arm_spe_set_interval(info, cfg->interval);
+ if (cfg->level == ARM_SPE_KERNEL_ONLY)
+ info->pmscr &= ~(PMSCR_E0SPE); /* turn off user */
+ if (cfg->level == ARM_SPE_USER_ONLY)
+ info->pmscr &= ~(PMSCR_E1SPE); /* turn off kern */
+ if (cfg->ctx_field)
+ info->ctx_field = cfg->ctx_field;
+ } else
+ err = (EINVAL);
+ mtx_unlock_spin(&info->lock);
+
+ return (err);
+}
+
+
+static void
+arm_spe_enable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+ uint64_t base, limit;
+
+ dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+ mtx_lock_spin(&info->lock);
+
+ if (info->ctx_field == ARM_SPE_CTX_CPU_ID)
+ WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, PCPU_GET(cpuid));
+
+ WRITE_SPECIALREG(PMSFCR_EL1_REG, info->pmsfcr);
+ WRITE_SPECIALREG(PMSEVFR_EL1_REG, info->pmsevfr);
+ WRITE_SPECIALREG(PMSLATFR_EL1_REG, info->pmslatfr);
+
+ /* Set the sampling interval */
+ WRITE_SPECIALREG(PMSIRR_EL1_REG, info->pmsirr);
+ isb();
+
+ /* Write 0 here before enabling sampling */
+ WRITE_SPECIALREG(PMSICR_EL1_REG, info->pmsicr);
+ isb();
+
+ base = info->kvaddr;
+ limit = base + (info->buf_size/2);
+ /* Enable the buffer */
+ limit &= PMBLIMITR_LIMIT_MASK; /* Zero lower 12 bits */
+ limit |= PMBLIMITR_E;
+ /* Set the base and limit */
+ WRITE_SPECIALREG(PMBPTR_EL1_REG, base);
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit);
+ isb();
+
+ /* Enable sampling */
+ WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+ isb();
+
+ info->enabled = true;
+
+ mtx_unlock_spin(&info->lock);
+}
+
+static int
+spe_backend_enable_smp(struct hwt_context *ctx)
+{
+ struct arm_spe_info *info;
+ struct hwt_vm *vm;
+ int cpu_id;
+
+ HWT_CTX_LOCK(ctx);
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ vm = hwt_cpu_get(ctx, cpu_id)->vm;
+
+ info = &spe_info[cpu_id];
+
+ mtx_lock_spin(&info->lock);
+ info->kvaddr = vm->kvaddr;
+ info->buf_size = ctx->bufsize;
+ mtx_unlock_spin(&info->lock);
+ }
+ HWT_CTX_UNLOCK(ctx);
+
+ cpu_id = CPU_FFS(&ctx->cpu_map) - 1;
+ info = &spe_info[cpu_id];
+ if (info->ctx_field == ARM_SPE_CTX_PID)
+ arm64_pid_in_contextidr = true;
+ else
+ arm64_pid_in_contextidr = false;
+
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_enable, smp_no_rendezvous_barrier, NULL);
+
+ return (0);
+}
+
+void
+arm_spe_disable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+ struct arm_spe_buf_info *buf = &info->buf_info[info->buf_idx];
+
+ if (!info->enabled)
+ return;
+
+ dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+ /* Disable profiling */
+ WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0);
+ isb();
+
+ /* Drain any remaining tracing data */
+ psb_csync();
+ dsb(nsh);
+
+ /* Disable the profiling buffer */
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, 0);
+ isb();
+
+ /* Clear interrupt status reg */
+ WRITE_SPECIALREG(PMBSR_EL1_REG, 0x0);
+
+ /* Clear PID/CPU_ID from context ID reg */
+ WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, 0);
+
+ mtx_lock_spin(&info->lock);
+ buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG);
+ info->enabled = false;
+ mtx_unlock_spin(&info->lock);
+}
+
+int
+spe_backend_disable_smp(struct hwt_context *ctx)
+{
+ struct kevent kev;
+ struct arm_spe_info *info;
+ struct arm_spe_buf_info *buf;
+ int cpu_id;
+ int ret;
+
+ /* Disable and send out remaining data in bufs */
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_disable, smp_no_rendezvous_barrier, NULL);
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ buf = &info->buf_info[info->buf_idx];
+ arm_spe_send_buffer(buf, 0);
+ }
+
+ arm64_pid_in_contextidr = false;
+
+ /*
+ * Tracing on all CPUs has been disabled, and we've sent write ptr
+ * offsets for all bufs - let userspace know it can shutdown
+ */
+ EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+ ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK);
+ if (ret)
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+
+ return (0);
+}
+
+static void
+spe_backend_stop(struct hwt_context *ctx)
+{
+ spe_backend_disable_smp(ctx);
+}
+
+static void
+arm_spe_reenable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];;
+
+ WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+ isb();
+}
+
+static int
+spe_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size,
+ int data_version)
+{
+ struct arm_spe_info *info;
+ struct arm_spe_buf_info *buf;
+ struct arm_spe_svc_buf *s;
+ int err = 0;
+ cpuset_t cpu_set;
+
+ if (data_size != sizeof(struct arm_spe_svc_buf))
+ return (E2BIG);
+
+ if (data_version != 1)
+ return (EINVAL);
+
+ s = (struct arm_spe_svc_buf *)data;
+ if (s->buf_idx > 1)
+ return (ENODEV);
+ if (s->ident >= mp_ncpus)
+ return (EINVAL);
+
+ info = &spe_info[s->ident];
+ mtx_lock_spin(&info->lock);
+
+ buf = &info->buf_info[s->buf_idx];
+
+ if (!info->enabled) {
+ err = ENXIO;
+ goto end;
+ }
+
+ /* Clear the flag the signals buffer needs servicing */
+ buf->buf_svc = false;
+
+ /* Re-enable profiling if we've been waiting for this notification */
+ if (buf->buf_wait) {
+ CPU_SETOF(s->ident, &cpu_set);
+
+ mtx_unlock_spin(&info->lock);
+ smp_rendezvous_cpus(cpu_set, smp_no_rendezvous_barrier,
+ arm_spe_reenable, smp_no_rendezvous_barrier, NULL);
+ mtx_lock_spin(&info->lock);
+
+ buf->buf_wait = false;
+ }
+
+end:
+ mtx_unlock_spin(&info->lock);
+ return (err);
+}
+
+static int
+spe_backend_read(struct hwt_vm *vm, int *ident, vm_offset_t *offset,
+ uint64_t *data)
+{
+ struct arm_spe_queue *q;
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ int error = 0;
+
+ mtx_lock_spin(&sc->sc_lock);
+
+ /* Return the first pending buffer that needs servicing */
+ q = STAILQ_FIRST(&sc->pending);
+ if (q == NULL) {
+ error = ENOENT;
+ goto error;
+ }
+ *ident = q->ident;
+ *offset = q->offset;
+ *data = (q->buf_idx << KQ_BUF_POS_SHIFT) |
+ (q->partial_rec << KQ_PARTREC_SHIFT) |
+ (q->final_buf << KQ_FINAL_BUF_SHIFT);
+
+ STAILQ_REMOVE_HEAD(&sc->pending, next);
+ sc->npending--;
+
+error:
+ mtx_unlock_spin(&sc->sc_lock);
+ if (error)
+ return (error);
+
+ free(q, M_ARM_SPE);
+ return (0);
+}
+
+static struct hwt_backend_ops spe_ops = {
+ .hwt_backend_init = spe_backend_init,
+ .hwt_backend_deinit = spe_backend_deinit,
+
+ .hwt_backend_configure = spe_backend_configure,
+ .hwt_backend_svc_buf = spe_backend_svc_buf,
+ .hwt_backend_stop = spe_backend_stop,
+
+ .hwt_backend_enable_smp = spe_backend_enable_smp,
+ .hwt_backend_disable_smp = spe_backend_disable_smp,
+
+ .hwt_backend_read = spe_backend_read,
+};
+
+int
+spe_register(device_t dev)
+{
+ spe_dev = dev;
+
+ return (hwt_backend_register(&backend));
+}
diff --git a/sys/arm64/spe/arm_spe_dev.c b/sys/arm64/spe/arm_spe_dev.c
new file mode 100644
index 000000000000..8a834197eeef
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_dev.c
@@ -0,0 +1,324 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/event.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe.h>
+#include <arm64/spe/arm_spe_dev.h>
+
+MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing");
+
+/*
+ * taskqueue(9) used for sleepable routines called from interrupt handlers
+ */
+TASKQUEUE_FAST_DEFINE_THREAD(arm_spe);
+
+void arm_spe_send_buffer(void *, int);
+static void arm_spe_error(void *, int);
+static int arm_spe_intr(void *);
+device_attach_t arm_spe_attach;
+
+static device_method_t arm_spe_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_attach, arm_spe_attach),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods,
+ sizeof(struct arm_spe_softc));
+
+#define ARM_SPE_KVA_MAX_ALIGN UL(2048)
+
+int
+arm_spe_attach(device_t dev)
+{
+ struct arm_spe_softc *sc;
+ int error, rid;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+
+ sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG);
+ sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG);
+ device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr);
+ device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr);
+ if ((sc->pmbidr & PMBIDR_P) != 0) {
+ device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n");
+ return (EPERM);
+ }
+
+ sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT);
+ if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) {
+ device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align);
+ return (EINVAL);
+ }
+
+ rid = 0;
+ sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
+ RF_ACTIVE);
+ if (sc->sc_irq_res == NULL) {
+ device_printf(dev, "Unable to allocate interrupt\n");
+ return (ENXIO);
+ }
+ error = bus_setup_intr(dev, sc->sc_irq_res,
+ INTR_TYPE_MISC | INTR_MPSAFE, arm_spe_intr, NULL, sc,
+ &sc->sc_irq_cookie);
+ if (error != 0) {
+ device_printf(dev, "Unable to set up interrupt\n");
+ return (error);
+ }
+
+ mtx_init(&sc->sc_lock, "Arm SPE lock", NULL, MTX_SPIN);
+
+ STAILQ_INIT(&sc->pending);
+ sc->npending = 0;
+
+ spe_register(dev);
+
+ return (0);
+}
+
+/* Interrupt handler runs on the same core that triggered the exception */
+static int
+arm_spe_intr(void *arg)
+{
+ int cpu_id = PCPU_GET(cpuid);
+ struct arm_spe_softc *sc = arg;
+ uint64_t pmbsr;
+ uint64_t base, limit;
+ uint8_t ec;
+ struct arm_spe_info *info = &sc->spe_info[cpu_id];
+ uint8_t i = info->buf_idx;
+ struct arm_spe_buf_info *buf = &info->buf_info[i];
+ struct arm_spe_buf_info *prev_buf = &info->buf_info[!i];
+ device_t dev = sc->dev;
+
+ /* Make sure the profiling data is visible to the CPU */
+ psb_csync();
+ dsb(nsh);
+
+ /* Make sure any HW update of PMBPTR_EL1 is visible to the CPU */
+ isb();
+
+ pmbsr = READ_SPECIALREG(PMBSR_EL1_REG);
+
+ if (!(pmbsr & PMBSR_S))
+ return (FILTER_STRAY);
+
+ /* Event Class */
+ ec = PMBSR_EC_VAL(pmbsr);
+ switch (ec)
+ {
+ case PMBSR_EC_OTHER_BUF_MGMT: /* Other buffer management event */
+ break;
+ case PMBSR_EC_GRAN_PROT_CHK: /* Granule Protection Check fault */
+ device_printf(dev, "PMBSR_EC_GRAN_PROT_CHK\n");
+ break;
+ case PMBSR_EC_STAGE1_DA: /* Stage 1 Data Abort */
+ device_printf(dev, "PMBSR_EC_STAGE1_DA\n");
+ break;
+ case PMBSR_EC_STAGE2_DA: /* Stage 2 Data Abort */
+ device_printf(dev, "PMBSR_EC_STAGE2_DA\n");
+ break;
+ default:
+ /* Unknown EC */
+ device_printf(dev, "unknown PMBSR_EC: %#x\n", ec);
+ arm_spe_disable(NULL);
+ TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx);
+ taskqueue_enqueue(taskqueue_arm_spe, &sc->task);
+ return (FILTER_HANDLED);
+ }
+
+ switch (ec) {
+ case PMBSR_EC_OTHER_BUF_MGMT:
+ /* Buffer Status Code = buffer filled */
+ if ((pmbsr & PMBSR_MSS_BSC_MASK) == PMBSR_MSS_BSC_BUFFER_FILLED) {
+ dprintf("%s SPE buffer full event (cpu:%d)\n",
+ __func__, cpu_id);
+ break;
+ }
+ case PMBSR_EC_GRAN_PROT_CHK:
+ case PMBSR_EC_STAGE1_DA:
+ case PMBSR_EC_STAGE2_DA:
+ /*
+ * If we have one of these, we've messed up the
+ * programming somehow (e.g. passed invalid memory to
+ * SPE) and can't recover
+ */
+ arm_spe_disable(NULL);
+ TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx);
+ taskqueue_enqueue(taskqueue_arm_spe, &sc->task);
+ /* PMBPTR_EL1 is fault address if PMBSR_DL is 1 */
+ device_printf(dev, "CPU:%d PMBSR_EL1:%#lx\n", cpu_id, pmbsr);
+ device_printf(dev, "PMBPTR_EL1:%#lx PMBLIMITR_EL1:%#lx\n",
+ READ_SPECIALREG(PMBPTR_EL1_REG),
+ READ_SPECIALREG(PMBLIMITR_EL1_REG));
+ return (FILTER_HANDLED);
+ }
+
+ mtx_lock_spin(&info->lock);
+
+ /*
+ * Data Loss bit - pmbptr might not be pointing to the end of the last
+ * complete record
+ */
+ if ((pmbsr & PMBSR_DL) == PMBSR_DL)
+ buf->partial_rec = 1;
+ buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG);
+ buf->buf_svc = true;
+
+ /* Setup regs ready to start writing to the other half of the buffer */
+ info->buf_idx = !info->buf_idx;
+ base = buf_start_addr(info->buf_idx, info);
+ limit = base + (info->buf_size/2);
+ limit &= PMBLIMITR_LIMIT_MASK;
+ limit |= PMBLIMITR_E;
+ WRITE_SPECIALREG(PMBPTR_EL1_REG, base);
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit);
+ isb();
+
+ /*
+ * Notify userspace via kqueue that buffer is full and needs copying
+ * out - since kqueue can sleep, don't do this in the interrupt handler,
+ * add to a taskqueue to be scheduled later instead
+ */
+ TASK_INIT(&info->task[i], 0, (task_fn_t *)arm_spe_send_buffer, buf);
+ taskqueue_enqueue(taskqueue_arm_spe, &info->task[i]);
+
+ /*
+ * It's possible userspace hasn't yet notified us they've copied out the
+ * other half of the buffer
+ *
+ * This might be because:
+ * a) Kernel hasn't scheduled the task via taskqueue to notify
+ * userspace to copy out the data
+ * b) Userspace is still copying the buffer or hasn't notified us
+ * back via the HWT_IOC_SVC_BUF ioctl
+ *
+ * Either way we need to avoid overwriting uncopied data in the
+ * buffer, so disable profiling until we receive that SVC_BUF
+ * ioctl
+ *
+ * Using a larger buffer size should help to minimise these events and
+ * loss of profiling data while profiling is disabled
+ */
+ if (prev_buf->buf_svc) {
+ device_printf(sc->dev, "cpu%d: buffer full interrupt, but other"
+ " half of buffer has not been copied out - consider"
+ " increasing buffer size to minimise loss of profiling data\n",
+ cpu_id);
+ WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0);
+ prev_buf->buf_wait = true;
+ }
+
+ mtx_unlock_spin(&info->lock);
+
+ /* Clear Profiling Buffer Status Register */
+ WRITE_SPECIALREG(PMBSR_EL1_REG, 0);
+
+ isb();
+
+ return (FILTER_HANDLED);
+}
+
+/* note: Scheduled and run via taskqueue, so can run on any CPU at any time */
+void
+arm_spe_send_buffer(void *arg, int pending __unused)
+{
+ struct arm_spe_buf_info *buf = (struct arm_spe_buf_info *)arg;
+ struct arm_spe_info *info = buf->info;
+ struct arm_spe_queue *queue;
+ struct kevent kev;
+ int ret;
+
+ queue = malloc(sizeof(struct arm_spe_queue), M_ARM_SPE,
+ M_WAITOK | M_ZERO);
+
+ mtx_lock_spin(&info->lock);
+
+ /* Add to queue for userspace to pickup */
+ queue->ident = info->ident;
+ queue->offset = buf->pmbptr - buf_start_addr(buf->buf_idx, info);
+ queue->buf_idx = buf->buf_idx;
+ queue->final_buf = !info->enabled;
+ queue->partial_rec = buf->partial_rec;
+ mtx_unlock_spin(&info->lock);
+
+ mtx_lock_spin(&info->sc->sc_lock);
+ STAILQ_INSERT_TAIL(&info->sc->pending, queue, next);
+ info->sc->npending++;
+ EV_SET(&kev, ARM_SPE_KQ_BUF, EVFILT_USER, 0, NOTE_TRIGGER,
+ info->sc->npending, NULL);
+ mtx_unlock_spin(&info->sc->sc_lock);
+
+ /* Notify userspace */
+ ret = kqfd_register(info->sc->kqueue_fd, &kev, info->sc->hwt_td,
+ M_WAITOK);
+ if (ret) {
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+ arm_spe_error(info->sc->ctx, 0);
+ }
+}
+
+static void
+arm_spe_error(void *arg, int pending __unused)
+{
+ struct hwt_context *ctx = arg;
+ struct kevent kev;
+ int ret;
+
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_disable, smp_no_rendezvous_barrier, NULL);
+
+ EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+ ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK);
+ if (ret)
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+}
+
+MODULE_DEPEND(spe, hwt, 1, 1, 1);
+MODULE_VERSION(spe, 1);
diff --git a/sys/arm64/spe/arm_spe_dev.h b/sys/arm64/spe/arm_spe_dev.h
new file mode 100644
index 000000000000..df88d98ef1c0
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_dev.h
@@ -0,0 +1,162 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_ARM_SPE_DEV_H_
+#define _ARM64_ARM_SPE_DEV_H_
+
+#include <sys/mutex.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+
+#include <arm64/spe/arm_spe.h>
+
+#include <dev/hwt/hwt_context.h>
+
+#define ARM_SPE_DEBUG
+#undef ARM_SPE_DEBUG
+
+#ifdef ARM_SPE_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+DECLARE_CLASS(arm_spe_driver);
+
+struct cdev;
+struct resource;
+
+extern bool arm64_pid_in_contextidr;
+
+int spe_register(device_t dev);
+void arm_spe_disable(void *arg __unused);
+int spe_backend_disable_smp(struct hwt_context *ctx);
+void arm_spe_send_buffer(void *arg, int pending __unused);
+
+/*
+ PSB CSYNC is a Profiling Synchronization Barrier encoded in the hint space
+ * so it is a NOP on earlier architecture.
+ */
+#define psb_csync() __asm __volatile("hint #17" ::: "memory");
+
+struct arm_spe_softc {
+ device_t dev;
+
+ struct resource *sc_irq_res;
+ void *sc_irq_cookie;
+ struct cdev *sc_cdev;
+ struct mtx sc_lock;
+ struct task task;
+
+ int64_t sc_pmsidr;
+ int kqueue_fd;
+ struct thread *hwt_td;
+ struct arm_spe_info *spe_info;
+ struct hwt_context *ctx;
+ STAILQ_HEAD(, arm_spe_queue) pending;
+ uint64_t npending;
+
+ uint64_t pmbidr;
+ uint64_t pmsidr;
+
+ uint16_t kva_align;
+};
+
+struct arm_spe_buf_info {
+ struct arm_spe_info *info;
+ uint64_t pmbptr;
+ uint8_t buf_idx : 1;
+ bool buf_svc : 1;
+ bool buf_wait : 1;
+ bool partial_rec : 1;
+};
+
+struct arm_spe_info {
+ int ident; /* tid or cpu_id */
+ struct mtx lock;
+ struct arm_spe_softc *sc;
+ struct task task[2];
+ bool enabled : 1;
+
+ /* buffer is split in half as a ping-pong buffer */
+ vm_object_t bufobj;
+ vm_offset_t kvaddr;
+ size_t buf_size;
+ uint8_t buf_idx : 1; /* 0 = first half of buf, 1 = 2nd half */
+ struct arm_spe_buf_info buf_info[2];
+
+ /* config */
+ enum arm_spe_profiling_level level;
+ enum arm_spe_ctx_field ctx_field;
+ /* filters */
+ uint64_t pmsfcr;
+ uint64_t pmsevfr;
+ uint64_t pmslatfr;
+ /* interval */
+ uint64_t pmsirr;
+ uint64_t pmsicr;
+ /* control */
+ uint64_t pmscr;
+};
+
+struct arm_spe_queue {
+ int ident;
+ u_int buf_idx : 1;
+ bool partial_rec : 1;
+ bool final_buf : 1;
+ vm_offset_t offset;
+ STAILQ_ENTRY(arm_spe_queue) next;
+};
+
+static inline vm_offset_t buf_start_addr(u_int buf_idx, struct arm_spe_info *info)
+{
+ vm_offset_t addr;
+ if (buf_idx == 0)
+ addr = info->kvaddr;
+ if (buf_idx == 1)
+ addr = info->kvaddr + (info->buf_size/2);
+
+ return (addr);
+}
+
+static inline vm_offset_t buf_end_addr(u_int buf_idx, struct arm_spe_info *info)
+{
+ vm_offset_t addr;
+ if (buf_idx == 0)
+ addr = info->kvaddr + (info->buf_size/2);
+ if (buf_idx == 1)
+ addr = info->kvaddr + info->buf_size;
+
+ return (addr);
+}
+
+#endif /* _ARM64_ARM_SPE_DEV_H_ */
diff --git a/sys/arm64/spe/arm_spe_fdt.c b/sys/arm64/spe/arm_spe_fdt.c
new file mode 100644
index 000000000000..d16f1dee2ac8
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_fdt.c
@@ -0,0 +1,75 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/openfirm.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+static device_probe_t arm_spe_fdt_probe;
+
+static struct ofw_compat_data compat_data[] = {
+ {"arm,statistical-profiling-extension-v1", true},
+ {NULL, false},
+};
+
+static device_method_t arm_spe_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, arm_spe_fdt_probe),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(spe, arm_spe_fdt_driver, arm_spe_fdt_methods,
+ sizeof(struct arm_spe_softc), arm_spe_driver);
+
+DRIVER_MODULE(spe, simplebus, arm_spe_fdt_driver, 0, 0);
+
+static int
+arm_spe_fdt_probe(device_t dev)
+{
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+
+ if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data)
+ return (ENXIO);
+
+ device_set_desc(dev, "ARM Statistical Profiling Extension");
+ return (BUS_PROBE_DEFAULT);
+}
diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
index 6a0c4c78e568..f530dab05331 100644
--- a/sys/arm64/vmm/arm64.h
+++ b/sys/arm64/vmm/arm64.h
@@ -78,14 +78,16 @@ struct hypctx {
uint64_t pmcr_el0; /* Performance Monitors Control Register */
uint64_t pmccntr_el0;
uint64_t pmccfiltr_el0;
+ uint64_t pmuserenr_el0;
+ uint64_t pmselr_el0;
+ uint64_t pmxevcntr_el0;
uint64_t pmcntenset_el0;
uint64_t pmintenset_el1;
uint64_t pmovsset_el0;
- uint64_t pmselr_el0;
- uint64_t pmuserenr_el0;
uint64_t pmevcntr_el0[31];
uint64_t pmevtyper_el0[31];
+ uint64_t dbgclaimset_el1;
uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */
uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */
uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */
@@ -117,6 +119,7 @@ struct hypctx {
struct vgic_v3_regs vgic_v3_regs;
struct vgic_v3_cpu *vgic_cpu;
bool has_exception;
+ bool dbg_oslock;
};
struct hyp {
@@ -125,42 +128,14 @@ struct hyp {
uint64_t vmid_generation;
uint64_t vttbr_el2;
uint64_t el2_addr; /* The address of this in el2 space */
+ uint64_t feats; /* Which features are enabled */
+#define HYP_FEAT_HCX (0x1ul << 0)
+#define HYP_FEAT_ECV_POFF (0x1ul << 1)
bool vgic_attached;
struct vgic_v3 *vgic;
struct hypctx *ctx[];
};
-#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
- ret_type vmmops_##opname args;
-
-DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum))
-DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
-DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
-DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
- uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
-DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
- struct vm_eventinfo *info))
-DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
-DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
- int vcpu_id))
-DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
-DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far))
-DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
-DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
-DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
-DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
-DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
- vm_offset_t max))
-DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
-#ifdef notyet
-#ifdef BHYVE_SNAPSHOT
-DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta))
-DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui,
- struct vm_snapshot_meta *meta))
-DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now))
-#endif
-#endif
-
uint64_t vmm_call_hyp(uint64_t, ...);
#if 0
diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h
index 0b2977c73960..0c8d0fb28b18 100644
--- a/sys/arm64/vmm/hyp.h
+++ b/sys/arm64/vmm/hyp.h
@@ -80,7 +80,6 @@
#define HYP_ENTER_GUEST 0x00000002
#define HYP_READ_REGISTER 0x00000003
#define HYP_REG_ICH_VTR 0x1
-#define HYP_REG_CNTHCTL 0x2
#define HYP_CLEAN_S2_TLBI 0x00000004
#define HYP_DC_CIVAC 0x00000005
#define HYP_EL2_TLBI 0x00000006
diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
index 67afb3374815..023406c64182 100644
--- a/sys/arm64/vmm/io/vgic_v3.c
+++ b/sys/arm64/vmm/io/vgic_v3.c
@@ -47,7 +47,6 @@
#include <dev/ofw/openfirm.h>
-#include <machine/armreg.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/cpufunc.h>
diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
index f59d7ebc1ad4..7c7fbb49e691 100644
--- a/sys/arm64/vmm/io/vtimer.c
+++ b/sys/arm64/vmm/io/vtimer.c
@@ -36,6 +36,7 @@
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/rman.h>
+#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/timeet.h>
#include <sys/timetc.h>
@@ -43,7 +44,6 @@
#include <machine/bus.h>
#include <machine/machdep.h>
#include <machine/vmm.h>
-#include <machine/armreg.h>
#include <arm64/vmm/arm64.h>
@@ -55,11 +55,18 @@
#define timer_enabled(ctl) \
(!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
-static uint64_t cnthctl_el2_reg;
static uint32_t tmr_frq;
#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, vtimer, CTLFLAG_RW, NULL, NULL);
+
+static bool allow_ecv_phys = false;
+SYSCTL_BOOL(_hw_vmm_vtimer, OID_AUTO, allow_ecv_phys, CTLFLAG_RW,
+ &allow_ecv_phys, 0,
+ "Enable hardware access to the physical timer if FEAT_ECV_POFF is supported");
+
static void vtimer_schedule_irq(struct hypctx *hypctx, bool phys);
static int
@@ -111,9 +118,8 @@ out:
}
int
-vtimer_init(uint64_t cnthctl_el2)
+vtimer_init(void)
{
- cnthctl_el2_reg = cnthctl_el2;
/*
* The guest *MUST* use the same timer frequency as the host. The
* register CNTFRQ_EL0 is accessible to the guest and a different value
@@ -128,8 +134,12 @@ void
vtimer_vminit(struct hyp *hyp)
{
uint64_t now;
+ bool ecv_poff;
- hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg;
+ ecv_poff = false;
+
+ if (allow_ecv_phys && (hyp->feats & HYP_FEAT_ECV_POFF) != 0)
+ ecv_poff = true;
/*
* Configure the Counter-timer Hypervisor Control Register for the VM.
@@ -137,35 +147,58 @@ vtimer_vminit(struct hyp *hyp)
if (in_vhe()) {
/*
* CNTHCTL_E2H_EL0PCTEN: trap EL0 access to CNTP{CT,CTSS}_EL0
- * CNTHCTL_E2H_EL1VCTEN: don't trap EL0 access to
- * CNTV{CT,CTSS}_EL0
+ * CNTHCTL_E2H_EL0VCTEN: don't trap EL0 access to
+ * CNTV{CT,CTXX}_EL0
* CNTHCTL_E2H_EL0VTEN: don't trap EL0 access to
* CNTV_{CTL,CVAL,TVAL}_EL0
* CNTHCTL_E2H_EL0PTEN: trap EL0 access to
* CNTP_{CTL,CVAL,TVAL}_EL0
- * CNTHCTL_E2H_EL1PCEN: trap EL1 access to
- CNTP_{CTL,CVAL,TVAL}_EL0
* CNTHCTL_E2H_EL1PCTEN: trap access to CNTPCT_EL0
+ * CNTHCTL_E2H_EL1PTEN: trap access to
+ * CNTP_{CTL,CVAL,TVAL}_EL0
+ * CNTHCTL_E2H_EL1VCTEN: don't trap EL0 access to
+ * CNTV{CT,CTSS}_EL0
+ * CNTHCTL_E2H_EL1PCEN: trap EL1 access to
+ * CNTP_{CTL,CVAL,TVAL}_EL0
*
* TODO: Don't trap when FEAT_ECV is present
*/
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PCTEN;
- hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VCTEN;
- hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VTEN;
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PTEN;
-
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PTEN;
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PCTEN;
+ hyp->vtimer.cnthctl_el2 =
+ CNTHCTL_E2H_EL0VCTEN_NOTRAP |
+ CNTHCTL_E2H_EL0VTEN_NOTRAP;
+ if (ecv_poff) {
+ hyp->vtimer.cnthctl_el2 |=
+ CNTHCTL_E2H_EL0PCTEN_NOTRAP |
+ CNTHCTL_E2H_EL0PTEN_NOTRAP |
+ CNTHCTL_E2H_EL1PCTEN_NOTRAP |
+ CNTHCTL_E2H_EL1PTEN_NOTRAP;
+ } else {
+ hyp->vtimer.cnthctl_el2 |=
+ CNTHCTL_E2H_EL0PCTEN_TRAP |
+ CNTHCTL_E2H_EL0PTEN_TRAP |
+ CNTHCTL_E2H_EL1PCTEN_TRAP |
+ CNTHCTL_E2H_EL1PTEN_TRAP;
+ }
} else {
/*
* CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0
* from EL1
* CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0
*/
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCEN;
- hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN;
+ if (ecv_poff) {
+ hyp->vtimer.cnthctl_el2 =
+ CNTHCTL_EL1PCTEN_NOTRAP |
+ CNTHCTL_EL1PCEN_NOTRAP;
+ } else {
+ hyp->vtimer.cnthctl_el2 =
+ CNTHCTL_EL1PCTEN_TRAP |
+ CNTHCTL_EL1PCEN_TRAP;
+ }
}
+ if (ecv_poff)
+ hyp->vtimer.cnthctl_el2 |= CNTHCTL_ECV_EN;
+
now = READ_SPECIALREG(cntpct_el0);
hyp->vtimer.cntvoff_el2 = now;
@@ -231,15 +264,10 @@ vtimer_cleanup(void)
{
}
-void
-vtimer_sync_hwstate(struct hypctx *hypctx)
+static void
+vtime_sync_timer(struct hypctx *hypctx, struct vtimer_timer *timer,
+ uint64_t cntpct_el0)
{
- struct vtimer_timer *timer;
- uint64_t cntpct_el0;
-
- timer = &hypctx->vtimer_cpu.virt_timer;
- cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
- hypctx->hyp->vtimer.cntvoff_el2;
if (!timer_enabled(timer->cntx_ctl_el0)) {
vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
timer->irqid, false);
@@ -253,6 +281,21 @@ vtimer_sync_hwstate(struct hypctx *hypctx)
}
}
+void
+vtimer_sync_hwstate(struct hypctx *hypctx)
+{
+ uint64_t cntpct_el0;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hypctx->hyp->vtimer.cntvoff_el2;
+ vtime_sync_timer(hypctx, &hypctx->vtimer_cpu.virt_timer, cntpct_el0);
+ /* If FEAT_ECV_POFF is in use then we need to sync the physical timer */
+ if ((hypctx->hyp->vtimer.cnthctl_el2 & CNTHCTL_ECV_EN) != 0) {
+ vtime_sync_timer(hypctx, &hypctx->vtimer_cpu.phys_timer,
+ cntpct_el0);
+ }
+}
+
static void
vtimer_inject_irq_callout_phys(void *context)
{
diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h
index 71a20344d05e..92ce025968d2 100644
--- a/sys/arm64/vmm/io/vtimer.h
+++ b/sys/arm64/vmm/io/vtimer.h
@@ -66,7 +66,7 @@ struct vtimer_cpu {
uint32_t cntkctl_el1;
};
-int vtimer_init(uint64_t cnthctl_el2);
+int vtimer_init(void);
void vtimer_vminit(struct hyp *);
void vtimer_cpuinit(struct hypctx *);
void vtimer_cpucleanup(struct hypctx *);
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 3082d2941221..31d2fb3f516b 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -33,7 +33,6 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
@@ -51,7 +50,6 @@
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
-#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/fpu.h>
#include <machine/machdep.h>
@@ -88,7 +86,6 @@ struct vcpu {
struct vfpstate *guestfpu; /* (a,i) guest fpu state */
};
-#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
@@ -126,9 +123,8 @@ struct vm {
bool dying; /* (o) is dying */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
- struct vmspace *vmspace; /* (o) guest's address space */
struct vm_mem mem; /* (i) guest memory */
- char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */
struct vcpu **vcpu; /* (i) guest vcpus */
struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
/* (o) guest MMIO regions */
@@ -141,8 +137,6 @@ struct vm {
struct sx vcpus_init_lock; /* (o) */
};
-static bool vmm_initialized = false;
-
static int vm_handle_wfi(struct vcpu *vcpu,
struct vm_exit *vme, bool *retu);
@@ -211,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = {
/* Host registers masked by vmm_arch_regs_masks. */
static struct vmm_regs vmm_arch_regs;
-u_int vm_maxcpu;
-SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
- &vm_maxcpu, 0, "Maximum number of vCPUs");
-
static void vcpu_notify_event_locked(struct vcpu *vcpu);
/* global statistics */
@@ -234,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
-/*
- * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
- * is a safe value for now.
- */
-#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
-
static int
vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
{
@@ -274,6 +258,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy)
vmm_stat_free(vcpu->stats);
fpu_save_area_free(vcpu->guestfpu);
vcpu_lock_destroy(vcpu);
+ free(vcpu, M_VMM);
}
}
@@ -325,20 +310,14 @@ vmm_unsupported_quirk(void)
return (0);
}
-static int
-vmm_init(void)
+int
+vmm_modinit(void)
{
int error;
- vm_maxcpu = mp_ncpus;
- TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
-
- if (vm_maxcpu > VM_MAXCPU) {
- printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
- vm_maxcpu = VM_MAXCPU;
- }
- if (vm_maxcpu == 0)
- vm_maxcpu = 1;
+ error = vmm_unsupported_quirk();
+ if (error != 0)
+ return (error);
error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
if (error != 0)
@@ -347,67 +326,18 @@ vmm_init(void)
return (vmmops_modinit(0));
}
-static int
-vmm_handler(module_t mod, int what, void *arg)
+int
+vmm_modcleanup(void)
{
- int error;
-
- switch (what) {
- case MOD_LOAD:
- error = vmm_unsupported_quirk();
- if (error != 0)
- break;
- error = vmmdev_init();
- if (error != 0)
- break;
- error = vmm_init();
- if (error == 0)
- vmm_initialized = true;
- else
- (void)vmmdev_cleanup();
- break;
- case MOD_UNLOAD:
- error = vmmdev_cleanup();
- if (error == 0 && vmm_initialized) {
- error = vmmops_modcleanup();
- if (error) {
- /*
- * Something bad happened - prevent new
- * VMs from being created
- */
- vmm_initialized = false;
- }
- }
- break;
- default:
- error = 0;
- break;
- }
- return (error);
+ return (vmmops_modcleanup());
}
-static moduledata_t vmm_kmod = {
- "vmm",
- vmm_handler,
- NULL
-};
-
-/*
- * vmm initialization has the following dependencies:
- *
- * - HYP initialization requires smp_rendezvous() and therefore must happen
- * after SMP is fully functional (after SI_SUB_SMP).
- * - vmm device initialization requires an initialized devfs.
- */
-DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
-MODULE_VERSION(vmm, 1);
-
static void
vm_init(struct vm *vm, bool create)
{
int i;
- vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
MPASS(vm->cookie != NULL);
CPU_ZERO(&vm->active_cpus);
@@ -443,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
return (NULL);
- /* Some interrupt controllers may have a CPU limit */
- if (vcpuid >= vgic_max_cpu_count(vm->cookie))
- return (NULL);
-
vcpu = (struct vcpu *)
atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
if (__predict_true(vcpu != NULL))
@@ -455,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
sx_xlock(&vm->vcpus_init_lock);
vcpu = vm->vcpu[vcpuid];
if (vcpu == NULL && !vm->dying) {
+ /* Some interrupt controllers may have a CPU limit */
+ if (vcpuid >= vgic_max_cpu_count(vm->cookie)) {
+ sx_xunlock(&vm->vcpus_init_lock);
+ return (NULL);
+ }
+
vcpu = vcpu_alloc(vm, vcpuid);
vcpu_init(vcpu);
@@ -470,9 +402,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
}
void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
{
- sx_slock(&vm->vcpus_init_lock);
+ sx_xlock(&vm->vcpus_init_lock);
}
void
@@ -485,26 +417,15 @@ int
vm_create(const char *name, struct vm **retvm)
{
struct vm *vm;
- struct vmspace *vmspace;
-
- /*
- * If vmm.ko could not be successfully initialized then don't attempt
- * to create the virtual machine.
- */
- if (!vmm_initialized)
- return (ENXIO);
-
- if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
- return (EINVAL);
-
- vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
- if (vmspace == NULL)
- return (ENOMEM);
+ int error;
vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ error = vm_mem_init(&vm->mem, 0, 1ul << 39);
+ if (error != 0) {
+ free(vm, M_VMM);
+ return (error);
+ }
strcpy(vm->name, name);
- vm->vmspace = vmspace;
- vm_mem_init(&vm->mem);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
@@ -558,7 +479,7 @@ vm_cleanup(struct vm *vm, bool destroy)
if (destroy) {
vm_xlock_memsegs(vm);
- pmap = vmspace_pmap(vm->vmspace);
+ pmap = vmspace_pmap(vm_vmspace(vm));
sched_pin();
PCPU_SET(curvmpmap, NULL);
sched_unpin();
@@ -582,11 +503,6 @@ vm_cleanup(struct vm *vm, bool destroy)
if (destroy) {
vm_mem_destroy(vm);
- vmmops_vmspace_free(vm->vmspace);
- vm->vmspace = NULL;
-
- for (i = 0; i < vm->maxcpus; i++)
- free(vm->vcpu[i], M_VMM);
free(vm->vcpu, M_VMM);
sx_destroy(&vm->vcpus_init_lock);
}
@@ -651,6 +567,33 @@ vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
return (0);
}
+static int
+vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ struct hypctx *hypctx;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ /* All other fields are RES0 & we don't do anything with this */
+ /* TODO: Disable access to other debug state when locked */
+ hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK;
+ return (0);
+}
+
+static int
+vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ struct hypctx *hypctx;
+ uint64_t val;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ val = OSLSR_OSLM_1;
+ if (hypctx->dbg_oslock)
+ val |= OSLSR_OSLK;
+ *rval = val;
+
+ return (0);
+}
+
static const struct vmm_special_reg vmm_special_regs[] = {
#define SPECIAL_REG(_reg, _read, _write) \
{ \
@@ -707,6 +650,13 @@ static const struct vmm_special_reg vmm_special_regs[] = {
SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
vtimer_phys_tval_write),
SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
+
+ /* Debug registers */
+ SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi),
+ SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi),
+ /* TODO: Exceptions on invalid access */
+ SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1),
+ SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi),
#undef SPECIAL_REG
};
@@ -1056,12 +1006,6 @@ vcpu_notify_event(struct vcpu *vcpu)
vcpu_unlock(vcpu);
}
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
- return (vm->vmspace);
-}
-
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -1258,8 +1202,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
-
- if (reg >= VM_REG_LAST)
+ if (reg < 0 || reg >= VM_REG_LAST)
return (EINVAL);
return (vmmops_getreg(vcpu->cookie, reg, retval));
@@ -1270,7 +1213,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
{
int error;
- if (reg >= VM_REG_LAST)
+ if (reg < 0 || reg >= VM_REG_LAST)
return (EINVAL);
error = vmmops_setreg(vcpu->cookie, reg, val);
if (error || reg != VM_REG_GUEST_PC)
@@ -1342,8 +1285,14 @@ vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
static int
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
{
+ struct vm *vm;
+
+ vm = vcpu->vm;
vcpu_lock(vcpu);
while (1) {
+ if (vm->suspend)
+ break;
+
if (vgic_has_pending_irq(vcpu->cookie))
break;
@@ -1376,7 +1325,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
vme = &vcpu->exitinfo;
- pmap = vmspace_pmap(vcpu->vm->vmspace);
+ pmap = vmspace_pmap(vm_vmspace(vcpu->vm));
addr = vme->u.paging.gpa;
esr = vme->u.paging.esr;
@@ -1393,7 +1342,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
panic("%s: Invalid exception (esr = %lx)", __func__, esr);
}
- map = &vm->vmspace->vm_map;
+ map = &vm_vmspace(vm)->vm_map;
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
if (rv != KERN_SUCCESS)
return (EFAULT);
@@ -1467,7 +1416,7 @@ vm_run(struct vcpu *vcpu)
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
return (EINVAL);
- pmap = vmspace_pmap(vm->vmspace);
+ pmap = vmspace_pmap(vm_vmspace(vm));
vme = &vcpu->exitinfo;
evinfo.rptr = NULL;
evinfo.sptr = &vm->suspend;
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
index de2425aae0a1..aa1361049f49 100644
--- a/sys/arm64/vmm/vmm_arm64.c
+++ b/sys/arm64/vmm/vmm_arm64.c
@@ -47,7 +47,6 @@
#include <vm/vm_page.h>
#include <vm/vm_param.h>
-#include <machine/armreg.h>
#include <machine/vm.h>
#include <machine/cpufunc.h>
#include <machine/cpu.h>
@@ -238,7 +237,6 @@ vmmops_modinit(int ipinum)
vm_offset_t next_hyp_va;
vm_paddr_t vmm_base;
uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
- uint64_t cnthctl_el2;
int cpu, i;
bool rv __diagused;
@@ -444,10 +442,9 @@ vmmops_modinit(int ipinum)
vmem_add(el2_mem_alloc, next_hyp_va,
HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
}
- cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL);
vgic_init();
- vtimer_init(cnthctl_el2);
+ vtimer_init();
return (0);
}
@@ -517,6 +514,7 @@ vmmops_init(struct vm *vm, pmap_t pmap)
{
struct hyp *hyp;
vm_size_t size;
+ uint64_t idreg;
size = el2_hyp_size(vm);
hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
@@ -524,6 +522,16 @@ vmmops_init(struct vm *vm, pmap_t pmap)
hyp->vm = vm;
hyp->vgic_attached = false;
+ if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) {
+ if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF)
+ hyp->feats |= HYP_FEAT_ECV_POFF;
+ }
+
+ if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) {
+ if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL)
+ hyp->feats |= HYP_FEAT_HCX;
+ }
+
vtimer_vminit(hyp);
vgic_vminit(hyp);
@@ -1251,6 +1259,8 @@ hypctx_regptr(struct hypctx *hypctx, int reg)
return (&hypctx->tcr_el1);
case VM_REG_GUEST_TCR2_EL1:
return (&hypctx->tcr2_el1);
+ case VM_REG_GUEST_MPIDR_EL1:
+ return (&hypctx->vmpidr_el2);
default:
break;
}
@@ -1354,7 +1364,7 @@ vmmops_setcap(void *vcpui, int num, int val)
break;
if (val != 0)
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
- else
+ else if ((hypctx->setcaps & (1ul << VM_CAP_SS_EXIT)) == 0)
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
break;
case VM_CAP_SS_EXIT:
@@ -1363,20 +1373,20 @@ vmmops_setcap(void *vcpui, int num, int val)
if (val != 0) {
hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
- hypctx->debug_mdscr |= hypctx->mdscr_el1 &
- (MDSCR_SS | MDSCR_KDE);
+ hypctx->debug_mdscr |= (hypctx->mdscr_el1 & MDSCR_SS);
hypctx->tf.tf_spsr |= PSR_SS;
- hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
+ hypctx->mdscr_el1 |= MDSCR_SS;
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
} else {
hypctx->tf.tf_spsr &= ~PSR_SS;
hypctx->tf.tf_spsr |= hypctx->debug_spsr;
hypctx->debug_spsr &= ~PSR_SS;
- hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
+ hypctx->mdscr_el1 &= ~MDSCR_SS;
hypctx->mdscr_el1 |= hypctx->debug_mdscr;
- hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
- hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
+ hypctx->debug_mdscr &= ~MDSCR_SS;
+ if ((hypctx->setcaps & (1ul << VM_CAP_BRK_EXIT)) == 0)
+ hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
}
break;
case VM_CAP_MASK_HWINTR:
diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c
index 926a74fa528b..29d14e1ba952 100644
--- a/sys/arm64/vmm/vmm_dev_machdep.c
+++ b/sys/arm64/vmm/vmm_dev_machdep.c
@@ -68,19 +68,13 @@ int
vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
int fflag, struct thread *td)
{
- struct vm_run *vmrun;
- struct vm_vgic_version *vgv;
- struct vm_vgic_descr *vgic;
- struct vm_irq *vi;
- struct vm_exception *vmexc;
- struct vm_gla2gpa *gg;
- struct vm_msi *vmsi;
int error;
error = 0;
switch (cmd) {
case VM_RUN: {
struct vm_exit *vme;
+ struct vm_run *vmrun;
vmrun = (struct vm_run *)data;
vme = vm_exitinfo(vcpu);
@@ -94,41 +88,62 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
break;
break;
}
- case VM_INJECT_EXCEPTION:
+ case VM_INJECT_EXCEPTION: {
+ struct vm_exception *vmexc;
+
vmexc = (struct vm_exception *)data;
error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far);
break;
- case VM_GLA2GPA_NOFAULT:
+ }
+ case VM_GLA2GPA_NOFAULT: {
+ struct vm_gla2gpa *gg;
+
gg = (struct vm_gla2gpa *)data;
error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
gg->prot, &gg->gpa, &gg->fault);
KASSERT(error == 0 || error == EFAULT,
("%s: vm_gla2gpa unknown error %d", __func__, error));
break;
- case VM_GET_VGIC_VERSION:
+ }
+ case VM_GET_VGIC_VERSION: {
+ struct vm_vgic_version *vgv;
+
vgv = (struct vm_vgic_version *)data;
/* TODO: Query the vgic driver for this */
vgv->version = 3;
vgv->flags = 0;
error = 0;
break;
- case VM_ATTACH_VGIC:
+ }
+ case VM_ATTACH_VGIC: {
+ struct vm_vgic_descr *vgic;
+
vgic = (struct vm_vgic_descr *)data;
error = vm_attach_vgic(vm, vgic);
break;
- case VM_RAISE_MSI:
+ }
+ case VM_RAISE_MSI: {
+ struct vm_msi *vmsi;
+
vmsi = (struct vm_msi *)data;
error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus,
vmsi->slot, vmsi->func);
break;
- case VM_ASSERT_IRQ:
+ }
+ case VM_ASSERT_IRQ: {
+ struct vm_irq *vi;
+
vi = (struct vm_irq *)data;
error = vm_assert_irq(vm, vi->irq);
break;
- case VM_DEASSERT_IRQ:
+ }
+ case VM_DEASSERT_IRQ: {
+ struct vm_irq *vi;
+
vi = (struct vm_irq *)data;
error = vm_deassert_irq(vm, vi->irq);
break;
+ }
default:
error = ENOTTY;
break;
diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
index d61885c15871..0ad7930e9a87 100644
--- a/sys/arm64/vmm/vmm_hyp.c
+++ b/sys/arm64/vmm/vmm_hyp.c
@@ -32,7 +32,6 @@
#include <sys/types.h>
#include <sys/proc.h>
-#include <machine/armreg.h>
#include "arm64.h"
#include "hyp.h"
@@ -42,11 +41,11 @@ struct hypctx;
uint64_t VMM_HYP_FUNC(do_call_guest)(struct hypctx *);
static void
-vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest,
+ bool ecv_poff)
{
uint64_t dfr0;
- /* Store the guest VFP registers */
if (guest) {
/* Store the timer registers */
hypctx->vtimer_cpu.cntkctl_el1 =
@@ -55,7 +54,20 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
READ_SPECIALREG(EL0_REG(CNTV_CVAL));
hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 =
READ_SPECIALREG(EL0_REG(CNTV_CTL));
+ }
+ if (guest_or_nonvhe(guest) && ecv_poff) {
+ /*
+ * If we have ECV then the guest could modify these registers.
+ * If VHE is enabled then the kernel will see a different view
+ * of the registers, so doesn't need to handle them.
+ */
+ hypctx->vtimer_cpu.phys_timer.cntx_cval_el0 =
+ READ_SPECIALREG(EL0_REG(CNTP_CVAL));
+ hypctx->vtimer_cpu.phys_timer.cntx_ctl_el0 =
+ READ_SPECIALREG(EL0_REG(CNTP_CTL));
+ }
+ if (guest) {
/* Store the GICv3 registers */
hypctx->vgic_v3_regs.ich_eisr_el2 =
READ_SPECIALREG(ich_eisr_el2);
@@ -108,6 +120,8 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
}
}
+ hypctx->dbgclaimset_el1 = READ_SPECIALREG(dbgclaimset_el1);
+
dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
#define STORE_DBG_BRP(x) \
@@ -167,10 +181,13 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0);
hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0);
hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0);
+ hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
+ hypctx->pmselr_el0 = READ_SPECIALREG(pmselr_el0);
+ hypctx->pmxevcntr_el0 = READ_SPECIALREG(pmxevcntr_el0);
hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0);
hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1);
hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0);
- hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
+
switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
#define STORE_PMU(x) \
case (x + 1): \
@@ -259,29 +276,20 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2);
hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2);
hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2);
-
-#ifndef VMM_VHE
- /* hcrx_el2 depends on feat_hcx */
- uint64_t mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- if (ID_AA64MMFR1_HCX_VAL(mmfr1) >> ID_AA64MMFR1_HCX_SHIFT) {
- hypctx->hcrx_el2 = READ_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2));
- }
-#endif
}
static void
-vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest,
+ bool ecv_poff)
{
uint64_t dfr0;
/* Restore the special registers */
WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2);
- if (guest_or_nonvhe(guest)) {
- uint64_t mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- if (ID_AA64MMFR1_HCX_VAL(mmfr1) >> ID_AA64MMFR1_HCX_SHIFT) {
- WRITE_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2), hypctx->hcrx_el2);
- }
+ if (guest) {
+ if ((hyp->feats & HYP_FEAT_HCX) != 0)
+ WRITE_SPECIALREG(HCRX_EL2_REG, hypctx->hcrx_el2);
}
isb();
@@ -333,12 +341,15 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0);
WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0);
WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0);
+ WRITE_SPECIALREG(pmuserenr_el0, hypctx->pmuserenr_el0);
+ WRITE_SPECIALREG(pmselr_el0, hypctx->pmselr_el0);
+ WRITE_SPECIALREG(pmxevcntr_el0, hypctx->pmxevcntr_el0);
/* Clear all events/interrupts then enable them */
- WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmcntenclr_el0, ~0ul);
WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0);
- WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful);
+ WRITE_SPECIALREG(pmintenclr_el1, ~0ul);
WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1);
- WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmovsclr_el0, ~0ul);
WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0);
switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
@@ -384,6 +395,9 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
#undef LOAD_PMU
}
+ WRITE_SPECIALREG(dbgclaimclr_el1, ~0ul);
+ WRITE_SPECIALREG(dbgclaimclr_el1, hypctx->dbgclaimset_el1);
+
dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
#define LOAD_DBG_BRP(x) \
@@ -450,6 +464,29 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2);
WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2);
+ if (ecv_poff) {
+ /*
+ * Load the same offset as the virtual timer
+ * to keep in sync.
+ */
+ WRITE_SPECIALREG(CNTPOFF_EL2_REG,
+ hyp->vtimer.cntvoff_el2);
+ isb();
+ }
+ }
+ if (guest_or_nonvhe(guest) && ecv_poff) {
+ /*
+ * If we have ECV then the guest could modify these registers.
+ * If VHE is enabled then the kernel will see a different view
+ * of the registers, so doesn't need to handle them.
+ */
+ WRITE_SPECIALREG(EL0_REG(CNTP_CVAL),
+ hypctx->vtimer_cpu.phys_timer.cntx_cval_el0);
+ WRITE_SPECIALREG(EL0_REG(CNTP_CTL),
+ hypctx->vtimer_cpu.phys_timer.cntx_ctl_el0);
+ }
+
+ if (guest) {
/* Load the GICv3 registers */
WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_v3_regs.ich_hcr_el2);
WRITE_SPECIALREG(ich_vmcr_el2,
@@ -502,11 +539,19 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx)
struct hypctx host_hypctx;
uint64_t cntvoff_el2;
uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1;
+#ifndef VMM_VHE
+ uint64_t hcrx_el2;
+#endif
uint64_t ret;
uint64_t s1e1r, hpfar_el2;
- bool hpfar_valid;
+ bool ecv_poff, hpfar_valid;
- vmm_hyp_reg_store(&host_hypctx, NULL, false);
+ ecv_poff = (hyp->vtimer.cnthctl_el2 & CNTHCTL_ECV_EN) != 0;
+ vmm_hyp_reg_store(&host_hypctx, NULL, false, ecv_poff);
+#ifndef VMM_VHE
+ if ((hyp->feats & HYP_FEAT_HCX) != 0)
+ hcrx_el2 = READ_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2));
+#endif
/* Save the host special registers */
cnthctl_el2 = READ_SPECIALREG(cnthctl_el2);
@@ -516,7 +561,7 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx)
ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2);
- vmm_hyp_reg_restore(hypctx, hyp, true);
+ vmm_hyp_reg_restore(hypctx, hyp, true, ecv_poff);
/* Load the common hypervisor registers */
WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2);
@@ -532,7 +577,7 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx)
/* Store the exit info */
hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2);
- vmm_hyp_reg_store(hypctx, hyp, true);
+ vmm_hyp_reg_store(hypctx, hyp, true, ecv_poff);
hpfar_valid = true;
if (ret == EXCP_TYPE_EL1_SYNC) {
@@ -582,7 +627,12 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx)
}
}
- vmm_hyp_reg_restore(&host_hypctx, NULL, false);
+ vmm_hyp_reg_restore(&host_hypctx, NULL, false, ecv_poff);
+
+#ifndef VMM_VHE
+ if ((hyp->feats & HYP_FEAT_HCX) != 0)
+ WRITE_SPECIALREG(MRS_REG_ALT_NAME(HCRX_EL2), hcrx_el2);
+#endif
/* Restore the host special registers */
WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2);
@@ -613,8 +663,6 @@ VMM_HYP_FUNC(read_reg)(uint64_t reg)
switch (reg) {
case HYP_REG_ICH_VTR:
return (READ_SPECIALREG(ich_vtr_el2));
- case HYP_REG_CNTHCTL:
- return (READ_SPECIALREG(cnthctl_el2));
}
return (0);
diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
index 79d022cf33e8..0e4910ea87b4 100644
--- a/sys/arm64/vmm/vmm_reset.c
+++ b/sys/arm64/vmm/vmm_reset.c
@@ -31,7 +31,6 @@
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/hypervisor.h>
@@ -100,10 +99,12 @@ reset_vm_el01_regs(void *vcpu)
el2ctx->pmcr_el0 |= PMCR_LC;
set_arch_unknown(el2ctx->pmccntr_el0);
set_arch_unknown(el2ctx->pmccfiltr_el0);
+ set_arch_unknown(el2ctx->pmuserenr_el0);
+ set_arch_unknown(el2ctx->pmselr_el0);
+ set_arch_unknown(el2ctx->pmxevcntr_el0);
set_arch_unknown(el2ctx->pmcntenset_el0);
set_arch_unknown(el2ctx->pmintenset_el1);
set_arch_unknown(el2ctx->pmovsset_el0);
- set_arch_unknown(el2ctx->pmuserenr_el0);
memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0));
memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0));
}
@@ -143,7 +144,8 @@ reset_vm_el2_regs(void *vcpu)
/* Set the Extended Hypervisor Configuration Register */
el2ctx->hcrx_el2 = 0;
/* TODO: Trap all extensions we don't support */
- el2ctx->mdcr_el2 = 0;
+ el2ctx->mdcr_el2 = MDCR_EL2_TDOSA | MDCR_EL2_TDRA | MDCR_EL2_TPMS |
+ MDCR_EL2_TTRF;
/* PMCR_EL0.N is read from MDCR_EL2.HPMN */
el2ctx->mdcr_el2 |= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT;