aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64')
-rw-r--r--sys/arm64/arm64/db_disasm.c1
-rw-r--r--sys/arm64/arm64/elf32_machdep.c2
-rw-r--r--sys/arm64/arm64/gic_v3.c2
-rw-r--r--sys/arm64/arm64/gic_v3_var.h2
-rw-r--r--sys/arm64/arm64/gicv3_its.c4
-rw-r--r--sys/arm64/arm64/kexec_support.c188
-rw-r--r--sys/arm64/arm64/locore.S48
-rw-r--r--sys/arm64/arm64/machdep_boot.c3
-rw-r--r--sys/arm64/arm64/mp_machdep.c82
-rw-r--r--sys/arm64/arm64/nexus.c85
-rw-r--r--sys/arm64/conf/std.arm3
-rw-r--r--sys/arm64/conf/std.arm646
-rw-r--r--sys/arm64/conf/std.broadcom3
-rw-r--r--sys/arm64/include/_armreg.h57
-rw-r--r--sys/arm64/include/armreg.h35
-rw-r--r--sys/arm64/include/cpu.h2
-rw-r--r--sys/arm64/include/cpufunc.h7
-rw-r--r--sys/arm64/include/db_machdep.h1
-rw-r--r--sys/arm64/include/hypervisor.h3
-rw-r--r--sys/arm64/include/kexec.h33
-rw-r--r--sys/arm64/include/pcpu.h3
-rw-r--r--sys/arm64/include/smp.h1
-rw-r--r--sys/arm64/include/vmm.h21
-rw-r--r--sys/arm64/include/vmm_dev.h2
-rw-r--r--sys/arm64/nvidia/tegra210/max77620_regulators.c4
-rw-r--r--sys/arm64/rockchip/rk_i2s.c8
-rw-r--r--sys/arm64/spe/arm_spe.h77
-rw-r--r--sys/arm64/spe/arm_spe_acpi.c146
-rw-r--r--sys/arm64/spe/arm_spe_backend.c586
-rw-r--r--sys/arm64/spe/arm_spe_dev.c324
-rw-r--r--sys/arm64/spe/arm_spe_dev.h162
-rw-r--r--sys/arm64/spe/arm_spe_fdt.c75
-rw-r--r--sys/arm64/vmm/io/vgic_v3.c1
-rw-r--r--sys/arm64/vmm/io/vtimer.c1
-rw-r--r--sys/arm64/vmm/vmm.c112
-rw-r--r--sys/arm64/vmm/vmm_arm64.c15
-rw-r--r--sys/arm64/vmm/vmm_dev_machdep.c43
-rw-r--r--sys/arm64/vmm/vmm_hyp.c1
-rw-r--r--sys/arm64/vmm/vmm_reset.c1
39 files changed, 1972 insertions, 178 deletions
diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c
index ab1002560b20..14ae2acc2ce6 100644
--- a/sys/arm64/arm64/db_disasm.c
+++ b/sys/arm64/arm64/db_disasm.c
@@ -31,6 +31,7 @@
#include <ddb/db_access.h>
#include <ddb/db_sym.h>
+#include <machine/armreg.h>
#include <machine/disassem.h>
static u_int db_disasm_read_word(vm_offset_t);
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 8f8a934ad520..4cb8ee5f57ef 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -210,7 +210,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->code = *ap++;
nap--;
} else if (sa->code == SYS___syscall) {
- sa->code = ap[1];
+ sa->code = ap[_QUAD_LOWWORD];
nap -= 2;
ap += 2;
}
diff --git a/sys/arm64/arm64/gic_v3.c b/sys/arm64/arm64/gic_v3.c
index 201cdae6de09..641b6d6dbc5e 100644
--- a/sys/arm64/arm64/gic_v3.c
+++ b/sys/arm64/arm64/gic_v3.c
@@ -494,7 +494,7 @@ gic_v3_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
case GICV3_IVAR_REDIST:
*result = (uintptr_t)&sc->gic_redists.pcpu[PCPU_GET(cpuid)];
return (0);
- case GICV3_IVAR_SUPPORT_LPIS:
+ case GIC_IVAR_SUPPORT_LPIS:
*result =
(gic_d_read(sc, 4, GICD_TYPER) & GICD_TYPER_LPIS) != 0;
return (0);
diff --git a/sys/arm64/arm64/gic_v3_var.h b/sys/arm64/arm64/gic_v3_var.h
index 8bc0f456d91e..2570834c2818 100644
--- a/sys/arm64/arm64/gic_v3_var.h
+++ b/sys/arm64/arm64/gic_v3_var.h
@@ -108,11 +108,9 @@ MALLOC_DECLARE(M_GIC_V3);
#define GICV3_IVAR_NIRQS 1000
/* 1001 was GICV3_IVAR_REDIST_VADDR */
#define GICV3_IVAR_REDIST 1002
-#define GICV3_IVAR_SUPPORT_LPIS 1003
__BUS_ACCESSOR(gicv3, nirqs, GICV3, NIRQS, u_int);
__BUS_ACCESSOR(gicv3, redist, GICV3, REDIST, void *);
-__BUS_ACCESSOR(gicv3, support_lpis, GICV3, SUPPORT_LPIS, bool);
/* Device methods */
int gic_v3_attach(device_t dev);
diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c
index 546a225abf09..7821b1512083 100644
--- a/sys/arm64/arm64/gicv3_its.c
+++ b/sys/arm64/arm64/gicv3_its.c
@@ -2222,7 +2222,7 @@ gicv3_its_fdt_probe(device_t dev)
if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its"))
return (ENXIO);
- if (!gicv3_get_support_lpis(dev))
+ if (!gic_get_support_lpis(dev))
return (ENXIO);
device_set_desc(dev, "ARM GIC Interrupt Translation Service");
@@ -2294,7 +2294,7 @@ gicv3_its_acpi_probe(device_t dev)
if (gic_get_hw_rev(dev) < 3)
return (EINVAL);
- if (!gicv3_get_support_lpis(dev))
+ if (!gic_get_support_lpis(dev))
return (ENXIO);
device_set_desc(dev, "ARM GIC Interrupt Translation Service");
diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c
new file mode 100644
index 000000000000..8b9719c05b67
--- /dev/null
+++ b/sys/arm64/arm64/kexec_support.c
@@ -0,0 +1,188 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/kexec.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+
+#include <machine/armreg.h>
+#include <machine/pmap.h>
+#include <machine/pte.h>
+
+/*
+ * Idea behind this:
+ *
+ * kexec_load_md():
+ * - Update boot page tables (identity map) to include all pages needed before
+ * disabling MMU.
+ *
+ * kexec_reboot_md():
+ * - Copy pages into target(s)
+ * - Do "other stuff"
+ * - Does not return
+ */
+
+extern pt_entry_t pagetable_l0_ttbr0_bootstrap[];
+extern unsigned long initstack_end[];
+void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *);
+
+#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \
+ SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS)
+#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base)
+static inline vm_page_t
+phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p)
+{
+ return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p));
+}
+
+/* First 2 args are filler for switch_stack() */
+static void __aligned(16) __dead2
+kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused,
+ struct kexec_image *image)
+{
+ void (*e)(void) = (void *)image->entry;
+ vm_offset_t vm_page_base = (vm_offset_t)vm_page_array;
+ vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array);
+ struct kexec_segment_stage *phys_segs =
+ (void *)pmap_kextract((vm_offset_t)&image->segments);
+ vm_paddr_t from_pa, to_pa;
+ vm_size_t size;
+ vm_page_t first, m, mp;
+ struct pctrie_iter pct_i;
+
+ /*
+ * Create a linked list of all pages in the object before we disable the
+ * MMU. Once the MMU is disabled we can't use the vm_radix iterators,
+ * as they rely on virtual address pointers.
+ */
+ first = NULL;
+ vm_radix_iter_init(&pct_i, &image->map_obj->rtree);
+ VM_RADIX_FORALL(m, &pct_i) {
+ if (first == NULL)
+ first = m;
+ else
+ SLIST_INSERT_AFTER(mp, m, plinks.s.ss);
+ mp = m;
+ }
+
+ /*
+ * We're running out of the identity map now, disable the MMU before we
+ * continue. It's possible page tables can be overwritten, which would
+ * be very bad if we were running with the MMU enabled.
+ */
+ WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU);
+ isb();
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (phys_segs[i].size == 0)
+ break;
+ to_pa = phys_segs[i].target;
+ /* Copy the segment here... */
+ for (vm_page_t p = phys_segs[i].first_page;
+ p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size;
+ p = SLIST_NEXT(p, plinks.s.ss)) {
+ p = phys_vm_page(p, vm_page_base, vm_page_phys);
+ from_pa = p->phys_addr;
+ if (p->phys_addr == to_pa) {
+ to_pa += PAGE_SIZE;
+ continue;
+ }
+ for (size = PAGE_SIZE / sizeof(register_t);
+ size > 0; --size) {
+ *(register_t *)to_pa = *(register_t *)from_pa;
+ to_pa += sizeof(register_t);
+ from_pa += sizeof(register_t);
+ }
+ }
+ }
+ invalidate_icache();
+ e();
+ while (1)
+ ;
+}
+
+void
+kexec_reboot_md(struct kexec_image *image)
+{
+ uintptr_t ptr;
+ register_t reg;
+
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (image->segments[i].size > 0)
+ cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target),
+ image->segments[i].size);
+ }
+ ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ serror_disable();
+
+ reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap);
+ set_ttbr0(reg);
+ cpu_tlb_flushID();
+
+ typeof(kexec_reboot_bottom) *p = (void *)ptr;
+ switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end),
+ p, image);
+ while (1)
+ ;
+}
+
+int
+kexec_load_md(struct kexec_image *image)
+{
+ vm_paddr_t tmp;
+ pt_entry_t *pte;
+
+ /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */
+
+ /*
+ * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so
+ * move to there.
+ */
+ pte = pagetable_l0_ttbr0_bootstrap;
+ pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */
+
+ /*
+ * Populate the identity map with symbols we know we'll need before we
+ * turn off the MMU.
+ */
+ tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ tmp = pmap_kextract((vm_offset_t)initstack_end);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ /* We'll need vm_page_array for doing offset calculations. */
+ tmp = pmap_kextract((vm_offset_t)&vm_page_array);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index d35e334905a7..c22d5fe76468 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -325,6 +325,19 @@ mp_virtdone:
b init_secondary
LEND(mpentry_common)
+
+ENTRY(mp_cpu_spinloop)
+0:
+ wfe
+ ldr x0, mp_cpu_spin_table_release_addr
+ cbz x0, 0b
+ blr x0
+ .globl mp_cpu_spin_table_release_addr
+mp_cpu_spin_table_release_addr:
+ .quad 0
+ .globl mp_cpu_spinloop_end
+mp_cpu_spinloop_end:
+END(mp_cpu_spinloop)
#endif
/*
@@ -432,6 +445,10 @@ LENTRY(enter_kernel_el)
ldr x3, =(CNTHCTL_EL1PCTEN_NOTRAP | CNTHCTL_EL1PCEN_NOTRAP)
ldr x5, =(PSR_DAIF | PSR_M_EL1h)
+ /* Enable SPE at EL1 via Monitor Debug Configuration Register */
+ mov x6, MDCR_EL2_E2PB_EL1_0_NO_TRAP
+ msr mdcr_el2, x6
+
.Ldone_vhe:
msr cptr_el2, x2
@@ -475,6 +492,29 @@ LENTRY(enter_kernel_el)
eret
LEND(enter_kernel_el)
+/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there.
+ * Does not return.
+ * - x0 - target address to jump to after stopping the MMU.
+ * - x1 - kernel load address
+ */
+ENTRY(stop_mmu)
+ mov x16, x0 /* Save target. */
+ ldr x2, =(1f - KERNBASE)
+ add x17, x1, x2
+ ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE)
+ add x1, x1, x3
+ msr ttbr0_el1, x1
+ isb
+ br x17
+1:
+ BTI_J
+ mrs x0, sctlr_el1
+ bic x0, x0, SCTLR_M
+ bic x0, x0, SCTLR_C
+ msr sctlr_el1, x0
+ isb
+ br x16
+END(stop_mmu)
/*
* Get the physical address the kernel was loaded at.
*/
@@ -1094,12 +1134,19 @@ tcr:
TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA)
LEND(start_mmu)
+ENTRY(switch_stack)
+ mov sp, x0
+ mov x16, x1
+ br x16
+END(switch_stack)
+
ENTRY(abort)
b abort
END(abort)
.bss
.align PAGE_SHIFT
+ .globl initstack_end
initstack:
.space BOOT_STACK_SIZE
initstack_end:
@@ -1116,6 +1163,7 @@ initstack_end:
* L0 for user
*/
.globl pagetable_l0_ttbr1
+ .globl pagetable_l0_ttbr0_bootstrap
pagetable:
pagetable_l3_ttbr1:
.space (PAGE_SIZE * L3_PAGE_COUNT)
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
index 83bd74ea7317..1c5e8189e436 100644
--- a/sys/arm64/arm64/machdep_boot.c
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -106,7 +106,8 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE);
PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t));
- PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS));
+ PRELOAD_PUSH_VALUE(uint64_t,
+ (size_t)((vm_offset_t)&end - VM_MIN_KERNEL_ADDRESS));
if (dtb_ptr != NULL) {
/* Copy DTB to KVA space and insert it into module chain. */
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index e4d011df3a06..ba673ce9d6ee 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -60,6 +60,7 @@
#include <machine/debug_monitor.h>
#include <machine/intr.h>
#include <machine/smp.h>
+#include <machine/vmparam.h>
#ifdef VFP
#include <machine/vfp.h>
#endif
@@ -103,6 +104,7 @@ static void ipi_hardclock(void *);
static void ipi_preempt(void *);
static void ipi_rendezvous(void *);
static void ipi_stop(void *);
+static void ipi_off(void *);
#ifdef FDT
static u_int fdt_cpuid;
@@ -193,6 +195,7 @@ release_aps(void *dummy __unused)
intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
+ intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL);
atomic_store_int(&aps_started, 0);
atomic_store_rel_int(&aps_ready, 1);
@@ -267,6 +270,8 @@ init_secondary(uint64_t cpu)
install_cpu_errata();
enable_cpu_feat(CPU_FEAT_AFTER_DEV);
+ intr_pic_init_secondary();
+
/* Signal we are done */
atomic_add_int(&aps_started, 1);
@@ -285,8 +290,6 @@ init_secondary(uint64_t cpu)
("pmap0 doesn't match cpu %ld's ttbr0", cpu));
pcpup->pc_curpmap = pmap0;
- intr_pic_init_secondary();
-
/* Start per-CPU event timers. */
cpu_initclocks_ap();
@@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused)
CTR0(KTR_SMP, "IPI_STOP (restart)");
}
+void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2;
+extern uint32_t mp_cpu_spinloop[];
+extern uint32_t mp_cpu_spinloop_end[];
+extern uint64_t mp_cpu_spin_table_release_addr;
+static void
+ipi_off(void *dummy __unused)
+{
+ CTR0(KTR_SMP, "IPI_OFF");
+ if (psci_present)
+ psci_cpu_off();
+ else {
+ uint64_t release_addr;
+ vm_size_t size;
+
+ size = (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+ release_addr = PCPU_GET(release_addr) - size;
+ isb();
+ invalidate_icache();
+ /* Go catatonic, don't take any interrupts. */
+ intr_disable();
+ stop_mmu(release_addr, pmap_kextract(KERNBASE));
+
+
+ }
+ CTR0(KTR_SMP, "IPI_OFF failed");
+}
+
struct cpu_group *
cpu_topo(void)
{
@@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
bootpcpu = pcpup;
+ pcpup->pc_release_addr = release_addr;
dpcpu[cpuid - 1] = (void *)(pcpup + 1);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
@@ -752,6 +784,52 @@ cpu_mp_start(void)
}
}
+void
+cpu_mp_stop(void)
+{
+
+ /* Short-circuit for single-CPU */
+ if (CPU_COUNT(&all_cpus) == 1)
+ return;
+
+ KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n"));
+
+ /*
+ * If we use spin-table, assume U-boot method for now (single address
+ * shared by all CPUs).
+ */
+ if (!psci_present) {
+ int cpu;
+ vm_paddr_t release_addr;
+ void *release_vaddr;
+ vm_size_t size;
+
+ /* Find the shared release address. */
+ CPU_FOREACH(cpu) {
+ release_addr = pcpu_find(cpu)->pc_release_addr;
+ if (release_addr != 0)
+ break;
+ }
+ /* No release address? No way of notifying other CPUs. */
+ if (release_addr == 0)
+ return;
+
+ size = (vm_offset_t)&mp_cpu_spinloop_end -
+ (vm_offset_t)&mp_cpu_spinloop;
+
+ release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+
+ release_vaddr = pmap_mapdev(release_addr, size);
+ bcopy(mp_cpu_spinloop, release_vaddr, size);
+ cpu_dcache_wbinv_range(release_vaddr, size);
+ pmap_unmapdev(release_vaddr, size);
+ invalidate_icache();
+ }
+ ipi_all_but_self(IPI_OFF);
+ DELAY(1000000);
+}
+
/* Introduce rest of cores to the world */
void
cpu_mp_announce(void)
diff --git a/sys/arm64/arm64/nexus.c b/sys/arm64/arm64/nexus.c
index 26b3389db172..012bf859eb3c 100644
--- a/sys/arm64/arm64/nexus.c
+++ b/sys/arm64/arm64/nexus.c
@@ -72,6 +72,8 @@
#include "acpi_bus_if.h"
#endif
+#include "pcib_if.h"
+
extern struct bus_space memmap_bus;
static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device");
@@ -123,6 +125,15 @@ static bus_get_bus_tag_t nexus_get_bus_tag;
#ifdef FDT
static ofw_bus_map_intr_t nexus_ofw_map_intr;
+/*
+ * PCIB interface
+ */
+static pcib_alloc_msi_t nexus_fdt_pcib_alloc_msi;
+static pcib_release_msi_t nexus_fdt_pcib_release_msi;
+static pcib_alloc_msix_t nexus_fdt_pcib_alloc_msix;
+static pcib_release_msix_t nexus_fdt_pcib_release_msix;
+static pcib_map_msi_t nexus_fdt_pcib_map_msi;
+
#endif
static device_method_t nexus_methods[] = {
@@ -441,6 +452,13 @@ static device_method_t nexus_fdt_methods[] = {
/* OFW interface */
DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr),
+ /* PCIB interface */
+ DEVMETHOD(pcib_alloc_msi, nexus_fdt_pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, nexus_fdt_pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, nexus_fdt_pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, nexus_fdt_pcib_release_msix),
+ DEVMETHOD(pcib_map_msi, nexus_fdt_pcib_map_msi),
+
DEVMETHOD_END,
};
@@ -518,6 +536,73 @@ nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells,
irq = intr_map_irq(NULL, iparent, (struct intr_map_data *)fdt_data);
return (irq);
}
+
+static int
+nexus_fdt_pcib_alloc_msi(device_t dev, device_t child, int count, int maxcount,
+ int *irqs)
+{
+ phandle_t msi_parent;
+ int error;
+
+ error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL);
+ if (error != 0)
+ return (error);
+
+ return (intr_alloc_msi(dev, child, msi_parent, count, maxcount, irqs));
+}
+
+static int
+nexus_fdt_pcib_release_msi(device_t dev, device_t child, int count, int *irqs)
+{
+ phandle_t msi_parent;
+ int error;
+
+ error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL);
+ if (error != 0)
+ return (error);
+
+ return (intr_release_msi(dev, child, msi_parent, count, irqs));
+}
+
+static int
+nexus_fdt_pcib_alloc_msix(device_t dev, device_t child, int *irq)
+{
+ phandle_t msi_parent;
+ int error;
+
+ error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL);
+ if (error != 0)
+ return (error);
+
+ return (intr_alloc_msix(dev, child, msi_parent, irq));
+}
+
+static int
+nexus_fdt_pcib_release_msix(device_t dev, device_t child, int irq)
+{
+ phandle_t msi_parent;
+ int error;
+
+ error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL);
+ if (error != 0)
+ return (error);
+
+ return (intr_release_msix(dev, child, msi_parent, irq));
+}
+
+static int
+nexus_fdt_pcib_map_msi(device_t dev, device_t child, int irq, uint64_t *addr,
+ uint32_t *data)
+{
+ phandle_t msi_parent;
+ int error;
+
+ error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL);
+ if (error != 0)
+ return (error);
+
+ return (intr_map_msi(dev, child, msi_parent, irq, addr, data));
+}
#endif
#ifdef DEV_ACPI
diff --git a/sys/arm64/conf/std.arm b/sys/arm64/conf/std.arm
index fb5561506531..309059a096eb 100644
--- a/sys/arm64/conf/std.arm
+++ b/sys/arm64/conf/std.arm
@@ -21,3 +21,6 @@ device arm_doorbell # ARM Message Handling Unit (MHU)
options FDT
device acpi
+
+# DTBs
+makeoptions MODULES_EXTRA+="dtb/arm"
diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64
index a0568466cfaf..02bdd25f2d52 100644
--- a/sys/arm64/conf/std.arm64
+++ b/sys/arm64/conf/std.arm64
@@ -106,3 +106,9 @@ device efirtc # EFI RTC
# SMBIOS -- all EFI platforms
device smbios
+
+# random(4)
+device tpm # Trusted Platform Module
+options RANDOM_ENABLE_TPM # enable entropy from TPM 2.0
+options RANDOM_ENABLE_KBD
+options RANDOM_ENABLE_MOUSE
diff --git a/sys/arm64/conf/std.broadcom b/sys/arm64/conf/std.broadcom
index 3332aaac0826..65bee16e315d 100644
--- a/sys/arm64/conf/std.broadcom
+++ b/sys/arm64/conf/std.broadcom
@@ -33,5 +33,8 @@ device sdhci
options FDT
device acpi
+# Sound support
+device vchiq
+
# DTBs
makeoptions MODULES_EXTRA+="dtb/rpi"
diff --git a/sys/arm64/include/_armreg.h b/sys/arm64/include/_armreg.h
new file mode 100644
index 000000000000..0f5134e5a978
--- /dev/null
+++ b/sys/arm64/include/_armreg.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2013, 2014 Andrew Turner
+ * Copyright (c) 2015,2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(_MACHINE_ARMREG_H_) && \
+ !defined(_MACHINE_CPU_H_) && \
+ !defined(_MACHINE_HYPERVISOR_H_)
+#error Do not include this file directly
+#endif
+
+#ifndef _MACHINE__ARMREG_H_
+#define _MACHINE__ARMREG_H_
+
+#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
+ S##op0##_##op1##_C##crn##_C##crm##_##op2
+#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
+ __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2)
+#define MRS_REG_ALT_NAME(reg) \
+ _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2)
+
+
+#define READ_SPECIALREG(reg) \
+({ uint64_t _val; \
+ __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \
+ _val; \
+})
+#define WRITE_SPECIALREG(reg, _val) \
+ __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val))
+
+#define UL(x) UINT64_C(x)
+
+#endif /* !_MACHINE__ARMREG_H_ */
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
index 393d6d89da0c..27b02c44cd76 100644
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -34,25 +34,9 @@
#ifndef _MACHINE_ARMREG_H_
#define _MACHINE_ARMREG_H_
-#define INSN_SIZE 4
-
-#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
- S##op0##_##op1##_C##crn##_C##crm##_##op2
-#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \
- __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2)
-#define MRS_REG_ALT_NAME(reg) \
- _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2)
-
+#include <machine/_armreg.h>
-#define READ_SPECIALREG(reg) \
-({ uint64_t _val; \
- __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \
- _val; \
-})
-#define WRITE_SPECIALREG(reg, _val) \
- __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val))
-
-#define UL(x) UINT64_C(x)
+#define INSN_SIZE 4
/* AFSR0_EL1 - Auxiliary Fault Status Register 0 */
#define AFSR0_EL1_REG MRS_REG_ALT_NAME(AFSR0_EL1)
@@ -2267,6 +2251,7 @@
#define PMBSR_MSS_SHIFT 0
#define PMBSR_MSS_MASK (UL(0xffff) << PMBSR_MSS_SHIFT)
#define PMBSR_MSS_BSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT)
+#define PMBSR_MSS_BSC_BUFFER_FILLED (UL(0x01) << PMBSR_MSS_SHIFT)
#define PMBSR_MSS_FSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT)
#define PMBSR_COLL_SHIFT 16
#define PMBSR_COLL (UL(0x1) << PMBSR_COLL_SHIFT)
@@ -2278,6 +2263,11 @@
#define PMBSR_DL (UL(0x1) << PMBSR_DL_SHIFT)
#define PMBSR_EC_SHIFT 26
#define PMBSR_EC_MASK (UL(0x3f) << PMBSR_EC_SHIFT)
+#define PMBSR_EC_VAL(x) (((x) & PMBSR_EC_MASK) >> PMBSR_EC_SHIFT)
+#define PMBSR_EC_OTHER_BUF_MGMT 0x00
+#define PMBSR_EC_GRAN_PROT_CHK 0x1e
+#define PMBSR_EC_STAGE1_DA 0x24
+#define PMBSR_EC_STAGE2_DA 0x25
/* PMCCFILTR_EL0 */
#define PMCCFILTR_EL0_op0 3
@@ -2513,6 +2503,15 @@
#define PMSIDR_FnE (UL(0x1) << PMSIDR_FnE_SHIFT)
#define PMSIDR_Interval_SHIFT 8
#define PMSIDR_Interval_MASK (UL(0xf) << PMSIDR_Interval_SHIFT)
+#define PMSIDR_Interval_VAL(x) (((x) & PMSIDR_Interval_MASK) >> PMSIDR_Interval_SHIFT)
+#define PMSIDR_Interval_256 0
+#define PMSIDR_Interval_512 2
+#define PMSIDR_Interval_768 3
+#define PMSIDR_Interval_1024 4
+#define PMSIDR_Interval_1536 5
+#define PMSIDR_Interval_2048 6
+#define PMSIDR_Interval_3072 7
+#define PMSIDR_Interval_4096 8
#define PMSIDR_MaxSize_SHIFT 12
#define PMSIDR_MaxSize_MASK (UL(0xf) << PMSIDR_MaxSize_SHIFT)
#define PMSIDR_CountSize_SHIFT 16
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 124da8c215ed..b15210633d37 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -43,10 +43,10 @@
#define _MACHINE_CPU_H_
#if !defined(__ASSEMBLER__)
+#include <machine/_armreg.h>
#include <machine/atomic.h>
#include <machine/frame.h>
#endif
-#include <machine/armreg.h>
#define TRAPF_PC(tfp) ((tfp)->tf_elr)
#define TRAPF_USERMODE(tfp) (((tfp)->tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h
index e6e1f682794e..e9eee643216b 100644
--- a/sys/arm64/include/cpufunc.h
+++ b/sys/arm64/include/cpufunc.h
@@ -96,6 +96,13 @@ serror_enable(void)
__asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")");
}
+static __inline void
+serror_disable(void)
+{
+
+ __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")");
+}
+
static __inline register_t
get_midr(void)
{
diff --git a/sys/arm64/include/db_machdep.h b/sys/arm64/include/db_machdep.h
index 5dc496ca851d..3ef95f7802ea 100644
--- a/sys/arm64/include/db_machdep.h
+++ b/sys/arm64/include/db_machdep.h
@@ -31,7 +31,6 @@
#ifndef _MACHINE_DB_MACHDEP_H_
#define _MACHINE_DB_MACHDEP_H_
-#include <machine/armreg.h>
#include <machine/frame.h>
#include <machine/trap.h>
diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h
index 8feabd2b981b..f3d7027269c9 100644
--- a/sys/arm64/include/hypervisor.h
+++ b/sys/arm64/include/hypervisor.h
@@ -30,6 +30,8 @@
#ifndef _MACHINE_HYPERVISOR_H_
#define _MACHINE_HYPERVISOR_H_
+#include <machine/_armreg.h>
+
/*
* These registers are only useful when in hypervisor context,
* e.g. specific to EL2, or controlling the hypervisor.
@@ -266,6 +268,7 @@
#define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT)
#define MDCR_EL2_E2PB_SHIFT 12
#define MDCR_EL2_E2PB_MASK (0x3UL << MDCR_EL2_E2PB_SHIFT)
+#define MDCR_EL2_E2PB_EL1_0_NO_TRAP (0x3UL << MDCR_EL2_E2PB_SHIFT)
#define MDCR_EL2_TPMS_SHIFT 14
#define MDCR_EL2_TPMS (0x1UL << MDCR_EL2_TPMS_SHIFT)
#define MDCR_EL2_EnSPM_SHIFT 15
diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h
new file mode 100644
index 000000000000..0a8c7a053331
--- /dev/null
+++ b/sys/arm64/include/kexec.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_KEXEC_H_
+#define _ARM64_KEXEC_H_
+
+#define KEXEC_MD_PAGES(x) 0
+
+#endif /* _ARM64_KEXEC_H_ */
diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
index 09bd8fa8a966..73399d2c3f8c 100644
--- a/sys/arm64/include/pcpu.h
+++ b/sys/arm64/include/pcpu.h
@@ -50,7 +50,8 @@ struct debug_monitor_state;
struct pmap *pc_curvmpmap; \
uint64_t pc_mpidr; \
u_int pc_bcast_tlbi_workaround; \
- char __pad[197]
+ uint64_t pc_release_addr; \
+ char __pad[189]
#ifdef _KERNEL
diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h
index 500cd1ef4f02..4a5bfda3ac1c 100644
--- a/sys/arm64/include/smp.h
+++ b/sys/arm64/include/smp.h
@@ -40,6 +40,7 @@ enum {
IPI_STOP,
IPI_STOP_HARD,
IPI_HARDCLOCK,
+ IPI_OFF,
INTR_IPI_COUNT,
};
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index 696a69669a2a..e67540eac66d 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -106,27 +106,6 @@ enum vm_reg_name {
#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
-/*
- * The VM name has to fit into the pathname length constraints of devfs,
- * governed primarily by SPECNAMELEN. The length is the total number of
- * characters in the full path, relative to the mount point and not
- * including any leading '/' characters.
- * A prefix and a suffix are added to the name specified by the user.
- * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
- * longer for future use.
- * The suffix is a string that identifies a bootrom image or some similar
- * image that is attached to the VM. A separator character gets added to
- * the suffix automatically when generating the full path, so it must be
- * accounted for, reducing the effective length by 1.
- * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
- * bytes for FreeBSD 12. A minimum length is set for safety and supports
- * a SPECNAMELEN as small as 32 on old systems.
- */
-#define VM_MAX_PREFIXLEN 10
-#define VM_MAX_SUFFIXLEN 15
-#define VM_MAX_NAMELEN \
- (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
-
#ifdef _KERNEL
struct vm;
struct vm_exception;
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
index 219f1116c728..289ff0fe1fc9 100644
--- a/sys/arm64/include/vmm_dev.h
+++ b/sys/arm64/include/vmm_dev.h
@@ -31,6 +31,8 @@
#include <machine/vmm.h>
+#include <dev/vmm/vmm_param.h>
+
struct vm_memmap {
vm_paddr_t gpa;
int segid; /* memory segment */
diff --git a/sys/arm64/nvidia/tegra210/max77620_regulators.c b/sys/arm64/nvidia/tegra210/max77620_regulators.c
index af1a5af20ec3..d52aeaef1287 100644
--- a/sys/arm64/nvidia/tegra210/max77620_regulators.c
+++ b/sys/arm64/nvidia/tegra210/max77620_regulators.c
@@ -364,7 +364,7 @@ max77620_get_sel(struct max77620_reg_sc *sc, uint8_t *sel)
rv = RD1(sc->base_sc, sc->def->volt_reg, sel);
if (rv != 0) {
- printf("%s: cannot read volatge selector: %d\n",
+ printf("%s: cannot read voltage selector: %d\n",
regnode_get_name(sc->regnode), rv);
return (rv);
}
@@ -384,7 +384,7 @@ max77620_set_sel(struct max77620_reg_sc *sc, uint8_t sel)
rv = RM1(sc->base_sc, sc->def->volt_reg,
sc->def->volt_vsel_mask, sel);
if (rv != 0) {
- printf("%s: cannot set volatge selector: %d\n",
+ printf("%s: cannot set voltage selector: %d\n",
regnode_get_name(sc->regnode), rv);
return (rv);
}
diff --git a/sys/arm64/rockchip/rk_i2s.c b/sys/arm64/rockchip/rk_i2s.c
index 5f1b6bbdeabf..856fa20e6ce4 100644
--- a/sys/arm64/rockchip/rk_i2s.c
+++ b/sys/arm64/rockchip/rk_i2s.c
@@ -403,10 +403,10 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu
count = sndbuf_getready(play_buf);
if (count > FIFO_SIZE - 1)
count = FIFO_SIZE - 1;
- size = sndbuf_getsize(play_buf);
+ size = play_buf->bufsize;
readyptr = sndbuf_getreadyptr(play_buf);
- samples = (uint8_t*)sndbuf_getbuf(play_buf);
+ samples = play_buf->buf;
written = 0;
for (; level < count; level++) {
val = (samples[readyptr++ % size] << 0);
@@ -426,9 +426,9 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu
uint8_t *samples;
uint32_t count, size, freeptr, recorded;
count = sndbuf_getfree(rec_buf);
- size = sndbuf_getsize(rec_buf);
+ size = rec_buf->bufsize;
freeptr = sndbuf_getfreeptr(rec_buf);
- samples = (uint8_t*)sndbuf_getbuf(rec_buf);
+ samples = rec_buf->buf;
recorded = 0;
if (level > count / 4)
level = count / 4;
diff --git a/sys/arm64/spe/arm_spe.h b/sys/arm64/spe/arm_spe.h
new file mode 100644
index 000000000000..5dba20673a77
--- /dev/null
+++ b/sys/arm64/spe/arm_spe.h
@@ -0,0 +1,77 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_ARM_SPE_H_
+#define _ARM64_ARM_SPE_H_
+
+/* kqueue events */
+#define ARM_SPE_KQ_BUF 138
+#define ARM_SPE_KQ_SHUTDOWN 139
+#define ARM_SPE_KQ_SIGNAL 140
+
+/* spe_backend_read() u64 data encoding */
+#define KQ_BUF_POS_SHIFT 0
+#define KQ_BUF_POS (1 << KQ_BUF_POS_SHIFT)
+#define KQ_PARTREC_SHIFT 1
+#define KQ_PARTREC (1 << KQ_PARTREC_SHIFT)
+#define KQ_FINAL_BUF_SHIFT 2
+#define KQ_FINAL_BUF (1 << KQ_FINAL_BUF_SHIFT)
+
+enum arm_spe_ctx_field {
+ ARM_SPE_CTX_NONE,
+ ARM_SPE_CTX_PID,
+ ARM_SPE_CTX_CPU_ID
+};
+
+enum arm_spe_profiling_level {
+ ARM_SPE_KERNEL_AND_USER,
+ ARM_SPE_KERNEL_ONLY,
+ ARM_SPE_USER_ONLY
+};
+struct arm_spe_config {
+ /* Minimum interval is IMP DEF up to maximum 24 bit value */
+ uint32_t interval;
+
+ /* Profile kernel (EL1), userspace (EL0) or both */
+ enum arm_spe_profiling_level level;
+
+ /*
+ * Configure context field in SPE records to store either the
+ * current PID, the CPU ID or neither
+ *
+ * In PID mode, kernel threads without a process context are
+ * logged as PID 0
+ */
+ enum arm_spe_ctx_field ctx_field;
+};
+
+struct arm_spe_svc_buf {
+ uint32_t ident;
+ uint8_t buf_idx : 1;
+};
+
+#endif /* _ARM64_ARM_SPE_H_ */
diff --git a/sys/arm64/spe/arm_spe_acpi.c b/sys/arm64/spe/arm_spe_acpi.c
new file mode 100644
index 000000000000..b9f40448d940
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_acpi.c
@@ -0,0 +1,146 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+static device_identify_t arm_spe_acpi_identify;
+static device_probe_t arm_spe_acpi_probe;
+
+static device_method_t arm_spe_acpi_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, arm_spe_acpi_identify),
+ DEVMETHOD(device_probe, arm_spe_acpi_probe),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(spe, arm_spe_acpi_driver, arm_spe_acpi_methods,
+ sizeof(struct arm_spe_softc), arm_spe_driver);
+
+DRIVER_MODULE(spe, acpi, arm_spe_acpi_driver, 0, 0);
+
+struct madt_data {
+ u_int irq;
+ bool found;
+ bool valid;
+};
+
+static void
+madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_INTERRUPT *intr;
+ struct madt_data *madt_data;
+ u_int irq;
+
+ madt_data = (struct madt_data *)arg;
+
+ /* Exit early if we are have decided to not attach */
+ if (!madt_data->valid)
+ return;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+ intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
+ irq = intr->SpeInterrupt;
+
+ if (irq == 0) {
+ madt_data->valid = false;
+ } else if (!madt_data->found) {
+ madt_data->found = true;
+ madt_data->irq = irq;
+ } else if (madt_data->irq != irq) {
+ madt_data->valid = false;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+arm_spe_acpi_identify(driver_t *driver, device_t parent)
+{
+ struct madt_data madt_data;
+ ACPI_TABLE_MADT *madt;
+ device_t dev;
+ vm_paddr_t physaddr;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return;
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ device_printf(parent, "spe: Unable to map the MADT\n");
+ return;
+ }
+
+ madt_data.irq = 0;
+ madt_data.found = false;
+ madt_data.valid = true;
+
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ madt_handler, &madt_data);
+
+ if (!madt_data.found || !madt_data.valid)
+ goto out;
+
+ MPASS(madt_data.irq != 0);
+
+ dev = BUS_ADD_CHILD(parent, 0, "spe", -1);
+ if (dev == NULL) {
+ device_printf(parent, "add spe child failed\n");
+ goto out;
+ }
+
+ BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, madt_data.irq, 1);
+
+out:
+ acpi_unmap_table(madt);
+}
+
+static int
+arm_spe_acpi_probe(device_t dev)
+{
+ device_set_desc(dev, "ARM Statistical Profiling Extension");
+ return (BUS_PROBE_NOWILDCARD);
+}
diff --git a/sys/arm64/spe/arm_spe_backend.c b/sys/arm64/spe/arm_spe_backend.c
new file mode 100644
index 000000000000..b4e1132f9cbc
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_backend.c
@@ -0,0 +1,586 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Arm Statistical Profiling Extension (SPE) backend
+ *
+ * Basic SPE operation
+ *
+ * SPE is enabled and configured on a per-core basis, with each core requiring
+ * separate code to enable and configure. Each core also requires a separate
+ * buffer passed as config where the CPU will write profiling data. When the
+ * profiling buffer is full, an interrupt will be taken on the same CPU.
+ *
+ * Driver Design
+ *
+ * - HWT allocates a large single buffer per core. This buffer is split in half
+ * to create a 2 element circular buffer (aka ping-pong buffer) where the
+ * kernel writes to one half while userspace is copying the other half
+ * - SMP calls are used to enable and configure each core, with SPE initially
+ * configured to write to the first half of the buffer
+ * - When the first half of the buffer is full, a buffer full interrupt will
+ * immediately switch writing to the second half. The kernel adds the details
+ * of the half that needs copying to a FIFO STAILQ and notifies userspace via
+ * kqueue by sending a ARM_SPE_KQ_BUF kevent with how many buffers on the
+ * queue need servicing
+ * - The kernel responds to HWT_IOC_BUFPTR_GET ioctl by sending details of the
+ * first item from the queue
+ * - The buffers pending copying will not be overwritten until an
+ * HWT_IOC_SVC_BUF ioctl is received from userspace confirming the data has
+ * been copied out
+ * - In the case where both halfs of the buffer are full, profiling will be
+ * paused until notification via HWT_IOC_SVC_BUF is received
+ *
+ * Future improvements and limitations
+ *
+ * - Using large buffer sizes should minimise pauses and loss of profiling
+ * data while kernel is waiting for userspace to copy out data. Since it is
+ * generally expected that consuming (copying) this data is faster than
+ * producing it, in practice this has not so far been an issue. If it does
+ * prove to be an issue even with large buffer sizes then additional buffering
+ * i.e. n element circular buffers might be required.
+ *
+ * - kqueue can only notify and queue one kevent of the same type, with
+ * subsequent events overwriting data in the first event. The kevent
+ * ARM_SPE_KQ_BUF can therefore only contain the number of buffers on the
+ * STAILQ, incrementing each time a new buffer is full. In this case kqueue
+ * serves just as a notification to userspace to wake up and query the kernel
+ * with the appropriate ioctl. An alternative might be custom kevents where
+ * the kevent identifier is encoded with something like n+cpu_id or n+tid. In
+ * this case data could be sent directly with kqueue via the kevent data and
+ * fflags elements, avoiding the extra ioctl.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+#include <dev/hwt/hwt_vm.h>
+#include <dev/hwt/hwt_backend.h>
+#include <dev/hwt/hwt_config.h>
+#include <dev/hwt/hwt_context.h>
+#include <dev/hwt/hwt_cpu.h>
+#include <dev/hwt/hwt_thread.h>
+
+MALLOC_DECLARE(M_ARM_SPE);
+
+extern u_int mp_maxid;
+extern struct taskqueue *taskqueue_arm_spe;
+
+int spe_backend_disable_smp(struct hwt_context *ctx);
+
+static device_t spe_dev;
+static struct hwt_backend_ops spe_ops;
+static struct hwt_backend backend = {
+ .ops = &spe_ops,
+ .name = "spe",
+ .kva_req = 1,
+};
+
+static struct arm_spe_info *spe_info;
+
+static int
+spe_backend_init_thread(struct hwt_context *ctx)
+{
+ return (ENOTSUP);
+}
+
+static void
+spe_backend_init_cpu(struct hwt_context *ctx)
+{
+ struct arm_spe_info *info;
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ char lock_name[32];
+ char *tmp = "Arm SPE lock/cpu/";
+ int cpu_id;
+
+ spe_info = malloc(sizeof(struct arm_spe_info) * mp_ncpus,
+ M_ARM_SPE, M_WAITOK | M_ZERO);
+
+ sc->spe_info = spe_info;
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ info->sc = sc;
+ info->ident = cpu_id;
+ info->buf_info[0].info = info;
+ info->buf_info[0].buf_idx = 0;
+ info->buf_info[1].info = info;
+ info->buf_info[1].buf_idx = 1;
+ snprintf(lock_name, sizeof(lock_name), "%s%d", tmp, cpu_id);
+ mtx_init(&info->lock, lock_name, NULL, MTX_SPIN);
+ }
+}
+
+static int
+spe_backend_init(struct hwt_context *ctx)
+{
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ int error = 0;
+
+ /*
+ * HWT currently specifies buffer size must be a multiple of PAGE_SIZE,
+ * i.e. minimum 4KB + the maximum PMBIDR.Align is 2KB
+ * This should never happen but it's good to sense check
+ */
+ if (ctx->bufsize % sc->kva_align != 0)
+ return (EINVAL);
+
+ /*
+ * Since we're splitting the buffer in half + PMBLIMITR needs to be page
+ * aligned, minimum buffer size needs to be 2x PAGE_SIZE
+ */
+ if (ctx->bufsize < (2 * PAGE_SIZE))
+ return (EINVAL);
+
+ sc->ctx = ctx;
+ sc->kqueue_fd = ctx->kqueue_fd;
+ sc->hwt_td = ctx->hwt_td;
+
+ if (ctx->mode == HWT_MODE_THREAD)
+ error = spe_backend_init_thread(ctx);
+ else
+ spe_backend_init_cpu(ctx);
+
+ return (error);
+}
+
+#ifdef ARM_SPE_DEBUG
+static void hex_dump(uint8_t *buf, size_t len)
+{
+ size_t i;
+
+ printf("--------------------------------------------------------------\n");
+ for (i = 0; i < len; ++i) {
+ if (i % 8 == 0) {
+ printf(" ");
+ }
+ if (i % 16 == 0) {
+ if (i != 0) {
+ printf("\r\n");
+ }
+ printf("\t");
+ }
+ printf("%02X ", buf[i]);
+ }
+ printf("\r\n");
+}
+#endif
+
+static int
+spe_backend_deinit(struct hwt_context *ctx)
+{
+#ifdef ARM_SPE_DEBUG
+ struct arm_spe_info *info;
+ int cpu_id;
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ hex_dump((void *)info->kvaddr, 128);
+ hex_dump((void *)(info->kvaddr + (info->buf_size/2)), 128);
+ }
+#endif
+
+ if (ctx->state == CTX_STATE_RUNNING) {
+ spe_backend_disable_smp(ctx);
+ ctx->state = CTX_STATE_STOPPED;
+ }
+
+ free(spe_info, M_ARM_SPE);
+
+ return (0);
+}
+
+static uint64_t
+arm_spe_min_interval(struct arm_spe_softc *sc)
+{
+ /* IMPLEMENTATION DEFINED */
+ switch (PMSIDR_Interval_VAL(sc->pmsidr))
+ {
+ case PMSIDR_Interval_256:
+ return (256);
+ case PMSIDR_Interval_512:
+ return (512);
+ case PMSIDR_Interval_768:
+ return (768);
+ case PMSIDR_Interval_1024:
+ return (1024);
+ case PMSIDR_Interval_1536:
+ return (1536);
+ case PMSIDR_Interval_2048:
+ return (2048);
+ case PMSIDR_Interval_3072:
+ return (3072);
+ case PMSIDR_Interval_4096:
+ return (4096);
+ default:
+ return (4096);
+ }
+}
+
+static inline void
+arm_spe_set_interval(struct arm_spe_info *info, uint64_t interval)
+{
+ uint64_t min_interval = arm_spe_min_interval(info->sc);
+
+ interval = MAX(interval, min_interval);
+ interval = MIN(interval, 1 << 24); /* max 24 bits */
+
+ dprintf("%s %lu\n", __func__, interval);
+
+ info->pmsirr &= ~(PMSIRR_INTERVAL_MASK);
+ info->pmsirr |= (interval << PMSIRR_INTERVAL_SHIFT);
+}
+
+static int
+spe_backend_configure(struct hwt_context *ctx, int cpu_id, int session_id)
+{
+ struct arm_spe_info *info = &spe_info[cpu_id];
+ struct arm_spe_config *cfg;
+ int err = 0;
+
+ mtx_lock_spin(&info->lock);
+ info->ident = cpu_id;
+ /* Set defaults */
+ info->pmsfcr = 0;
+ info->pmsevfr = 0xFFFFFFFFFFFFFFFFUL;
+ info->pmslatfr = 0;
+ info->pmsirr =
+ (arm_spe_min_interval(info->sc) << PMSIRR_INTERVAL_SHIFT)
+ | PMSIRR_RND;
+ info->pmsicr = 0;
+ info->pmscr = PMSCR_TS | PMSCR_PA | PMSCR_CX | PMSCR_E1SPE | PMSCR_E0SPE;
+
+ if (ctx->config != NULL &&
+ ctx->config_size == sizeof(struct arm_spe_config) &&
+ ctx->config_version == 1) {
+ cfg = (struct arm_spe_config *)ctx->config;
+ if (cfg->interval)
+ arm_spe_set_interval(info, cfg->interval);
+ if (cfg->level == ARM_SPE_KERNEL_ONLY)
+ info->pmscr &= ~(PMSCR_E0SPE); /* turn off user */
+ if (cfg->level == ARM_SPE_USER_ONLY)
+ info->pmscr &= ~(PMSCR_E1SPE); /* turn off kern */
+ if (cfg->ctx_field)
+ info->ctx_field = cfg->ctx_field;
+ } else
+ err = (EINVAL);
+ mtx_unlock_spin(&info->lock);
+
+ return (err);
+}
+
+
+static void
+arm_spe_enable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+ uint64_t base, limit;
+
+ dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+ mtx_lock_spin(&info->lock);
+
+ if (info->ctx_field == ARM_SPE_CTX_CPU_ID)
+ WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, PCPU_GET(cpuid));
+
+ WRITE_SPECIALREG(PMSFCR_EL1_REG, info->pmsfcr);
+ WRITE_SPECIALREG(PMSEVFR_EL1_REG, info->pmsevfr);
+ WRITE_SPECIALREG(PMSLATFR_EL1_REG, info->pmslatfr);
+
+ /* Set the sampling interval */
+ WRITE_SPECIALREG(PMSIRR_EL1_REG, info->pmsirr);
+ isb();
+
+ /* Write 0 here before enabling sampling */
+ WRITE_SPECIALREG(PMSICR_EL1_REG, info->pmsicr);
+ isb();
+
+ base = info->kvaddr;
+ limit = base + (info->buf_size/2);
+ /* Enable the buffer */
+ limit &= PMBLIMITR_LIMIT_MASK; /* Zero lower 12 bits */
+ limit |= PMBLIMITR_E;
+ /* Set the base and limit */
+ WRITE_SPECIALREG(PMBPTR_EL1_REG, base);
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit);
+ isb();
+
+ /* Enable sampling */
+ WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+ isb();
+
+ info->enabled = true;
+
+ mtx_unlock_spin(&info->lock);
+}
+
+static int
+spe_backend_enable_smp(struct hwt_context *ctx)
+{
+ struct arm_spe_info *info;
+ struct hwt_vm *vm;
+ int cpu_id;
+
+ HWT_CTX_LOCK(ctx);
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ vm = hwt_cpu_get(ctx, cpu_id)->vm;
+
+ info = &spe_info[cpu_id];
+
+ mtx_lock_spin(&info->lock);
+ info->kvaddr = vm->kvaddr;
+ info->buf_size = ctx->bufsize;
+ mtx_unlock_spin(&info->lock);
+ }
+ HWT_CTX_UNLOCK(ctx);
+
+ cpu_id = CPU_FFS(&ctx->cpu_map) - 1;
+ info = &spe_info[cpu_id];
+ if (info->ctx_field == ARM_SPE_CTX_PID)
+ arm64_pid_in_contextidr = true;
+ else
+ arm64_pid_in_contextidr = false;
+
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_enable, smp_no_rendezvous_barrier, NULL);
+
+ return (0);
+}
+
+void
+arm_spe_disable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+ struct arm_spe_buf_info *buf = &info->buf_info[info->buf_idx];
+
+ if (!info->enabled)
+ return;
+
+ dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+ /* Disable profiling */
+ WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0);
+ isb();
+
+ /* Drain any remaining tracing data */
+ psb_csync();
+ dsb(nsh);
+
+ /* Disable the profiling buffer */
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, 0);
+ isb();
+
+ /* Clear interrupt status reg */
+ WRITE_SPECIALREG(PMBSR_EL1_REG, 0x0);
+
+ /* Clear PID/CPU_ID from context ID reg */
+ WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, 0);
+
+ mtx_lock_spin(&info->lock);
+ buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG);
+ info->enabled = false;
+ mtx_unlock_spin(&info->lock);
+}
+
+int
+spe_backend_disable_smp(struct hwt_context *ctx)
+{
+ struct kevent kev;
+ struct arm_spe_info *info;
+ struct arm_spe_buf_info *buf;
+ int cpu_id;
+ int ret;
+
+ /* Disable and send out remaining data in bufs */
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_disable, smp_no_rendezvous_barrier, NULL);
+
+ CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+ info = &spe_info[cpu_id];
+ buf = &info->buf_info[info->buf_idx];
+ arm_spe_send_buffer(buf, 0);
+ }
+
+ arm64_pid_in_contextidr = false;
+
+ /*
+ * Tracing on all CPUs has been disabled, and we've sent write ptr
+ * offsets for all bufs - let userspace know it can shutdown
+ */
+ EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+ ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK);
+ if (ret)
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+
+ return (0);
+}
+
+static void
+spe_backend_stop(struct hwt_context *ctx)
+{
+ spe_backend_disable_smp(ctx);
+}
+
+static void
+arm_spe_reenable(void *arg __unused)
+{
+ struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];;
+
+ WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+ isb();
+}
+
+static int
+spe_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size,
+ int data_version)
+{
+ struct arm_spe_info *info;
+ struct arm_spe_buf_info *buf;
+ struct arm_spe_svc_buf *s;
+ int err = 0;
+ cpuset_t cpu_set;
+
+ if (data_size != sizeof(struct arm_spe_svc_buf))
+ return (E2BIG);
+
+ if (data_version != 1)
+ return (EINVAL);
+
+ s = (struct arm_spe_svc_buf *)data;
+ if (s->buf_idx > 1)
+ return (ENODEV);
+ if (s->ident >= mp_ncpus)
+ return (EINVAL);
+
+ info = &spe_info[s->ident];
+ mtx_lock_spin(&info->lock);
+
+ buf = &info->buf_info[s->buf_idx];
+
+ if (!info->enabled) {
+ err = ENXIO;
+ goto end;
+ }
+
+ /* Clear the flag the signals buffer needs servicing */
+ buf->buf_svc = false;
+
+ /* Re-enable profiling if we've been waiting for this notification */
+ if (buf->buf_wait) {
+ CPU_SETOF(s->ident, &cpu_set);
+
+ mtx_unlock_spin(&info->lock);
+ smp_rendezvous_cpus(cpu_set, smp_no_rendezvous_barrier,
+ arm_spe_reenable, smp_no_rendezvous_barrier, NULL);
+ mtx_lock_spin(&info->lock);
+
+ buf->buf_wait = false;
+ }
+
+end:
+ mtx_unlock_spin(&info->lock);
+ return (err);
+}
+
+static int
+spe_backend_read(struct hwt_vm *vm, int *ident, vm_offset_t *offset,
+ uint64_t *data)
+{
+ struct arm_spe_queue *q;
+ struct arm_spe_softc *sc = device_get_softc(spe_dev);
+ int error = 0;
+
+ mtx_lock_spin(&sc->sc_lock);
+
+ /* Return the first pending buffer that needs servicing */
+ q = STAILQ_FIRST(&sc->pending);
+ if (q == NULL) {
+ error = ENOENT;
+ goto error;
+ }
+ *ident = q->ident;
+ *offset = q->offset;
+ *data = (q->buf_idx << KQ_BUF_POS_SHIFT) |
+ (q->partial_rec << KQ_PARTREC_SHIFT) |
+ (q->final_buf << KQ_FINAL_BUF_SHIFT);
+
+ STAILQ_REMOVE_HEAD(&sc->pending, next);
+ sc->npending--;
+
+error:
+ mtx_unlock_spin(&sc->sc_lock);
+ if (error)
+ return (error);
+
+ free(q, M_ARM_SPE);
+ return (0);
+}
+
+static struct hwt_backend_ops spe_ops = {
+ .hwt_backend_init = spe_backend_init,
+ .hwt_backend_deinit = spe_backend_deinit,
+
+ .hwt_backend_configure = spe_backend_configure,
+ .hwt_backend_svc_buf = spe_backend_svc_buf,
+ .hwt_backend_stop = spe_backend_stop,
+
+ .hwt_backend_enable_smp = spe_backend_enable_smp,
+ .hwt_backend_disable_smp = spe_backend_disable_smp,
+
+ .hwt_backend_read = spe_backend_read,
+};
+
+int
+spe_register(device_t dev)
+{
+ spe_dev = dev;
+
+ return (hwt_backend_register(&backend));
+}
diff --git a/sys/arm64/spe/arm_spe_dev.c b/sys/arm64/spe/arm_spe_dev.c
new file mode 100644
index 000000000000..8a834197eeef
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_dev.c
@@ -0,0 +1,324 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/event.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe.h>
+#include <arm64/spe/arm_spe_dev.h>
+
+MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing");
+
+/*
+ * taskqueue(9) used for sleepable routines called from interrupt handlers
+ */
+TASKQUEUE_FAST_DEFINE_THREAD(arm_spe);
+
+void arm_spe_send_buffer(void *, int);
+static void arm_spe_error(void *, int);
+static int arm_spe_intr(void *);
+device_attach_t arm_spe_attach;
+
+static device_method_t arm_spe_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_attach, arm_spe_attach),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods,
+ sizeof(struct arm_spe_softc));
+
+#define ARM_SPE_KVA_MAX_ALIGN UL(2048)
+
+int
+arm_spe_attach(device_t dev)
+{
+ struct arm_spe_softc *sc;
+ int error, rid;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+
+ sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG);
+ sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG);
+ device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr);
+ device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr);
+ if ((sc->pmbidr & PMBIDR_P) != 0) {
+ device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n");
+ return (EPERM);
+ }
+
+ sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT);
+ if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) {
+ device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align);
+ return (EINVAL);
+ }
+
+ rid = 0;
+ sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
+ RF_ACTIVE);
+ if (sc->sc_irq_res == NULL) {
+ device_printf(dev, "Unable to allocate interrupt\n");
+ return (ENXIO);
+ }
+ error = bus_setup_intr(dev, sc->sc_irq_res,
+ INTR_TYPE_MISC | INTR_MPSAFE, arm_spe_intr, NULL, sc,
+ &sc->sc_irq_cookie);
+ if (error != 0) {
+ device_printf(dev, "Unable to set up interrupt\n");
+ return (error);
+ }
+
+ mtx_init(&sc->sc_lock, "Arm SPE lock", NULL, MTX_SPIN);
+
+ STAILQ_INIT(&sc->pending);
+ sc->npending = 0;
+
+ spe_register(dev);
+
+ return (0);
+}
+
+/* Interrupt handler runs on the same core that triggered the exception */
+static int
+arm_spe_intr(void *arg)
+{
+ int cpu_id = PCPU_GET(cpuid);
+ struct arm_spe_softc *sc = arg;
+ uint64_t pmbsr;
+ uint64_t base, limit;
+ uint8_t ec;
+ struct arm_spe_info *info = &sc->spe_info[cpu_id];
+ uint8_t i = info->buf_idx;
+ struct arm_spe_buf_info *buf = &info->buf_info[i];
+ struct arm_spe_buf_info *prev_buf = &info->buf_info[!i];
+ device_t dev = sc->dev;
+
+ /* Make sure the profiling data is visible to the CPU */
+ psb_csync();
+ dsb(nsh);
+
+ /* Make sure any HW update of PMBPTR_EL1 is visible to the CPU */
+ isb();
+
+ pmbsr = READ_SPECIALREG(PMBSR_EL1_REG);
+
+ if (!(pmbsr & PMBSR_S))
+ return (FILTER_STRAY);
+
+ /* Event Class */
+ ec = PMBSR_EC_VAL(pmbsr);
+ switch (ec)
+ {
+ case PMBSR_EC_OTHER_BUF_MGMT: /* Other buffer management event */
+ break;
+ case PMBSR_EC_GRAN_PROT_CHK: /* Granule Protection Check fault */
+ device_printf(dev, "PMBSR_EC_GRAN_PROT_CHK\n");
+ break;
+ case PMBSR_EC_STAGE1_DA: /* Stage 1 Data Abort */
+ device_printf(dev, "PMBSR_EC_STAGE1_DA\n");
+ break;
+ case PMBSR_EC_STAGE2_DA: /* Stage 2 Data Abort */
+ device_printf(dev, "PMBSR_EC_STAGE2_DA\n");
+ break;
+ default:
+ /* Unknown EC */
+ device_printf(dev, "unknown PMBSR_EC: %#x\n", ec);
+ arm_spe_disable(NULL);
+ TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx);
+ taskqueue_enqueue(taskqueue_arm_spe, &sc->task);
+ return (FILTER_HANDLED);
+ }
+
+ switch (ec) {
+ case PMBSR_EC_OTHER_BUF_MGMT:
+ /* Buffer Status Code = buffer filled */
+ if ((pmbsr & PMBSR_MSS_BSC_MASK) == PMBSR_MSS_BSC_BUFFER_FILLED) {
+ dprintf("%s SPE buffer full event (cpu:%d)\n",
+ __func__, cpu_id);
+ break;
+ }
+ case PMBSR_EC_GRAN_PROT_CHK:
+ case PMBSR_EC_STAGE1_DA:
+ case PMBSR_EC_STAGE2_DA:
+ /*
+ * If we have one of these, we've messed up the
+ * programming somehow (e.g. passed invalid memory to
+ * SPE) and can't recover
+ */
+ arm_spe_disable(NULL);
+ TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx);
+ taskqueue_enqueue(taskqueue_arm_spe, &sc->task);
+ /* PMBPTR_EL1 is fault address if PMBSR_DL is 1 */
+ device_printf(dev, "CPU:%d PMBSR_EL1:%#lx\n", cpu_id, pmbsr);
+ device_printf(dev, "PMBPTR_EL1:%#lx PMBLIMITR_EL1:%#lx\n",
+ READ_SPECIALREG(PMBPTR_EL1_REG),
+ READ_SPECIALREG(PMBLIMITR_EL1_REG));
+ return (FILTER_HANDLED);
+ }
+
+ mtx_lock_spin(&info->lock);
+
+ /*
+ * Data Loss bit - pmbptr might not be pointing to the end of the last
+ * complete record
+ */
+ if ((pmbsr & PMBSR_DL) == PMBSR_DL)
+ buf->partial_rec = 1;
+ buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG);
+ buf->buf_svc = true;
+
+ /* Setup regs ready to start writing to the other half of the buffer */
+ info->buf_idx = !info->buf_idx;
+ base = buf_start_addr(info->buf_idx, info);
+ limit = base + (info->buf_size/2);
+ limit &= PMBLIMITR_LIMIT_MASK;
+ limit |= PMBLIMITR_E;
+ WRITE_SPECIALREG(PMBPTR_EL1_REG, base);
+ WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit);
+ isb();
+
+ /*
+ * Notify userspace via kqueue that buffer is full and needs copying
+ * out - since kqueue can sleep, don't do this in the interrupt handler,
+ * add to a taskqueue to be scheduled later instead
+ */
+ TASK_INIT(&info->task[i], 0, (task_fn_t *)arm_spe_send_buffer, buf);
+ taskqueue_enqueue(taskqueue_arm_spe, &info->task[i]);
+
+ /*
+ * It's possible userspace hasn't yet notified us they've copied out the
+ * other half of the buffer
+ *
+ * This might be because:
+ * a) Kernel hasn't scheduled the task via taskqueue to notify
+ * userspace to copy out the data
+ * b) Userspace is still copying the buffer or hasn't notified us
+ * back via the HWT_IOC_SVC_BUF ioctl
+ *
+ * Either way we need to avoid overwriting uncopied data in the
+ * buffer, so disable profiling until we receive that SVC_BUF
+ * ioctl
+ *
+ * Using a larger buffer size should help to minimise these events and
+ * loss of profiling data while profiling is disabled
+ */
+ if (prev_buf->buf_svc) {
+ device_printf(sc->dev, "cpu%d: buffer full interrupt, but other"
+ " half of buffer has not been copied out - consider"
+ " increasing buffer size to minimise loss of profiling data\n",
+ cpu_id);
+ WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0);
+ prev_buf->buf_wait = true;
+ }
+
+ mtx_unlock_spin(&info->lock);
+
+ /* Clear Profiling Buffer Status Register */
+ WRITE_SPECIALREG(PMBSR_EL1_REG, 0);
+
+ isb();
+
+ return (FILTER_HANDLED);
+}
+
+/* note: Scheduled and run via taskqueue, so can run on any CPU at any time */
+void
+arm_spe_send_buffer(void *arg, int pending __unused)
+{
+ struct arm_spe_buf_info *buf = (struct arm_spe_buf_info *)arg;
+ struct arm_spe_info *info = buf->info;
+ struct arm_spe_queue *queue;
+ struct kevent kev;
+ int ret;
+
+ queue = malloc(sizeof(struct arm_spe_queue), M_ARM_SPE,
+ M_WAITOK | M_ZERO);
+
+ mtx_lock_spin(&info->lock);
+
+ /* Add to queue for userspace to pickup */
+ queue->ident = info->ident;
+ queue->offset = buf->pmbptr - buf_start_addr(buf->buf_idx, info);
+ queue->buf_idx = buf->buf_idx;
+ queue->final_buf = !info->enabled;
+ queue->partial_rec = buf->partial_rec;
+ mtx_unlock_spin(&info->lock);
+
+ mtx_lock_spin(&info->sc->sc_lock);
+ STAILQ_INSERT_TAIL(&info->sc->pending, queue, next);
+ info->sc->npending++;
+ EV_SET(&kev, ARM_SPE_KQ_BUF, EVFILT_USER, 0, NOTE_TRIGGER,
+ info->sc->npending, NULL);
+ mtx_unlock_spin(&info->sc->sc_lock);
+
+ /* Notify userspace */
+ ret = kqfd_register(info->sc->kqueue_fd, &kev, info->sc->hwt_td,
+ M_WAITOK);
+ if (ret) {
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+ arm_spe_error(info->sc->ctx, 0);
+ }
+}
+
+static void
+arm_spe_error(void *arg, int pending __unused)
+{
+ struct hwt_context *ctx = arg;
+ struct kevent kev;
+ int ret;
+
+ smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+ arm_spe_disable, smp_no_rendezvous_barrier, NULL);
+
+ EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+ ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK);
+ if (ret)
+ dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+}
+
+MODULE_DEPEND(spe, hwt, 1, 1, 1);
+MODULE_VERSION(spe, 1);
diff --git a/sys/arm64/spe/arm_spe_dev.h b/sys/arm64/spe/arm_spe_dev.h
new file mode 100644
index 000000000000..df88d98ef1c0
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_dev.h
@@ -0,0 +1,162 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_ARM_SPE_DEV_H_
+#define _ARM64_ARM_SPE_DEV_H_
+
+#include <sys/mutex.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+
+#include <arm64/spe/arm_spe.h>
+
+#include <dev/hwt/hwt_context.h>
+
+#define ARM_SPE_DEBUG
+#undef ARM_SPE_DEBUG
+
+#ifdef ARM_SPE_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+DECLARE_CLASS(arm_spe_driver);
+
+struct cdev;
+struct resource;
+
+extern bool arm64_pid_in_contextidr;
+
+int spe_register(device_t dev);
+void arm_spe_disable(void *arg __unused);
+int spe_backend_disable_smp(struct hwt_context *ctx);
+void arm_spe_send_buffer(void *arg, int pending __unused);
+
+/*
+ PSB CSYNC is a Profiling Synchronization Barrier encoded in the hint space
+ * so it is a NOP on earlier architecture.
+ */
+#define psb_csync() __asm __volatile("hint #17" ::: "memory");
+
+struct arm_spe_softc {
+ device_t dev;
+
+ struct resource *sc_irq_res;
+ void *sc_irq_cookie;
+ struct cdev *sc_cdev;
+ struct mtx sc_lock;
+ struct task task;
+
+ int64_t sc_pmsidr;
+ int kqueue_fd;
+ struct thread *hwt_td;
+ struct arm_spe_info *spe_info;
+ struct hwt_context *ctx;
+ STAILQ_HEAD(, arm_spe_queue) pending;
+ uint64_t npending;
+
+ uint64_t pmbidr;
+ uint64_t pmsidr;
+
+ uint16_t kva_align;
+};
+
+struct arm_spe_buf_info {
+ struct arm_spe_info *info;
+ uint64_t pmbptr;
+ uint8_t buf_idx : 1;
+ bool buf_svc : 1;
+ bool buf_wait : 1;
+ bool partial_rec : 1;
+};
+
+struct arm_spe_info {
+ int ident; /* tid or cpu_id */
+ struct mtx lock;
+ struct arm_spe_softc *sc;
+ struct task task[2];
+ bool enabled : 1;
+
+ /* buffer is split in half as a ping-pong buffer */
+ vm_object_t bufobj;
+ vm_offset_t kvaddr;
+ size_t buf_size;
+ uint8_t buf_idx : 1; /* 0 = first half of buf, 1 = 2nd half */
+ struct arm_spe_buf_info buf_info[2];
+
+ /* config */
+ enum arm_spe_profiling_level level;
+ enum arm_spe_ctx_field ctx_field;
+ /* filters */
+ uint64_t pmsfcr;
+ uint64_t pmsevfr;
+ uint64_t pmslatfr;
+ /* interval */
+ uint64_t pmsirr;
+ uint64_t pmsicr;
+ /* control */
+ uint64_t pmscr;
+};
+
+struct arm_spe_queue {
+ int ident;
+ u_int buf_idx : 1;
+ bool partial_rec : 1;
+ bool final_buf : 1;
+ vm_offset_t offset;
+ STAILQ_ENTRY(arm_spe_queue) next;
+};
+
+static inline vm_offset_t buf_start_addr(u_int buf_idx, struct arm_spe_info *info)
+{
+ vm_offset_t addr;
+ if (buf_idx == 0)
+ addr = info->kvaddr;
+ if (buf_idx == 1)
+ addr = info->kvaddr + (info->buf_size/2);
+
+ return (addr);
+}
+
+static inline vm_offset_t buf_end_addr(u_int buf_idx, struct arm_spe_info *info)
+{
+ vm_offset_t addr;
+ if (buf_idx == 0)
+ addr = info->kvaddr + (info->buf_size/2);
+ if (buf_idx == 1)
+ addr = info->kvaddr + info->buf_size;
+
+ return (addr);
+}
+
+#endif /* _ARM64_ARM_SPE_DEV_H_ */
diff --git a/sys/arm64/spe/arm_spe_fdt.c b/sys/arm64/spe/arm_spe_fdt.c
new file mode 100644
index 000000000000..d16f1dee2ac8
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_fdt.c
@@ -0,0 +1,75 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/openfirm.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+static device_probe_t arm_spe_fdt_probe;
+
+static struct ofw_compat_data compat_data[] = {
+ {"arm,statistical-profiling-extension-v1", true},
+ {NULL, false},
+};
+
+static device_method_t arm_spe_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, arm_spe_fdt_probe),
+
+ DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(spe, arm_spe_fdt_driver, arm_spe_fdt_methods,
+ sizeof(struct arm_spe_softc), arm_spe_driver);
+
+DRIVER_MODULE(spe, simplebus, arm_spe_fdt_driver, 0, 0);
+
+static int
+arm_spe_fdt_probe(device_t dev)
+{
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+
+ if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data)
+ return (ENXIO);
+
+ device_set_desc(dev, "ARM Statistical Profiling Extension");
+ return (BUS_PROBE_DEFAULT);
+}
diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
index 67afb3374815..023406c64182 100644
--- a/sys/arm64/vmm/io/vgic_v3.c
+++ b/sys/arm64/vmm/io/vgic_v3.c
@@ -47,7 +47,6 @@
#include <dev/ofw/openfirm.h>
-#include <machine/armreg.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/cpufunc.h>
diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
index da0f0d96c431..7c7fbb49e691 100644
--- a/sys/arm64/vmm/io/vtimer.c
+++ b/sys/arm64/vmm/io/vtimer.c
@@ -44,7 +44,6 @@
#include <machine/bus.h>
#include <machine/machdep.h>
#include <machine/vmm.h>
-#include <machine/armreg.h>
#include <arm64/vmm/arm64.h>
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index bf52dc0fe916..31d2fb3f516b 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -33,7 +33,6 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
@@ -51,7 +50,6 @@
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
-#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/fpu.h>
#include <machine/machdep.h>
@@ -126,7 +124,7 @@ struct vm {
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
struct vm_mem mem; /* (i) guest memory */
- char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */
struct vcpu **vcpu; /* (i) guest vcpus */
struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
/* (o) guest MMIO regions */
@@ -139,8 +137,6 @@ struct vm {
struct sx vcpus_init_lock; /* (o) */
};
-static bool vmm_initialized = false;
-
static int vm_handle_wfi(struct vcpu *vcpu,
struct vm_exit *vme, bool *retu);
@@ -209,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = {
/* Host registers masked by vmm_arch_regs_masks. */
static struct vmm_regs vmm_arch_regs;
-u_int vm_maxcpu;
-SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
- &vm_maxcpu, 0, "Maximum number of vCPUs");
-
static void vcpu_notify_event_locked(struct vcpu *vcpu);
/* global statistics */
@@ -232,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
-/*
- * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
- * is a safe value for now.
- */
-#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
-
static int
vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
{
@@ -324,20 +310,14 @@ vmm_unsupported_quirk(void)
return (0);
}
-static int
-vmm_init(void)
+int
+vmm_modinit(void)
{
int error;
- vm_maxcpu = mp_ncpus;
- TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
-
- if (vm_maxcpu > VM_MAXCPU) {
- printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
- vm_maxcpu = VM_MAXCPU;
- }
- if (vm_maxcpu == 0)
- vm_maxcpu = 1;
+ error = vmm_unsupported_quirk();
+ if (error != 0)
+ return (error);
error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
if (error != 0)
@@ -346,61 +326,12 @@ vmm_init(void)
return (vmmops_modinit(0));
}
-static int
-vmm_handler(module_t mod, int what, void *arg)
+int
+vmm_modcleanup(void)
{
- int error;
-
- switch (what) {
- case MOD_LOAD:
- error = vmm_unsupported_quirk();
- if (error != 0)
- break;
- error = vmmdev_init();
- if (error != 0)
- break;
- error = vmm_init();
- if (error == 0)
- vmm_initialized = true;
- else
- (void)vmmdev_cleanup();
- break;
- case MOD_UNLOAD:
- error = vmmdev_cleanup();
- if (error == 0 && vmm_initialized) {
- error = vmmops_modcleanup();
- if (error) {
- /*
- * Something bad happened - prevent new
- * VMs from being created
- */
- vmm_initialized = false;
- }
- }
- break;
- default:
- error = 0;
- break;
- }
- return (error);
+ return (vmmops_modcleanup());
}
-static moduledata_t vmm_kmod = {
- "vmm",
- vmm_handler,
- NULL
-};
-
-/*
- * vmm initialization has the following dependencies:
- *
- * - HYP initialization requires smp_rendezvous() and therefore must happen
- * after SMP is fully functional (after SI_SUB_SMP).
- * - vmm device initialization requires an initialized devfs.
- */
-DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
-MODULE_VERSION(vmm, 1);
-
static void
vm_init(struct vm *vm, bool create)
{
@@ -442,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
return (NULL);
- /* Some interrupt controllers may have a CPU limit */
- if (vcpuid >= vgic_max_cpu_count(vm->cookie))
- return (NULL);
-
vcpu = (struct vcpu *)
atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
if (__predict_true(vcpu != NULL))
@@ -454,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
sx_xlock(&vm->vcpus_init_lock);
vcpu = vm->vcpu[vcpuid];
if (vcpu == NULL && !vm->dying) {
+ /* Some interrupt controllers may have a CPU limit */
+ if (vcpuid >= vgic_max_cpu_count(vm->cookie)) {
+ sx_xunlock(&vm->vcpus_init_lock);
+ return (NULL);
+ }
+
vcpu = vcpu_alloc(vm, vcpuid);
vcpu_init(vcpu);
@@ -486,16 +419,6 @@ vm_create(const char *name, struct vm **retvm)
struct vm *vm;
int error;
- /*
- * If vmm.ko could not be successfully initialized then don't attempt
- * to create the virtual machine.
- */
- if (!vmm_initialized)
- return (ENXIO);
-
- if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
- return (EINVAL);
-
vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
error = vm_mem_init(&vm->mem, 0, 1ul << 39);
if (error != 0) {
@@ -1279,8 +1202,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
-
- if (reg >= VM_REG_LAST)
+ if (reg < 0 || reg >= VM_REG_LAST)
return (EINVAL);
return (vmmops_getreg(vcpu->cookie, reg, retval));
@@ -1291,7 +1213,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
{
int error;
- if (reg >= VM_REG_LAST)
+ if (reg < 0 || reg >= VM_REG_LAST)
return (EINVAL);
error = vmmops_setreg(vcpu->cookie, reg, val);
if (error || reg != VM_REG_GUEST_PC)
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
index 618f4afaf8ee..aa1361049f49 100644
--- a/sys/arm64/vmm/vmm_arm64.c
+++ b/sys/arm64/vmm/vmm_arm64.c
@@ -47,7 +47,6 @@
#include <vm/vm_page.h>
#include <vm/vm_param.h>
-#include <machine/armreg.h>
#include <machine/vm.h>
#include <machine/cpufunc.h>
#include <machine/cpu.h>
@@ -1365,7 +1364,7 @@ vmmops_setcap(void *vcpui, int num, int val)
break;
if (val != 0)
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
- else
+ else if ((hypctx->setcaps & (1ul << VM_CAP_SS_EXIT)) == 0)
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
break;
case VM_CAP_SS_EXIT:
@@ -1374,20 +1373,20 @@ vmmops_setcap(void *vcpui, int num, int val)
if (val != 0) {
hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
- hypctx->debug_mdscr |= hypctx->mdscr_el1 &
- (MDSCR_SS | MDSCR_KDE);
+ hypctx->debug_mdscr |= (hypctx->mdscr_el1 & MDSCR_SS);
hypctx->tf.tf_spsr |= PSR_SS;
- hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
+ hypctx->mdscr_el1 |= MDSCR_SS;
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
} else {
hypctx->tf.tf_spsr &= ~PSR_SS;
hypctx->tf.tf_spsr |= hypctx->debug_spsr;
hypctx->debug_spsr &= ~PSR_SS;
- hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
+ hypctx->mdscr_el1 &= ~MDSCR_SS;
hypctx->mdscr_el1 |= hypctx->debug_mdscr;
- hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
- hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
+ hypctx->debug_mdscr &= ~MDSCR_SS;
+ if ((hypctx->setcaps & (1ul << VM_CAP_BRK_EXIT)) == 0)
+ hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
}
break;
case VM_CAP_MASK_HWINTR:
diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c
index 926a74fa528b..29d14e1ba952 100644
--- a/sys/arm64/vmm/vmm_dev_machdep.c
+++ b/sys/arm64/vmm/vmm_dev_machdep.c
@@ -68,19 +68,13 @@ int
vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
int fflag, struct thread *td)
{
- struct vm_run *vmrun;
- struct vm_vgic_version *vgv;
- struct vm_vgic_descr *vgic;
- struct vm_irq *vi;
- struct vm_exception *vmexc;
- struct vm_gla2gpa *gg;
- struct vm_msi *vmsi;
int error;
error = 0;
switch (cmd) {
case VM_RUN: {
struct vm_exit *vme;
+ struct vm_run *vmrun;
vmrun = (struct vm_run *)data;
vme = vm_exitinfo(vcpu);
@@ -94,41 +88,62 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
break;
break;
}
- case VM_INJECT_EXCEPTION:
+ case VM_INJECT_EXCEPTION: {
+ struct vm_exception *vmexc;
+
vmexc = (struct vm_exception *)data;
error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far);
break;
- case VM_GLA2GPA_NOFAULT:
+ }
+ case VM_GLA2GPA_NOFAULT: {
+ struct vm_gla2gpa *gg;
+
gg = (struct vm_gla2gpa *)data;
error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
gg->prot, &gg->gpa, &gg->fault);
KASSERT(error == 0 || error == EFAULT,
("%s: vm_gla2gpa unknown error %d", __func__, error));
break;
- case VM_GET_VGIC_VERSION:
+ }
+ case VM_GET_VGIC_VERSION: {
+ struct vm_vgic_version *vgv;
+
vgv = (struct vm_vgic_version *)data;
/* TODO: Query the vgic driver for this */
vgv->version = 3;
vgv->flags = 0;
error = 0;
break;
- case VM_ATTACH_VGIC:
+ }
+ case VM_ATTACH_VGIC: {
+ struct vm_vgic_descr *vgic;
+
vgic = (struct vm_vgic_descr *)data;
error = vm_attach_vgic(vm, vgic);
break;
- case VM_RAISE_MSI:
+ }
+ case VM_RAISE_MSI: {
+ struct vm_msi *vmsi;
+
vmsi = (struct vm_msi *)data;
error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus,
vmsi->slot, vmsi->func);
break;
- case VM_ASSERT_IRQ:
+ }
+ case VM_ASSERT_IRQ: {
+ struct vm_irq *vi;
+
vi = (struct vm_irq *)data;
error = vm_assert_irq(vm, vi->irq);
break;
- case VM_DEASSERT_IRQ:
+ }
+ case VM_DEASSERT_IRQ: {
+ struct vm_irq *vi;
+
vi = (struct vm_irq *)data;
error = vm_deassert_irq(vm, vi->irq);
break;
+ }
default:
error = ENOTTY;
break;
diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
index b8c6d2ab7a9a..0ad7930e9a87 100644
--- a/sys/arm64/vmm/vmm_hyp.c
+++ b/sys/arm64/vmm/vmm_hyp.c
@@ -32,7 +32,6 @@
#include <sys/types.h>
#include <sys/proc.h>
-#include <machine/armreg.h>
#include "arm64.h"
#include "hyp.h"
diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
index 1240c3ed16ec..0e4910ea87b4 100644
--- a/sys/arm64/vmm/vmm_reset.c
+++ b/sys/arm64/vmm/vmm_reset.c
@@ -31,7 +31,6 @@
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/hypervisor.h>