diff options
Diffstat (limited to 'sys/arm64')
39 files changed, 1972 insertions, 178 deletions
diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c index ab1002560b20..14ae2acc2ce6 100644 --- a/sys/arm64/arm64/db_disasm.c +++ b/sys/arm64/arm64/db_disasm.c @@ -31,6 +31,7 @@ #include <ddb/db_access.h> #include <ddb/db_sym.h> +#include <machine/armreg.h> #include <machine/disassem.h> static u_int db_disasm_read_word(vm_offset_t); diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 8f8a934ad520..4cb8ee5f57ef 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -210,7 +210,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { - sa->code = ap[1]; + sa->code = ap[_QUAD_LOWWORD]; nap -= 2; ap += 2; } diff --git a/sys/arm64/arm64/gic_v3.c b/sys/arm64/arm64/gic_v3.c index 201cdae6de09..641b6d6dbc5e 100644 --- a/sys/arm64/arm64/gic_v3.c +++ b/sys/arm64/arm64/gic_v3.c @@ -494,7 +494,7 @@ gic_v3_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) case GICV3_IVAR_REDIST: *result = (uintptr_t)&sc->gic_redists.pcpu[PCPU_GET(cpuid)]; return (0); - case GICV3_IVAR_SUPPORT_LPIS: + case GIC_IVAR_SUPPORT_LPIS: *result = (gic_d_read(sc, 4, GICD_TYPER) & GICD_TYPER_LPIS) != 0; return (0); diff --git a/sys/arm64/arm64/gic_v3_var.h b/sys/arm64/arm64/gic_v3_var.h index 8bc0f456d91e..2570834c2818 100644 --- a/sys/arm64/arm64/gic_v3_var.h +++ b/sys/arm64/arm64/gic_v3_var.h @@ -108,11 +108,9 @@ MALLOC_DECLARE(M_GIC_V3); #define GICV3_IVAR_NIRQS 1000 /* 1001 was GICV3_IVAR_REDIST_VADDR */ #define GICV3_IVAR_REDIST 1002 -#define GICV3_IVAR_SUPPORT_LPIS 1003 __BUS_ACCESSOR(gicv3, nirqs, GICV3, NIRQS, u_int); __BUS_ACCESSOR(gicv3, redist, GICV3, REDIST, void *); -__BUS_ACCESSOR(gicv3, support_lpis, GICV3, SUPPORT_LPIS, bool); /* Device methods */ int gic_v3_attach(device_t dev); diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c index 546a225abf09..7821b1512083 100644 --- a/sys/arm64/arm64/gicv3_its.c +++ b/sys/arm64/arm64/gicv3_its.c @@ -2222,7 +2222,7 @@ gicv3_its_fdt_probe(device_t dev) if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its")) return (ENXIO); - if (!gicv3_get_support_lpis(dev)) + if (!gic_get_support_lpis(dev)) return (ENXIO); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); @@ -2294,7 +2294,7 @@ gicv3_its_acpi_probe(device_t dev) if (gic_get_hw_rev(dev) < 3) return (EINVAL); - if (!gicv3_get_support_lpis(dev)) + if (!gic_get_support_lpis(dev)) return (ENXIO); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c new file mode 100644 index 000000000000..8b9719c05b67 --- /dev/null +++ b/sys/arm64/arm64/kexec_support.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> + +#include <machine/armreg.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + * disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +extern pt_entry_t pagetable_l0_ttbr0_bootstrap[]; +extern unsigned long initstack_end[]; +void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *); + +#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \ + SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS) +#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base) +static inline vm_page_t +phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p) +{ + return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p)); +} + +/* First 2 args are filler for switch_stack() */ +static void __aligned(16) __dead2 +kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused, + struct kexec_image *image) +{ + void (*e)(void) = (void *)image->entry; + vm_offset_t vm_page_base = (vm_offset_t)vm_page_array; + vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array); + struct kexec_segment_stage *phys_segs = + (void *)pmap_kextract((vm_offset_t)&image->segments); + vm_paddr_t from_pa, to_pa; + vm_size_t size; + vm_page_t first, m, mp; + struct pctrie_iter pct_i; + + /* + * Create a linked list of all pages in the object before we disable the + * MMU. Once the MMU is disabled we can't use the vm_radix iterators, + * as they rely on virtual address pointers. + */ + first = NULL; + vm_radix_iter_init(&pct_i, &image->map_obj->rtree); + VM_RADIX_FORALL(m, &pct_i) { + if (first == NULL) + first = m; + else + SLIST_INSERT_AFTER(mp, m, plinks.s.ss); + mp = m; + } + + /* + * We're running out of the identity map now, disable the MMU before we + * continue. It's possible page tables can be overwritten, which would + * be very bad if we were running with the MMU enabled. + */ + WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU); + isb(); + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (phys_segs[i].size == 0) + break; + to_pa = phys_segs[i].target; + /* Copy the segment here... */ + for (vm_page_t p = phys_segs[i].first_page; + p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size; + p = SLIST_NEXT(p, plinks.s.ss)) { + p = phys_vm_page(p, vm_page_base, vm_page_phys); + from_pa = p->phys_addr; + if (p->phys_addr == to_pa) { + to_pa += PAGE_SIZE; + continue; + } + for (size = PAGE_SIZE / sizeof(register_t); + size > 0; --size) { + *(register_t *)to_pa = *(register_t *)from_pa; + to_pa += sizeof(register_t); + from_pa += sizeof(register_t); + } + } + } + invalidate_icache(); + e(); + while (1) + ; +} + +void +kexec_reboot_md(struct kexec_image *image) +{ + uintptr_t ptr; + register_t reg; + + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (image->segments[i].size > 0) + cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target), + image->segments[i].size); + } + ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + serror_disable(); + + reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap); + set_ttbr0(reg); + cpu_tlb_flushID(); + + typeof(kexec_reboot_bottom) *p = (void *)ptr; + switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end), + p, image); + while (1) + ; +} + +int +kexec_load_md(struct kexec_image *image) +{ + vm_paddr_t tmp; + pt_entry_t *pte; + + /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */ + + /* + * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so + * move to there. + */ + pte = pagetable_l0_ttbr0_bootstrap; + pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */ + + /* + * Populate the identity map with symbols we know we'll need before we + * turn off the MMU. + */ + tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + tmp = pmap_kextract((vm_offset_t)initstack_end); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + /* We'll need vm_page_array for doing offset calculations. */ + tmp = pmap_kextract((vm_offset_t)&vm_page_array); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + + return (0); +} diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index d35e334905a7..c22d5fe76468 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -325,6 +325,19 @@ mp_virtdone: b init_secondary LEND(mpentry_common) + +ENTRY(mp_cpu_spinloop) +0: + wfe + ldr x0, mp_cpu_spin_table_release_addr + cbz x0, 0b + blr x0 + .globl mp_cpu_spin_table_release_addr +mp_cpu_spin_table_release_addr: + .quad 0 + .globl mp_cpu_spinloop_end +mp_cpu_spinloop_end: +END(mp_cpu_spinloop) #endif /* @@ -432,6 +445,10 @@ LENTRY(enter_kernel_el) ldr x3, =(CNTHCTL_EL1PCTEN_NOTRAP | CNTHCTL_EL1PCEN_NOTRAP) ldr x5, =(PSR_DAIF | PSR_M_EL1h) + /* Enable SPE at EL1 via Monitor Debug Configuration Register */ + mov x6, MDCR_EL2_E2PB_EL1_0_NO_TRAP + msr mdcr_el2, x6 + .Ldone_vhe: msr cptr_el2, x2 @@ -475,6 +492,29 @@ LENTRY(enter_kernel_el) eret LEND(enter_kernel_el) +/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there. + * Does not return. + * - x0 - target address to jump to after stopping the MMU. + * - x1 - kernel load address + */ +ENTRY(stop_mmu) + mov x16, x0 /* Save target. */ + ldr x2, =(1f - KERNBASE) + add x17, x1, x2 + ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE) + add x1, x1, x3 + msr ttbr0_el1, x1 + isb + br x17 +1: + BTI_J + mrs x0, sctlr_el1 + bic x0, x0, SCTLR_M + bic x0, x0, SCTLR_C + msr sctlr_el1, x0 + isb + br x16 +END(stop_mmu) /* * Get the physical address the kernel was loaded at. */ @@ -1094,12 +1134,19 @@ tcr: TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA) LEND(start_mmu) +ENTRY(switch_stack) + mov sp, x0 + mov x16, x1 + br x16 +END(switch_stack) + ENTRY(abort) b abort END(abort) .bss .align PAGE_SHIFT + .globl initstack_end initstack: .space BOOT_STACK_SIZE initstack_end: @@ -1116,6 +1163,7 @@ initstack_end: * L0 for user */ .globl pagetable_l0_ttbr1 + .globl pagetable_l0_ttbr0_bootstrap pagetable: pagetable_l3_ttbr1: .space (PAGE_SIZE * L3_PAGE_COUNT) diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c index 83bd74ea7317..1c5e8189e436 100644 --- a/sys/arm64/arm64/machdep_boot.c +++ b/sys/arm64/arm64/machdep_boot.c @@ -106,7 +106,8 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size) PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE); PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t)); - PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS)); + PRELOAD_PUSH_VALUE(uint64_t, + (size_t)((vm_offset_t)&end - VM_MIN_KERNEL_ADDRESS)); if (dtb_ptr != NULL) { /* Copy DTB to KVA space and insert it into module chain. */ diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index e4d011df3a06..ba673ce9d6ee 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -60,6 +60,7 @@ #include <machine/debug_monitor.h> #include <machine/intr.h> #include <machine/smp.h> +#include <machine/vmparam.h> #ifdef VFP #include <machine/vfp.h> #endif @@ -103,6 +104,7 @@ static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); +static void ipi_off(void *); #ifdef FDT static u_int fdt_cpuid; @@ -193,6 +195,7 @@ release_aps(void *dummy __unused) intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); + intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL); atomic_store_int(&aps_started, 0); atomic_store_rel_int(&aps_ready, 1); @@ -267,6 +270,8 @@ init_secondary(uint64_t cpu) install_cpu_errata(); enable_cpu_feat(CPU_FEAT_AFTER_DEV); + intr_pic_init_secondary(); + /* Signal we are done */ atomic_add_int(&aps_started, 1); @@ -285,8 +290,6 @@ init_secondary(uint64_t cpu) ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); pcpup->pc_curpmap = pmap0; - intr_pic_init_secondary(); - /* Start per-CPU event timers. */ cpu_initclocks_ap(); @@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused) CTR0(KTR_SMP, "IPI_STOP (restart)"); } +void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2; +extern uint32_t mp_cpu_spinloop[]; +extern uint32_t mp_cpu_spinloop_end[]; +extern uint64_t mp_cpu_spin_table_release_addr; +static void +ipi_off(void *dummy __unused) +{ + CTR0(KTR_SMP, "IPI_OFF"); + if (psci_present) + psci_cpu_off(); + else { + uint64_t release_addr; + vm_size_t size; + + size = (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + release_addr = PCPU_GET(release_addr) - size; + isb(); + invalidate_icache(); + /* Go catatonic, don't take any interrupts. */ + intr_disable(); + stop_mmu(release_addr, pmap_kextract(KERNBASE)); + + + } + CTR0(KTR_SMP, "IPI_OFF failed"); +} + struct cpu_group * cpu_topo(void) { @@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr) pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; bootpcpu = pcpup; + pcpup->pc_release_addr = release_addr; dpcpu[cpuid - 1] = (void *)(pcpup + 1); dpcpu_init(dpcpu[cpuid - 1], cpuid); @@ -752,6 +784,52 @@ cpu_mp_start(void) } } +void +cpu_mp_stop(void) +{ + + /* Short-circuit for single-CPU */ + if (CPU_COUNT(&all_cpus) == 1) + return; + + KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n")); + + /* + * If we use spin-table, assume U-boot method for now (single address + * shared by all CPUs). + */ + if (!psci_present) { + int cpu; + vm_paddr_t release_addr; + void *release_vaddr; + vm_size_t size; + + /* Find the shared release address. */ + CPU_FOREACH(cpu) { + release_addr = pcpu_find(cpu)->pc_release_addr; + if (release_addr != 0) + break; + } + /* No release address? No way of notifying other CPUs. */ + if (release_addr == 0) + return; + + size = (vm_offset_t)&mp_cpu_spinloop_end - + (vm_offset_t)&mp_cpu_spinloop; + + release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + + release_vaddr = pmap_mapdev(release_addr, size); + bcopy(mp_cpu_spinloop, release_vaddr, size); + cpu_dcache_wbinv_range(release_vaddr, size); + pmap_unmapdev(release_vaddr, size); + invalidate_icache(); + } + ipi_all_but_self(IPI_OFF); + DELAY(1000000); +} + /* Introduce rest of cores to the world */ void cpu_mp_announce(void) diff --git a/sys/arm64/arm64/nexus.c b/sys/arm64/arm64/nexus.c index 26b3389db172..012bf859eb3c 100644 --- a/sys/arm64/arm64/nexus.c +++ b/sys/arm64/arm64/nexus.c @@ -72,6 +72,8 @@ #include "acpi_bus_if.h" #endif +#include "pcib_if.h" + extern struct bus_space memmap_bus; static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); @@ -123,6 +125,15 @@ static bus_get_bus_tag_t nexus_get_bus_tag; #ifdef FDT static ofw_bus_map_intr_t nexus_ofw_map_intr; +/* + * PCIB interface + */ +static pcib_alloc_msi_t nexus_fdt_pcib_alloc_msi; +static pcib_release_msi_t nexus_fdt_pcib_release_msi; +static pcib_alloc_msix_t nexus_fdt_pcib_alloc_msix; +static pcib_release_msix_t nexus_fdt_pcib_release_msix; +static pcib_map_msi_t nexus_fdt_pcib_map_msi; + #endif static device_method_t nexus_methods[] = { @@ -441,6 +452,13 @@ static device_method_t nexus_fdt_methods[] = { /* OFW interface */ DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), + /* PCIB interface */ + DEVMETHOD(pcib_alloc_msi, nexus_fdt_pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, nexus_fdt_pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, nexus_fdt_pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, nexus_fdt_pcib_release_msix), + DEVMETHOD(pcib_map_msi, nexus_fdt_pcib_map_msi), + DEVMETHOD_END, }; @@ -518,6 +536,73 @@ nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, irq = intr_map_irq(NULL, iparent, (struct intr_map_data *)fdt_data); return (irq); } + +static int +nexus_fdt_pcib_alloc_msi(device_t dev, device_t child, int count, int maxcount, + int *irqs) +{ + phandle_t msi_parent; + int error; + + error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL); + if (error != 0) + return (error); + + return (intr_alloc_msi(dev, child, msi_parent, count, maxcount, irqs)); +} + +static int +nexus_fdt_pcib_release_msi(device_t dev, device_t child, int count, int *irqs) +{ + phandle_t msi_parent; + int error; + + error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL); + if (error != 0) + return (error); + + return (intr_release_msi(dev, child, msi_parent, count, irqs)); +} + +static int +nexus_fdt_pcib_alloc_msix(device_t dev, device_t child, int *irq) +{ + phandle_t msi_parent; + int error; + + error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL); + if (error != 0) + return (error); + + return (intr_alloc_msix(dev, child, msi_parent, irq)); +} + +static int +nexus_fdt_pcib_release_msix(device_t dev, device_t child, int irq) +{ + phandle_t msi_parent; + int error; + + error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL); + if (error != 0) + return (error); + + return (intr_release_msix(dev, child, msi_parent, irq)); +} + +static int +nexus_fdt_pcib_map_msi(device_t dev, device_t child, int irq, uint64_t *addr, + uint32_t *data) +{ + phandle_t msi_parent; + int error; + + error = ofw_bus_msimap(ofw_bus_get_node(child), 0, &msi_parent, NULL); + if (error != 0) + return (error); + + return (intr_map_msi(dev, child, msi_parent, irq, addr, data)); +} #endif #ifdef DEV_ACPI diff --git a/sys/arm64/conf/std.arm b/sys/arm64/conf/std.arm index fb5561506531..309059a096eb 100644 --- a/sys/arm64/conf/std.arm +++ b/sys/arm64/conf/std.arm @@ -21,3 +21,6 @@ device arm_doorbell # ARM Message Handling Unit (MHU) options FDT device acpi + +# DTBs +makeoptions MODULES_EXTRA+="dtb/arm" diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64 index a0568466cfaf..02bdd25f2d52 100644 --- a/sys/arm64/conf/std.arm64 +++ b/sys/arm64/conf/std.arm64 @@ -106,3 +106,9 @@ device efirtc # EFI RTC # SMBIOS -- all EFI platforms device smbios + +# random(4) +device tpm # Trusted Platform Module +options RANDOM_ENABLE_TPM # enable entropy from TPM 2.0 +options RANDOM_ENABLE_KBD +options RANDOM_ENABLE_MOUSE diff --git a/sys/arm64/conf/std.broadcom b/sys/arm64/conf/std.broadcom index 3332aaac0826..65bee16e315d 100644 --- a/sys/arm64/conf/std.broadcom +++ b/sys/arm64/conf/std.broadcom @@ -33,5 +33,8 @@ device sdhci options FDT device acpi +# Sound support +device vchiq + # DTBs makeoptions MODULES_EXTRA+="dtb/rpi" diff --git a/sys/arm64/include/_armreg.h b/sys/arm64/include/_armreg.h new file mode 100644 index 000000000000..0f5134e5a978 --- /dev/null +++ b/sys/arm64/include/_armreg.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2013, 2014 Andrew Turner + * Copyright (c) 2015,2021 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(_MACHINE_ARMREG_H_) && \ + !defined(_MACHINE_CPU_H_) && \ + !defined(_MACHINE_HYPERVISOR_H_) +#error Do not include this file directly +#endif + +#ifndef _MACHINE__ARMREG_H_ +#define _MACHINE__ARMREG_H_ + +#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + S##op0##_##op1##_C##crn##_C##crm##_##op2 +#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) +#define MRS_REG_ALT_NAME(reg) \ + _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) + + +#define READ_SPECIALREG(reg) \ +({ uint64_t _val; \ + __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ + _val; \ +}) +#define WRITE_SPECIALREG(reg, _val) \ + __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) + +#define UL(x) UINT64_C(x) + +#endif /* !_MACHINE__ARMREG_H_ */ diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index 393d6d89da0c..27b02c44cd76 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -34,25 +34,9 @@ #ifndef _MACHINE_ARMREG_H_ #define _MACHINE_ARMREG_H_ -#define INSN_SIZE 4 - -#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - S##op0##_##op1##_C##crn##_C##crm##_##op2 -#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) -#define MRS_REG_ALT_NAME(reg) \ - _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) - +#include <machine/_armreg.h> -#define READ_SPECIALREG(reg) \ -({ uint64_t _val; \ - __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ - _val; \ -}) -#define WRITE_SPECIALREG(reg, _val) \ - __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) - -#define UL(x) UINT64_C(x) +#define INSN_SIZE 4 /* AFSR0_EL1 - Auxiliary Fault Status Register 0 */ #define AFSR0_EL1_REG MRS_REG_ALT_NAME(AFSR0_EL1) @@ -2267,6 +2251,7 @@ #define PMBSR_MSS_SHIFT 0 #define PMBSR_MSS_MASK (UL(0xffff) << PMBSR_MSS_SHIFT) #define PMBSR_MSS_BSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT) +#define PMBSR_MSS_BSC_BUFFER_FILLED (UL(0x01) << PMBSR_MSS_SHIFT) #define PMBSR_MSS_FSC_MASK (UL(0x3f) << PMBSR_MSS_SHIFT) #define PMBSR_COLL_SHIFT 16 #define PMBSR_COLL (UL(0x1) << PMBSR_COLL_SHIFT) @@ -2278,6 +2263,11 @@ #define PMBSR_DL (UL(0x1) << PMBSR_DL_SHIFT) #define PMBSR_EC_SHIFT 26 #define PMBSR_EC_MASK (UL(0x3f) << PMBSR_EC_SHIFT) +#define PMBSR_EC_VAL(x) (((x) & PMBSR_EC_MASK) >> PMBSR_EC_SHIFT) +#define PMBSR_EC_OTHER_BUF_MGMT 0x00 +#define PMBSR_EC_GRAN_PROT_CHK 0x1e +#define PMBSR_EC_STAGE1_DA 0x24 +#define PMBSR_EC_STAGE2_DA 0x25 /* PMCCFILTR_EL0 */ #define PMCCFILTR_EL0_op0 3 @@ -2513,6 +2503,15 @@ #define PMSIDR_FnE (UL(0x1) << PMSIDR_FnE_SHIFT) #define PMSIDR_Interval_SHIFT 8 #define PMSIDR_Interval_MASK (UL(0xf) << PMSIDR_Interval_SHIFT) +#define PMSIDR_Interval_VAL(x) (((x) & PMSIDR_Interval_MASK) >> PMSIDR_Interval_SHIFT) +#define PMSIDR_Interval_256 0 +#define PMSIDR_Interval_512 2 +#define PMSIDR_Interval_768 3 +#define PMSIDR_Interval_1024 4 +#define PMSIDR_Interval_1536 5 +#define PMSIDR_Interval_2048 6 +#define PMSIDR_Interval_3072 7 +#define PMSIDR_Interval_4096 8 #define PMSIDR_MaxSize_SHIFT 12 #define PMSIDR_MaxSize_MASK (UL(0xf) << PMSIDR_MaxSize_SHIFT) #define PMSIDR_CountSize_SHIFT 16 diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h index 124da8c215ed..b15210633d37 100644 --- a/sys/arm64/include/cpu.h +++ b/sys/arm64/include/cpu.h @@ -43,10 +43,10 @@ #define _MACHINE_CPU_H_ #if !defined(__ASSEMBLER__) +#include <machine/_armreg.h> #include <machine/atomic.h> #include <machine/frame.h> #endif -#include <machine/armreg.h> #define TRAPF_PC(tfp) ((tfp)->tf_elr) #define TRAPF_USERMODE(tfp) (((tfp)->tf_spsr & PSR_M_MASK) == PSR_M_EL0t) diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h index e6e1f682794e..e9eee643216b 100644 --- a/sys/arm64/include/cpufunc.h +++ b/sys/arm64/include/cpufunc.h @@ -96,6 +96,13 @@ serror_enable(void) __asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")"); } +static __inline void +serror_disable(void) +{ + + __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")"); +} + static __inline register_t get_midr(void) { diff --git a/sys/arm64/include/db_machdep.h b/sys/arm64/include/db_machdep.h index 5dc496ca851d..3ef95f7802ea 100644 --- a/sys/arm64/include/db_machdep.h +++ b/sys/arm64/include/db_machdep.h @@ -31,7 +31,6 @@ #ifndef _MACHINE_DB_MACHDEP_H_ #define _MACHINE_DB_MACHDEP_H_ -#include <machine/armreg.h> #include <machine/frame.h> #include <machine/trap.h> diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h index 8feabd2b981b..f3d7027269c9 100644 --- a/sys/arm64/include/hypervisor.h +++ b/sys/arm64/include/hypervisor.h @@ -30,6 +30,8 @@ #ifndef _MACHINE_HYPERVISOR_H_ #define _MACHINE_HYPERVISOR_H_ +#include <machine/_armreg.h> + /* * These registers are only useful when in hypervisor context, * e.g. specific to EL2, or controlling the hypervisor. @@ -266,6 +268,7 @@ #define MDCR_EL2_TDRA (0x1UL << MDCR_EL2_TDRA_SHIFT) #define MDCR_EL2_E2PB_SHIFT 12 #define MDCR_EL2_E2PB_MASK (0x3UL << MDCR_EL2_E2PB_SHIFT) +#define MDCR_EL2_E2PB_EL1_0_NO_TRAP (0x3UL << MDCR_EL2_E2PB_SHIFT) #define MDCR_EL2_TPMS_SHIFT 14 #define MDCR_EL2_TPMS (0x1UL << MDCR_EL2_TPMS_SHIFT) #define MDCR_EL2_EnSPM_SHIFT 15 diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h new file mode 100644 index 000000000000..0a8c7a053331 --- /dev/null +++ b/sys/arm64/include/kexec.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_KEXEC_H_ +#define _ARM64_KEXEC_H_ + +#define KEXEC_MD_PAGES(x) 0 + +#endif /* _ARM64_KEXEC_H_ */ diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h index 09bd8fa8a966..73399d2c3f8c 100644 --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -50,7 +50,8 @@ struct debug_monitor_state; struct pmap *pc_curvmpmap; \ uint64_t pc_mpidr; \ u_int pc_bcast_tlbi_workaround; \ - char __pad[197] + uint64_t pc_release_addr; \ + char __pad[189] #ifdef _KERNEL diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h index 500cd1ef4f02..4a5bfda3ac1c 100644 --- a/sys/arm64/include/smp.h +++ b/sys/arm64/include/smp.h @@ -40,6 +40,7 @@ enum { IPI_STOP, IPI_STOP_HARD, IPI_HARDCLOCK, + IPI_OFF, INTR_IPI_COUNT, }; diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 696a69669a2a..e67540eac66d 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -106,27 +106,6 @@ enum vm_reg_name { #define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */ -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL struct vm; struct vm_exception; diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 219f1116c728..289ff0fe1fc9 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -31,6 +31,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/arm64/nvidia/tegra210/max77620_regulators.c b/sys/arm64/nvidia/tegra210/max77620_regulators.c index af1a5af20ec3..d52aeaef1287 100644 --- a/sys/arm64/nvidia/tegra210/max77620_regulators.c +++ b/sys/arm64/nvidia/tegra210/max77620_regulators.c @@ -364,7 +364,7 @@ max77620_get_sel(struct max77620_reg_sc *sc, uint8_t *sel) rv = RD1(sc->base_sc, sc->def->volt_reg, sel); if (rv != 0) { - printf("%s: cannot read volatge selector: %d\n", + printf("%s: cannot read voltage selector: %d\n", regnode_get_name(sc->regnode), rv); return (rv); } @@ -384,7 +384,7 @@ max77620_set_sel(struct max77620_reg_sc *sc, uint8_t sel) rv = RM1(sc->base_sc, sc->def->volt_reg, sc->def->volt_vsel_mask, sel); if (rv != 0) { - printf("%s: cannot set volatge selector: %d\n", + printf("%s: cannot set voltage selector: %d\n", regnode_get_name(sc->regnode), rv); return (rv); } diff --git a/sys/arm64/rockchip/rk_i2s.c b/sys/arm64/rockchip/rk_i2s.c index 5f1b6bbdeabf..856fa20e6ce4 100644 --- a/sys/arm64/rockchip/rk_i2s.c +++ b/sys/arm64/rockchip/rk_i2s.c @@ -403,10 +403,10 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu count = sndbuf_getready(play_buf); if (count > FIFO_SIZE - 1) count = FIFO_SIZE - 1; - size = sndbuf_getsize(play_buf); + size = play_buf->bufsize; readyptr = sndbuf_getreadyptr(play_buf); - samples = (uint8_t*)sndbuf_getbuf(play_buf); + samples = play_buf->buf; written = 0; for (; level < count; level++) { val = (samples[readyptr++ % size] << 0); @@ -426,9 +426,9 @@ rk_i2s_dai_intr(device_t dev, struct snd_dbuf *play_buf, struct snd_dbuf *rec_bu uint8_t *samples; uint32_t count, size, freeptr, recorded; count = sndbuf_getfree(rec_buf); - size = sndbuf_getsize(rec_buf); + size = rec_buf->bufsize; freeptr = sndbuf_getfreeptr(rec_buf); - samples = (uint8_t*)sndbuf_getbuf(rec_buf); + samples = rec_buf->buf; recorded = 0; if (level > count / 4) level = count / 4; diff --git a/sys/arm64/spe/arm_spe.h b/sys/arm64/spe/arm_spe.h new file mode 100644 index 000000000000..5dba20673a77 --- /dev/null +++ b/sys/arm64/spe/arm_spe.h @@ -0,0 +1,77 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_ARM_SPE_H_ +#define _ARM64_ARM_SPE_H_ + +/* kqueue events */ +#define ARM_SPE_KQ_BUF 138 +#define ARM_SPE_KQ_SHUTDOWN 139 +#define ARM_SPE_KQ_SIGNAL 140 + +/* spe_backend_read() u64 data encoding */ +#define KQ_BUF_POS_SHIFT 0 +#define KQ_BUF_POS (1 << KQ_BUF_POS_SHIFT) +#define KQ_PARTREC_SHIFT 1 +#define KQ_PARTREC (1 << KQ_PARTREC_SHIFT) +#define KQ_FINAL_BUF_SHIFT 2 +#define KQ_FINAL_BUF (1 << KQ_FINAL_BUF_SHIFT) + +enum arm_spe_ctx_field { + ARM_SPE_CTX_NONE, + ARM_SPE_CTX_PID, + ARM_SPE_CTX_CPU_ID +}; + +enum arm_spe_profiling_level { + ARM_SPE_KERNEL_AND_USER, + ARM_SPE_KERNEL_ONLY, + ARM_SPE_USER_ONLY +}; +struct arm_spe_config { + /* Minimum interval is IMP DEF up to maximum 24 bit value */ + uint32_t interval; + + /* Profile kernel (EL1), userspace (EL0) or both */ + enum arm_spe_profiling_level level; + + /* + * Configure context field in SPE records to store either the + * current PID, the CPU ID or neither + * + * In PID mode, kernel threads without a process context are + * logged as PID 0 + */ + enum arm_spe_ctx_field ctx_field; +}; + +struct arm_spe_svc_buf { + uint32_t ident; + uint8_t buf_idx : 1; +}; + +#endif /* _ARM64_ARM_SPE_H_ */ diff --git a/sys/arm64/spe/arm_spe_acpi.c b/sys/arm64/spe/arm_spe_acpi.c new file mode 100644 index 000000000000..b9f40448d940 --- /dev/null +++ b/sys/arm64/spe/arm_spe_acpi.c @@ -0,0 +1,146 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <contrib/dev/acpica/include/acpi.h> +#include <dev/acpica/acpivar.h> + +#include <arm64/spe/arm_spe_dev.h> + +static device_identify_t arm_spe_acpi_identify; +static device_probe_t arm_spe_acpi_probe; + +static device_method_t arm_spe_acpi_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, arm_spe_acpi_identify), + DEVMETHOD(device_probe, arm_spe_acpi_probe), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_1(spe, arm_spe_acpi_driver, arm_spe_acpi_methods, + sizeof(struct arm_spe_softc), arm_spe_driver); + +DRIVER_MODULE(spe, acpi, arm_spe_acpi_driver, 0, 0); + +struct madt_data { + u_int irq; + bool found; + bool valid; +}; + +static void +madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) +{ + ACPI_MADT_GENERIC_INTERRUPT *intr; + struct madt_data *madt_data; + u_int irq; + + madt_data = (struct madt_data *)arg; + + /* Exit early if we are have decided to not attach */ + if (!madt_data->valid) + return; + + switch(entry->Type) { + case ACPI_MADT_TYPE_GENERIC_INTERRUPT: + intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; + irq = intr->SpeInterrupt; + + if (irq == 0) { + madt_data->valid = false; + } else if (!madt_data->found) { + madt_data->found = true; + madt_data->irq = irq; + } else if (madt_data->irq != irq) { + madt_data->valid = false; + } + break; + + default: + break; + } +} + +static void +arm_spe_acpi_identify(driver_t *driver, device_t parent) +{ + struct madt_data madt_data; + ACPI_TABLE_MADT *madt; + device_t dev; + vm_paddr_t physaddr; + + physaddr = acpi_find_table(ACPI_SIG_MADT); + if (physaddr == 0) + return; + + madt = acpi_map_table(physaddr, ACPI_SIG_MADT); + if (madt == NULL) { + device_printf(parent, "spe: Unable to map the MADT\n"); + return; + } + + madt_data.irq = 0; + madt_data.found = false; + madt_data.valid = true; + + acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, + madt_handler, &madt_data); + + if (!madt_data.found || !madt_data.valid) + goto out; + + MPASS(madt_data.irq != 0); + + dev = BUS_ADD_CHILD(parent, 0, "spe", -1); + if (dev == NULL) { + device_printf(parent, "add spe child failed\n"); + goto out; + } + + BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, madt_data.irq, 1); + +out: + acpi_unmap_table(madt); +} + +static int +arm_spe_acpi_probe(device_t dev) +{ + device_set_desc(dev, "ARM Statistical Profiling Extension"); + return (BUS_PROBE_NOWILDCARD); +} diff --git a/sys/arm64/spe/arm_spe_backend.c b/sys/arm64/spe/arm_spe_backend.c new file mode 100644 index 000000000000..b4e1132f9cbc --- /dev/null +++ b/sys/arm64/spe/arm_spe_backend.c @@ -0,0 +1,586 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Arm Statistical Profiling Extension (SPE) backend + * + * Basic SPE operation + * + * SPE is enabled and configured on a per-core basis, with each core requiring + * separate code to enable and configure. Each core also requires a separate + * buffer passed as config where the CPU will write profiling data. When the + * profiling buffer is full, an interrupt will be taken on the same CPU. + * + * Driver Design + * + * - HWT allocates a large single buffer per core. This buffer is split in half + * to create a 2 element circular buffer (aka ping-pong buffer) where the + * kernel writes to one half while userspace is copying the other half + * - SMP calls are used to enable and configure each core, with SPE initially + * configured to write to the first half of the buffer + * - When the first half of the buffer is full, a buffer full interrupt will + * immediately switch writing to the second half. The kernel adds the details + * of the half that needs copying to a FIFO STAILQ and notifies userspace via + * kqueue by sending a ARM_SPE_KQ_BUF kevent with how many buffers on the + * queue need servicing + * - The kernel responds to HWT_IOC_BUFPTR_GET ioctl by sending details of the + * first item from the queue + * - The buffers pending copying will not be overwritten until an + * HWT_IOC_SVC_BUF ioctl is received from userspace confirming the data has + * been copied out + * - In the case where both halfs of the buffer are full, profiling will be + * paused until notification via HWT_IOC_SVC_BUF is received + * + * Future improvements and limitations + * + * - Using large buffer sizes should minimise pauses and loss of profiling + * data while kernel is waiting for userspace to copy out data. Since it is + * generally expected that consuming (copying) this data is faster than + * producing it, in practice this has not so far been an issue. If it does + * prove to be an issue even with large buffer sizes then additional buffering + * i.e. n element circular buffers might be required. + * + * - kqueue can only notify and queue one kevent of the same type, with + * subsequent events overwriting data in the first event. The kevent + * ARM_SPE_KQ_BUF can therefore only contain the number of buffers on the + * STAILQ, incrementing each time a new buffer is full. In this case kqueue + * serves just as a notification to userspace to wake up and query the kernel + * with the appropriate ioctl. An alternative might be custom kevents where + * the kevent identifier is encoded with something like n+cpu_id or n+tid. In + * this case data could be sent directly with kqueue via the kevent data and + * fflags elements, avoiding the extra ioctl. + * + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/hwt.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <machine/bus.h> + +#include <arm64/spe/arm_spe_dev.h> + +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_cpu.h> +#include <dev/hwt/hwt_thread.h> + +MALLOC_DECLARE(M_ARM_SPE); + +extern u_int mp_maxid; +extern struct taskqueue *taskqueue_arm_spe; + +int spe_backend_disable_smp(struct hwt_context *ctx); + +static device_t spe_dev; +static struct hwt_backend_ops spe_ops; +static struct hwt_backend backend = { + .ops = &spe_ops, + .name = "spe", + .kva_req = 1, +}; + +static struct arm_spe_info *spe_info; + +static int +spe_backend_init_thread(struct hwt_context *ctx) +{ + return (ENOTSUP); +} + +static void +spe_backend_init_cpu(struct hwt_context *ctx) +{ + struct arm_spe_info *info; + struct arm_spe_softc *sc = device_get_softc(spe_dev); + char lock_name[32]; + char *tmp = "Arm SPE lock/cpu/"; + int cpu_id; + + spe_info = malloc(sizeof(struct arm_spe_info) * mp_ncpus, + M_ARM_SPE, M_WAITOK | M_ZERO); + + sc->spe_info = spe_info; + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + info->sc = sc; + info->ident = cpu_id; + info->buf_info[0].info = info; + info->buf_info[0].buf_idx = 0; + info->buf_info[1].info = info; + info->buf_info[1].buf_idx = 1; + snprintf(lock_name, sizeof(lock_name), "%s%d", tmp, cpu_id); + mtx_init(&info->lock, lock_name, NULL, MTX_SPIN); + } +} + +static int +spe_backend_init(struct hwt_context *ctx) +{ + struct arm_spe_softc *sc = device_get_softc(spe_dev); + int error = 0; + + /* + * HWT currently specifies buffer size must be a multiple of PAGE_SIZE, + * i.e. minimum 4KB + the maximum PMBIDR.Align is 2KB + * This should never happen but it's good to sense check + */ + if (ctx->bufsize % sc->kva_align != 0) + return (EINVAL); + + /* + * Since we're splitting the buffer in half + PMBLIMITR needs to be page + * aligned, minimum buffer size needs to be 2x PAGE_SIZE + */ + if (ctx->bufsize < (2 * PAGE_SIZE)) + return (EINVAL); + + sc->ctx = ctx; + sc->kqueue_fd = ctx->kqueue_fd; + sc->hwt_td = ctx->hwt_td; + + if (ctx->mode == HWT_MODE_THREAD) + error = spe_backend_init_thread(ctx); + else + spe_backend_init_cpu(ctx); + + return (error); +} + +#ifdef ARM_SPE_DEBUG +static void hex_dump(uint8_t *buf, size_t len) +{ + size_t i; + + printf("--------------------------------------------------------------\n"); + for (i = 0; i < len; ++i) { + if (i % 8 == 0) { + printf(" "); + } + if (i % 16 == 0) { + if (i != 0) { + printf("\r\n"); + } + printf("\t"); + } + printf("%02X ", buf[i]); + } + printf("\r\n"); +} +#endif + +static int +spe_backend_deinit(struct hwt_context *ctx) +{ +#ifdef ARM_SPE_DEBUG + struct arm_spe_info *info; + int cpu_id; + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + hex_dump((void *)info->kvaddr, 128); + hex_dump((void *)(info->kvaddr + (info->buf_size/2)), 128); + } +#endif + + if (ctx->state == CTX_STATE_RUNNING) { + spe_backend_disable_smp(ctx); + ctx->state = CTX_STATE_STOPPED; + } + + free(spe_info, M_ARM_SPE); + + return (0); +} + +static uint64_t +arm_spe_min_interval(struct arm_spe_softc *sc) +{ + /* IMPLEMENTATION DEFINED */ + switch (PMSIDR_Interval_VAL(sc->pmsidr)) + { + case PMSIDR_Interval_256: + return (256); + case PMSIDR_Interval_512: + return (512); + case PMSIDR_Interval_768: + return (768); + case PMSIDR_Interval_1024: + return (1024); + case PMSIDR_Interval_1536: + return (1536); + case PMSIDR_Interval_2048: + return (2048); + case PMSIDR_Interval_3072: + return (3072); + case PMSIDR_Interval_4096: + return (4096); + default: + return (4096); + } +} + +static inline void +arm_spe_set_interval(struct arm_spe_info *info, uint64_t interval) +{ + uint64_t min_interval = arm_spe_min_interval(info->sc); + + interval = MAX(interval, min_interval); + interval = MIN(interval, 1 << 24); /* max 24 bits */ + + dprintf("%s %lu\n", __func__, interval); + + info->pmsirr &= ~(PMSIRR_INTERVAL_MASK); + info->pmsirr |= (interval << PMSIRR_INTERVAL_SHIFT); +} + +static int +spe_backend_configure(struct hwt_context *ctx, int cpu_id, int session_id) +{ + struct arm_spe_info *info = &spe_info[cpu_id]; + struct arm_spe_config *cfg; + int err = 0; + + mtx_lock_spin(&info->lock); + info->ident = cpu_id; + /* Set defaults */ + info->pmsfcr = 0; + info->pmsevfr = 0xFFFFFFFFFFFFFFFFUL; + info->pmslatfr = 0; + info->pmsirr = + (arm_spe_min_interval(info->sc) << PMSIRR_INTERVAL_SHIFT) + | PMSIRR_RND; + info->pmsicr = 0; + info->pmscr = PMSCR_TS | PMSCR_PA | PMSCR_CX | PMSCR_E1SPE | PMSCR_E0SPE; + + if (ctx->config != NULL && + ctx->config_size == sizeof(struct arm_spe_config) && + ctx->config_version == 1) { + cfg = (struct arm_spe_config *)ctx->config; + if (cfg->interval) + arm_spe_set_interval(info, cfg->interval); + if (cfg->level == ARM_SPE_KERNEL_ONLY) + info->pmscr &= ~(PMSCR_E0SPE); /* turn off user */ + if (cfg->level == ARM_SPE_USER_ONLY) + info->pmscr &= ~(PMSCR_E1SPE); /* turn off kern */ + if (cfg->ctx_field) + info->ctx_field = cfg->ctx_field; + } else + err = (EINVAL); + mtx_unlock_spin(&info->lock); + + return (err); +} + + +static void +arm_spe_enable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)]; + uint64_t base, limit; + + dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid)); + + mtx_lock_spin(&info->lock); + + if (info->ctx_field == ARM_SPE_CTX_CPU_ID) + WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, PCPU_GET(cpuid)); + + WRITE_SPECIALREG(PMSFCR_EL1_REG, info->pmsfcr); + WRITE_SPECIALREG(PMSEVFR_EL1_REG, info->pmsevfr); + WRITE_SPECIALREG(PMSLATFR_EL1_REG, info->pmslatfr); + + /* Set the sampling interval */ + WRITE_SPECIALREG(PMSIRR_EL1_REG, info->pmsirr); + isb(); + + /* Write 0 here before enabling sampling */ + WRITE_SPECIALREG(PMSICR_EL1_REG, info->pmsicr); + isb(); + + base = info->kvaddr; + limit = base + (info->buf_size/2); + /* Enable the buffer */ + limit &= PMBLIMITR_LIMIT_MASK; /* Zero lower 12 bits */ + limit |= PMBLIMITR_E; + /* Set the base and limit */ + WRITE_SPECIALREG(PMBPTR_EL1_REG, base); + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit); + isb(); + + /* Enable sampling */ + WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr); + isb(); + + info->enabled = true; + + mtx_unlock_spin(&info->lock); +} + +static int +spe_backend_enable_smp(struct hwt_context *ctx) +{ + struct arm_spe_info *info; + struct hwt_vm *vm; + int cpu_id; + + HWT_CTX_LOCK(ctx); + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + vm = hwt_cpu_get(ctx, cpu_id)->vm; + + info = &spe_info[cpu_id]; + + mtx_lock_spin(&info->lock); + info->kvaddr = vm->kvaddr; + info->buf_size = ctx->bufsize; + mtx_unlock_spin(&info->lock); + } + HWT_CTX_UNLOCK(ctx); + + cpu_id = CPU_FFS(&ctx->cpu_map) - 1; + info = &spe_info[cpu_id]; + if (info->ctx_field == ARM_SPE_CTX_PID) + arm64_pid_in_contextidr = true; + else + arm64_pid_in_contextidr = false; + + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_enable, smp_no_rendezvous_barrier, NULL); + + return (0); +} + +void +arm_spe_disable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)]; + struct arm_spe_buf_info *buf = &info->buf_info[info->buf_idx]; + + if (!info->enabled) + return; + + dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid)); + + /* Disable profiling */ + WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0); + isb(); + + /* Drain any remaining tracing data */ + psb_csync(); + dsb(nsh); + + /* Disable the profiling buffer */ + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, 0); + isb(); + + /* Clear interrupt status reg */ + WRITE_SPECIALREG(PMBSR_EL1_REG, 0x0); + + /* Clear PID/CPU_ID from context ID reg */ + WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, 0); + + mtx_lock_spin(&info->lock); + buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG); + info->enabled = false; + mtx_unlock_spin(&info->lock); +} + +int +spe_backend_disable_smp(struct hwt_context *ctx) +{ + struct kevent kev; + struct arm_spe_info *info; + struct arm_spe_buf_info *buf; + int cpu_id; + int ret; + + /* Disable and send out remaining data in bufs */ + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_disable, smp_no_rendezvous_barrier, NULL); + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + info = &spe_info[cpu_id]; + buf = &info->buf_info[info->buf_idx]; + arm_spe_send_buffer(buf, 0); + } + + arm64_pid_in_contextidr = false; + + /* + * Tracing on all CPUs has been disabled, and we've sent write ptr + * offsets for all bufs - let userspace know it can shutdown + */ + EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); + ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK); + if (ret) + dprintf("%s kqfd_register ret:%d\n", __func__, ret); + + return (0); +} + +static void +spe_backend_stop(struct hwt_context *ctx) +{ + spe_backend_disable_smp(ctx); +} + +static void +arm_spe_reenable(void *arg __unused) +{ + struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];; + + WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr); + isb(); +} + +static int +spe_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size, + int data_version) +{ + struct arm_spe_info *info; + struct arm_spe_buf_info *buf; + struct arm_spe_svc_buf *s; + int err = 0; + cpuset_t cpu_set; + + if (data_size != sizeof(struct arm_spe_svc_buf)) + return (E2BIG); + + if (data_version != 1) + return (EINVAL); + + s = (struct arm_spe_svc_buf *)data; + if (s->buf_idx > 1) + return (ENODEV); + if (s->ident >= mp_ncpus) + return (EINVAL); + + info = &spe_info[s->ident]; + mtx_lock_spin(&info->lock); + + buf = &info->buf_info[s->buf_idx]; + + if (!info->enabled) { + err = ENXIO; + goto end; + } + + /* Clear the flag the signals buffer needs servicing */ + buf->buf_svc = false; + + /* Re-enable profiling if we've been waiting for this notification */ + if (buf->buf_wait) { + CPU_SETOF(s->ident, &cpu_set); + + mtx_unlock_spin(&info->lock); + smp_rendezvous_cpus(cpu_set, smp_no_rendezvous_barrier, + arm_spe_reenable, smp_no_rendezvous_barrier, NULL); + mtx_lock_spin(&info->lock); + + buf->buf_wait = false; + } + +end: + mtx_unlock_spin(&info->lock); + return (err); +} + +static int +spe_backend_read(struct hwt_vm *vm, int *ident, vm_offset_t *offset, + uint64_t *data) +{ + struct arm_spe_queue *q; + struct arm_spe_softc *sc = device_get_softc(spe_dev); + int error = 0; + + mtx_lock_spin(&sc->sc_lock); + + /* Return the first pending buffer that needs servicing */ + q = STAILQ_FIRST(&sc->pending); + if (q == NULL) { + error = ENOENT; + goto error; + } + *ident = q->ident; + *offset = q->offset; + *data = (q->buf_idx << KQ_BUF_POS_SHIFT) | + (q->partial_rec << KQ_PARTREC_SHIFT) | + (q->final_buf << KQ_FINAL_BUF_SHIFT); + + STAILQ_REMOVE_HEAD(&sc->pending, next); + sc->npending--; + +error: + mtx_unlock_spin(&sc->sc_lock); + if (error) + return (error); + + free(q, M_ARM_SPE); + return (0); +} + +static struct hwt_backend_ops spe_ops = { + .hwt_backend_init = spe_backend_init, + .hwt_backend_deinit = spe_backend_deinit, + + .hwt_backend_configure = spe_backend_configure, + .hwt_backend_svc_buf = spe_backend_svc_buf, + .hwt_backend_stop = spe_backend_stop, + + .hwt_backend_enable_smp = spe_backend_enable_smp, + .hwt_backend_disable_smp = spe_backend_disable_smp, + + .hwt_backend_read = spe_backend_read, +}; + +int +spe_register(device_t dev) +{ + spe_dev = dev; + + return (hwt_backend_register(&backend)); +} diff --git a/sys/arm64/spe/arm_spe_dev.c b/sys/arm64/spe/arm_spe_dev.c new file mode 100644 index 000000000000..8a834197eeef --- /dev/null +++ b/sys/arm64/spe/arm_spe_dev.c @@ -0,0 +1,324 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/event.h> +#include <sys/hwt.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/rman.h> +#include <sys/smp.h> +#include <sys/systm.h> +#include <sys/taskqueue.h> + +#include <machine/bus.h> + +#include <arm64/spe/arm_spe.h> +#include <arm64/spe/arm_spe_dev.h> + +MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing"); + +/* + * taskqueue(9) used for sleepable routines called from interrupt handlers + */ +TASKQUEUE_FAST_DEFINE_THREAD(arm_spe); + +void arm_spe_send_buffer(void *, int); +static void arm_spe_error(void *, int); +static int arm_spe_intr(void *); +device_attach_t arm_spe_attach; + +static device_method_t arm_spe_methods[] = { + /* Device interface */ + DEVMETHOD(device_attach, arm_spe_attach), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods, + sizeof(struct arm_spe_softc)); + +#define ARM_SPE_KVA_MAX_ALIGN UL(2048) + +int +arm_spe_attach(device_t dev) +{ + struct arm_spe_softc *sc; + int error, rid; + + sc = device_get_softc(dev); + sc->dev = dev; + + sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG); + sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG); + device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr); + device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr); + if ((sc->pmbidr & PMBIDR_P) != 0) { + device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n"); + return (EPERM); + } + + sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT); + if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) { + device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align); + return (EINVAL); + } + + rid = 0; + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE); + if (sc->sc_irq_res == NULL) { + device_printf(dev, "Unable to allocate interrupt\n"); + return (ENXIO); + } + error = bus_setup_intr(dev, sc->sc_irq_res, + INTR_TYPE_MISC | INTR_MPSAFE, arm_spe_intr, NULL, sc, + &sc->sc_irq_cookie); + if (error != 0) { + device_printf(dev, "Unable to set up interrupt\n"); + return (error); + } + + mtx_init(&sc->sc_lock, "Arm SPE lock", NULL, MTX_SPIN); + + STAILQ_INIT(&sc->pending); + sc->npending = 0; + + spe_register(dev); + + return (0); +} + +/* Interrupt handler runs on the same core that triggered the exception */ +static int +arm_spe_intr(void *arg) +{ + int cpu_id = PCPU_GET(cpuid); + struct arm_spe_softc *sc = arg; + uint64_t pmbsr; + uint64_t base, limit; + uint8_t ec; + struct arm_spe_info *info = &sc->spe_info[cpu_id]; + uint8_t i = info->buf_idx; + struct arm_spe_buf_info *buf = &info->buf_info[i]; + struct arm_spe_buf_info *prev_buf = &info->buf_info[!i]; + device_t dev = sc->dev; + + /* Make sure the profiling data is visible to the CPU */ + psb_csync(); + dsb(nsh); + + /* Make sure any HW update of PMBPTR_EL1 is visible to the CPU */ + isb(); + + pmbsr = READ_SPECIALREG(PMBSR_EL1_REG); + + if (!(pmbsr & PMBSR_S)) + return (FILTER_STRAY); + + /* Event Class */ + ec = PMBSR_EC_VAL(pmbsr); + switch (ec) + { + case PMBSR_EC_OTHER_BUF_MGMT: /* Other buffer management event */ + break; + case PMBSR_EC_GRAN_PROT_CHK: /* Granule Protection Check fault */ + device_printf(dev, "PMBSR_EC_GRAN_PROT_CHK\n"); + break; + case PMBSR_EC_STAGE1_DA: /* Stage 1 Data Abort */ + device_printf(dev, "PMBSR_EC_STAGE1_DA\n"); + break; + case PMBSR_EC_STAGE2_DA: /* Stage 2 Data Abort */ + device_printf(dev, "PMBSR_EC_STAGE2_DA\n"); + break; + default: + /* Unknown EC */ + device_printf(dev, "unknown PMBSR_EC: %#x\n", ec); + arm_spe_disable(NULL); + TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); + taskqueue_enqueue(taskqueue_arm_spe, &sc->task); + return (FILTER_HANDLED); + } + + switch (ec) { + case PMBSR_EC_OTHER_BUF_MGMT: + /* Buffer Status Code = buffer filled */ + if ((pmbsr & PMBSR_MSS_BSC_MASK) == PMBSR_MSS_BSC_BUFFER_FILLED) { + dprintf("%s SPE buffer full event (cpu:%d)\n", + __func__, cpu_id); + break; + } + case PMBSR_EC_GRAN_PROT_CHK: + case PMBSR_EC_STAGE1_DA: + case PMBSR_EC_STAGE2_DA: + /* + * If we have one of these, we've messed up the + * programming somehow (e.g. passed invalid memory to + * SPE) and can't recover + */ + arm_spe_disable(NULL); + TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); + taskqueue_enqueue(taskqueue_arm_spe, &sc->task); + /* PMBPTR_EL1 is fault address if PMBSR_DL is 1 */ + device_printf(dev, "CPU:%d PMBSR_EL1:%#lx\n", cpu_id, pmbsr); + device_printf(dev, "PMBPTR_EL1:%#lx PMBLIMITR_EL1:%#lx\n", + READ_SPECIALREG(PMBPTR_EL1_REG), + READ_SPECIALREG(PMBLIMITR_EL1_REG)); + return (FILTER_HANDLED); + } + + mtx_lock_spin(&info->lock); + + /* + * Data Loss bit - pmbptr might not be pointing to the end of the last + * complete record + */ + if ((pmbsr & PMBSR_DL) == PMBSR_DL) + buf->partial_rec = 1; + buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG); + buf->buf_svc = true; + + /* Setup regs ready to start writing to the other half of the buffer */ + info->buf_idx = !info->buf_idx; + base = buf_start_addr(info->buf_idx, info); + limit = base + (info->buf_size/2); + limit &= PMBLIMITR_LIMIT_MASK; + limit |= PMBLIMITR_E; + WRITE_SPECIALREG(PMBPTR_EL1_REG, base); + WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit); + isb(); + + /* + * Notify userspace via kqueue that buffer is full and needs copying + * out - since kqueue can sleep, don't do this in the interrupt handler, + * add to a taskqueue to be scheduled later instead + */ + TASK_INIT(&info->task[i], 0, (task_fn_t *)arm_spe_send_buffer, buf); + taskqueue_enqueue(taskqueue_arm_spe, &info->task[i]); + + /* + * It's possible userspace hasn't yet notified us they've copied out the + * other half of the buffer + * + * This might be because: + * a) Kernel hasn't scheduled the task via taskqueue to notify + * userspace to copy out the data + * b) Userspace is still copying the buffer or hasn't notified us + * back via the HWT_IOC_SVC_BUF ioctl + * + * Either way we need to avoid overwriting uncopied data in the + * buffer, so disable profiling until we receive that SVC_BUF + * ioctl + * + * Using a larger buffer size should help to minimise these events and + * loss of profiling data while profiling is disabled + */ + if (prev_buf->buf_svc) { + device_printf(sc->dev, "cpu%d: buffer full interrupt, but other" + " half of buffer has not been copied out - consider" + " increasing buffer size to minimise loss of profiling data\n", + cpu_id); + WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0); + prev_buf->buf_wait = true; + } + + mtx_unlock_spin(&info->lock); + + /* Clear Profiling Buffer Status Register */ + WRITE_SPECIALREG(PMBSR_EL1_REG, 0); + + isb(); + + return (FILTER_HANDLED); +} + +/* note: Scheduled and run via taskqueue, so can run on any CPU at any time */ +void +arm_spe_send_buffer(void *arg, int pending __unused) +{ + struct arm_spe_buf_info *buf = (struct arm_spe_buf_info *)arg; + struct arm_spe_info *info = buf->info; + struct arm_spe_queue *queue; + struct kevent kev; + int ret; + + queue = malloc(sizeof(struct arm_spe_queue), M_ARM_SPE, + M_WAITOK | M_ZERO); + + mtx_lock_spin(&info->lock); + + /* Add to queue for userspace to pickup */ + queue->ident = info->ident; + queue->offset = buf->pmbptr - buf_start_addr(buf->buf_idx, info); + queue->buf_idx = buf->buf_idx; + queue->final_buf = !info->enabled; + queue->partial_rec = buf->partial_rec; + mtx_unlock_spin(&info->lock); + + mtx_lock_spin(&info->sc->sc_lock); + STAILQ_INSERT_TAIL(&info->sc->pending, queue, next); + info->sc->npending++; + EV_SET(&kev, ARM_SPE_KQ_BUF, EVFILT_USER, 0, NOTE_TRIGGER, + info->sc->npending, NULL); + mtx_unlock_spin(&info->sc->sc_lock); + + /* Notify userspace */ + ret = kqfd_register(info->sc->kqueue_fd, &kev, info->sc->hwt_td, + M_WAITOK); + if (ret) { + dprintf("%s kqfd_register ret:%d\n", __func__, ret); + arm_spe_error(info->sc->ctx, 0); + } +} + +static void +arm_spe_error(void *arg, int pending __unused) +{ + struct hwt_context *ctx = arg; + struct kevent kev; + int ret; + + smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, + arm_spe_disable, smp_no_rendezvous_barrier, NULL); + + EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); + ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK); + if (ret) + dprintf("%s kqfd_register ret:%d\n", __func__, ret); +} + +MODULE_DEPEND(spe, hwt, 1, 1, 1); +MODULE_VERSION(spe, 1); diff --git a/sys/arm64/spe/arm_spe_dev.h b/sys/arm64/spe/arm_spe_dev.h new file mode 100644 index 000000000000..df88d98ef1c0 --- /dev/null +++ b/sys/arm64/spe/arm_spe_dev.h @@ -0,0 +1,162 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_ARM_SPE_DEV_H_ +#define _ARM64_ARM_SPE_DEV_H_ + +#include <sys/mutex.h> +#include <sys/taskqueue.h> + +#include <vm/vm.h> + +#include <arm64/spe/arm_spe.h> + +#include <dev/hwt/hwt_context.h> + +#define ARM_SPE_DEBUG +#undef ARM_SPE_DEBUG + +#ifdef ARM_SPE_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +DECLARE_CLASS(arm_spe_driver); + +struct cdev; +struct resource; + +extern bool arm64_pid_in_contextidr; + +int spe_register(device_t dev); +void arm_spe_disable(void *arg __unused); +int spe_backend_disable_smp(struct hwt_context *ctx); +void arm_spe_send_buffer(void *arg, int pending __unused); + +/* + PSB CSYNC is a Profiling Synchronization Barrier encoded in the hint space + * so it is a NOP on earlier architecture. + */ +#define psb_csync() __asm __volatile("hint #17" ::: "memory"); + +struct arm_spe_softc { + device_t dev; + + struct resource *sc_irq_res; + void *sc_irq_cookie; + struct cdev *sc_cdev; + struct mtx sc_lock; + struct task task; + + int64_t sc_pmsidr; + int kqueue_fd; + struct thread *hwt_td; + struct arm_spe_info *spe_info; + struct hwt_context *ctx; + STAILQ_HEAD(, arm_spe_queue) pending; + uint64_t npending; + + uint64_t pmbidr; + uint64_t pmsidr; + + uint16_t kva_align; +}; + +struct arm_spe_buf_info { + struct arm_spe_info *info; + uint64_t pmbptr; + uint8_t buf_idx : 1; + bool buf_svc : 1; + bool buf_wait : 1; + bool partial_rec : 1; +}; + +struct arm_spe_info { + int ident; /* tid or cpu_id */ + struct mtx lock; + struct arm_spe_softc *sc; + struct task task[2]; + bool enabled : 1; + + /* buffer is split in half as a ping-pong buffer */ + vm_object_t bufobj; + vm_offset_t kvaddr; + size_t buf_size; + uint8_t buf_idx : 1; /* 0 = first half of buf, 1 = 2nd half */ + struct arm_spe_buf_info buf_info[2]; + + /* config */ + enum arm_spe_profiling_level level; + enum arm_spe_ctx_field ctx_field; + /* filters */ + uint64_t pmsfcr; + uint64_t pmsevfr; + uint64_t pmslatfr; + /* interval */ + uint64_t pmsirr; + uint64_t pmsicr; + /* control */ + uint64_t pmscr; +}; + +struct arm_spe_queue { + int ident; + u_int buf_idx : 1; + bool partial_rec : 1; + bool final_buf : 1; + vm_offset_t offset; + STAILQ_ENTRY(arm_spe_queue) next; +}; + +static inline vm_offset_t buf_start_addr(u_int buf_idx, struct arm_spe_info *info) +{ + vm_offset_t addr; + if (buf_idx == 0) + addr = info->kvaddr; + if (buf_idx == 1) + addr = info->kvaddr + (info->buf_size/2); + + return (addr); +} + +static inline vm_offset_t buf_end_addr(u_int buf_idx, struct arm_spe_info *info) +{ + vm_offset_t addr; + if (buf_idx == 0) + addr = info->kvaddr + (info->buf_size/2); + if (buf_idx == 1) + addr = info->kvaddr + info->buf_size; + + return (addr); +} + +#endif /* _ARM64_ARM_SPE_DEV_H_ */ diff --git a/sys/arm64/spe/arm_spe_fdt.c b/sys/arm64/spe/arm_spe_fdt.c new file mode 100644 index 000000000000..d16f1dee2ac8 --- /dev/null +++ b/sys/arm64/spe/arm_spe_fdt.c @@ -0,0 +1,75 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * Copyright (c) 2022 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> + +#include <arm64/spe/arm_spe_dev.h> + +static device_probe_t arm_spe_fdt_probe; + +static struct ofw_compat_data compat_data[] = { + {"arm,statistical-profiling-extension-v1", true}, + {NULL, false}, +}; + +static device_method_t arm_spe_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, arm_spe_fdt_probe), + + DEVMETHOD_END, +}; + +DEFINE_CLASS_1(spe, arm_spe_fdt_driver, arm_spe_fdt_methods, + sizeof(struct arm_spe_softc), arm_spe_driver); + +DRIVER_MODULE(spe, simplebus, arm_spe_fdt_driver, 0, 0); + +static int +arm_spe_fdt_probe(device_t dev) +{ + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) + return (ENXIO); + + device_set_desc(dev, "ARM Statistical Profiling Extension"); + return (BUS_PROBE_DEFAULT); +} diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c index 67afb3374815..023406c64182 100644 --- a/sys/arm64/vmm/io/vgic_v3.c +++ b/sys/arm64/vmm/io/vgic_v3.c @@ -47,7 +47,6 @@ #include <dev/ofw/openfirm.h> -#include <machine/armreg.h> #include <machine/atomic.h> #include <machine/bus.h> #include <machine/cpufunc.h> diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c index da0f0d96c431..7c7fbb49e691 100644 --- a/sys/arm64/vmm/io/vtimer.c +++ b/sys/arm64/vmm/io/vtimer.c @@ -44,7 +44,6 @@ #include <machine/bus.h> #include <machine/machdep.h> #include <machine/vmm.h> -#include <machine/armreg.h> #include <arm64/vmm/arm64.h> diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index bf52dc0fe916..31d2fb3f516b 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -33,7 +33,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -51,7 +50,6 @@ #include <vm/vm_extern.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/fpu.h> #include <machine/machdep.h> @@ -126,7 +124,7 @@ struct vm { volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct vm_mem mem; /* (i) guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -139,8 +137,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); @@ -209,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = { /* Host registers masked by vmm_arch_regs_masks. */ static struct vmm_regs vmm_arch_regs; -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -232,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static int vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { @@ -324,20 +310,14 @@ vmm_unsupported_quirk(void) return (0); } -static int -vmm_init(void) +int +vmm_modinit(void) { int error; - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; + error = vmm_unsupported_quirk(); + if (error != 0) + return (error); error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); if (error != 0) @@ -346,61 +326,12 @@ vmm_init(void) return (vmmops_modinit(0)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmm_unsupported_quirk(); - if (error != 0) - break; - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - HYP initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -442,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); - /* Some interrupt controllers may have a CPU limit */ - if (vcpuid >= vgic_max_cpu_count(vm->cookie)) - return (NULL); - vcpu = (struct vcpu *) atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) @@ -454,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= vgic_max_cpu_count(vm->cookie)) { + sx_xunlock(&vm->vcpus_init_lock); + return (NULL); + } + vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); @@ -486,16 +419,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, 1ul << 39); if (error != 0) { @@ -1279,8 +1202,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu) int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); @@ -1291,7 +1213,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_PC) diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index 618f4afaf8ee..aa1361049f49 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -47,7 +47,6 @@ #include <vm/vm_page.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/vm.h> #include <machine/cpufunc.h> #include <machine/cpu.h> @@ -1365,7 +1364,7 @@ vmmops_setcap(void *vcpui, int num, int val) break; if (val != 0) hypctx->mdcr_el2 |= MDCR_EL2_TDE; - else + else if ((hypctx->setcaps & (1ul << VM_CAP_SS_EXIT)) == 0) hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; break; case VM_CAP_SS_EXIT: @@ -1374,20 +1373,20 @@ vmmops_setcap(void *vcpui, int num, int val) if (val != 0) { hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); - hypctx->debug_mdscr |= hypctx->mdscr_el1 & - (MDSCR_SS | MDSCR_KDE); + hypctx->debug_mdscr |= (hypctx->mdscr_el1 & MDSCR_SS); hypctx->tf.tf_spsr |= PSR_SS; - hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; + hypctx->mdscr_el1 |= MDSCR_SS; hypctx->mdcr_el2 |= MDCR_EL2_TDE; } else { hypctx->tf.tf_spsr &= ~PSR_SS; hypctx->tf.tf_spsr |= hypctx->debug_spsr; hypctx->debug_spsr &= ~PSR_SS; - hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdscr_el1 &= ~MDSCR_SS; hypctx->mdscr_el1 |= hypctx->debug_mdscr; - hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); - hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + hypctx->debug_mdscr &= ~MDSCR_SS; + if ((hypctx->setcaps & (1ul << VM_CAP_BRK_EXIT)) == 0) + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; } break; case VM_CAP_MASK_HWINTR: diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c index 926a74fa528b..29d14e1ba952 100644 --- a/sys/arm64/vmm/vmm_dev_machdep.c +++ b/sys/arm64/vmm/vmm_dev_machdep.c @@ -68,19 +68,13 @@ int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td) { - struct vm_run *vmrun; - struct vm_vgic_version *vgv; - struct vm_vgic_descr *vgic; - struct vm_irq *vi; - struct vm_exception *vmexc; - struct vm_gla2gpa *gg; - struct vm_msi *vmsi; int error; error = 0; switch (cmd) { case VM_RUN: { struct vm_exit *vme; + struct vm_run *vmrun; vmrun = (struct vm_run *)data; vme = vm_exitinfo(vcpu); @@ -94,41 +88,62 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, break; break; } - case VM_INJECT_EXCEPTION: + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + vmexc = (struct vm_exception *)data; error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); break; - case VM_GLA2GPA_NOFAULT: + } + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; - case VM_GET_VGIC_VERSION: + } + case VM_GET_VGIC_VERSION: { + struct vm_vgic_version *vgv; + vgv = (struct vm_vgic_version *)data; /* TODO: Query the vgic driver for this */ vgv->version = 3; vgv->flags = 0; error = 0; break; - case VM_ATTACH_VGIC: + } + case VM_ATTACH_VGIC: { + struct vm_vgic_descr *vgic; + vgic = (struct vm_vgic_descr *)data; error = vm_attach_vgic(vm, vgic); break; - case VM_RAISE_MSI: + } + case VM_RAISE_MSI: { + struct vm_msi *vmsi; + vmsi = (struct vm_msi *)data; error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, vmsi->slot, vmsi->func); break; - case VM_ASSERT_IRQ: + } + case VM_ASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_assert_irq(vm, vi->irq); break; - case VM_DEASSERT_IRQ: + } + case VM_DEASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_deassert_irq(vm, vi->irq); break; + } default: error = ENOTTY; break; diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c index b8c6d2ab7a9a..0ad7930e9a87 100644 --- a/sys/arm64/vmm/vmm_hyp.c +++ b/sys/arm64/vmm/vmm_hyp.c @@ -32,7 +32,6 @@ #include <sys/types.h> #include <sys/proc.h> -#include <machine/armreg.h> #include "arm64.h" #include "hyp.h" diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c index 1240c3ed16ec..0e4910ea87b4 100644 --- a/sys/arm64/vmm/vmm_reset.c +++ b/sys/arm64/vmm/vmm_reset.c @@ -31,7 +31,6 @@ #include <sys/kernel.h> #include <sys/lock.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/hypervisor.h> |
