aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64/arm64/machdep.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64/arm64/machdep.c')
-rw-r--r--sys/arm64/arm64/machdep.c330
1 files changed, 250 insertions, 80 deletions
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 58136981c1a4..517f4e7c2e23 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -26,14 +26,13 @@
*/
#include "opt_acpi.h"
+#include "opt_kstack_pages.h"
#include "opt_platform.h"
#include "opt_ddb.h"
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/asan.h>
#include <sys/buf.h>
#include <sys/bus.h>
#include <sys/cons.h>
@@ -48,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <sys/limits.h>
#include <sys/linker.h>
+#include <sys/msan.h>
#include <sys/msgbuf.h>
#include <sys/pcpu.h>
#include <sys/physmem.h>
@@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$");
#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/debug_monitor.h>
+#include <machine/hypervisor.h>
#include <machine/kdb.h>
#include <machine/machdep.h>
#include <machine/metadata.h>
@@ -100,6 +101,14 @@ __FBSDID("$FreeBSD$");
#include <dev/ofw/openfirm.h>
#endif
+#include <dev/smbios/smbios.h>
+
+_Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
+_Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
+ "pcb_fpusaved changed offset");
+_Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
+ "pcb_fpustate changed offset");
+
enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
/*
@@ -122,12 +131,22 @@ static struct trapframe proc0_tf;
int early_boot = 1;
int cold = 1;
static int boot_el;
+static uint64_t hcr_el2;
struct kva_md_info kmi;
int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
int has_pan;
+#if defined(SOCDEV_PA)
+/*
+ * This is the virtual address used to access SOCDEV_PA. As it's set before
+ * .bss is cleared we need to ensure it's preserved. To do this use
+ * __read_mostly as it's only ever set once but read in the putc functions.
+ */
+uintptr_t socdev_va __read_mostly;
+#endif
+
/*
* Physical address of the EFI System Table. Stashed from the metadata hints
* passed into the kernel and used by the EFI code to call runtime services.
@@ -189,7 +208,19 @@ bool
has_hyp(void)
{
- return (boot_el == 2);
+ /*
+ * XXX The E2H check is wrong, but it's close enough for now. Needs to
+ * be re-evaluated once we're running regularly in EL2.
+ */
+ return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0);
+}
+
+bool
+in_vhe(void)
+{
+ /* If we are currently in EL2 then must be in VHE */
+ return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
+ CURRENTEL_EL_EL2);
}
static void
@@ -300,7 +331,7 @@ cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
{
pcpu->pc_acpi_id = 0xffffffff;
- pcpu->pc_mpidr = 0xffffffff;
+ pcpu->pc_mpidr = UINT64_MAX;
}
void
@@ -346,11 +377,14 @@ makectx(struct trapframe *tf, struct pcb *pcb)
{
int i;
- for (i = 0; i < nitems(pcb->pcb_x); i++)
- pcb->pcb_x[i] = tf->tf_x[i];
+ /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
+ for (i = 0; i < nitems(pcb->pcb_x); i++) {
+ if (i == PCB_LR)
+ pcb->pcb_x[i] = tf->tf_elr;
+ else
+ pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
+ }
- /* NB: pcb_lr is the PC, see PC_REGS() in db_machdep.h */
- pcb->pcb_lr = tf->tf_elr;
pcb->pcb_sp = tf->tf_sp;
}
@@ -370,6 +404,7 @@ init_proc0(vm_offset_t kstack)
#endif
thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
thread0.td_kstack_pages * PAGE_SIZE) - 1;
+ thread0.td_pcb->pcb_flags = 0;
thread0.td_pcb->pcb_fpflags = 0;
thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
@@ -389,12 +424,12 @@ init_proc0(vm_offset_t kstack)
* read-only, e.g. to patch kernel code.
*/
bool
-arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
+arm64_get_writable_addr(void *addr, void **out)
{
vm_paddr_t pa;
/* Check if the page is writable */
- if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
+ if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
*out = addr;
return (true);
}
@@ -402,16 +437,17 @@ arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
/*
* Find the physical address of the given page.
*/
- if (!pmap_klookup(addr, &pa)) {
+ if (!pmap_klookup((vm_offset_t)addr, &pa)) {
return (false);
}
/*
* If it is within the DMAP region and is writable use that.
*/
- if (PHYS_IN_DMAP(pa)) {
- addr = PHYS_TO_DMAP(pa);
- if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
+ if (PHYS_IN_DMAP_RANGE(pa)) {
+ addr = (void *)PHYS_TO_DMAP(pa);
+ if (PAR_SUCCESS(arm64_address_translate_s1e1w(
+ (vm_offset_t)addr))) {
*out = addr;
return (true);
}
@@ -420,18 +456,10 @@ arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
return (false);
}
-typedef struct {
- uint32_t type;
- uint64_t phys_start;
- uint64_t virt_start;
- uint64_t num_pages;
- uint64_t attr;
-} EFI_MEMORY_DESCRIPTOR;
-
-typedef void (*efi_map_entry_cb)(struct efi_md *);
+typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
static void
-foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
+foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
{
struct efi_md *map, *p;
size_t efisz;
@@ -442,7 +470,7 @@ foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
* Boot Services API.
*/
efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
- map = (struct efi_md *)((uint8_t *)efihdr + efisz);
+ map = (struct efi_md *)((uint8_t *)efihdr + efisz);
if (efihdr->descriptor_size == 0)
return;
@@ -450,40 +478,29 @@ foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
for (i = 0, p = map; i < ndesc; i++,
p = efi_next_descriptor(p, efihdr->descriptor_size)) {
- cb(p);
- }
-}
-
-static void
-exclude_efi_map_entry(struct efi_md *p)
-{
-
- switch (p->md_type) {
- case EFI_MD_TYPE_CODE:
- case EFI_MD_TYPE_DATA:
- case EFI_MD_TYPE_BS_CODE:
- case EFI_MD_TYPE_BS_DATA:
- case EFI_MD_TYPE_FREE:
- /*
- * We're allowed to use any entry with these types.
- */
- break;
- default:
- physmem_exclude_region(p->md_phys, p->md_pages * EFI_PAGE_SIZE,
- EXFLAG_NOALLOC);
+ cb(p, argp);
}
}
+/*
+ * Handle the EFI memory map list.
+ *
+ * We will make two passes at this, the first (exclude == false) to populate
+ * physmem with valid physical memory ranges from recognized map entry types.
+ * In the second pass we will exclude memory ranges from physmem which must not
+ * be used for general allocations, either because they are used by runtime
+ * firmware or otherwise reserved.
+ *
+ * Adding the runtime-reserved memory ranges to physmem and excluding them
+ * later ensures that they are included in the DMAP, but excluded from
+ * phys_avail[].
+ *
+ * Entry types not explicitly listed here are ignored and not mapped.
+ */
static void
-exclude_efi_map_entries(struct efi_map_header *efihdr)
-{
-
- foreach_efi_map_entry(efihdr, exclude_efi_map_entry);
-}
-
-static void
-add_efi_map_entry(struct efi_md *p)
+handle_efi_map_entry(struct efi_md *p, void *argp)
{
+ bool exclude = *(bool *)argp;
switch (p->md_type) {
case EFI_MD_TYPE_RECLAIM:
@@ -495,7 +512,7 @@ add_efi_map_entry(struct efi_md *p)
/*
* Some UEFI implementations put the system table in the
* runtime code section. Include it in the DMAP, but will
- * be excluded from phys_avail later.
+ * be excluded from phys_avail.
*/
case EFI_MD_TYPE_RT_DATA:
/*
@@ -503,6 +520,12 @@ add_efi_map_entry(struct efi_md *p)
* region is created to stop it from being added
* to phys_avail.
*/
+ if (exclude) {
+ physmem_exclude_region(p->md_phys,
+ p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
+ break;
+ }
+ /* FALLTHROUGH */
case EFI_MD_TYPE_CODE:
case EFI_MD_TYPE_DATA:
case EFI_MD_TYPE_BS_CODE:
@@ -511,8 +534,12 @@ add_efi_map_entry(struct efi_md *p)
/*
* We're allowed to use any entry with these types.
*/
- physmem_hardware_region(p->md_phys,
- p->md_pages * EFI_PAGE_SIZE);
+ if (!exclude)
+ physmem_hardware_region(p->md_phys,
+ p->md_pages * EFI_PAGE_SIZE);
+ break;
+ default:
+ /* Other types shall not be handled by physmem. */
break;
}
}
@@ -520,12 +547,19 @@ add_efi_map_entry(struct efi_md *p)
static void
add_efi_map_entries(struct efi_map_header *efihdr)
{
+ bool exclude = false;
+ foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
+}
- foreach_efi_map_entry(efihdr, add_efi_map_entry);
+static void
+exclude_efi_map_entries(struct efi_map_header *efihdr)
+{
+ bool exclude = true;
+ foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
}
static void
-print_efi_map_entry(struct efi_md *p)
+print_efi_map_entry(struct efi_md *p, void *argp __unused)
{
const char *type;
static const char *types[] = {
@@ -585,7 +619,116 @@ print_efi_map_entries(struct efi_map_header *efihdr)
printf("%23s %12s %12s %8s %4s\n",
"Type", "Physical", "Virtual", "#Pages", "Attr");
- foreach_efi_map_entry(efihdr, print_efi_map_entry);
+ foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
+}
+
+/*
+ * Map the passed in VA in EFI space to a void * using the efi memory table to
+ * find the PA and return it in the DMAP, if it exists. We're used between the
+ * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
+ * tables We assume that either the entry you are mapping fits within its page,
+ * or if it spills to the next page, that's contiguous in PA and in the DMAP.
+ * All observed tables obey the first part of this precondition.
+ */
+struct early_map_data
+{
+ vm_offset_t va;
+ vm_offset_t pa;
+};
+
+static void
+efi_early_map_entry(struct efi_md *p, void *argp)
+{
+ struct early_map_data *emdp = argp;
+ vm_offset_t s, e;
+
+ if (emdp->pa != 0)
+ return;
+ if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
+ return;
+ s = p->md_virt;
+ e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
+ if (emdp->va < s || emdp->va >= e)
+ return;
+ emdp->pa = p->md_phys + (emdp->va - p->md_virt);
+}
+
+static void *
+efi_early_map(vm_offset_t va)
+{
+ struct early_map_data emd = { .va = va };
+
+ foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
+ if (emd.pa == 0)
+ return NULL;
+ return (void *)PHYS_TO_DMAP(emd.pa);
+}
+
+
+/*
+ * When booted via kboot, the prior kernel will pass in reserved memory areas in
+ * a EFI config table. We need to find that table and walk through it excluding
+ * the memory ranges in it. btw, this is called too early for the printf to do
+ * anything since msgbufp isn't initialized, let alone a console...
+ */
+static void
+exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
+{
+ struct efi_systbl *systbl;
+ struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
+
+ systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
+ if (systbl == NULL) {
+ printf("can't map systbl\n");
+ return;
+ }
+ if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
+ printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
+ return;
+ }
+
+ /*
+ * We don't yet have the pmap system booted enough to create a pmap for
+ * the efi firmware's preferred address space from the GetMemoryMap()
+ * table. The st_cfgtbl is a VA in this space, so we need to do the
+ * mapping ourselves to a kernel VA with efi_early_map. We assume that
+ * the cfgtbl entries don't span a page. Other pointers are PAs, as
+ * noted below.
+ */
+ if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
+ return;
+ for (int i = 0; i < systbl->st_entries; i++) {
+ struct efi_cfgtbl *cfgtbl;
+ struct linux_efi_memreserve *mr;
+
+ cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
+ if (cfgtbl == NULL)
+ panic("Can't map the config table entry %d\n", i);
+ if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
+ continue;
+
+ /*
+ * cfgtbl points are either VA or PA, depending on the GUID of
+ * the table. memreserve GUID pointers are PA and not converted
+ * after a SetVirtualAddressMap(). The list's mr_next pointer
+ * is also a PA.
+ */
+ mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
+ (vm_offset_t)cfgtbl->ct_data);
+ while (true) {
+ for (int j = 0; j < mr->mr_count; j++) {
+ struct linux_efi_memreserve_entry *mre;
+
+ mre = &mr->mr_entry[j];
+ physmem_exclude_region(mre->mre_base, mre->mre_size,
+ EXFLAG_NODUMP | EXFLAG_NOALLOC);
+ }
+ if (mr->mr_next == 0)
+ break;
+ mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
+ };
+ }
+
}
#ifdef FDT
@@ -653,6 +796,8 @@ bus_probe(void)
break;
}
order = strchr(order, ',');
+ if (order != NULL)
+ order++; /* Skip comma */
}
freeenv(env);
@@ -760,6 +905,7 @@ initarm(struct arm64_bootparams *abp)
TSRAW(&thread0, TS_ENTER, __func__, NULL);
boot_el = abp->boot_el;
+ hcr_el2 = abp->hcr_el2;
/* Parse loader or FDT boot parametes. Determine last used address. */
lastaddr = parse_boot_param(abp);
@@ -770,10 +916,30 @@ initarm(struct arm64_bootparams *abp)
kmdp = preload_search_by_type("elf64 kernel");
identify_cpu(0);
+ identify_hypervisor_smbios();
+
update_special_regs(0);
+ /* Set the pcpu data, this is needed by pmap_bootstrap */
+ pcpup = &pcpu0;
+ pcpu_init(pcpup, 0, sizeof(struct pcpu));
+
+ /*
+ * Set the pcpu pointer with a backup in tpidr_el1 to be
+ * loaded when entering the kernel from userland.
+ */
+ __asm __volatile(
+ "mov x18, %0 \n"
+ "msr tpidr_el1, %0" :: "r"(pcpup));
+
+ /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
+ PCPU_SET(curthread, &thread0);
+ PCPU_SET(midr, get_midr());
+
link_elf_ireloc(kmdp);
+#ifdef FDT
try_load_dtb(kmdp);
+#endif
efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
@@ -802,22 +968,6 @@ initarm(struct arm64_bootparams *abp)
physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
EXFLAG_NOALLOC);
- /* Set the pcpu data, this is needed by pmap_bootstrap */
- pcpup = &pcpu0;
- pcpu_init(pcpup, 0, sizeof(struct pcpu));
-
- /*
- * Set the pcpu pointer with a backup in tpidr_el1 to be
- * loaded when entering the kernel from userland.
- */
- __asm __volatile(
- "mov x18, %0 \n"
- "msr tpidr_el1, %0" :: "r"(pcpup));
-
- /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
- PCPU_SET(curthread, &thread0);
- PCPU_SET(midr, get_midr());
-
/* Do basic tuning, hz etc */
init_param1();
@@ -825,11 +975,25 @@ initarm(struct arm64_bootparams *abp)
pan_setup();
/* Bootstrap enough of pmap to enter the kernel proper */
- pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
- KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
- /* Exclude entries neexed in teh DMAP region, but not phys_avail */
+ pmap_bootstrap(lastaddr - KERNBASE);
+ /* Exclude entries needed in the DMAP region, but not phys_avail */
if (efihdr != NULL)
exclude_efi_map_entries(efihdr);
+ /* Do the same for reserve entries in the EFI MEMRESERVE table */
+ if (efi_systbl_phys != 0)
+ exclude_efi_memreserve(efi_systbl_phys);
+
+ /*
+ * We carefully bootstrap the sanitizer map after we've excluded
+ * absolutely everything else that could impact phys_avail. There's not
+ * always enough room for the initial shadow map after the kernel, so
+ * we'll end up searching for segments that we can safely use. Those
+ * segments also get excluded from phys_avail.
+ */
+#if defined(KASAN) || defined(KMSAN)
+ pmap_bootstrap_san();
+#endif
+
physmem_init_kernel_globals();
devmap_bootstrap(0, NULL);
@@ -873,6 +1037,8 @@ initarm(struct arm64_bootparams *abp)
pan_enable();
kcsan_cpu_init(0);
+ kasan_init();
+ kmsan_init();
env = kern_getenv("kernelname");
if (env != NULL)
@@ -902,6 +1068,10 @@ initarm(struct arm64_bootparams *abp)
early_boot = 0;
+ if (bootverbose && kstack_pages != KSTACK_PAGES)
+ printf("kern.kstack_pages = %d ignored for thread0\n",
+ kstack_pages);
+
TSEXIT();
}