diff options
Diffstat (limited to 'sys/arm64/arm64/machdep.c')
-rw-r--r-- | sys/arm64/arm64/machdep.c | 330 |
1 files changed, 250 insertions, 80 deletions
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index 58136981c1a4..517f4e7c2e23 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -26,14 +26,13 @@ */ #include "opt_acpi.h" +#include "opt_kstack_pages.h" #include "opt_platform.h" #include "opt_ddb.h" -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> +#include <sys/asan.h> #include <sys/buf.h> #include <sys/bus.h> #include <sys/cons.h> @@ -48,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/ktr.h> #include <sys/limits.h> #include <sys/linker.h> +#include <sys/msan.h> #include <sys/msgbuf.h> #include <sys/pcpu.h> #include <sys/physmem.h> @@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$"); #include <machine/armreg.h> #include <machine/cpu.h> #include <machine/debug_monitor.h> +#include <machine/hypervisor.h> #include <machine/kdb.h> #include <machine/machdep.h> #include <machine/metadata.h> @@ -100,6 +101,14 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/openfirm.h> #endif +#include <dev/smbios/smbios.h> + +_Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size"); +_Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136, + "pcb_fpusaved changed offset"); +_Static_assert(offsetof(struct pcb, pcb_fpustate) == 192, + "pcb_fpustate changed offset"); + enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; /* @@ -122,12 +131,22 @@ static struct trapframe proc0_tf; int early_boot = 1; int cold = 1; static int boot_el; +static uint64_t hcr_el2; struct kva_md_info kmi; int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ int has_pan; +#if defined(SOCDEV_PA) +/* + * This is the virtual address used to access SOCDEV_PA. As it's set before + * .bss is cleared we need to ensure it's preserved. To do this use + * __read_mostly as it's only ever set once but read in the putc functions. + */ +uintptr_t socdev_va __read_mostly; +#endif + /* * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. @@ -189,7 +208,19 @@ bool has_hyp(void) { - return (boot_el == 2); + /* + * XXX The E2H check is wrong, but it's close enough for now. Needs to + * be re-evaluated once we're running regularly in EL2. + */ + return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0); +} + +bool +in_vhe(void) +{ + /* If we are currently in EL2 then must be in VHE */ + return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) == + CURRENTEL_EL_EL2); } static void @@ -300,7 +331,7 @@ cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; - pcpu->pc_mpidr = 0xffffffff; + pcpu->pc_mpidr = UINT64_MAX; } void @@ -346,11 +377,14 @@ makectx(struct trapframe *tf, struct pcb *pcb) { int i; - for (i = 0; i < nitems(pcb->pcb_x); i++) - pcb->pcb_x[i] = tf->tf_x[i]; + /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */ + for (i = 0; i < nitems(pcb->pcb_x); i++) { + if (i == PCB_LR) + pcb->pcb_x[i] = tf->tf_elr; + else + pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START]; + } - /* NB: pcb_lr is the PC, see PC_REGS() in db_machdep.h */ - pcb->pcb_lr = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } @@ -370,6 +404,7 @@ init_proc0(vm_offset_t kstack) #endif thread0.td_pcb = (struct pcb *)(thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE) - 1; + thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; @@ -389,12 +424,12 @@ init_proc0(vm_offset_t kstack) * read-only, e.g. to patch kernel code. */ bool -arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out) +arm64_get_writable_addr(void *addr, void **out) { vm_paddr_t pa; /* Check if the page is writable */ - if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) { + if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) { *out = addr; return (true); } @@ -402,16 +437,17 @@ arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out) /* * Find the physical address of the given page. */ - if (!pmap_klookup(addr, &pa)) { + if (!pmap_klookup((vm_offset_t)addr, &pa)) { return (false); } /* * If it is within the DMAP region and is writable use that. */ - if (PHYS_IN_DMAP(pa)) { - addr = PHYS_TO_DMAP(pa); - if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) { + if (PHYS_IN_DMAP_RANGE(pa)) { + addr = (void *)PHYS_TO_DMAP(pa); + if (PAR_SUCCESS(arm64_address_translate_s1e1w( + (vm_offset_t)addr))) { *out = addr; return (true); } @@ -420,18 +456,10 @@ arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out) return (false); } -typedef struct { - uint32_t type; - uint64_t phys_start; - uint64_t virt_start; - uint64_t num_pages; - uint64_t attr; -} EFI_MEMORY_DESCRIPTOR; - -typedef void (*efi_map_entry_cb)(struct efi_md *); +typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp); static void -foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb) +foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp) { struct efi_md *map, *p; size_t efisz; @@ -442,7 +470,7 @@ foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb) * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; - map = (struct efi_md *)((uint8_t *)efihdr + efisz); + map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; @@ -450,40 +478,29 @@ foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb) for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { - cb(p); - } -} - -static void -exclude_efi_map_entry(struct efi_md *p) -{ - - switch (p->md_type) { - case EFI_MD_TYPE_CODE: - case EFI_MD_TYPE_DATA: - case EFI_MD_TYPE_BS_CODE: - case EFI_MD_TYPE_BS_DATA: - case EFI_MD_TYPE_FREE: - /* - * We're allowed to use any entry with these types. - */ - break; - default: - physmem_exclude_region(p->md_phys, p->md_pages * EFI_PAGE_SIZE, - EXFLAG_NOALLOC); + cb(p, argp); } } +/* + * Handle the EFI memory map list. + * + * We will make two passes at this, the first (exclude == false) to populate + * physmem with valid physical memory ranges from recognized map entry types. + * In the second pass we will exclude memory ranges from physmem which must not + * be used for general allocations, either because they are used by runtime + * firmware or otherwise reserved. + * + * Adding the runtime-reserved memory ranges to physmem and excluding them + * later ensures that they are included in the DMAP, but excluded from + * phys_avail[]. + * + * Entry types not explicitly listed here are ignored and not mapped. + */ static void -exclude_efi_map_entries(struct efi_map_header *efihdr) -{ - - foreach_efi_map_entry(efihdr, exclude_efi_map_entry); -} - -static void -add_efi_map_entry(struct efi_md *p) +handle_efi_map_entry(struct efi_md *p, void *argp) { + bool exclude = *(bool *)argp; switch (p->md_type) { case EFI_MD_TYPE_RECLAIM: @@ -495,7 +512,7 @@ add_efi_map_entry(struct efi_md *p) /* * Some UEFI implementations put the system table in the * runtime code section. Include it in the DMAP, but will - * be excluded from phys_avail later. + * be excluded from phys_avail. */ case EFI_MD_TYPE_RT_DATA: /* @@ -503,6 +520,12 @@ add_efi_map_entry(struct efi_md *p) * region is created to stop it from being added * to phys_avail. */ + if (exclude) { + physmem_exclude_region(p->md_phys, + p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC); + break; + } + /* FALLTHROUGH */ case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: @@ -511,8 +534,12 @@ add_efi_map_entry(struct efi_md *p) /* * We're allowed to use any entry with these types. */ - physmem_hardware_region(p->md_phys, - p->md_pages * EFI_PAGE_SIZE); + if (!exclude) + physmem_hardware_region(p->md_phys, + p->md_pages * EFI_PAGE_SIZE); + break; + default: + /* Other types shall not be handled by physmem. */ break; } } @@ -520,12 +547,19 @@ add_efi_map_entry(struct efi_md *p) static void add_efi_map_entries(struct efi_map_header *efihdr) { + bool exclude = false; + foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); +} - foreach_efi_map_entry(efihdr, add_efi_map_entry); +static void +exclude_efi_map_entries(struct efi_map_header *efihdr) +{ + bool exclude = true; + foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); } static void -print_efi_map_entry(struct efi_md *p) +print_efi_map_entry(struct efi_md *p, void *argp __unused) { const char *type; static const char *types[] = { @@ -585,7 +619,116 @@ print_efi_map_entries(struct efi_map_header *efihdr) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); - foreach_efi_map_entry(efihdr, print_efi_map_entry); + foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL); +} + +/* + * Map the passed in VA in EFI space to a void * using the efi memory table to + * find the PA and return it in the DMAP, if it exists. We're used between the + * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG + * tables We assume that either the entry you are mapping fits within its page, + * or if it spills to the next page, that's contiguous in PA and in the DMAP. + * All observed tables obey the first part of this precondition. + */ +struct early_map_data +{ + vm_offset_t va; + vm_offset_t pa; +}; + +static void +efi_early_map_entry(struct efi_md *p, void *argp) +{ + struct early_map_data *emdp = argp; + vm_offset_t s, e; + + if (emdp->pa != 0) + return; + if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + return; + s = p->md_virt; + e = p->md_virt + p->md_pages * EFI_PAGE_SIZE; + if (emdp->va < s || emdp->va >= e) + return; + emdp->pa = p->md_phys + (emdp->va - p->md_virt); +} + +static void * +efi_early_map(vm_offset_t va) +{ + struct early_map_data emd = { .va = va }; + + foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd); + if (emd.pa == 0) + return NULL; + return (void *)PHYS_TO_DMAP(emd.pa); +} + + +/* + * When booted via kboot, the prior kernel will pass in reserved memory areas in + * a EFI config table. We need to find that table and walk through it excluding + * the memory ranges in it. btw, this is called too early for the printf to do + * anything since msgbufp isn't initialized, let alone a console... + */ +static void +exclude_efi_memreserve(vm_offset_t efi_systbl_phys) +{ + struct efi_systbl *systbl; + struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE; + + systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); + if (systbl == NULL) { + printf("can't map systbl\n"); + return; + } + if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { + printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig); + return; + } + + /* + * We don't yet have the pmap system booted enough to create a pmap for + * the efi firmware's preferred address space from the GetMemoryMap() + * table. The st_cfgtbl is a VA in this space, so we need to do the + * mapping ourselves to a kernel VA with efi_early_map. We assume that + * the cfgtbl entries don't span a page. Other pointers are PAs, as + * noted below. + */ + if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */ + return; + for (int i = 0; i < systbl->st_entries; i++) { + struct efi_cfgtbl *cfgtbl; + struct linux_efi_memreserve *mr; + + cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl)); + if (cfgtbl == NULL) + panic("Can't map the config table entry %d\n", i); + if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0) + continue; + + /* + * cfgtbl points are either VA or PA, depending on the GUID of + * the table. memreserve GUID pointers are PA and not converted + * after a SetVirtualAddressMap(). The list's mr_next pointer + * is also a PA. + */ + mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP( + (vm_offset_t)cfgtbl->ct_data); + while (true) { + for (int j = 0; j < mr->mr_count; j++) { + struct linux_efi_memreserve_entry *mre; + + mre = &mr->mr_entry[j]; + physmem_exclude_region(mre->mre_base, mre->mre_size, + EXFLAG_NODUMP | EXFLAG_NOALLOC); + } + if (mr->mr_next == 0) + break; + mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next); + }; + } + } #ifdef FDT @@ -653,6 +796,8 @@ bus_probe(void) break; } order = strchr(order, ','); + if (order != NULL) + order++; /* Skip comma */ } freeenv(env); @@ -760,6 +905,7 @@ initarm(struct arm64_bootparams *abp) TSRAW(&thread0, TS_ENTER, __func__, NULL); boot_el = abp->boot_el; + hcr_el2 = abp->hcr_el2; /* Parse loader or FDT boot parametes. Determine last used address. */ lastaddr = parse_boot_param(abp); @@ -770,10 +916,30 @@ initarm(struct arm64_bootparams *abp) kmdp = preload_search_by_type("elf64 kernel"); identify_cpu(0); + identify_hypervisor_smbios(); + update_special_regs(0); + /* Set the pcpu data, this is needed by pmap_bootstrap */ + pcpup = &pcpu0; + pcpu_init(pcpup, 0, sizeof(struct pcpu)); + + /* + * Set the pcpu pointer with a backup in tpidr_el1 to be + * loaded when entering the kernel from userland. + */ + __asm __volatile( + "mov x18, %0 \n" + "msr tpidr_el1, %0" :: "r"(pcpup)); + + /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ + PCPU_SET(curthread, &thread0); + PCPU_SET(midr, get_midr()); + link_elf_ireloc(kmdp); +#ifdef FDT try_load_dtb(kmdp); +#endif efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); @@ -802,22 +968,6 @@ initarm(struct arm64_bootparams *abp) physmem_exclude_region(efifb->fb_addr, efifb->fb_size, EXFLAG_NOALLOC); - /* Set the pcpu data, this is needed by pmap_bootstrap */ - pcpup = &pcpu0; - pcpu_init(pcpup, 0, sizeof(struct pcpu)); - - /* - * Set the pcpu pointer with a backup in tpidr_el1 to be - * loaded when entering the kernel from userland. - */ - __asm __volatile( - "mov x18, %0 \n" - "msr tpidr_el1, %0" :: "r"(pcpup)); - - /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ - PCPU_SET(curthread, &thread0); - PCPU_SET(midr, get_midr()); - /* Do basic tuning, hz etc */ init_param1(); @@ -825,11 +975,25 @@ initarm(struct arm64_bootparams *abp) pan_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ - pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, - KERNBASE - abp->kern_delta, lastaddr - KERNBASE); - /* Exclude entries neexed in teh DMAP region, but not phys_avail */ + pmap_bootstrap(lastaddr - KERNBASE); + /* Exclude entries needed in the DMAP region, but not phys_avail */ if (efihdr != NULL) exclude_efi_map_entries(efihdr); + /* Do the same for reserve entries in the EFI MEMRESERVE table */ + if (efi_systbl_phys != 0) + exclude_efi_memreserve(efi_systbl_phys); + + /* + * We carefully bootstrap the sanitizer map after we've excluded + * absolutely everything else that could impact phys_avail. There's not + * always enough room for the initial shadow map after the kernel, so + * we'll end up searching for segments that we can safely use. Those + * segments also get excluded from phys_avail. + */ +#if defined(KASAN) || defined(KMSAN) + pmap_bootstrap_san(); +#endif + physmem_init_kernel_globals(); devmap_bootstrap(0, NULL); @@ -873,6 +1037,8 @@ initarm(struct arm64_bootparams *abp) pan_enable(); kcsan_cpu_init(0); + kasan_init(); + kmsan_init(); env = kern_getenv("kernelname"); if (env != NULL) @@ -902,6 +1068,10 @@ initarm(struct arm64_bootparams *abp) early_boot = 0; + if (bootverbose && kstack_pages != KSTACK_PAGES) + printf("kern.kstack_pages = %d ignored for thread0\n", + kstack_pages); + TSEXIT(); } |