aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--stand/common/bootstrap.h3
-rw-r--r--stand/common/load_elf.c15
-rw-r--r--stand/efi/loader/arch/amd64/elf64_freebsd.c142
-rw-r--r--stand/efi/loader/bootinfo.c6
-rw-r--r--stand/efi/loader/copy.c232
-rw-r--r--stand/efi/loader/loader_efi.h10
6 files changed, 348 insertions, 60 deletions
diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h
index b7e1f8553f47..eb4e50203133 100644
--- a/stand/common/bootstrap.h
+++ b/stand/common/bootstrap.h
@@ -228,6 +228,9 @@ struct preloaded_file
size_t f_size; /* file size */
struct kernel_module *f_modules; /* list of modules if any */
struct preloaded_file *f_next; /* next file */
+#ifdef __amd64__
+ bool f_kernphys_relocatable;
+#endif
};
struct file_format
diff --git a/stand/common/load_elf.c b/stand/common/load_elf.c
index f1a9ff8e0c22..9ae91036dbb4 100644
--- a/stand/common/load_elf.c
+++ b/stand/common/load_elf.c
@@ -207,6 +207,18 @@ static int elf_section_header_convert(const Elf_Ehdr *ehdr, Elf_Shdr *shdr)
#undef CONVERT_SWITCH
#undef CONVERT_FIELD
+
+#ifdef __amd64__
+static bool
+is_kernphys_relocatable(elf_file_t ef)
+{
+ Elf_Sym sym;
+
+ return (__elfN(lookup_symbol)(ef, "kernphys", &sym, STT_OBJECT) == 0 &&
+ sym.st_size == 8);
+}
+#endif
+
static int
__elfN(load_elf_header)(char *filename, elf_file_t ef)
{
@@ -434,6 +446,9 @@ __elfN(loadfile_raw)(char *filename, uint64_t dest,
/* Load OK, return module pointer */
*result = (struct preloaded_file *)fp;
err = 0;
+#ifdef __amd64__
+ fp->f_kernphys_relocatable = is_kernphys_relocatable(&ef);
+#endif
goto out;
ioerr:
diff --git a/stand/efi/loader/arch/amd64/elf64_freebsd.c b/stand/efi/loader/arch/amd64/elf64_freebsd.c
index a950ca55e843..d0c8ef96eeea 100644
--- a/stand/efi/loader/arch/amd64/elf64_freebsd.c
+++ b/stand/efi/loader/arch/amd64/elf64_freebsd.c
@@ -82,7 +82,11 @@ struct file_format *file_formats[] = {
static pml4_entry_t *PT4;
static pdp_entry_t *PT3;
+static pdp_entry_t *PT3_l, *PT3_u;
static pd_entry_t *PT2;
+static pd_entry_t *PT2_l0, *PT2_l1, *PT2_l2, *PT2_l3, *PT2_u0, *PT2_u1;
+
+extern EFI_PHYSICAL_ADDRESS staging;
static void (*trampoline)(uint64_t stack, void *copy_finish, uint64_t kernend,
uint64_t modulep, pml4_entry_t *pagetable, uint64_t entry);
@@ -105,6 +109,12 @@ elf64_exec(struct preloaded_file *fp)
ACPI_TABLE_RSDP *rsdp;
char buf[24];
int revision;
+ bool copy_auto;
+
+ copy_auto = copy_staging == COPY_STAGING_AUTO;
+ if (copy_auto)
+ copy_staging = fp->f_kernphys_relocatable ?
+ COPY_STAGING_DISABLE : COPY_STAGING_ENABLE;
/*
* Report the RSDP to the kernel. While this can be found with
@@ -151,57 +161,133 @@ elf64_exec(struct preloaded_file *fp)
}
if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL)
- return(EFTYPE);
+ return (EFTYPE);
ehdr = (Elf_Ehdr *)&(md->md_data);
- trampcode = (vm_offset_t)0x0000000040000000;
+ trampcode = copy_staging == COPY_STAGING_ENABLE ?
+ (vm_offset_t)0x0000000040000000 /* 1G */ :
+ (vm_offset_t)0x0000000100000000; /* 4G */;
err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 1,
(EFI_PHYSICAL_ADDRESS *)&trampcode);
+ if (EFI_ERROR(err)) {
+ printf("Unable to allocate trampoline\n");
+ if (copy_auto)
+ copy_staging = COPY_STAGING_AUTO;
+ return (ENOMEM);
+ }
bzero((void *)trampcode, EFI_PAGE_SIZE);
trampstack = trampcode + EFI_PAGE_SIZE - 8;
bcopy((void *)&amd64_tramp, (void *)trampcode, amd64_tramp_size);
trampoline = (void *)trampcode;
- PT4 = (pml4_entry_t *)0x0000000040000000;
- err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 3,
- (EFI_PHYSICAL_ADDRESS *)&PT4);
- bzero(PT4, 3 * EFI_PAGE_SIZE);
+ if (copy_staging == COPY_STAGING_ENABLE) {
+ PT4 = (pml4_entry_t *)0x0000000040000000;
+ err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 3,
+ (EFI_PHYSICAL_ADDRESS *)&PT4);
+ if (EFI_ERROR(err)) {
+ printf("Unable to allocate trampoline page table\n");
+ BS->FreePages(trampcode, 1);
+ if (copy_auto)
+ copy_staging = COPY_STAGING_AUTO;
+ return (ENOMEM);
+ }
+ bzero(PT4, 3 * EFI_PAGE_SIZE);
+ PT3 = &PT4[512];
+ PT2 = &PT3[512];
+
+ /*
+ * This is kinda brutal, but every single 1GB VM
+ * memory segment points to the same first 1GB of
+ * physical memory. But it is more than adequate.
+ */
+ for (i = 0; i < NPTEPG; i++) {
+ /*
+ * Each slot of the L4 pages points to the
+ * same L3 page.
+ */
+ PT4[i] = (pml4_entry_t)PT3;
+ PT4[i] |= PG_V | PG_RW;
+
+ /*
+ * Each slot of the L3 pages points to the
+ * same L2 page.
+ */
+ PT3[i] = (pdp_entry_t)PT2;
+ PT3[i] |= PG_V | PG_RW;
+
+ /*
+ * The L2 page slots are mapped with 2MB pages for 1GB.
+ */
+ PT2[i] = (pd_entry_t)i * (2 * 1024 * 1024);
+ PT2[i] |= PG_V | PG_RW | PG_PS;
+ }
+ } else {
+ PT4 = (pml4_entry_t *)0x0000000100000000; /* 4G */
+ err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 9,
+ (EFI_PHYSICAL_ADDRESS *)&PT4);
+ if (EFI_ERROR(err)) {
+ printf("Unable to allocate trampoline page table\n");
+ BS->FreePages(trampcode, 9);
+ if (copy_auto)
+ copy_staging = COPY_STAGING_AUTO;
+ return (ENOMEM);
+ }
- PT3 = &PT4[512];
- PT2 = &PT3[512];
+ bzero(PT4, 9 * EFI_PAGE_SIZE);
+
+ PT3_l = &PT4[NPML4EPG * 1];
+ PT3_u = &PT4[NPML4EPG * 2];
+ PT2_l0 = &PT4[NPML4EPG * 3];
+ PT2_l1 = &PT4[NPML4EPG * 4];
+ PT2_l2 = &PT4[NPML4EPG * 5];
+ PT2_l3 = &PT4[NPML4EPG * 6];
+ PT2_u0 = &PT4[NPML4EPG * 7];
+ PT2_u1 = &PT4[NPML4EPG * 8];
+
+ /* 1:1 mapping of lower 4G */
+ PT4[0] = (pml4_entry_t)PT3_l | PG_V | PG_RW;
+ PT3_l[0] = (pdp_entry_t)PT2_l0 | PG_V | PG_RW;
+ PT3_l[1] = (pdp_entry_t)PT2_l1 | PG_V | PG_RW;
+ PT3_l[2] = (pdp_entry_t)PT2_l2 | PG_V | PG_RW;
+ PT3_l[3] = (pdp_entry_t)PT2_l3 | PG_V | PG_RW;
+ for (i = 0; i < 4 * NPDEPG; i++) {
+ PT2_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V |
+ PG_RW | PG_PS;
+ }
- /*
- * This is kinda brutal, but every single 1GB VM memory segment points
- * to the same first 1GB of physical memory. But it is more than
- * adequate.
- */
- for (i = 0; i < 512; i++) {
- /* Each slot of the L4 pages points to the same L3 page. */
- PT4[i] = (pml4_entry_t)PT3;
- PT4[i] |= PG_V | PG_RW;
-
- /* Each slot of the L3 pages points to the same L2 page. */
- PT3[i] = (pdp_entry_t)PT2;
- PT3[i] |= PG_V | PG_RW;
-
- /* The L2 page slots are mapped with 2MB pages for 1GB. */
- PT2[i] = i * (2 * 1024 * 1024);
- PT2[i] |= PG_V | PG_RW | PG_PS;
+ /* mapping of kernel 2G below top */
+ PT4[NPML4EPG - 1] = (pml4_entry_t)PT3_u | PG_V | PG_RW;
+ PT3_u[NPDPEPG - 2] = (pdp_entry_t)PT2_u0 | PG_V | PG_RW;
+ PT3_u[NPDPEPG - 1] = (pdp_entry_t)PT2_u1 | PG_V | PG_RW;
+ /* compat mapping of phys @0 */
+ PT2_u0[0] = PG_PS | PG_V | PG_RW;
+ /* this maps past staging area */
+ for (i = 1; i < 2 * NPDEPG; i++) {
+ PT2_u0[i] = ((pd_entry_t)staging +
+ ((pd_entry_t)i - 1) * NBPDR) |
+ PG_V | PG_RW | PG_PS;
+ }
}
+ printf("staging %#lx (%scoping) tramp %p PT4 %p\n",
+ staging, copy_staging == COPY_STAGING_ENABLE ? "" : "not ",
+ trampoline, PT4);
printf("Start @ 0x%lx ...\n", ehdr->e_entry);
efi_time_fini();
err = bi_load(fp->f_args, &modulep, &kernend, true);
if (err != 0) {
efi_time_init();
- return(err);
+ if (copy_auto)
+ copy_staging = COPY_STAGING_AUTO;
+ return (err);
}
dev_cleanup();
- trampoline(trampstack, efi_copy_finish, kernend, modulep, PT4,
- ehdr->e_entry);
+ trampoline(trampstack, copy_staging == COPY_STAGING_ENABLE ?
+ efi_copy_finish : efi_copy_finish_nop, kernend, modulep,
+ PT4, ehdr->e_entry);
panic("exec returned");
}
diff --git a/stand/efi/loader/bootinfo.c b/stand/efi/loader/bootinfo.c
index 9924901d29e6..f4501f18f14c 100644
--- a/stand/efi/loader/bootinfo.c
+++ b/stand/efi/loader/bootinfo.c
@@ -65,6 +65,8 @@ int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp,
extern EFI_SYSTEM_TABLE *ST;
+int boot_services_gone;
+
static int
bi_getboothowto(char *kargs)
{
@@ -396,8 +398,10 @@ bi_load_efi_data(struct preloaded_file *kfp, bool exit_bs)
if (!exit_bs)
break;
status = BS->ExitBootServices(IH, efi_mapkey);
- if (!EFI_ERROR(status))
+ if (!EFI_ERROR(status)) {
+ boot_services_gone = 1;
break;
+ }
}
if (retry == 0) {
diff --git a/stand/efi/loader/copy.c b/stand/efi/loader/copy.c
index e723b61e3bca..b8ed4c8e027e 100644
--- a/stand/efi/loader/copy.c
+++ b/stand/efi/loader/copy.c
@@ -39,6 +39,11 @@ __FBSDID("$FreeBSD$");
#include "loader_efi.h"
+#define M(x) ((x) * 1024 * 1024)
+#define G(x) (1UL * (x) * 1024 * 1024 * 1024)
+
+extern int boot_services_gone;
+
#if defined(__i386__) || defined(__amd64__)
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
@@ -175,24 +180,142 @@ out:
#ifndef EFI_STAGING_SIZE
#if defined(__arm__)
-#define EFI_STAGING_SIZE 32
+#define EFI_STAGING_SIZE M(32)
+#else
+#define EFI_STAGING_SIZE M(64)
+#endif
+#endif
+
+#if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
+ defined(__riscv)
+#define EFI_STAGING_2M_ALIGN 1
#else
-#define EFI_STAGING_SIZE 64
+#define EFI_STAGING_2M_ALIGN 0
#endif
+
+#if defined(__amd64__)
+#define EFI_STAGING_SLOP M(8)
+#else
+#define EFI_STAGING_SLOP 0
#endif
+static u_long staging_slop = EFI_STAGING_SLOP;
+
EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base;
int stage_offset_set = 0;
ssize_t stage_offset;
+static void
+efi_copy_free(void)
+{
+ BS->FreePages(staging_base, (staging_end - staging_base) /
+ EFI_PAGE_SIZE);
+ stage_offset_set = 0;
+ stage_offset = 0;
+}
+
+#ifdef __amd64__
+int copy_staging = COPY_STAGING_ENABLE;
+
+static int
+command_copy_staging(int argc, char *argv[])
+{
+ static const char *const mode[3] = {
+ [COPY_STAGING_ENABLE] = "enable",
+ [COPY_STAGING_DISABLE] = "disable",
+ [COPY_STAGING_AUTO] = "auto",
+ };
+ int prev, res;
+
+ res = CMD_OK;
+ if (argc > 2) {
+ res = CMD_ERROR;
+ } else if (argc == 2) {
+ prev = copy_staging;
+ if (strcmp(argv[1], "enable") == 0)
+ copy_staging = COPY_STAGING_ENABLE;
+ else if (strcmp(argv[1], "disable") == 0)
+ copy_staging = COPY_STAGING_DISABLE;
+ else if (strcmp(argv[1], "auto") == 0)
+ copy_staging = COPY_STAGING_AUTO;
+ else {
+ printf("usage: copy_staging enable|disable|auto\n");
+ res = CMD_ERROR;
+ }
+ if (res == CMD_OK && prev != copy_staging) {
+ printf("changed copy_staging, unloading kernel\n");
+ unload();
+ efi_copy_free();
+ efi_copy_init();
+ }
+ } else {
+ printf("copy staging: %s\n", mode[copy_staging]);
+ }
+ return (res);
+}
+COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
+#endif
+
+static int
+command_staging_slop(int argc, char *argv[])
+{
+ char *endp;
+ u_long new, prev;
+ int res;
+
+ res = CMD_OK;
+ if (argc > 2) {
+ res = CMD_ERROR;
+ } else if (argc == 2) {
+ new = strtoul(argv[1], &endp, 0);
+ if (*endp != '\0') {
+ printf("invalid slop value\n");
+ res = CMD_ERROR;
+ }
+ if (res == CMD_OK && staging_slop != new) {
+ printf("changed slop, unloading kernel\n");
+ unload();
+ efi_copy_free();
+ efi_copy_init();
+ }
+ } else {
+ printf("staging slop %#lx\n", staging_slop);
+ }
+ return (res);
+}
+COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
+ command_staging_slop);
+
+#if defined(__i386__) || defined(__amd64__)
+/*
+ * The staging area must reside in the the first 1GB or 4GB physical
+ * memory: see elf64_exec() in
+ * boot/efi/loader/arch/amd64/elf64_freebsd.c.
+ */
+static EFI_PHYSICAL_ADDRESS
+get_staging_max(void)
+{
+ EFI_PHYSICAL_ADDRESS res;
+
+#if defined(__i386__)
+ res = G(1);
+#elif defined(__amd64__)
+ res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
+#endif
+ return (res);
+}
+#define EFI_ALLOC_METHOD AllocateMaxAddress
+#else
+#define EFI_ALLOC_METHOD AllocateAnyPages
+#endif
+
int
efi_copy_init(void)
{
EFI_STATUS status;
-
unsigned long nr_pages;
- nr_pages = EFI_SIZE_TO_PAGES((EFI_STAGING_SIZE) * 1024 * 1024);
+ nr_pages = EFI_SIZE_TO_PAGES((EFI_STAGING_SIZE));
#if defined(__i386__) || defined(__amd64__)
/*
@@ -203,18 +326,10 @@ efi_copy_init(void)
if (running_on_hyperv())
efi_verify_staging_size(&nr_pages);
- /*
- * The staging area must reside in the the first 1GB physical
- * memory: see elf64_exec() in
- * boot/efi/loader/arch/amd64/elf64_freebsd.c.
- */
- staging = 1024*1024*1024;
- status = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData,
- nr_pages, &staging);
-#else
- status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData,
- nr_pages, &staging);
+ staging = get_staging_max();
#endif
+ status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
+ nr_pages, &staging);
if (EFI_ERROR(status)) {
printf("failed to allocate staging area: %lu\n",
EFI_ERROR_CODE(status));
@@ -223,7 +338,7 @@ efi_copy_init(void)
staging_base = staging;
staging_end = staging + nr_pages * EFI_PAGE_SIZE;
-#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
+#if EFI_STAGING_2M_ALIGN
/*
* Round the kernel load address to a 2MiB value. This is needed
* because the kernel builds a page table based on where it has
@@ -231,7 +346,7 @@ efi_copy_init(void)
* either a 1MiB or 2MiB page for this we need to make sure it
* is correctly aligned for both cases.
*/
- staging = roundup2(staging, 2 * 1024 * 1024);
+ staging = roundup2(staging, M(2));
#endif
return (0);
@@ -240,20 +355,42 @@ efi_copy_init(void)
static bool
efi_check_space(vm_offset_t end)
{
- EFI_PHYSICAL_ADDRESS addr;
+ EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
EFI_STATUS status;
unsigned long nr_pages;
+ end = roundup2(end, EFI_PAGE_SIZE);
+
/* There is already enough space */
- if (end <= staging_end)
+ if (end + staging_slop <= staging_end)
return (true);
- end = roundup2(end, EFI_PAGE_SIZE);
- nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
+ if (boot_services_gone) {
+ if (end <= staging_end)
+ return (true);
+ panic("efi_check_space: cannot expand staging area "
+ "after boot services were exited\n");
+ }
+
+ /*
+ * Add slop at the end:
+ * 1. amd64 kernel expects to do some very early allocations
+ * by carving out memory after kernend. Slop guarantees
+ * that it does not ovewrite anything useful.
+ * 2. It seems that initial calculation of the staging size
+ * could be somewhat smaller than actually copying in after
+ * boot services are exited. Slop avoids calling
+ * BS->AllocatePages() when it cannot work.
+ */
+ end += staging_slop;
+ nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
#if defined(__i386__) || defined(__amd64__)
- /* X86 needs all memory to be allocated under the 1G boundary */
- if (end > 1024*1024*1024)
+ /*
+ * i386 needs all memory to be allocated under the 1G boundary.
+ * amd64 needs all memory to be allocated under the 1G or 4G boundary.
+ */
+ if (end > get_staging_max())
goto before_staging;
#endif
@@ -268,14 +405,12 @@ efi_check_space(vm_offset_t end)
before_staging:
/* Try allocating space before the previous allocation */
- if (staging < nr_pages * EFI_PAGE_SIZE) {
- printf("Not enough space before allocation\n");
- return (false);
- }
+ if (staging < nr_pages * EFI_PAGE_SIZE)
+ goto expand;
addr = staging - nr_pages * EFI_PAGE_SIZE;
-#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
+#if EFI_STAGING_2M_ALIGN
/* See efi_copy_init for why this is needed */
- addr = rounddown2(addr, 2 * 1024 * 1024);
+ addr = rounddown2(addr, M(2));
#endif
nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages,
@@ -288,11 +423,42 @@ before_staging:
staging_base = addr;
memmove((void *)(uintptr_t)staging_base,
(void *)(uintptr_t)staging, staging_end - staging);
- stage_offset -= (staging - staging_base);
+ stage_offset -= staging - staging_base;
staging = staging_base;
return (true);
}
+expand:
+ nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
+#if EFI_STAGING_2M_ALIGN
+ nr_pages += M(2) / EFI_PAGE_SIZE;
+#endif
+#if defined(__i386__) || defined(__amd64__)
+ new_base = get_staging_max();
+#endif
+ status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
+ nr_pages, &new_base);
+ if (!EFI_ERROR(status)) {
+#if EFI_STAGING_2M_ALIGN
+ new_staging = roundup2(new_base, M(2));
+#else
+ new_staging = new_base;
+#endif
+ /*
+ * Move the old allocation and update the state so
+ * translation still works.
+ */
+ memcpy((void *)(uintptr_t)new_staging,
+ (void *)(uintptr_t)staging, staging_end - staging);
+ BS->FreePages(staging_base, (staging_end - staging_base) /
+ EFI_PAGE_SIZE);
+ stage_offset -= staging - new_staging;
+ staging = new_staging;
+ staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
+ staging_base = new_base;
+ return (true);
+ }
+
printf("efi_check_space: Unable to expand staging area\n");
return (false);
}
@@ -335,7 +501,6 @@ efi_copyout(const vm_offset_t src, void *dest, const size_t len)
return (len);
}
-
ssize_t
efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
{
@@ -364,3 +529,8 @@ efi_copy_finish(void)
while (src < last)
*dst++ = *src++;
}
+
+void
+efi_copy_finish_nop(void)
+{
+}
diff --git a/stand/efi/loader/loader_efi.h b/stand/efi/loader/loader_efi.h
index 4d077514e423..8254d16b1592 100644
--- a/stand/efi/loader/loader_efi.h
+++ b/stand/efi/loader/loader_efi.h
@@ -34,6 +34,15 @@
#include <stand.h>
#include <readin.h>
+#ifdef __amd64__
+enum {
+ COPY_STAGING_ENABLE,
+ COPY_STAGING_DISABLE,
+ COPY_STAGING_AUTO,
+};
+extern int copy_staging;
+#endif
+
int efi_autoload(void);
int efi_copy_init(void);
@@ -44,5 +53,6 @@ ssize_t efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
void * efi_translate(vm_offset_t ptr);
void efi_copy_finish(void);
+void efi_copy_finish_nop(void);
#endif /* _LOADER_EFI_COPY_H_ */