aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2020-03-24 18:43:23 +0000
committerMark Johnston <markj@FreeBSD.org>2020-03-24 18:43:23 +0000
commit8db2e8fd16c4353ab1ba4f9d37e0939cfeff1ff8 (patch)
tree2d51ab774539044b4083ee7d30ffe25f67cee006 /sys
parentd681bc9e649a6077bd91be39174e40bc498e6dd7 (diff)
downloadsrc-8db2e8fd16c4353ab1ba4f9d37e0939cfeff1ff8.tar.gz
src-8db2e8fd16c4353ab1ba4f9d37e0939cfeff1ff8.zip
Remove the secondary_stacks array in arm64 and riscv kernels.
Instead, dynamically allocate a page for the boot stack of each AP when starting them up, like we do on x86. This shrinks the bss by MAXCPU*KSTACK_PAGES pages, which corresponds to 4MB on arm64 and 256KB on riscv. Duplicate the logic used on x86 to free the bootstacks, by using a sysinit to wait for each AP to switch to a thread before freeing its stack. While here, mark some static MD variables as such. Reviewed by: kib MFC after: 1 month Sponsored by: Juniper Networks, Klara Inc. Differential Revision: https://reviews.freebsd.org/D24158
Notes
Notes: svn path=/head/; revision=359280
Diffstat (limited to 'sys')
-rw-r--r--sys/arm64/arm64/locore.S9
-rw-r--r--sys/arm64/arm64/mp_machdep.c68
-rw-r--r--sys/riscv/riscv/locore.S10
-rw-r--r--sys/riscv/riscv/mp_machdep.c54
4 files changed, 107 insertions, 34 deletions
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index c8b7ded1bf29..0268ab6ef00c 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -214,11 +214,10 @@ ENTRY(mpentry)
br x15
mp_virtdone:
- ldr x4, =secondary_stacks
- mov x5, #(PAGE_SIZE * KSTACK_PAGES)
- mul x5, x0, x5
- add sp, x4, x5
-
+ /* Start using the AP boot stack */
+ ldr x4, =bootstack
+ ldr x4, [x4]
+ mov sp, x4
b init_secondary
END(mpentry)
#endif
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index e16a1f416c93..a9250e3e2cd7 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -123,7 +123,6 @@ static void ipi_preempt(void *);
static void ipi_rendezvous(void *);
static void ipi_stop(void *);
-struct mtx ap_boot_mtx;
struct pcb stoppcbs[MAXCPU];
/*
@@ -136,10 +135,18 @@ static int cpu0 = -1;
void mpentry(unsigned long cpuid);
void init_secondary(uint64_t);
-uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
+/* Synchronize AP startup. */
+static struct mtx ap_boot_mtx;
+
+/* Stacks for AP initialization, discarded once idle threads are started. */
+void *bootstack;
+static void *bootstacks[MAXCPU];
+
+/* Count of started APs, used to synchronize access to bootstack. */
+static volatile int aps_started;
/* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
+static volatile int aps_ready;
/* Temporary variables for init_secondary() */
void *dpcpu[MAXCPU - 1];
@@ -205,14 +212,14 @@ init_secondary(uint64_t cpu)
"mov x18, %0 \n"
"msr tpidr_el1, %0" :: "r"(pcpup));
- /* Spin until the BSP releases the APs */
- while (!aps_ready)
+ /* Signal the BSP and spin until it has released all APs. */
+ atomic_add_int(&aps_started, 1);
+ while (!atomic_load_int(&aps_ready))
__asm __volatile("wfe");
/* Initialize curthread */
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
pcpup->pc_curthread = pcpup->pc_idlethread;
- pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
/* Initialize curpmap to match TTBR0's current setting. */
pmap0 = vmspace_pmap(&vmspace0);
@@ -250,6 +257,11 @@ init_secondary(uint64_t cpu)
kcsan_cpu_init(cpu);
+ /*
+ * Assert that smp_after_idle_runnable condition is reasonable.
+ */
+ MPASS(PCPU_GET(curpcb) == NULL);
+
/* Enter the scheduler */
sched_throw(NULL);
@@ -257,6 +269,24 @@ init_secondary(uint64_t cpu)
/* NOTREACHED */
}
+static void
+smp_after_idle_runnable(void *arg __unused)
+{
+ struct pcpu *pc;
+ int cpu;
+
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ if (bootstacks[cpu] != NULL) {
+ pc = pcpu_find(cpu);
+ while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
+ cpu_spinwait();
+ kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
+ }
+ }
+}
+SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
+ smp_after_idle_runnable, NULL);
+
/*
* Send IPI thru interrupt controller.
*/
@@ -391,7 +421,7 @@ start_cpu(u_int id, uint64_t target_cpu)
struct pcpu *pcpup;
vm_paddr_t pa;
u_int cpuid;
- int err;
+ int err, naps;
/* Check we are able to start this cpu */
if (id > mp_maxid)
@@ -405,7 +435,7 @@ start_cpu(u_int id, uint64_t target_cpu)
/*
* Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other
- * CPUs ordered as the are likely grouped into clusters so it can be
+ * CPUs ordered as they are likely grouped into clusters so it can be
* useful to keep that property, e.g. for the GICv3 driver to send
* an IPI to all CPUs in the cluster.
*/
@@ -420,29 +450,41 @@ start_cpu(u_int id, uint64_t target_cpu)
dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
+ bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+
+ naps = atomic_load_int(&aps_started);
+ bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
+
printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
-
err = psci_cpu_on(target_cpu, pa, cpuid);
if (err != PSCI_RETVAL_SUCCESS) {
/*
* Panic here if INVARIANTS are enabled and PSCI failed to
- * start the requested CPU. If psci_cpu_on returns PSCI_MISSING
+ * start the requested CPU. psci_cpu_on() returns PSCI_MISSING
* to indicate we are unable to use it to start the given CPU.
*/
KASSERT(err == PSCI_MISSING ||
(mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
- ("Failed to start CPU %u (%lx)\n", id, target_cpu));
+ ("Failed to start CPU %u (%lx), error %d\n",
+ id, target_cpu, err));
pcpu_destroy(pcpup);
kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
dpcpu[cpuid - 1] = NULL;
+ kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE);
+ bootstacks[cpuid] = NULL;
mp_ncpus--;
/* Notify the user that the CPU failed to start */
- printf("Failed to start CPU %u (%lx)\n", id, target_cpu);
- } else
+ printf("Failed to start CPU %u (%lx), error %d\n",
+ id, target_cpu, err);
+ } else {
+ /* Wait for the AP to switch to its boot stack. */
+ while (atomic_load_int(&aps_started) < naps + 1)
+ cpu_spinwait();
CPU_SET(cpuid, &all_cpus);
+ }
return (true);
}
diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S
index 8d3b89897c3e..a794851bcb6b 100644
--- a/sys/riscv/riscv/locore.S
+++ b/sys/riscv/riscv/locore.S
@@ -301,14 +301,8 @@ ENTRY(mpentry)
beqz t1, 1b
/* Setup stack pointer */
- lla t0, secondary_stacks
- li t1, (PAGE_SIZE * KSTACK_PAGES)
- mulw t2, t1, a0
- add t0, t0, t2
- add t0, t0, t1
- sub t0, t0, s9
- li t1, KERNBASE
- add sp, t0, t1
+ lla t0, bootstack
+ ld sp, 0(t0)
/* Setup supervisor trap vector */
lla t0, mpva
diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c
index 322121510b3e..113165192735 100644
--- a/sys/riscv/riscv/mp_machdep.c
+++ b/sys/riscv/riscv/mp_machdep.c
@@ -87,7 +87,6 @@ static device_attach_t riscv64_cpu_attach;
static int ipi_handler(void *);
-struct mtx ap_boot_mtx;
struct pcb stoppcbs[MAXCPU];
extern uint32_t boot_hart;
@@ -98,13 +97,19 @@ static uint32_t cpu_reg[MAXCPU][2];
#endif
static device_t cpu_list[MAXCPU];
-void mpentry(unsigned long cpuid);
void init_secondary(uint64_t);
-uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
+static struct mtx ap_boot_mtx;
+
+/* Stacks for AP initialization, discarded once idle threads are started. */
+void *bootstack;
+static void *bootstacks[MAXCPU];
+
+/* Count of started APs, used to synchronize access to bootstack. */
+static volatile int aps_started;
/* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
+static volatile int aps_ready;
/* Temporary variables for init_secondary() */
void *dpcpu[MAXCPU - 1];
@@ -233,14 +238,14 @@ init_secondary(uint64_t hart)
csr_set(sie, SIE_SSIE);
csr_set(sip, SIE_SSIE);
- /* Spin until the BSP releases the APs */
- while (!aps_ready)
+ /* Signal the BSP and spin until it has released all APs. */
+ atomic_add_int(&aps_started, 1);
+ while (!atomic_load_int(&aps_ready))
__asm __volatile("wfi");
/* Initialize curthread */
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
pcpup->pc_curthread = pcpup->pc_idlethread;
- pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
/*
* Identify current CPU. This is necessary to setup
@@ -274,6 +279,11 @@ init_secondary(uint64_t hart)
mtx_unlock_spin(&ap_boot_mtx);
+ /*
+ * Assert that smp_after_idle_runnable condition is reasonable.
+ */
+ MPASS(PCPU_GET(curpcb) == NULL);
+
/* Enter the scheduler */
sched_throw(NULL);
@@ -281,6 +291,24 @@ init_secondary(uint64_t hart)
/* NOTREACHED */
}
+static void
+smp_after_idle_runnable(void *arg __unused)
+{
+ struct pcpu *pc;
+ int cpu;
+
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ if (bootstacks[cpu] != NULL) {
+ pc = pcpu_find(cpu);
+ while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
+ cpu_spinwait();
+ kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
+ }
+ }
+}
+SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
+ smp_after_idle_runnable, NULL);
+
static int
ipi_handler(void *arg)
{
@@ -373,6 +401,7 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
struct pcpu *pcpup;
uint64_t hart;
u_int cpuid;
+ int naps;
/* Check if this hart supports MMU. */
if (OF_getproplen(node, "mmu-type") < 0)
@@ -419,8 +448,17 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
+ bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+
+ naps = atomic_load_int(&aps_started);
+ bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
+
printf("Starting CPU %u (hart %lx)\n", cpuid, hart);
- __riscv_boot_ap[hart] = 1;
+ atomic_store_32(&__riscv_boot_ap[hart], 1);
+
+ /* Wait for the AP to switch to its boot stack. */
+ while (atomic_load_int(&aps_started) < naps + 1)
+ cpu_spinwait();
CPU_SET(cpuid, &all_cpus);
CPU_SET(hart, &all_harts);