aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64/arm64')
-rw-r--r--sys/arm64/arm64/autoconf.c92
-rw-r--r--sys/arm64/arm64/bus_machdep.c230
-rw-r--r--sys/arm64/arm64/bus_space_asm.S399
-rw-r--r--sys/arm64/arm64/busdma_bounce.c1357
-rw-r--r--sys/arm64/arm64/busdma_machdep.c285
-rw-r--r--sys/arm64/arm64/bzero.S206
-rw-r--r--sys/arm64/arm64/clock.c39
-rw-r--r--sys/arm64/arm64/copyinout.S226
-rw-r--r--sys/arm64/arm64/cpu_errata.c192
-rw-r--r--sys/arm64/arm64/cpufunc_asm.S182
-rw-r--r--sys/arm64/arm64/db_disasm.c70
-rw-r--r--sys/arm64/arm64/db_interface.c194
-rw-r--r--sys/arm64/arm64/db_trace.c133
-rw-r--r--sys/arm64/arm64/debug_monitor.c565
-rw-r--r--sys/arm64/arm64/disassem.c545
-rw-r--r--sys/arm64/arm64/dump_machdep.c73
-rw-r--r--sys/arm64/arm64/efirt_machdep.c280
-rw-r--r--sys/arm64/arm64/elf32_machdep.c261
-rw-r--r--sys/arm64/arm64/elf_machdep.c284
-rw-r--r--sys/arm64/arm64/exception.S255
-rw-r--r--sys/arm64/arm64/freebsd32_machdep.c438
-rw-r--r--sys/arm64/arm64/genassym.c79
-rw-r--r--sys/arm64/arm64/gic_v3.c1271
-rw-r--r--sys/arm64/arm64/gic_v3_acpi.c389
-rw-r--r--sys/arm64/arm64/gic_v3_fdt.c331
-rw-r--r--sys/arm64/arm64/gic_v3_reg.h434
-rw-r--r--sys/arm64/arm64/gic_v3_var.h145
-rw-r--r--sys/arm64/arm64/gicv3_its.c1960
-rw-r--r--sys/arm64/arm64/identcpu.c1667
-rw-r--r--sys/arm64/arm64/in_cksum.c241
-rw-r--r--sys/arm64/arm64/locore.S859
-rw-r--r--sys/arm64/arm64/machdep.c1375
-rw-r--r--sys/arm64/arm64/machdep_boot.c232
-rw-r--r--sys/arm64/arm64/mem.c138
-rw-r--r--sys/arm64/arm64/memcpy.S219
-rw-r--r--sys/arm64/arm64/memmove.S150
-rw-r--r--sys/arm64/arm64/minidump_machdep.c448
-rw-r--r--sys/arm64/arm64/mp_machdep.c896
-rw-r--r--sys/arm64/arm64/nexus.c549
-rw-r--r--sys/arm64/arm64/ofw_machdep.c58
-rw-r--r--sys/arm64/arm64/pmap.c6710
-rw-r--r--sys/arm64/arm64/stack_machdep.c93
-rw-r--r--sys/arm64/arm64/support.S290
-rw-r--r--sys/arm64/arm64/swtch.S292
-rw-r--r--sys/arm64/arm64/sys_machdep.c45
-rw-r--r--sys/arm64/arm64/trap.c567
-rw-r--r--sys/arm64/arm64/uio_machdep.c134
-rw-r--r--sys/arm64/arm64/uma_machdep.c77
-rw-r--r--sys/arm64/arm64/undefined.c177
-rw-r--r--sys/arm64/arm64/unwind.c53
-rw-r--r--sys/arm64/arm64/vfp.c380
-rw-r--r--sys/arm64/arm64/vm_machdep.c300
52 files changed, 26865 insertions, 0 deletions
diff --git a/sys/arm64/arm64/autoconf.c b/sys/arm64/arm64/autoconf.c
new file mode 100644
index 000000000000..9788c789cfc4
--- /dev/null
+++ b/sys/arm64/arm64/autoconf.c
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Setup the system to run on the current machine.
+ *
+ * Configure() is called at boot time and initializes the vba
+ * device tables and the memory controller monitoring. Available
+ * devices are determined (from possibilities mentioned in ioconf.c),
+ * and the drivers are initialized.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h>
+#include <sys/kernel.h>
+
+#include <machine/intr.h>
+
+static void configure_first(void *);
+static void configure(void *);
+static void configure_final(void *);
+
+SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL);
+/* SI_ORDER_SECOND is hookable */
+SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL);
+/* SI_ORDER_MIDDLE is hookable */
+SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
+
+/*
+ * Determine i/o configuration for a machine.
+ */
+static void
+configure_first(void *dummy)
+{
+
+ /* nexus0 is the top of the device tree */
+ device_add_child(root_bus, "nexus", 0);
+}
+
+static void
+configure(void *dummy)
+{
+
+ /* initialize new bus architecture */
+ root_bus_configure();
+}
+
+static void
+configure_final(void *dummy)
+{
+
+ /* Enable interrupt reception on this CPU */
+ intr_enable();
+ cninit_finish();
+
+ if (bootverbose)
+ printf("Device configuration finished.\n");
+
+ cold = 0;
+}
diff --git a/sys/arm64/arm64/bus_machdep.c b/sys/arm64/arm64/bus_machdep.c
new file mode 100644
index 000000000000..1fabb91c575f
--- /dev/null
+++ b/sys/arm64/arm64/bus_machdep.c
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#define KCSAN_RUNTIME
+
+#include "opt_platform.h"
+
+#include <sys/param.h>
+__FBSDID("$FreeBSD$");
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+
+uint8_t generic_bs_r_1(void *, bus_space_handle_t, bus_size_t);
+uint16_t generic_bs_r_2(void *, bus_space_handle_t, bus_size_t);
+uint32_t generic_bs_r_4(void *, bus_space_handle_t, bus_size_t);
+uint64_t generic_bs_r_8(void *, bus_space_handle_t, bus_size_t);
+
+void generic_bs_rm_1(void *, bus_space_handle_t, bus_size_t, uint8_t *,
+ bus_size_t);
+void generic_bs_rm_2(void *, bus_space_handle_t, bus_size_t, uint16_t *,
+ bus_size_t);
+void generic_bs_rm_4(void *, bus_space_handle_t, bus_size_t, uint32_t *,
+ bus_size_t);
+void generic_bs_rm_8(void *, bus_space_handle_t, bus_size_t, uint64_t *,
+ bus_size_t);
+
+void generic_bs_rr_1(void *, bus_space_handle_t, bus_size_t, uint8_t *,
+ bus_size_t);
+void generic_bs_rr_2(void *, bus_space_handle_t, bus_size_t, uint16_t *,
+ bus_size_t);
+void generic_bs_rr_4(void *, bus_space_handle_t, bus_size_t, uint32_t *,
+ bus_size_t);
+void generic_bs_rr_8(void *, bus_space_handle_t, bus_size_t, uint64_t *,
+ bus_size_t);
+
+void generic_bs_w_1(void *, bus_space_handle_t, bus_size_t, uint8_t);
+void generic_bs_w_2(void *, bus_space_handle_t, bus_size_t, uint16_t);
+void generic_bs_w_4(void *, bus_space_handle_t, bus_size_t, uint32_t);
+void generic_bs_w_8(void *, bus_space_handle_t, bus_size_t, uint64_t);
+
+void generic_bs_wm_1(void *, bus_space_handle_t, bus_size_t, const uint8_t *,
+ bus_size_t);
+void generic_bs_wm_2(void *, bus_space_handle_t, bus_size_t, const uint16_t *,
+ bus_size_t);
+void generic_bs_wm_4(void *, bus_space_handle_t, bus_size_t, const uint32_t *,
+ bus_size_t);
+void generic_bs_wm_8(void *, bus_space_handle_t, bus_size_t, const uint64_t *,
+ bus_size_t);
+
+void generic_bs_wr_1(void *, bus_space_handle_t, bus_size_t, const uint8_t *,
+ bus_size_t);
+void generic_bs_wr_2(void *, bus_space_handle_t, bus_size_t, const uint16_t *,
+ bus_size_t);
+void generic_bs_wr_4(void *, bus_space_handle_t, bus_size_t, const uint32_t *,
+ bus_size_t);
+void generic_bs_wr_8(void *, bus_space_handle_t, bus_size_t, const uint64_t *,
+ bus_size_t);
+
+static int
+generic_bs_map(void *t, bus_addr_t bpa, bus_size_t size, int flags,
+ bus_space_handle_t *bshp)
+{
+ void *va;
+
+ va = pmap_mapdev(bpa, size);
+ if (va == NULL)
+ return (ENOMEM);
+ *bshp = (bus_space_handle_t)va;
+ return (0);
+}
+
+static void
+generic_bs_unmap(void *t, bus_space_handle_t bsh, bus_size_t size)
+{
+
+ pmap_unmapdev(bsh, size);
+}
+
+static void
+generic_bs_barrier(void *t, bus_space_handle_t bsh, bus_size_t offset,
+ bus_size_t size, int flags)
+{
+}
+
+static int
+generic_bs_subregion(void *t, bus_space_handle_t bsh, bus_size_t offset,
+ bus_size_t size, bus_space_handle_t *nbshp)
+{
+
+ *nbshp = bsh + offset;
+ return (0);
+}
+
+struct bus_space memmap_bus = {
+ /* cookie */
+ .bs_cookie = NULL,
+
+ /* mapping/unmapping */
+ .bs_map = generic_bs_map,
+ .bs_unmap = generic_bs_unmap,
+ .bs_subregion = generic_bs_subregion,
+
+ /* allocation/deallocation */
+ .bs_alloc = NULL,
+ .bs_free = NULL,
+
+ /* barrier */
+ .bs_barrier = generic_bs_barrier,
+
+ /* read single */
+ .bs_r_1 = generic_bs_r_1,
+ .bs_r_2 = generic_bs_r_2,
+ .bs_r_4 = generic_bs_r_4,
+ .bs_r_8 = generic_bs_r_8,
+
+ /* read multiple */
+ .bs_rm_1 = generic_bs_rm_1,
+ .bs_rm_2 = generic_bs_rm_2,
+ .bs_rm_4 = generic_bs_rm_4,
+ .bs_rm_8 = generic_bs_rm_8,
+
+ /* read region */
+ .bs_rr_1 = generic_bs_rr_1,
+ .bs_rr_2 = generic_bs_rr_2,
+ .bs_rr_4 = generic_bs_rr_4,
+ .bs_rr_8 = generic_bs_rr_8,
+
+ /* write single */
+ .bs_w_1 = generic_bs_w_1,
+ .bs_w_2 = generic_bs_w_2,
+ .bs_w_4 = generic_bs_w_4,
+ .bs_w_8 = generic_bs_w_8,
+
+ /* write multiple */
+ .bs_wm_1 = generic_bs_wm_1,
+ .bs_wm_2 = generic_bs_wm_2,
+ .bs_wm_4 = generic_bs_wm_4,
+ .bs_wm_8 = generic_bs_wm_8,
+
+ /* write region */
+ .bs_wr_1 = generic_bs_wr_1,
+ .bs_wr_2 = generic_bs_wr_2,
+ .bs_wr_4 = generic_bs_wr_4,
+ .bs_wr_8 = generic_bs_wr_8,
+
+ /* set multiple */
+ .bs_sm_1 = NULL,
+ .bs_sm_2 = NULL,
+ .bs_sm_4 = NULL,
+ .bs_sm_8 = NULL,
+
+ /* set region */
+ .bs_sr_1 = NULL,
+ .bs_sr_2 = NULL,
+ .bs_sr_4 = NULL,
+ .bs_sr_8 = NULL,
+
+ /* copy */
+ .bs_c_1 = NULL,
+ .bs_c_2 = NULL,
+ .bs_c_4 = NULL,
+ .bs_c_8 = NULL,
+
+ /* read single stream */
+ .bs_r_1_s = NULL,
+ .bs_r_2_s = NULL,
+ .bs_r_4_s = NULL,
+ .bs_r_8_s = NULL,
+
+ /* read multiple stream */
+ .bs_rm_1_s = generic_bs_rm_1,
+ .bs_rm_2_s = generic_bs_rm_2,
+ .bs_rm_4_s = generic_bs_rm_4,
+ .bs_rm_8_s = generic_bs_rm_8,
+
+ /* read region stream */
+ .bs_rr_1_s = NULL,
+ .bs_rr_2_s = NULL,
+ .bs_rr_4_s = NULL,
+ .bs_rr_8_s = NULL,
+
+ /* write single stream */
+ .bs_w_1_s = NULL,
+ .bs_w_2_s = NULL,
+ .bs_w_4_s = NULL,
+ .bs_w_8_s = NULL,
+
+ /* write multiple stream */
+ .bs_wm_1_s = generic_bs_wm_1,
+ .bs_wm_2_s = generic_bs_wm_2,
+ .bs_wm_4_s = generic_bs_wm_4,
+ .bs_wm_8_s = generic_bs_wm_8,
+
+ /* write region stream */
+ .bs_wr_1_s = NULL,
+ .bs_wr_2_s = NULL,
+ .bs_wr_4_s = NULL,
+ .bs_wr_8_s = NULL,
+};
+
+#ifdef FDT
+bus_space_tag_t fdtbus_bs_tag = &memmap_bus;
+#endif
diff --git a/sys/arm64/arm64/bus_space_asm.S b/sys/arm64/arm64/bus_space_asm.S
new file mode 100644
index 000000000000..d919bd5c61b1
--- /dev/null
+++ b/sys/arm64/arm64/bus_space_asm.S
@@ -0,0 +1,399 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+
+__FBSDID("$FreeBSD$");
+
+ENTRY(generic_bs_r_1)
+ ldrb w0, [x1, x2]
+ ret
+END(generic_bs_r_1)
+
+ENTRY(generic_bs_r_2)
+ ldrh w0, [x1, x2]
+ ret
+END(generic_bs_r_2)
+
+ENTRY(generic_bs_r_4)
+ ldr w0, [x1, x2]
+ ret
+END(generic_bs_r_4)
+
+ENTRY(generic_bs_r_8)
+ ldr x0, [x1, x2]
+ ret
+END(generic_bs_r_8)
+
+ENTRY(generic_bs_rm_1)
+ /* If there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldrb w1, [x0]
+ strb w1, [x3], #1
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rm_1)
+
+ENTRY(generic_bs_rm_2)
+ /* If there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldrh w1, [x0]
+ strh w1, [x3], #2
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rm_2)
+
+ENTRY(generic_bs_rm_4)
+ /* If there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldr w1, [x0]
+ str w1, [x3], #4
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rm_4)
+
+ENTRY(generic_bs_rm_8)
+ /* If there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldr x1, [x0]
+ str x1, [x3], #8
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rm_8)
+
+ENTRY(generic_bs_rr_1)
+ /* Is there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldrb w1, [x0], #1
+ strb w1, [x3], #1
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rr_1)
+
+ENTRY(generic_bs_rr_2)
+ /* Is there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldrh w1, [x0], #2
+ strh w1, [x3], #2
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rr_2)
+
+ENTRY(generic_bs_rr_4)
+ /* Is there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldr w1, [x0], #4
+ str w1, [x3], #4
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rr_4)
+
+ENTRY(generic_bs_rr_8)
+ /* Is there is anything to read. */
+ cbz x4, 2f
+
+ /* Calculate the device address. */
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Read the data. */
+1: ldr x1, [x0], #8
+ str x1, [x3], #8
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_rr_8)
+
+
+ENTRY(generic_bs_w_1)
+ strb w3, [x1, x2]
+ ret
+END(generic_bs_w_1)
+
+ENTRY(generic_bs_w_2)
+ strh w3, [x1, x2]
+ ret
+END(generic_bs_w_2)
+
+ENTRY(generic_bs_w_4)
+ str w3, [x1, x2]
+ ret
+END(generic_bs_w_4)
+
+ENTRY(generic_bs_w_8)
+ str x3, [x1, x2]
+ ret
+END(generic_bs_w_8)
+
+ENTRY(generic_bs_wm_1)
+ /* If there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldrb w1, [x3], #1
+ strb w1, [x0]
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wm_1)
+
+ENTRY(generic_bs_wm_2)
+ /* If there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldrh w1, [x3], #2
+ strh w1, [x0]
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wm_2)
+
+ENTRY(generic_bs_wm_4)
+ /* If there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldr w1, [x3], #4
+ str w1, [x0]
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wm_4)
+
+ENTRY(generic_bs_wm_8)
+ /* If there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldr x1, [x3], #8
+ str x1, [x0]
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wm_8)
+
+ENTRY(generic_bs_wr_1)
+ /* Is there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldrb w1, [x3], #1
+ strb w1, [x0], #1
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wr_1)
+
+ENTRY(generic_bs_wr_2)
+ /* Is there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldrh w1, [x3], #2
+ strh w1, [x0], #2
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wr_2)
+
+ENTRY(generic_bs_wr_4)
+ /* Is there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldr w1, [x3], #4
+ str w1, [x0], #4
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wr_4)
+
+ENTRY(generic_bs_wr_8)
+ /* Is there is anything to write. */
+ cbz x4, 2f
+
+ add x0, x1, x2
+ /*
+ * x0 = The device address.
+ * x3 = The kernel address.
+ * x4 = Count
+ */
+
+ /* Write the data */
+1: ldr x1, [x3], #8
+ str x1, [x0], #8
+ subs x4, x4, #1
+ b.ne 1b
+
+2: ret
+END(generic_bs_wr_8)
diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c
new file mode 100644
index 000000000000..9d737d5c9021
--- /dev/null
+++ b/sys/arm64/arm64/busdma_bounce.c
@@ -0,0 +1,1357 @@
+/*-
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Andrew Turner
+ * under sponsorship of the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Semihalf
+ * under sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <arm64/include/bus_dma_impl.h>
+
+#define MAX_BPAGES 4096
+
+enum {
+ BF_COULD_BOUNCE = 0x01,
+ BF_MIN_ALLOC_COMP = 0x02,
+ BF_KMEM_ALLOC = 0x04,
+ BF_COHERENT = 0x10,
+};
+
+struct bounce_zone;
+
+struct bus_dma_tag {
+ struct bus_dma_tag_common common;
+ int map_count;
+ int bounce_flags;
+ bus_dma_segment_t *segments;
+ struct bounce_zone *bounce_zone;
+};
+
+struct bounce_page {
+ vm_offset_t vaddr; /* kva of bounce buffer */
+ bus_addr_t busaddr; /* Physical address */
+ vm_offset_t datavaddr; /* kva of client data */
+ vm_page_t datapage; /* physical page of client data */
+ vm_offset_t dataoffs; /* page offset of client data */
+ bus_size_t datacount; /* client data count */
+ STAILQ_ENTRY(bounce_page) links;
+};
+
+int busdma_swi_pending;
+
+struct bounce_zone {
+ STAILQ_ENTRY(bounce_zone) links;
+ STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
+ int total_bpages;
+ int free_bpages;
+ int reserved_bpages;
+ int active_bpages;
+ int total_bounced;
+ int total_deferred;
+ int map_count;
+ bus_size_t alignment;
+ bus_addr_t lowaddr;
+ char zoneid[8];
+ char lowaddrid[20];
+ struct sysctl_ctx_list sysctl_tree;
+ struct sysctl_oid *sysctl_tree_top;
+};
+
+static struct mtx bounce_lock;
+static int total_bpages;
+static int busdma_zonecount;
+static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
+
+static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "Busdma parameters");
+SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
+ "Total bounce pages");
+
+struct sync_list {
+ vm_offset_t vaddr; /* kva of client data */
+ bus_addr_t paddr; /* physical address */
+ vm_page_t pages; /* starting page of client data */
+ bus_size_t datacount; /* client data count */
+};
+
+struct bus_dmamap {
+ struct bp_list bpages;
+ int pagesneeded;
+ int pagesreserved;
+ bus_dma_tag_t dmat;
+ struct memdesc mem;
+ bus_dmamap_callback_t *callback;
+ void *callback_arg;
+ STAILQ_ENTRY(bus_dmamap) links;
+ u_int flags;
+#define DMAMAP_COULD_BOUNCE (1 << 0)
+#define DMAMAP_FROM_DMAMEM (1 << 1)
+ int sync_count;
+ struct sync_list slist[];
+};
+
+static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
+static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
+
+static void init_bounce_pages(void *dummy);
+static int alloc_bounce_zone(bus_dma_tag_t dmat);
+static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
+static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+ int commit);
+static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
+static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
+int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf,
+ bus_size_t buflen, int *pagesneeded);
+static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+ pmap_t pmap, void *buf, bus_size_t buflen, int flags);
+static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_paddr_t buf, bus_size_t buflen, int flags);
+static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+ int flags);
+
+/*
+ * Allocate a device specific dma_tag.
+ */
+static int
+bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+ bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
+ bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
+ int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
+ void *lockfuncarg, bus_dma_tag_t *dmat)
+{
+ bus_dma_tag_t newtag;
+ int error;
+
+ *dmat = NULL;
+ error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
+ NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
+ maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
+ sizeof (struct bus_dma_tag), (void **)&newtag);
+ if (error != 0)
+ return (error);
+
+ newtag->common.impl = &bus_dma_bounce_impl;
+ newtag->map_count = 0;
+ newtag->segments = NULL;
+
+ if ((flags & BUS_DMA_COHERENT) != 0)
+ newtag->bounce_flags |= BF_COHERENT;
+
+ if (parent != NULL) {
+ if ((newtag->common.filter != NULL ||
+ (parent->bounce_flags & BF_COULD_BOUNCE) != 0))
+ newtag->bounce_flags |= BF_COULD_BOUNCE;
+
+ /* Copy some flags from the parent */
+ newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT;
+ }
+
+ if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
+ newtag->common.alignment > 1)
+ newtag->bounce_flags |= BF_COULD_BOUNCE;
+
+ if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) &&
+ (flags & BUS_DMA_ALLOCNOW) != 0) {
+ struct bounce_zone *bz;
+
+ /* Must bounce */
+ if ((error = alloc_bounce_zone(newtag)) != 0) {
+ free(newtag, M_DEVBUF);
+ return (error);
+ }
+ bz = newtag->bounce_zone;
+
+ if (ptoa(bz->total_bpages) < maxsize) {
+ int pages;
+
+ pages = atop(round_page(maxsize)) - bz->total_bpages;
+
+ /* Add pages to our bounce pool */
+ if (alloc_bounce_pages(newtag, pages) < pages)
+ error = ENOMEM;
+ }
+ /* Performed initial allocation */
+ newtag->bounce_flags |= BF_MIN_ALLOC_COMP;
+ } else
+ error = 0;
+
+ if (error != 0)
+ free(newtag, M_DEVBUF);
+ else
+ *dmat = newtag;
+ CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+ __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
+ error);
+ return (error);
+}
+
+static int
+bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
+{
+ bus_dma_tag_t dmat_copy, parent;
+ int error;
+
+ error = 0;
+ dmat_copy = dmat;
+
+ if (dmat != NULL) {
+ if (dmat->map_count != 0) {
+ error = EBUSY;
+ goto out;
+ }
+ while (dmat != NULL) {
+ parent = (bus_dma_tag_t)dmat->common.parent;
+ atomic_subtract_int(&dmat->common.ref_count, 1);
+ if (dmat->common.ref_count == 0) {
+ if (dmat->segments != NULL)
+ free(dmat->segments, M_DEVBUF);
+ free(dmat, M_DEVBUF);
+ /*
+ * Last reference count, so
+ * release our reference
+ * count on our parent.
+ */
+ dmat = parent;
+ } else
+ dmat = NULL;
+ }
+ }
+out:
+ CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
+ return (error);
+}
+
+static bool
+bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+
+ if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0)
+ return (true);
+ return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL));
+}
+
+static bus_dmamap_t
+alloc_dmamap(bus_dma_tag_t dmat, int flags)
+{
+ u_long mapsize;
+ bus_dmamap_t map;
+
+ mapsize = sizeof(*map);
+ mapsize += sizeof(struct sync_list) * dmat->common.nsegments;
+ map = malloc(mapsize, M_DEVBUF, flags | M_ZERO);
+ if (map == NULL)
+ return (NULL);
+
+ /* Initialize the new map */
+ STAILQ_INIT(&map->bpages);
+
+ return (map);
+}
+
+/*
+ * Allocate a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+static int
+bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+ struct bounce_zone *bz;
+ int error, maxpages, pages;
+
+ error = 0;
+
+ if (dmat->segments == NULL) {
+ dmat->segments = (bus_dma_segment_t *)malloc(
+ sizeof(bus_dma_segment_t) * dmat->common.nsegments,
+ M_DEVBUF, M_NOWAIT);
+ if (dmat->segments == NULL) {
+ CTR3(KTR_BUSDMA, "%s: tag %p error %d",
+ __func__, dmat, ENOMEM);
+ return (ENOMEM);
+ }
+ }
+
+ *mapp = alloc_dmamap(dmat, M_NOWAIT);
+ if (*mapp == NULL) {
+ CTR3(KTR_BUSDMA, "%s: tag %p error %d",
+ __func__, dmat, ENOMEM);
+ return (ENOMEM);
+ }
+
+ /*
+ * Bouncing might be required if the driver asks for an active
+ * exclusion region, a data alignment that is stricter than 1, and/or
+ * an active address boundary.
+ */
+ if (dmat->bounce_flags & BF_COULD_BOUNCE) {
+ /* Must bounce */
+ if (dmat->bounce_zone == NULL) {
+ if ((error = alloc_bounce_zone(dmat)) != 0) {
+ free(*mapp, M_DEVBUF);
+ return (error);
+ }
+ }
+ bz = dmat->bounce_zone;
+
+ (*mapp)->flags = DMAMAP_COULD_BOUNCE;
+
+ /*
+ * Attempt to add pages to our pool on a per-instance
+ * basis up to a sane limit.
+ */
+ if (dmat->common.alignment > 1)
+ maxpages = MAX_BPAGES;
+ else
+ maxpages = MIN(MAX_BPAGES, Maxmem -
+ atop(dmat->common.lowaddr));
+ if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 ||
+ (bz->map_count > 0 && bz->total_bpages < maxpages)) {
+ pages = MAX(atop(dmat->common.maxsize), 1);
+ pages = MIN(maxpages - bz->total_bpages, pages);
+ pages = MAX(pages, 1);
+ if (alloc_bounce_pages(dmat, pages) < pages)
+ error = ENOMEM;
+ if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP)
+ == 0) {
+ if (error == 0) {
+ dmat->bounce_flags |=
+ BF_MIN_ALLOC_COMP;
+ }
+ } else
+ error = 0;
+ }
+ bz->map_count++;
+ }
+ if (error == 0)
+ dmat->map_count++;
+ else
+ free(*mapp, M_DEVBUF);
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+ __func__, dmat, dmat->common.flags, error);
+ return (error);
+}
+
+/*
+ * Destroy a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+static int
+bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+
+ /* Check we are destroying the correct map type */
+ if ((map->flags & DMAMAP_FROM_DMAMEM) != 0)
+ panic("bounce_bus_dmamap_destroy: Invalid map freed\n");
+
+ if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
+ CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY);
+ return (EBUSY);
+ }
+ if (dmat->bounce_zone) {
+ KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
+ ("%s: Bounce zone when cannot bounce", __func__));
+ dmat->bounce_zone->map_count--;
+ }
+ free(map, M_DEVBUF);
+ dmat->map_count--;
+ CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
+ return (0);
+}
+
+/*
+ * Allocate a piece of memory that can be efficiently mapped into
+ * bus device space based on the constraints lited in the dma tag.
+ * A dmamap to for use with dmamap_load is also allocated.
+ */
+static int
+bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+ bus_dmamap_t *mapp)
+{
+ /*
+ * XXX ARM64TODO:
+ * This bus_dma implementation requires IO-Coherent architecutre.
+ * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has
+ * to be implented using non-cacheable memory.
+ */
+
+ vm_memattr_t attr;
+ int mflags;
+
+ if (flags & BUS_DMA_NOWAIT)
+ mflags = M_NOWAIT;
+ else
+ mflags = M_WAITOK;
+
+ if (dmat->segments == NULL) {
+ dmat->segments = (bus_dma_segment_t *)malloc(
+ sizeof(bus_dma_segment_t) * dmat->common.nsegments,
+ M_DEVBUF, mflags);
+ if (dmat->segments == NULL) {
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+ __func__, dmat, dmat->common.flags, ENOMEM);
+ return (ENOMEM);
+ }
+ }
+ if (flags & BUS_DMA_ZERO)
+ mflags |= M_ZERO;
+ if (flags & BUS_DMA_NOCACHE)
+ attr = VM_MEMATTR_UNCACHEABLE;
+ else if ((flags & BUS_DMA_COHERENT) != 0 &&
+ (dmat->bounce_flags & BF_COHERENT) == 0)
+ /*
+ * If we have a non-coherent tag, and are trying to allocate
+ * a coherent block of memory it needs to be uncached.
+ */
+ attr = VM_MEMATTR_UNCACHEABLE;
+ else
+ attr = VM_MEMATTR_DEFAULT;
+
+ /*
+ * Create the map, but don't set the could bounce flag as
+ * this allocation should never bounce;
+ */
+ *mapp = alloc_dmamap(dmat, mflags);
+ if (*mapp == NULL) {
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+ __func__, dmat, dmat->common.flags, ENOMEM);
+ return (ENOMEM);
+ }
+ (*mapp)->flags = DMAMAP_FROM_DMAMEM;
+
+ /*
+ * Allocate the buffer from the malloc(9) allocator if...
+ * - It's small enough to fit into a single power of two sized bucket.
+ * - The alignment is less than or equal to the maximum size
+ * - The low address requirement is fulfilled.
+ * else allocate non-contiguous pages if...
+ * - The page count that could get allocated doesn't exceed
+ * nsegments also when the maximum segment size is less
+ * than PAGE_SIZE.
+ * - The alignment constraint isn't larger than a page boundary.
+ * - There are no boundary-crossing constraints.
+ * else allocate a block of contiguous pages because one or more of the
+ * constraints is something that only the contig allocator can fulfill.
+ *
+ * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
+ * below is just a quick hack. The exact alignment guarantees
+ * of malloc(9) need to be nailed down, and the code below
+ * should be rewritten to take that into account.
+ *
+ * In the meantime warn the user if malloc gets it wrong.
+ */
+ if ((dmat->common.maxsize <= PAGE_SIZE) &&
+ (dmat->common.alignment <= dmat->common.maxsize) &&
+ dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
+ attr == VM_MEMATTR_DEFAULT) {
+ *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags);
+ } else if (dmat->common.nsegments >=
+ howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
+ dmat->common.alignment <= PAGE_SIZE &&
+ (dmat->common.boundary % PAGE_SIZE) == 0) {
+ /* Page-based multi-segment allocations allowed */
+ *vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags,
+ 0ul, dmat->common.lowaddr, attr);
+ dmat->bounce_flags |= BF_KMEM_ALLOC;
+ } else {
+ *vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags,
+ 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ?
+ dmat->common.alignment : 1ul, dmat->common.boundary, attr);
+ dmat->bounce_flags |= BF_KMEM_ALLOC;
+ }
+ if (*vaddr == NULL) {
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+ __func__, dmat, dmat->common.flags, ENOMEM);
+ free(*mapp, M_DEVBUF);
+ return (ENOMEM);
+ } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
+ printf("bus_dmamem_alloc failed to align memory properly.\n");
+ }
+ dmat->map_count++;
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+ __func__, dmat, dmat->common.flags, 0);
+ return (0);
+}
+
+/*
+ * Free a piece of memory and it's allociated dmamap, that was allocated
+ * via bus_dmamem_alloc. Make the same choice for free/contigfree.
+ */
+static void
+bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
+{
+
+ /*
+ * Check the map came from bounce_bus_dmamem_alloc, so the map
+ * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc()
+ * was used and set if kmem_alloc_contig() was used.
+ */
+ if ((map->flags & DMAMAP_FROM_DMAMEM) == 0)
+ panic("bus_dmamem_free: Invalid map freed\n");
+ if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0)
+ free(vaddr, M_DEVBUF);
+ else
+ kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
+ free(map, M_DEVBUF);
+ dmat->map_count--;
+ CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
+ dmat->bounce_flags);
+}
+
+static bool
+_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen,
+ int *pagesneeded)
+{
+ bus_addr_t curaddr;
+ bus_size_t sgsize;
+ int count;
+
+ /*
+ * Count the number of bounce pages needed in order to
+ * complete this transfer
+ */
+ count = 0;
+ curaddr = buf;
+ while (buflen != 0) {
+ sgsize = MIN(buflen, dmat->common.maxsegsz);
+ if (bus_dma_run_filter(&dmat->common, curaddr)) {
+ sgsize = MIN(sgsize,
+ PAGE_SIZE - (curaddr & PAGE_MASK));
+ if (pagesneeded == NULL)
+ return (true);
+ count++;
+ }
+ curaddr += sgsize;
+ buflen -= sgsize;
+ }
+
+ if (pagesneeded != NULL)
+ *pagesneeded = count;
+ return (count != 0);
+}
+
+static void
+_bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
+ bus_size_t buflen, int flags)
+{
+
+ if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
+ _bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded);
+ CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
+ }
+}
+
+static void
+_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
+ void *buf, bus_size_t buflen, int flags)
+{
+ vm_offset_t vaddr;
+ vm_offset_t vendaddr;
+ bus_addr_t paddr;
+ bus_size_t sg_len;
+
+ if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
+ CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
+ "alignment= %d", dmat->common.lowaddr,
+ ptoa((vm_paddr_t)Maxmem),
+ dmat->common.boundary, dmat->common.alignment);
+ CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map,
+ map->pagesneeded);
+ /*
+ * Count the number of bounce pages
+ * needed in order to complete this transfer
+ */
+ vaddr = (vm_offset_t)buf;
+ vendaddr = (vm_offset_t)buf + buflen;
+
+ while (vaddr < vendaddr) {
+ sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
+ if (pmap == kernel_pmap)
+ paddr = pmap_kextract(vaddr);
+ else
+ paddr = pmap_extract(pmap, vaddr);
+ if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
+ sg_len = roundup2(sg_len,
+ dmat->common.alignment);
+ map->pagesneeded++;
+ }
+ vaddr += sg_len;
+ }
+ CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
+ }
+}
+
+static int
+_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
+{
+
+ /* Reserve Necessary Bounce Pages */
+ mtx_lock(&bounce_lock);
+ if (flags & BUS_DMA_NOWAIT) {
+ if (reserve_bounce_pages(dmat, map, 0) != 0) {
+ mtx_unlock(&bounce_lock);
+ return (ENOMEM);
+ }
+ } else {
+ if (reserve_bounce_pages(dmat, map, 1) != 0) {
+ /* Queue us for resources */
+ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
+ mtx_unlock(&bounce_lock);
+ return (EINPROGRESS);
+ }
+ }
+ mtx_unlock(&bounce_lock);
+
+ return (0);
+}
+
+/*
+ * Add a single contiguous physical range to the segment list.
+ */
+static bus_size_t
+_bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
+ bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
+{
+ bus_addr_t baddr, bmask;
+ int seg;
+
+ /*
+ * Make sure we don't cross any boundaries.
+ */
+ bmask = ~(dmat->common.boundary - 1);
+ if (dmat->common.boundary > 0) {
+ baddr = (curaddr + dmat->common.boundary) & bmask;
+ if (sgsize > (baddr - curaddr))
+ sgsize = (baddr - curaddr);
+ }
+
+ /*
+ * Insert chunk into a segment, coalescing with
+ * previous segment if possible.
+ */
+ seg = *segp;
+ if (seg == -1) {
+ seg = 0;
+ segs[seg].ds_addr = curaddr;
+ segs[seg].ds_len = sgsize;
+ } else {
+ if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
+ (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
+ (dmat->common.boundary == 0 ||
+ (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
+ segs[seg].ds_len += sgsize;
+ else {
+ if (++seg >= dmat->common.nsegments)
+ return (0);
+ segs[seg].ds_addr = curaddr;
+ segs[seg].ds_len = sgsize;
+ }
+ }
+ *segp = seg;
+ return (sgsize);
+}
+
+/*
+ * Utility function to load a physical buffer. segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ */
+static int
+bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct sync_list *sl;
+ bus_size_t sgsize;
+ bus_addr_t curaddr, sl_end;
+ int error;
+
+ if (segs == NULL)
+ segs = dmat->segments;
+
+ if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
+ _bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
+ if (map->pagesneeded != 0) {
+ error = _bus_dmamap_reserve_pages(dmat, map, flags);
+ if (error)
+ return (error);
+ }
+ }
+
+ sl = map->slist + map->sync_count - 1;
+ sl_end = 0;
+
+ while (buflen > 0) {
+ curaddr = buf;
+ sgsize = MIN(buflen, dmat->common.maxsegsz);
+ if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
+ map->pagesneeded != 0 &&
+ bus_dma_run_filter(&dmat->common, curaddr)) {
+ sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
+ curaddr = add_bounce_page(dmat, map, 0, curaddr,
+ sgsize);
+ } else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
+ if (map->sync_count > 0)
+ sl_end = sl->paddr + sl->datacount;
+
+ if (map->sync_count == 0 || curaddr != sl_end) {
+ if (++map->sync_count > dmat->common.nsegments)
+ break;
+ sl++;
+ sl->vaddr = 0;
+ sl->paddr = curaddr;
+ sl->datacount = sgsize;
+ sl->pages = PHYS_TO_VM_PAGE(curaddr);
+ KASSERT(sl->pages != NULL,
+ ("%s: page at PA:0x%08lx is not in "
+ "vm_page_array", __func__, curaddr));
+ } else
+ sl->datacount += sgsize;
+ }
+ sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
+ segp);
+ if (sgsize == 0)
+ break;
+ buf += sgsize;
+ buflen -= sgsize;
+ }
+
+ /*
+ * Did we fit?
+ */
+ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
+}
+
+/*
+ * Utility function to load a linear buffer. segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ */
+static int
+bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+ bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct sync_list *sl;
+ bus_size_t sgsize, max_sgsize;
+ bus_addr_t curaddr, sl_pend;
+ vm_offset_t kvaddr, vaddr, sl_vend;
+ int error;
+
+ if (segs == NULL)
+ segs = dmat->segments;
+
+ if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
+ _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
+ if (map->pagesneeded != 0) {
+ error = _bus_dmamap_reserve_pages(dmat, map, flags);
+ if (error)
+ return (error);
+ }
+ }
+
+ sl = map->slist + map->sync_count - 1;
+ vaddr = (vm_offset_t)buf;
+ sl_pend = 0;
+ sl_vend = 0;
+
+ while (buflen > 0) {
+ /*
+ * Get the physical address for this segment.
+ */
+ if (pmap == kernel_pmap) {
+ curaddr = pmap_kextract(vaddr);
+ kvaddr = vaddr;
+ } else {
+ curaddr = pmap_extract(pmap, vaddr);
+ kvaddr = 0;
+ }
+
+ /*
+ * Compute the segment size, and adjust counts.
+ */
+ max_sgsize = MIN(buflen, dmat->common.maxsegsz);
+ sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
+ if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
+ map->pagesneeded != 0 &&
+ bus_dma_run_filter(&dmat->common, curaddr)) {
+ sgsize = roundup2(sgsize, dmat->common.alignment);
+ sgsize = MIN(sgsize, max_sgsize);
+ curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
+ sgsize);
+ } else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
+ sgsize = MIN(sgsize, max_sgsize);
+ if (map->sync_count > 0) {
+ sl_pend = sl->paddr + sl->datacount;
+ sl_vend = sl->vaddr + sl->datacount;
+ }
+
+ if (map->sync_count == 0 ||
+ (kvaddr != 0 && kvaddr != sl_vend) ||
+ (curaddr != sl_pend)) {
+ if (++map->sync_count > dmat->common.nsegments)
+ goto cleanup;
+ sl++;
+ sl->vaddr = kvaddr;
+ sl->paddr = curaddr;
+ if (kvaddr != 0) {
+ sl->pages = NULL;
+ } else {
+ sl->pages = PHYS_TO_VM_PAGE(curaddr);
+ KASSERT(sl->pages != NULL,
+ ("%s: page at PA:0x%08lx is not "
+ "in vm_page_array", __func__,
+ curaddr));
+ }
+ sl->datacount = sgsize;
+ } else
+ sl->datacount += sgsize;
+ } else {
+ sgsize = MIN(sgsize, max_sgsize);
+ }
+ sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
+ segp);
+ if (sgsize == 0)
+ break;
+ vaddr += sgsize;
+ buflen -= sgsize;
+ }
+
+cleanup:
+ /*
+ * Did we fit?
+ */
+ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
+}
+
+static void
+bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
+{
+
+ if ((map->flags & DMAMAP_COULD_BOUNCE) == 0)
+ return;
+ map->mem = *mem;
+ map->dmat = dmat;
+ map->callback = callback;
+ map->callback_arg = callback_arg;
+}
+
+static bus_dma_segment_t *
+bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dma_segment_t *segs, int nsegs, int error)
+{
+
+ if (segs == NULL)
+ segs = dmat->segments;
+ return (segs);
+}
+
+/*
+ * Release the mapping held by map.
+ */
+static void
+bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+ struct bounce_page *bpage;
+
+ while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+ STAILQ_REMOVE_HEAD(&map->bpages, links);
+ free_bounce_page(dmat, bpage);
+ }
+
+ map->sync_count = 0;
+}
+
+static void
+dma_preread_safe(vm_offset_t va, vm_size_t size)
+{
+ /*
+ * Write back any partial cachelines immediately before and
+ * after the DMA region.
+ */
+ if (va & (dcache_line_size - 1))
+ cpu_dcache_wb_range(va, 1);
+ if ((va + size) & (dcache_line_size - 1))
+ cpu_dcache_wb_range(va + size, 1);
+
+ cpu_dcache_inv_range(va, size);
+}
+
+static void
+dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
+{
+ uint32_t len, offset;
+ vm_page_t m;
+ vm_paddr_t pa;
+ vm_offset_t va, tempva;
+ bus_size_t size;
+
+ offset = sl->paddr & PAGE_MASK;
+ m = sl->pages;
+ size = sl->datacount;
+ pa = sl->paddr;
+
+ for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
+ tempva = 0;
+ if (sl->vaddr == 0) {
+ len = min(PAGE_SIZE - offset, size);
+ tempva = pmap_quick_enter_page(m);
+ va = tempva | offset;
+ KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
+ ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx",
+ VM_PAGE_TO_PHYS(m) | offset, pa));
+ } else {
+ len = sl->datacount;
+ va = sl->vaddr;
+ }
+
+ switch (op) {
+ case BUS_DMASYNC_PREWRITE:
+ case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
+ cpu_dcache_wb_range(va, len);
+ break;
+ case BUS_DMASYNC_PREREAD:
+ /*
+ * An mbuf may start in the middle of a cacheline. There
+ * will be no cpu writes to the beginning of that line
+ * (which contains the mbuf header) while dma is in
+ * progress. Handle that case by doing a writeback of
+ * just the first cacheline before invalidating the
+ * overall buffer. Any mbuf in a chain may have this
+ * misalignment. Buffers which are not mbufs bounce if
+ * they are not aligned to a cacheline.
+ */
+ dma_preread_safe(va, len);
+ break;
+ case BUS_DMASYNC_POSTREAD:
+ case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
+ cpu_dcache_inv_range(va, len);
+ break;
+ default:
+ panic("unsupported combination of sync operations: "
+ "0x%08x\n", op);
+ }
+
+ if (tempva != 0)
+ pmap_quick_remove_page(tempva);
+ }
+}
+
+static void
+bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dmasync_op_t op)
+{
+ struct bounce_page *bpage;
+ struct sync_list *sl, *end;
+ vm_offset_t datavaddr, tempvaddr;
+
+ if (op == BUS_DMASYNC_POSTWRITE)
+ return;
+
+ if ((op & BUS_DMASYNC_POSTREAD) != 0) {
+ /*
+ * Wait for any DMA operations to complete before the bcopy.
+ */
+ dsb(sy);
+ }
+
+ if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
+ "performing bounce", __func__, dmat, dmat->common.flags,
+ op);
+
+ if ((op & BUS_DMASYNC_PREWRITE) != 0) {
+ while (bpage != NULL) {
+ tempvaddr = 0;
+ datavaddr = bpage->datavaddr;
+ if (datavaddr == 0) {
+ tempvaddr = pmap_quick_enter_page(
+ bpage->datapage);
+ datavaddr = tempvaddr | bpage->dataoffs;
+ }
+
+ bcopy((void *)datavaddr,
+ (void *)bpage->vaddr, bpage->datacount);
+ if (tempvaddr != 0)
+ pmap_quick_remove_page(tempvaddr);
+ if ((dmat->bounce_flags & BF_COHERENT) == 0)
+ cpu_dcache_wb_range(bpage->vaddr,
+ bpage->datacount);
+ bpage = STAILQ_NEXT(bpage, links);
+ }
+ dmat->bounce_zone->total_bounced++;
+ } else if ((op & BUS_DMASYNC_PREREAD) != 0) {
+ while (bpage != NULL) {
+ if ((dmat->bounce_flags & BF_COHERENT) == 0)
+ cpu_dcache_wbinv_range(bpage->vaddr,
+ bpage->datacount);
+ bpage = STAILQ_NEXT(bpage, links);
+ }
+ }
+
+ if ((op & BUS_DMASYNC_POSTREAD) != 0) {
+ while (bpage != NULL) {
+ if ((dmat->bounce_flags & BF_COHERENT) == 0)
+ cpu_dcache_inv_range(bpage->vaddr,
+ bpage->datacount);
+ tempvaddr = 0;
+ datavaddr = bpage->datavaddr;
+ if (datavaddr == 0) {
+ tempvaddr = pmap_quick_enter_page(
+ bpage->datapage);
+ datavaddr = tempvaddr | bpage->dataoffs;
+ }
+
+ bcopy((void *)bpage->vaddr,
+ (void *)datavaddr, bpage->datacount);
+
+ if (tempvaddr != 0)
+ pmap_quick_remove_page(tempvaddr);
+ bpage = STAILQ_NEXT(bpage, links);
+ }
+ dmat->bounce_zone->total_bounced++;
+ }
+ }
+
+ /*
+ * Cache maintenance for normal (non-COHERENT non-bounce) buffers.
+ */
+ if (map->sync_count != 0) {
+ sl = &map->slist[0];
+ end = &map->slist[map->sync_count];
+ CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x "
+ "performing sync", __func__, dmat, op);
+
+ for ( ; sl != end; ++sl)
+ dma_dcache_sync(sl, op);
+ }
+
+ if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) {
+ /*
+ * Wait for the bcopy to complete before any DMA operations.
+ */
+ dsb(sy);
+ }
+}
+
+static void
+init_bounce_pages(void *dummy __unused)
+{
+
+ total_bpages = 0;
+ STAILQ_INIT(&bounce_zone_list);
+ STAILQ_INIT(&bounce_map_waitinglist);
+ STAILQ_INIT(&bounce_map_callbacklist);
+ mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
+}
+SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
+
+static struct sysctl_ctx_list *
+busdma_sysctl_tree(struct bounce_zone *bz)
+{
+
+ return (&bz->sysctl_tree);
+}
+
+static struct sysctl_oid *
+busdma_sysctl_tree_top(struct bounce_zone *bz)
+{
+
+ return (bz->sysctl_tree_top);
+}
+
+static int
+alloc_bounce_zone(bus_dma_tag_t dmat)
+{
+ struct bounce_zone *bz;
+
+ /* Check to see if we already have a suitable zone */
+ STAILQ_FOREACH(bz, &bounce_zone_list, links) {
+ if ((dmat->common.alignment <= bz->alignment) &&
+ (dmat->common.lowaddr >= bz->lowaddr)) {
+ dmat->bounce_zone = bz;
+ return (0);
+ }
+ }
+
+ if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
+ M_NOWAIT | M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ STAILQ_INIT(&bz->bounce_page_list);
+ bz->free_bpages = 0;
+ bz->reserved_bpages = 0;
+ bz->active_bpages = 0;
+ bz->lowaddr = dmat->common.lowaddr;
+ bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
+ bz->map_count = 0;
+ snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
+ busdma_zonecount++;
+ snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
+ STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
+ dmat->bounce_zone = bz;
+
+ sysctl_ctx_init(&bz->sysctl_tree);
+ bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
+ SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+ if (bz->sysctl_tree_top == NULL) {
+ sysctl_ctx_free(&bz->sysctl_tree);
+ return (0); /* XXX error code? */
+ }
+
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
+ "Total bounce pages");
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
+ "Free bounce pages");
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
+ "Reserved bounce pages");
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
+ "Active bounce pages");
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
+ "Total bounce requests");
+ SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
+ "Total bounce requests that were deferred");
+ SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
+ SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
+ SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+ "alignment", CTLFLAG_RD, &bz->alignment, "");
+
+ return (0);
+}
+
+static int
+alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
+{
+ struct bounce_zone *bz;
+ int count;
+
+ bz = dmat->bounce_zone;
+ count = 0;
+ while (numpages > 0) {
+ struct bounce_page *bpage;
+
+ bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+
+ if (bpage == NULL)
+ break;
+ bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
+ M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
+ if (bpage->vaddr == 0) {
+ free(bpage, M_DEVBUF);
+ break;
+ }
+ bpage->busaddr = pmap_kextract(bpage->vaddr);
+ mtx_lock(&bounce_lock);
+ STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
+ total_bpages++;
+ bz->total_bpages++;
+ bz->free_bpages++;
+ mtx_unlock(&bounce_lock);
+ count++;
+ numpages--;
+ }
+ return (count);
+}
+
+static int
+reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
+{
+ struct bounce_zone *bz;
+ int pages;
+
+ mtx_assert(&bounce_lock, MA_OWNED);
+ bz = dmat->bounce_zone;
+ pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
+ if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
+ return (map->pagesneeded - (map->pagesreserved + pages));
+ bz->free_bpages -= pages;
+ bz->reserved_bpages += pages;
+ map->pagesreserved += pages;
+ pages = map->pagesneeded - map->pagesreserved;
+
+ return (pages);
+}
+
+static bus_addr_t
+add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
+ bus_addr_t addr, bus_size_t size)
+{
+ struct bounce_zone *bz;
+ struct bounce_page *bpage;
+
+ KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
+ KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
+ ("add_bounce_page: bad map %p", map));
+
+ bz = dmat->bounce_zone;
+ if (map->pagesneeded == 0)
+ panic("add_bounce_page: map doesn't need any pages");
+ map->pagesneeded--;
+
+ if (map->pagesreserved == 0)
+ panic("add_bounce_page: map doesn't need any pages");
+ map->pagesreserved--;
+
+ mtx_lock(&bounce_lock);
+ bpage = STAILQ_FIRST(&bz->bounce_page_list);
+ if (bpage == NULL)
+ panic("add_bounce_page: free page list is empty");
+
+ STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
+ bz->reserved_bpages--;
+ bz->active_bpages++;
+ mtx_unlock(&bounce_lock);
+
+ if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
+ /* Page offset needs to be preserved. */
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
+ }
+ bpage->datavaddr = vaddr;
+ bpage->datapage = PHYS_TO_VM_PAGE(addr);
+ bpage->dataoffs = addr & PAGE_MASK;
+ bpage->datacount = size;
+ STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
+ return (bpage->busaddr);
+}
+
+static void
+free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
+{
+ struct bus_dmamap *map;
+ struct bounce_zone *bz;
+
+ bz = dmat->bounce_zone;
+ bpage->datavaddr = 0;
+ bpage->datacount = 0;
+ if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
+ /*
+ * Reset the bounce page to start at offset 0. Other uses
+ * of this bounce page may need to store a full page of
+ * data and/or assume it starts on a page boundary.
+ */
+ bpage->vaddr &= ~PAGE_MASK;
+ bpage->busaddr &= ~PAGE_MASK;
+ }
+
+ mtx_lock(&bounce_lock);
+ STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
+ bz->free_bpages++;
+ bz->active_bpages--;
+ if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
+ if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
+ STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
+ STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
+ map, links);
+ busdma_swi_pending = 1;
+ bz->total_deferred++;
+ swi_sched(vm_ih, 0);
+ }
+ }
+ mtx_unlock(&bounce_lock);
+}
+
+void
+busdma_swi(void)
+{
+ bus_dma_tag_t dmat;
+ struct bus_dmamap *map;
+
+ mtx_lock(&bounce_lock);
+ while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
+ STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
+ mtx_unlock(&bounce_lock);
+ dmat = map->dmat;
+ (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
+ bus_dmamap_load_mem(map->dmat, map, &map->mem,
+ map->callback, map->callback_arg, BUS_DMA_WAITOK);
+ (dmat->common.lockfunc)(dmat->common.lockfuncarg,
+ BUS_DMA_UNLOCK);
+ mtx_lock(&bounce_lock);
+ }
+ mtx_unlock(&bounce_lock);
+}
+
+struct bus_dma_impl bus_dma_bounce_impl = {
+ .tag_create = bounce_bus_dma_tag_create,
+ .tag_destroy = bounce_bus_dma_tag_destroy,
+ .id_mapped = bounce_bus_dma_id_mapped,
+ .map_create = bounce_bus_dmamap_create,
+ .map_destroy = bounce_bus_dmamap_destroy,
+ .mem_alloc = bounce_bus_dmamem_alloc,
+ .mem_free = bounce_bus_dmamem_free,
+ .load_phys = bounce_bus_dmamap_load_phys,
+ .load_buffer = bounce_bus_dmamap_load_buffer,
+ .load_ma = bus_dmamap_load_ma_triv,
+ .map_waitok = bounce_bus_dmamap_waitok,
+ .map_complete = bounce_bus_dmamap_complete,
+ .map_unload = bounce_bus_dmamap_unload,
+ .map_sync = bounce_bus_dmamap_sync
+};
diff --git a/sys/arm64/arm64/busdma_machdep.c b/sys/arm64/arm64/busdma_machdep.c
new file mode 100644
index 000000000000..1a5ac67a2a4f
--- /dev/null
+++ b/sys/arm64/arm64/busdma_machdep.c
@@ -0,0 +1,285 @@
+/*-
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * Copyright (c) 2013, 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Semihalf
+ * under sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <arm64/include/bus_dma_impl.h>
+
+/*
+ * Convenience function for manipulating driver locks from busdma (during
+ * busdma_swi, for example). Drivers that don't provide their own locks
+ * should specify &Giant to dmat->lockfuncarg. Drivers that use their own
+ * non-mutex locking scheme don't have to use this at all.
+ */
+void
+busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
+{
+ struct mtx *dmtx;
+
+ dmtx = (struct mtx *)arg;
+ switch (op) {
+ case BUS_DMA_LOCK:
+ mtx_lock(dmtx);
+ break;
+ case BUS_DMA_UNLOCK:
+ mtx_unlock(dmtx);
+ break;
+ default:
+ panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
+ }
+}
+
+/*
+ * dflt_lock should never get called. It gets put into the dma tag when
+ * lockfunc == NULL, which is only valid if the maps that are associated
+ * with the tag are meant to never be defered.
+ * XXX Should have a way to identify which driver is responsible here.
+ */
+void
+bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op)
+{
+
+ panic("driver error: busdma dflt_lock called");
+}
+
+/*
+ * Return true if a match is made.
+ *
+ * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
+ *
+ * If paddr is within the bounds of the dma tag then call the filter callback
+ * to check for a match, if there is no filter callback then assume a match.
+ */
+int
+bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr)
+{
+ int retval;
+
+ retval = 0;
+ do {
+ if (((paddr > tc->lowaddr && paddr <= tc->highaddr) ||
+ ((paddr & (tc->alignment - 1)) != 0)) &&
+ (tc->filter == NULL ||
+ (*tc->filter)(tc->filterarg, paddr) != 0))
+ retval = 1;
+
+ tc = tc->parent;
+ } while (retval == 0 && tc != NULL);
+ return (retval);
+}
+
+int
+common_bus_dma_tag_create(struct bus_dma_tag_common *parent,
+ bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr,
+ bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg,
+ bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags,
+ bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat)
+{
+ void *newtag;
+ struct bus_dma_tag_common *common;
+
+ KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz"));
+ /* Return a NULL tag on failure */
+ *dmat = NULL;
+ /* Basic sanity checking */
+ if (boundary != 0 && boundary < maxsegsz)
+ maxsegsz = boundary;
+ if (maxsegsz == 0)
+ return (EINVAL);
+
+ newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT);
+ if (newtag == NULL) {
+ CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+ __func__, newtag, 0, ENOMEM);
+ return (ENOMEM);
+ }
+
+ common = newtag;
+ common->impl = &bus_dma_bounce_impl;
+ common->parent = parent;
+ common->alignment = alignment;
+ common->boundary = boundary;
+ common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
+ common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1);
+ common->filter = filter;
+ common->filterarg = filterarg;
+ common->maxsize = maxsize;
+ common->nsegments = nsegments;
+ common->maxsegsz = maxsegsz;
+ common->flags = flags;
+ common->ref_count = 1; /* Count ourself */
+ if (lockfunc != NULL) {
+ common->lockfunc = lockfunc;
+ common->lockfuncarg = lockfuncarg;
+ } else {
+ common->lockfunc = bus_dma_dflt_lock;
+ common->lockfuncarg = NULL;
+ }
+
+ /* Take into account any restrictions imposed by our parent tag */
+ if (parent != NULL) {
+ common->impl = parent->impl;
+ common->lowaddr = MIN(parent->lowaddr, common->lowaddr);
+ common->highaddr = MAX(parent->highaddr, common->highaddr);
+ if (common->boundary == 0)
+ common->boundary = parent->boundary;
+ else if (parent->boundary != 0) {
+ common->boundary = MIN(parent->boundary,
+ common->boundary);
+ }
+ if (common->filter == NULL) {
+ /*
+ * Short circuit looking at our parent directly
+ * since we have encapsulated all of its information
+ */
+ common->filter = parent->filter;
+ common->filterarg = parent->filterarg;
+ common->parent = parent->parent;
+ }
+ atomic_add_int(&parent->ref_count, 1);
+ }
+ *dmat = common;
+ return (0);
+}
+
+/*
+ * Allocate a device specific dma_tag.
+ */
+int
+bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+ bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
+ bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
+ int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
+ void *lockfuncarg, bus_dma_tag_t *dmat)
+{
+ struct bus_dma_tag_common *tc;
+ int error;
+
+ if (parent == NULL) {
+ error = bus_dma_bounce_impl.tag_create(parent, alignment,
+ boundary, lowaddr, highaddr, filter, filterarg, maxsize,
+ nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat);
+ } else {
+ tc = (struct bus_dma_tag_common *)parent;
+ error = tc->impl->tag_create(parent, alignment,
+ boundary, lowaddr, highaddr, filter, filterarg, maxsize,
+ nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat);
+ }
+ return (error);
+}
+
+void
+bus_dma_template_init(bus_dma_tag_template_t *t, bus_dma_tag_t parent)
+{
+
+ if (t == NULL)
+ return;
+
+ t->parent = parent;
+ t->alignment = 1;
+ t->boundary = 0;
+ t->lowaddr = t->highaddr = BUS_SPACE_MAXADDR;
+ t->maxsize = t->maxsegsize = BUS_SPACE_MAXSIZE;
+ t->nsegments = BUS_SPACE_UNRESTRICTED;
+ t->lockfunc = NULL;
+ t->lockfuncarg = NULL;
+ t->flags = 0;
+}
+
+int
+bus_dma_template_tag(bus_dma_tag_template_t *t, bus_dma_tag_t *dmat)
+{
+
+ if (t == NULL || dmat == NULL)
+ return (EINVAL);
+
+ return (bus_dma_tag_create(t->parent, t->alignment, t->boundary,
+ t->lowaddr, t->highaddr, NULL, NULL, t->maxsize,
+ t->nsegments, t->maxsegsize, t->flags, t->lockfunc, t->lockfuncarg,
+ dmat));
+}
+
+void
+bus_dma_template_clone(bus_dma_tag_template_t *t, bus_dma_tag_t dmat)
+{
+ struct bus_dma_tag_common *common;
+
+ if (t == NULL || dmat == NULL)
+ return;
+
+ common = (struct bus_dma_tag_common *)dmat;
+
+ t->parent = (bus_dma_tag_t)common->parent;
+ t->alignment = common->alignment;
+ t->boundary = common->boundary;
+ t->lowaddr = common->lowaddr;
+ t->highaddr = common->highaddr;
+ t->maxsize = common->maxsize;
+ t->nsegments = common->nsegments;
+ t->maxsegsize = common->maxsegsz;
+ t->flags = common->flags;
+ t->lockfunc = common->lockfunc;
+ t->lockfuncarg = common->lockfuncarg;
+}
+
+int
+bus_dma_tag_destroy(bus_dma_tag_t dmat)
+{
+ struct bus_dma_tag_common *tc;
+
+ tc = (struct bus_dma_tag_common *)dmat;
+ return (tc->impl->tag_destroy(dmat));
+}
+
+int
+bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
+{
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/bzero.S b/sys/arm64/arm64/bzero.S
new file mode 100644
index 000000000000..6c7f1fef1494
--- /dev/null
+++ b/sys/arm64/arm64/bzero.S
@@ -0,0 +1,206 @@
+/*-
+ * Copyright (C) 2016 Cavium Inc.
+ * All rights reserved.
+ *
+ * Developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+
+#include "assym.inc"
+
+ /*
+ * void bzero(void *p, size_t size)
+ *
+ * x0 - p
+ * x1 - size
+ */
+ENTRY(bzero)
+ cbz x1, ending
+
+ /*
+ * x5 is number of cache lines to zero - calculated later and
+ * will become non-zero if buffer is long enough to zero by
+ * cache lines (and if it is allowed.)
+ * We need to zero it before proceeding with buffers of size
+ * smaller than 16 bytes - otherwise the x5 will not be
+ * calculated and will retain random value.
+ * "normal" is used for buffers <= 16 bytes and to align buffer
+ * to cache line for buffers bigger than cache line; non-0 x5
+ * after "normal" has completed indicates that it has been used
+ * to align buffer to cache line and now zero by cache lines will
+ * be performed, and x5 is amount of cache lines to loop through.
+ */
+ mov x5, xzr
+
+ /* No use of cache assisted zero for buffers with size <= 16 */
+ cmp x1, #0x10
+ b.le normal
+
+ /*
+ * Load size of line that will be cleaned by dc zva call.
+ * 0 means that the instruction is not allowed
+ */
+ ldr x7, =dczva_line_size
+ ldr x7, [x7]
+ cbz x7, normal
+
+ /*
+ * Buffer must be larger than cache line for using cache zeroing
+ * (and cache line aligned but this is checked after jump)
+ */
+ cmp x1, x7
+ b.lt normal
+
+ /*
+ * Calculate number of bytes to cache aligned address (x4) nad
+ * number of full cache lines (x5). x6 is final address to zero.
+ */
+ sub x2, x7, #0x01
+ mov x3, -1
+ eor x3, x3, x2
+ add x4, x0, x2
+ and x4, x4, x3
+ subs x4, x4, x0
+ b.eq normal
+
+ /* Calculate number of "lines" in buffer */
+ sub x5, x1, x4
+ rbit x2, x7
+ clz x2, x2
+ lsr x5, x5, x2
+
+ /*
+ * If number of cache lines is 0, we will not be able to zero
+ * by cache lines, so go normal way.
+ */
+ cbz x5, normal
+ /* x6 is final address to zero */
+ add x6, x0, x1
+
+ /*
+ * We are here because x5 is non-0 so normal will be used to
+ * align buffer before cache zeroing. x4 holds number of bytes
+ * needed for alignment.
+ */
+ mov x1, x4
+
+ /* When jumping here: x0 holds pointer, x1 holds size */
+normal:
+ /*
+ * Get buffer offset into 16 byte aligned address; 0 means pointer
+ * is aligned.
+ */
+ ands x2, x0, #0x0f
+ b.eq aligned_to_16
+ /* Calculate one-byte loop runs to 8 byte aligned address. */
+ ands x2, x2, #0x07
+ mov x3, #0x08
+ sub x2, x3, x2
+ /* x2 is number of bytes missing for alignment, x1 is buffer size */
+ cmp x1, x2
+ csel x2, x1, x2, le
+ sub x1, x1, x2
+
+ /*
+ * Byte by byte copy will copy at least enough bytes to align
+ * pointer and at most "size".
+ */
+align:
+ strb wzr, [x0], #0x01
+ subs x2, x2, #0x01
+ b.ne align
+
+ /* Now pointer is aligned to 8 bytes */
+ cmp x1, #0x10
+ b.lt lead_out
+ /*
+ * Check if copy of another 8 bytes is needed to align to 16 byte
+ * address and do it
+ */
+ tbz x0, #0x03, aligned_to_16
+ str xzr, [x0], #0x08
+ sub x1, x1, #0x08
+
+ /* While jumping here: x0 is 16 byte alligned address, x1 is size */
+aligned_to_16:
+ /* If size is less than 16 bytes, use lead_out to copy what remains */
+ cmp x1, #0x10
+ b.lt lead_out
+
+ lsr x2, x1, #0x04
+zero_by_16:
+ stp xzr, xzr, [x0], #0x10
+ subs x2, x2, #0x01
+ b.ne zero_by_16
+
+ /*
+ * Lead out requires addresses to be aligned to 8 bytes. It is used to
+ * zero buffers with sizes < 16 and what can not be zeroed by
+ * zero_by_16 loop.
+ */
+ ands x1, x1, #0x0f
+ b.eq lead_out_end
+lead_out:
+ tbz x1, #0x03, lead_out_dword
+ str xzr, [x0], #0x08
+lead_out_dword:
+ tbz x1, #0x02, lead_out_word
+ str wzr, [x0], #0x04
+lead_out_word:
+ tbz x1, #0x01, lead_out_byte
+ strh wzr, [x0], #0x02
+lead_out_byte:
+ tbz x1, #0x00, lead_out_end
+ strb wzr, [x0], #0x01
+
+lead_out_end:
+ /*
+ * If x5 is non-zero, this means that normal has been used as
+ * a lead in to align buffer address to cache size
+ */
+ cbz x5, ending
+
+ /*
+ * Here x5 holds number of lines to zero; x6 is final address of
+ * buffer. x0 is cache line aligned pointer. x7 is cache line size
+ * in bytes
+ */
+cache_line_zero:
+ dc zva, x0
+ add x0, x0, x7
+ subs x5, x5, #0x01
+ b.ne cache_line_zero
+
+ /* Need to zero remaining bytes? */
+ subs x1, x6, x0
+ b.ne normal
+
+ending:
+ ret
+
+END(bzero)
+
diff --git a/sys/arm64/arm64/clock.c b/sys/arm64/arm64/clock.c
new file mode 100644
index 000000000000..ef68ea4d7e7b
--- /dev/null
+++ b/sys/arm64/arm64/clock.c
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+void
+cpu_initclocks(void)
+{
+
+ cpu_initclocks_bsp();
+}
diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S
new file mode 100644
index 000000000000..5c523d11ed00
--- /dev/null
+++ b/sys/arm64/arm64/copyinout.S
@@ -0,0 +1,226 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/errno.h>
+
+#include <machine/vmparam.h>
+
+#include "assym.inc"
+
+/*
+ * Fault handler for the copy{in,out} functions below.
+ */
+ENTRY(copyio_fault)
+ SET_FAULT_HANDLER(xzr, x1) /* Clear the handler */
+ EXIT_USER_ACCESS_CHECK(w0, x1)
+copyio_fault_nopcb:
+ mov x0, #EFAULT
+ ret
+END(copyio_fault)
+
+/*
+ * Copies from a kernel to user address
+ *
+ * int copyout(const void *kaddr, void *udaddr, size_t len)
+ */
+ENTRY(copyout)
+ cbz x2, 1f
+ adds x3, x1, x2
+ b.cs copyio_fault_nopcb
+ ldr x4, =VM_MAXUSER_ADDRESS
+ cmp x3, x4
+ b.hi copyio_fault_nopcb
+
+ b copycommon
+
+1: mov x0, xzr /* return 0 */
+ ret
+
+END(copyout)
+
+/*
+ * Copies from a user to kernel address
+ *
+ * int copyin(const void *uaddr, void *kdaddr, size_t len)
+ */
+ENTRY(copyin)
+ cbz x2, 1f
+ adds x3, x0, x2
+ b.cs copyio_fault_nopcb
+ ldr x4, =VM_MAXUSER_ADDRESS
+ cmp x3, x4
+ b.hi copyio_fault_nopcb
+
+ b copycommon
+
+1: mov x0, xzr /* return 0 */
+ ret
+
+END(copyin)
+
+/*
+ * Copies a string from a user to kernel address
+ *
+ * int copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
+ */
+ENTRY(copyinstr)
+ mov x5, xzr /* count = 0 */
+ mov w4, #1 /* If zero return faulure */
+ cbz x2, 3f /* If len == 0 then skip loop */
+
+ adr x6, copyio_fault /* Get the handler address */
+ SET_FAULT_HANDLER(x6, x7) /* Set the handler */
+ ENTER_USER_ACCESS(w6, x7)
+
+ ldr x7, =VM_MAXUSER_ADDRESS
+1: cmp x0, x7
+ b.cs copyio_fault
+ ldtrb w4, [x0] /* Load from uaddr */
+ add x0, x0, #1 /* Next char */
+ strb w4, [x1], #1 /* Store in kaddr */
+ add x5, x5, #1 /* count++ */
+ cbz w4, 2f /* Break when NUL-terminated */
+ sub x2, x2, #1 /* len-- */
+ cbnz x2, 1b
+
+2: EXIT_USER_ACCESS(w6)
+ SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */
+
+
+3: cbz x3, 4f /* Check if done != NULL */
+ str x5, [x3] /* done = count */
+
+4: mov w1, #ENAMETOOLONG /* Load ENAMETOOLONG to return if failed */
+ cmp w4, #0 /* Check if we saved the NUL-terminator */
+ csel w0, wzr, w1, eq /* If so return success, else failure */
+ ret
+END(copyinstr)
+
+/*
+ * Local helper
+ *
+ * x0 - src pointer
+ * x1 - dst pointer
+ * x2 - size
+ * lr - the return address, so jump here instead of calling
+ *
+ * This function is optimized to minimize concurrent memory accesses. In
+ * present form it is suited for cores with a single memory prefetching
+ * unit.
+ * ARM64TODO:
+ * Consider using separate functions for each ARM64 core. Adding memory
+ * access interleaving might increase a total throughput on A57 or A72.
+ */
+ .text
+ .align 4
+ .local copycommon
+ .type copycommon,@function
+
+copycommon:
+ adr x6, copyio_fault /* Get the handler address */
+ SET_FAULT_HANDLER(x6, x7) /* Set the handler */
+ ENTER_USER_ACCESS(w6, x7)
+
+ /* Check alignment */
+ orr x3, x0, x1
+ ands x3, x3, 0x07
+ b.eq aligned
+
+ /* Unaligned is byte by byte copy */
+byte_by_byte:
+ ldrb w3, [x0], #0x01
+ strb w3, [x1], #0x01
+ subs x2, x2, #0x01
+ b.ne byte_by_byte
+ b ending
+
+aligned:
+ cmp x2, #0x10
+ b.lt lead_out
+ cmp x2, #0x40
+ b.lt by_dwords_start
+
+ /* Block copy */
+ lsr x15, x2, #0x06
+by_blocks:
+ ldp x3, x4, [x0], #0x10
+ ldp x5, x6, [x0], #0x10
+ ldp x7, x8, [x0], #0x10
+ ldp x9, x10, [x0], #0x10
+ stp x3, x4, [x1], #0x10
+ stp x5, x6, [x1], #0x10
+ stp x7, x8, [x1], #0x10
+ stp x9, x10, [x1], #0x10
+
+ subs x15, x15, #0x01
+ b.ne by_blocks
+
+ and x2, x2, #0x3f
+
+by_dwords_start:
+ lsr x15, x2, #0x04
+ cbz x15, lead_out
+by_dwords:
+ ldp x3, x4, [x0], #0x10
+ stp x3, x4, [x1], #0x10
+ subs x15, x15, #0x01
+ b.ne by_dwords
+
+ /* Less than 16 bytes to copy */
+lead_out:
+ tbz x2, #0x03, last_word
+ ldr x3, [x0], #0x08
+ str x3, [x1], #0x08
+
+last_word:
+ tbz x2, #0x02, last_hword
+ ldr w3, [x0], #0x04
+ str w3, [x1], #0x04
+
+last_hword:
+ tbz x2, #0x01, last_byte
+ ldrh w3, [x0], #0x02
+ strh w3, [x1], #0x02
+
+last_byte:
+ tbz x2, #0x00, ending
+ ldrb w3, [x0]
+ strb w3, [x1]
+
+ending:
+ EXIT_USER_ACCESS_CHECK(w6, x7)
+ SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */
+
+ mov x0, xzr /* return 0 */
+ ret
+ .size copycommon, . - copycommon
diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c
new file mode 100644
index 000000000000..9879e645b827
--- /dev/null
+++ b/sys/arm64/arm64/cpu_errata.c
@@ -0,0 +1,192 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2018 Andrew Turner
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+#include <dev/psci/smccc.h>
+
+typedef void (cpu_quirk_install)(void);
+struct cpu_quirks {
+ cpu_quirk_install *quirk_install;
+ u_int midr_mask;
+ u_int midr_value;
+};
+
+static enum {
+ SSBD_FORCE_ON,
+ SSBD_FORCE_OFF,
+ SSBD_KERNEL,
+} ssbd_method = SSBD_KERNEL;
+
+static cpu_quirk_install install_psci_bp_hardening;
+static cpu_quirk_install install_ssbd_workaround;
+static cpu_quirk_install install_thunderx_bcast_tlbi_workaround;
+
+static struct cpu_quirks cpu_quirks[] = {
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
+ .quirk_install = install_psci_bp_hardening,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
+ .quirk_install = install_psci_bp_hardening,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
+ .quirk_install = install_psci_bp_hardening,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
+ .quirk_install = install_psci_bp_hardening,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value =
+ CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
+ .quirk_install = install_psci_bp_hardening,
+ },
+ {
+ .midr_mask = 0,
+ .midr_value = 0,
+ .quirk_install = install_ssbd_workaround,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value =
+ CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0),
+ .quirk_install = install_thunderx_bcast_tlbi_workaround,
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value =
+ CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX_81XX, 0, 0),
+ .quirk_install = install_thunderx_bcast_tlbi_workaround,
+ },
+};
+
+static void
+install_psci_bp_hardening(void)
+{
+
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS)
+ return;
+
+ PCPU_SET(bp_harden, smccc_arch_workaround_1);
+}
+
+static void
+install_ssbd_workaround(void)
+{
+ char *env;
+
+ if (PCPU_GET(cpuid) == 0) {
+ env = kern_getenv("kern.cfg.ssbd");
+ if (env != NULL) {
+ if (strcmp(env, "force-on") == 0) {
+ ssbd_method = SSBD_FORCE_ON;
+ } else if (strcmp(env, "force-off") == 0) {
+ ssbd_method = SSBD_FORCE_OFF;
+ }
+ }
+ }
+
+ /* Enable the workaround on this CPU if it's enabled in the firmware */
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
+ return;
+
+ switch(ssbd_method) {
+ case SSBD_FORCE_ON:
+ smccc_arch_workaround_2(1);
+ break;
+ case SSBD_FORCE_OFF:
+ smccc_arch_workaround_2(0);
+ break;
+ case SSBD_KERNEL:
+ default:
+ PCPU_SET(ssbd, smccc_arch_workaround_2);
+ break;
+ }
+}
+
+/*
+ * Workaround Cavium erratum 27456.
+ *
+ * Invalidate the local icache when changing address spaces.
+ */
+static void
+install_thunderx_bcast_tlbi_workaround(void)
+{
+ u_int midr;
+
+ midr = get_midr();
+ if (CPU_PART(midr) == CPU_PART_THUNDERX_81XX)
+ PCPU_SET(bcast_tlbi_workaround, 1);
+ else if (CPU_PART(midr) == CPU_PART_THUNDERX) {
+ if (CPU_VAR(midr) == 0) {
+ /* ThunderX 1.x */
+ PCPU_SET(bcast_tlbi_workaround, 1);
+ } else if (CPU_VAR(midr) == 1 && CPU_REV(midr) <= 1) {
+ /* ThunderX 2.0 - 2.1 */
+ PCPU_SET(bcast_tlbi_workaround, 1);
+ }
+ }
+}
+
+void
+install_cpu_errata(void)
+{
+ u_int midr;
+ size_t i;
+
+ midr = get_midr();
+
+ for (i = 0; i < nitems(cpu_quirks); i++) {
+ if ((midr & cpu_quirks[i].midr_mask) ==
+ cpu_quirks[i].midr_value) {
+ cpu_quirks[i].quirk_install();
+ }
+ }
+}
diff --git a/sys/arm64/arm64/cpufunc_asm.S b/sys/arm64/arm64/cpufunc_asm.S
new file mode 100644
index 000000000000..2f28c4f68271
--- /dev/null
+++ b/sys/arm64/arm64/cpufunc_asm.S
@@ -0,0 +1,182 @@
+/*-
+ * Copyright (c) 2014 Robin Randhawa
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Andrew Turner
+ * under sponsorship from the FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/errno.h>
+#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "assym.inc"
+
+__FBSDID("$FreeBSD$");
+
+/*
+ * FIXME:
+ * Need big.LITTLE awareness at some point.
+ * Using arm64_p[id]cache_line_size may not be the best option.
+ * Need better SMP awareness.
+ */
+ .text
+ .align 2
+
+.Lpage_mask:
+ .word PAGE_MASK
+
+/*
+ * Macro to handle the cache. This takes the start address in x0, length
+ * in x1. It will corrupt x0, x1, x2, x3, and x4.
+ */
+.macro cache_handle_range dcop = 0, ic = 0, icop = 0
+.if \ic == 0
+ ldr x3, =dcache_line_size /* Load the D cache line size */
+.else
+ ldr x3, =idcache_line_size /* Load the I & D cache line size */
+.endif
+ ldr x3, [x3]
+ sub x4, x3, #1 /* Get the address mask */
+ and x2, x0, x4 /* Get the low bits of the address */
+ add x1, x1, x2 /* Add these to the size */
+ bic x0, x0, x4 /* Clear the low bit of the address */
+.if \ic != 0
+ mov x2, x0 /* Save the address */
+ mov x4, x1 /* Save the size */
+.endif
+1:
+ dc \dcop, x0
+ add x0, x0, x3 /* Move to the next line */
+ subs x1, x1, x3 /* Reduce the size */
+ b.hi 1b /* Check if we are done */
+ dsb ish
+.if \ic != 0
+2:
+ ic \icop, x2
+ add x2, x2, x3 /* Move to the next line */
+ subs x4, x4, x3 /* Reduce the size */
+ b.hi 2b /* Check if we are done */
+ dsb ish
+ isb
+.endif
+.endm
+
+ENTRY(arm64_nullop)
+ ret
+END(arm64_nullop)
+
+/*
+ * Generic functions to read/modify/write the internal coprocessor registers
+ */
+
+ENTRY(arm64_tlb_flushID)
+ dsb ishst
+#ifdef SMP
+ tlbi vmalle1is
+#else
+ tlbi vmalle1
+#endif
+ dsb ish
+ isb
+ ret
+END(arm64_tlb_flushID)
+
+/*
+ * void arm64_dcache_wb_range(vm_offset_t, vm_size_t)
+ */
+ENTRY(arm64_dcache_wb_range)
+ cache_handle_range dcop = cvac
+ ret
+END(arm64_dcache_wb_range)
+
+/*
+ * void arm64_dcache_wbinv_range(vm_offset_t, vm_size_t)
+ */
+ENTRY(arm64_dcache_wbinv_range)
+ cache_handle_range dcop = civac
+ ret
+END(arm64_dcache_wbinv_range)
+
+/*
+ * void arm64_dcache_inv_range(vm_offset_t, vm_size_t)
+ *
+ * Note, we must not invalidate everything. If the range is too big we
+ * must use wb-inv of the entire cache.
+ */
+ENTRY(arm64_dcache_inv_range)
+ cache_handle_range dcop = ivac
+ ret
+END(arm64_dcache_inv_range)
+
+/*
+ * void arm64_dic_idc_icache_sync_range(vm_offset_t, vm_size_t)
+ * When the CTR_EL0.IDC bit is set cleaning to PoU becomes a dsb.
+ * When the CTR_EL0.DIC bit is set icache invalidation becomes an isb.
+ */
+ENTRY(arm64_dic_idc_icache_sync_range)
+ dsb ishst
+ isb
+ ret
+END(arm64_dic_idc_icache_sync_range)
+
+/*
+ * void arm64_aliasing_icache_sync_range(vm_offset_t, vm_size_t)
+ */
+ENTRY(arm64_aliasing_icache_sync_range)
+ /*
+ * XXX Temporary solution - I-cache flush should be range based for
+ * PIPT cache or IALLUIS for VIVT or VIPT caches
+ */
+/* cache_handle_range dcop = cvau, ic = 1, icop = ivau */
+ cache_handle_range dcop = cvau
+ ic ialluis
+ dsb ish
+ isb
+ ret
+END(arm64_aliasing_icache_sync_range)
+
+/*
+ * int arm64_icache_sync_range_checked(vm_offset_t, vm_size_t)
+ */
+ENTRY(arm64_icache_sync_range_checked)
+ adr x5, cache_maint_fault
+ SET_FAULT_HANDLER(x5, x6)
+ /* XXX: See comment in arm64_icache_sync_range */
+ cache_handle_range dcop = cvau
+ ic ialluis
+ dsb ish
+ isb
+ SET_FAULT_HANDLER(xzr, x6)
+ mov x0, #0
+ ret
+END(arm64_icache_sync_range_checked)
+
+ENTRY(cache_maint_fault)
+ SET_FAULT_HANDLER(xzr, x1)
+ mov x0, #EFAULT
+ ret
+END(cache_maint_fault)
diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c
new file mode 100644
index 000000000000..73efca0bdee9
--- /dev/null
+++ b/sys/arm64/arm64/db_disasm.c
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <ddb/ddb.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+
+#include <machine/disassem.h>
+
+static u_int db_disasm_read_word(vm_offset_t);
+static void db_disasm_printaddr(vm_offset_t);
+
+/* Glue code to interface db_disasm to the generic ARM disassembler */
+static const struct disasm_interface db_disasm_interface = {
+ .di_readword = db_disasm_read_word,
+ .di_printaddr = db_disasm_printaddr,
+ .di_printf = db_printf,
+};
+
+static u_int
+db_disasm_read_word(vm_offset_t address)
+{
+
+ return (db_get_value(address, INSN_SIZE, 0));
+}
+
+static void
+db_disasm_printaddr(vm_offset_t address)
+{
+
+ db_printsym((db_addr_t)address, DB_STGY_ANY);
+}
+
+vm_offset_t
+db_disasm(vm_offset_t loc, bool altfmt)
+{
+
+ return (disasm(&db_disasm_interface, loc, altfmt));
+}
+
+/* End of db_disasm.c */
diff --git a/sys/arm64/arm64/db_interface.c b/sys/arm64/arm64/db_interface.c
new file mode 100644
index 000000000000..5138bf3f1cab
--- /dev/null
+++ b/sys/arm64/arm64/db_interface.c
@@ -0,0 +1,194 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#ifdef KDB
+#include <sys/kdb.h>
+#endif
+
+#include <ddb/ddb.h>
+#include <ddb/db_variables.h>
+
+#include <machine/cpu.h>
+#include <machine/pcb.h>
+#include <machine/stack.h>
+#include <machine/vmparam.h>
+
+static int
+db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+ long *reg;
+
+ if (kdb_frame == NULL)
+ return (0);
+
+ reg = (long *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
+ if (op == DB_VAR_GET)
+ *valuep = *reg;
+ else
+ *reg = *valuep;
+ return (1);
+}
+
+#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x)
+struct db_variable db_regs[] = {
+ { "spsr", DB_OFFSET(tf_spsr), db_frame },
+ { "x0", DB_OFFSET(tf_x[0]), db_frame },
+ { "x1", DB_OFFSET(tf_x[1]), db_frame },
+ { "x2", DB_OFFSET(tf_x[2]), db_frame },
+ { "x3", DB_OFFSET(tf_x[3]), db_frame },
+ { "x4", DB_OFFSET(tf_x[4]), db_frame },
+ { "x5", DB_OFFSET(tf_x[5]), db_frame },
+ { "x6", DB_OFFSET(tf_x[6]), db_frame },
+ { "x7", DB_OFFSET(tf_x[7]), db_frame },
+ { "x8", DB_OFFSET(tf_x[8]), db_frame },
+ { "x9", DB_OFFSET(tf_x[9]), db_frame },
+ { "x10", DB_OFFSET(tf_x[10]), db_frame },
+ { "x11", DB_OFFSET(tf_x[11]), db_frame },
+ { "x12", DB_OFFSET(tf_x[12]), db_frame },
+ { "x13", DB_OFFSET(tf_x[13]), db_frame },
+ { "x14", DB_OFFSET(tf_x[14]), db_frame },
+ { "x15", DB_OFFSET(tf_x[15]), db_frame },
+ { "x16", DB_OFFSET(tf_x[16]), db_frame },
+ { "x17", DB_OFFSET(tf_x[17]), db_frame },
+ { "x18", DB_OFFSET(tf_x[18]), db_frame },
+ { "x19", DB_OFFSET(tf_x[19]), db_frame },
+ { "x20", DB_OFFSET(tf_x[20]), db_frame },
+ { "x21", DB_OFFSET(tf_x[21]), db_frame },
+ { "x22", DB_OFFSET(tf_x[22]), db_frame },
+ { "x23", DB_OFFSET(tf_x[23]), db_frame },
+ { "x24", DB_OFFSET(tf_x[24]), db_frame },
+ { "x25", DB_OFFSET(tf_x[25]), db_frame },
+ { "x26", DB_OFFSET(tf_x[26]), db_frame },
+ { "x27", DB_OFFSET(tf_x[27]), db_frame },
+ { "x28", DB_OFFSET(tf_x[28]), db_frame },
+ { "x29", DB_OFFSET(tf_x[29]), db_frame },
+ { "lr", DB_OFFSET(tf_lr), db_frame },
+ { "elr", DB_OFFSET(tf_elr), db_frame },
+ { "sp", DB_OFFSET(tf_sp), db_frame },
+};
+
+struct db_variable *db_eregs = db_regs + nitems(db_regs);
+
+void
+db_show_mdpcpu(struct pcpu *pc)
+{
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+int
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ jmp_buf jb;
+ void *prev_jb;
+ const char *src;
+ int ret;
+ uint64_t tmp64;
+ uint32_t tmp32;
+ uint16_t tmp16;
+
+ prev_jb = kdb_jmpbuf(jb);
+ ret = setjmp(jb);
+
+ if (ret == 0) {
+ src = (const char *)addr;
+ if (size == 8 && (addr & 7) == 0) {
+ tmp64 = *((const int *)src);
+ src = (const char *)&tmp64;
+ } else if (size == 4 && (addr & 3) == 0) {
+ tmp32 = *((const int *)src);
+ src = (const char *)&tmp32;
+ } else if (size == 2 && (addr & 1) == 0) {
+ tmp16 = *((const short *)src);
+ src = (const char *)&tmp16;
+ }
+ while (size-- > 0)
+ *data++ = *src++;
+ }
+ (void)kdb_jmpbuf(prev_jb);
+
+ return (ret);
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+int
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ jmp_buf jb;
+ void *prev_jb;
+ char *dst;
+ int ret;
+ uint64_t tmp64;
+ uint32_t tmp32;
+ uint16_t tmp16;
+
+ prev_jb = kdb_jmpbuf(jb);
+ ret = setjmp(jb);
+ if (ret == 0) {
+ if (size == 8 && (addr & 7) == 0) {
+ dst = (char *)&tmp64;
+ while (size-- > 0)
+ *dst++ = *data++;
+ *((uint64_t *)addr) = tmp64;
+ } else if (size == 4 && (addr & 3) == 0) {
+ dst = (char *)&tmp32;
+ while (size-- > 0)
+ *dst++ = *data++;
+ *((uint32_t *)addr) = tmp32;
+ } else if (size == 2 && (addr & 1) == 0) {
+ dst = (char *)&tmp16;
+ while (size-- > 0)
+ *dst++ = *data++;
+ *((uint32_t *)addr) = tmp16;
+ } else {
+ dst = (char *)addr;
+ while (size-- > 0)
+ *dst++ = *data++;
+ }
+ dsb(ish);
+
+ /* Clean D-cache and invalidate I-cache */
+ cpu_dcache_wb_range(addr, (vm_size_t)size);
+ cpu_icache_sync_range(addr, (vm_size_t)size);
+ }
+ (void)kdb_jmpbuf(prev_jb);
+
+ return (ret);
+}
diff --git a/sys/arm64/arm64/db_trace.c b/sys/arm64/arm64/db_trace.c
new file mode 100644
index 000000000000..f892935cd13a
--- /dev/null
+++ b/sys/arm64/arm64/db_trace.c
@@ -0,0 +1,133 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_ddb.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kdb.h>
+
+#include <machine/pcb.h>
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+
+#include <machine/armreg.h>
+#include <machine/debug_monitor.h>
+#include <machine/stack.h>
+
+void
+db_md_list_watchpoints()
+{
+
+ dbg_show_watchpoint();
+}
+
+int
+db_md_clr_watchpoint(db_expr_t addr, db_expr_t size)
+{
+
+ return (dbg_remove_watchpoint(NULL, addr, size));
+}
+
+int
+db_md_set_watchpoint(db_expr_t addr, db_expr_t size)
+{
+
+ return (dbg_setup_watchpoint(NULL, addr, size, HW_BREAKPOINT_RW));
+}
+
+static void
+db_stack_trace_cmd(struct unwind_state *frame)
+{
+ c_db_sym_t sym;
+ const char *name;
+ db_expr_t value;
+ db_expr_t offset;
+
+ while (1) {
+ uint64_t pc = frame->pc;
+ int ret;
+
+ ret = unwind_frame(frame);
+ if (ret < 0)
+ break;
+
+ sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
+ if (sym == C_DB_SYM_NULL) {
+ value = 0;
+ name = "(null)";
+ } else
+ db_symbol_values(sym, &name, &value);
+
+ db_printf("%s() at ", name);
+ db_printsym(frame->pc, DB_STGY_PROC);
+ db_printf("\n");
+
+ db_printf("\t pc = 0x%016lx lr = 0x%016lx\n", pc,
+ frame->pc);
+ db_printf("\t sp = 0x%016lx fp = 0x%016lx\n", frame->sp,
+ frame->fp);
+ /* TODO: Show some more registers */
+ db_printf("\n");
+ }
+}
+
+int
+db_trace_thread(struct thread *thr, int count)
+{
+ struct unwind_state frame;
+ struct pcb *ctx;
+
+ if (thr != curthread) {
+ ctx = kdb_thr_ctx(thr);
+
+ frame.sp = (uint64_t)ctx->pcb_sp;
+ frame.fp = (uint64_t)ctx->pcb_x[29];
+ frame.pc = (uint64_t)ctx->pcb_x[30];
+ db_stack_trace_cmd(&frame);
+ } else
+ db_trace_self();
+ return (0);
+}
+
+void
+db_trace_self(void)
+{
+ struct unwind_state frame;
+ uint64_t sp;
+
+ __asm __volatile("mov %0, sp" : "=&r" (sp));
+
+ frame.sp = sp;
+ frame.fp = (uint64_t)__builtin_frame_address(0);
+ frame.pc = (uint64_t)db_trace_self;
+ db_stack_trace_cmd(&frame);
+}
diff --git a/sys/arm64/arm64/debug_monitor.c b/sys/arm64/arm64/debug_monitor.c
new file mode 100644
index 000000000000..dcb3645cf5d4
--- /dev/null
+++ b/sys/arm64/arm64/debug_monitor.c
@@ -0,0 +1,565 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_ddb.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/kdb.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/sysent.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/debug_monitor.h>
+#include <machine/kdb.h>
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+#endif
+
+enum dbg_t {
+ DBG_TYPE_BREAKPOINT = 0,
+ DBG_TYPE_WATCHPOINT = 1,
+};
+
+static int dbg_watchpoint_num;
+static int dbg_breakpoint_num;
+static struct debug_monitor_state kernel_monitor = {
+ .dbg_flags = DBGMON_KERNEL
+};
+
+/* Called from the exception handlers */
+void dbg_monitor_enter(struct thread *);
+void dbg_monitor_exit(struct thread *, struct trapframe *);
+
+/* Watchpoints/breakpoints control register bitfields */
+#define DBG_WATCH_CTRL_LEN_1 (0x1 << 5)
+#define DBG_WATCH_CTRL_LEN_2 (0x3 << 5)
+#define DBG_WATCH_CTRL_LEN_4 (0xf << 5)
+#define DBG_WATCH_CTRL_LEN_8 (0xff << 5)
+#define DBG_WATCH_CTRL_LEN_MASK(x) ((x) & (0xff << 5))
+#define DBG_WATCH_CTRL_EXEC (0x0 << 3)
+#define DBG_WATCH_CTRL_LOAD (0x1 << 3)
+#define DBG_WATCH_CTRL_STORE (0x2 << 3)
+#define DBG_WATCH_CTRL_ACCESS_MASK(x) ((x) & (0x3 << 3))
+
+/* Common for breakpoint and watchpoint */
+#define DBG_WB_CTRL_EL1 (0x1 << 1)
+#define DBG_WB_CTRL_EL0 (0x2 << 1)
+#define DBG_WB_CTRL_ELX_MASK(x) ((x) & (0x3 << 1))
+#define DBG_WB_CTRL_E (0x1 << 0)
+
+#define DBG_REG_BASE_BVR 0
+#define DBG_REG_BASE_BCR (DBG_REG_BASE_BVR + 16)
+#define DBG_REG_BASE_WVR (DBG_REG_BASE_BCR + 16)
+#define DBG_REG_BASE_WCR (DBG_REG_BASE_WVR + 16)
+
+/* Watchpoint/breakpoint helpers */
+#define DBG_WB_WVR "wvr"
+#define DBG_WB_WCR "wcr"
+#define DBG_WB_BVR "bvr"
+#define DBG_WB_BCR "bcr"
+
+#define DBG_WB_READ(reg, num, val) do { \
+ __asm __volatile("mrs %0, dbg" reg #num "_el1" : "=r" (val)); \
+} while (0)
+
+#define DBG_WB_WRITE(reg, num, val) do { \
+ __asm __volatile("msr dbg" reg #num "_el1, %0" :: "r" (val)); \
+} while (0)
+
+#define READ_WB_REG_CASE(reg, num, offset, val) \
+ case (num + offset): \
+ DBG_WB_READ(reg, num, val); \
+ break
+
+#define WRITE_WB_REG_CASE(reg, num, offset, val) \
+ case (num + offset): \
+ DBG_WB_WRITE(reg, num, val); \
+ break
+
+#define SWITCH_CASES_READ_WB_REG(reg, offset, val) \
+ READ_WB_REG_CASE(reg, 0, offset, val); \
+ READ_WB_REG_CASE(reg, 1, offset, val); \
+ READ_WB_REG_CASE(reg, 2, offset, val); \
+ READ_WB_REG_CASE(reg, 3, offset, val); \
+ READ_WB_REG_CASE(reg, 4, offset, val); \
+ READ_WB_REG_CASE(reg, 5, offset, val); \
+ READ_WB_REG_CASE(reg, 6, offset, val); \
+ READ_WB_REG_CASE(reg, 7, offset, val); \
+ READ_WB_REG_CASE(reg, 8, offset, val); \
+ READ_WB_REG_CASE(reg, 9, offset, val); \
+ READ_WB_REG_CASE(reg, 10, offset, val); \
+ READ_WB_REG_CASE(reg, 11, offset, val); \
+ READ_WB_REG_CASE(reg, 12, offset, val); \
+ READ_WB_REG_CASE(reg, 13, offset, val); \
+ READ_WB_REG_CASE(reg, 14, offset, val); \
+ READ_WB_REG_CASE(reg, 15, offset, val)
+
+#define SWITCH_CASES_WRITE_WB_REG(reg, offset, val) \
+ WRITE_WB_REG_CASE(reg, 0, offset, val); \
+ WRITE_WB_REG_CASE(reg, 1, offset, val); \
+ WRITE_WB_REG_CASE(reg, 2, offset, val); \
+ WRITE_WB_REG_CASE(reg, 3, offset, val); \
+ WRITE_WB_REG_CASE(reg, 4, offset, val); \
+ WRITE_WB_REG_CASE(reg, 5, offset, val); \
+ WRITE_WB_REG_CASE(reg, 6, offset, val); \
+ WRITE_WB_REG_CASE(reg, 7, offset, val); \
+ WRITE_WB_REG_CASE(reg, 8, offset, val); \
+ WRITE_WB_REG_CASE(reg, 9, offset, val); \
+ WRITE_WB_REG_CASE(reg, 10, offset, val); \
+ WRITE_WB_REG_CASE(reg, 11, offset, val); \
+ WRITE_WB_REG_CASE(reg, 12, offset, val); \
+ WRITE_WB_REG_CASE(reg, 13, offset, val); \
+ WRITE_WB_REG_CASE(reg, 14, offset, val); \
+ WRITE_WB_REG_CASE(reg, 15, offset, val)
+
+#ifdef DDB
+static uint64_t
+dbg_wb_read_reg(int reg, int n)
+{
+ uint64_t val = 0;
+
+ switch (reg + n) {
+ SWITCH_CASES_READ_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
+ default:
+ printf("trying to read from wrong debug register %d\n", n);
+ }
+
+ return val;
+}
+#endif /* DDB */
+
+static void
+dbg_wb_write_reg(int reg, int n, uint64_t val)
+{
+ switch (reg + n) {
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
+ default:
+ printf("trying to write to wrong debug register %d\n", n);
+ return;
+ }
+ isb();
+}
+
+#ifdef DDB
+void
+kdb_cpu_set_singlestep(void)
+{
+
+ kdb_frame->tf_spsr |= DBG_SPSR_SS;
+ WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) |
+ DBG_MDSCR_SS | DBG_MDSCR_KDE);
+
+ /*
+ * Disable breakpoints and watchpoints, e.g. stepping
+ * over watched instruction will trigger break exception instead of
+ * single-step exception and locks CPU on that instruction for ever.
+ */
+ if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) & ~DBG_MDSCR_MDE);
+ }
+}
+
+void
+kdb_cpu_clear_singlestep(void)
+{
+
+ WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) &
+ ~(DBG_MDSCR_SS | DBG_MDSCR_KDE));
+
+ /* Restore breakpoints and watchpoints */
+ if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) | DBG_MDSCR_MDE);
+
+ if ((kernel_monitor.dbg_flags & DBGMON_KERNEL) != 0) {
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) | DBG_MDSCR_KDE);
+ }
+ }
+}
+
+static const char *
+dbg_watchtype_str(uint32_t type)
+{
+ switch (type) {
+ case DBG_WATCH_CTRL_EXEC:
+ return ("execute");
+ case DBG_WATCH_CTRL_STORE:
+ return ("write");
+ case DBG_WATCH_CTRL_LOAD:
+ return ("read");
+ case DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE:
+ return ("read/write");
+ default:
+ return ("invalid");
+ }
+}
+
+static int
+dbg_watchtype_len(uint32_t len)
+{
+ switch (len) {
+ case DBG_WATCH_CTRL_LEN_1:
+ return (1);
+ case DBG_WATCH_CTRL_LEN_2:
+ return (2);
+ case DBG_WATCH_CTRL_LEN_4:
+ return (4);
+ case DBG_WATCH_CTRL_LEN_8:
+ return (8);
+ default:
+ return (0);
+ }
+}
+
+void
+dbg_show_watchpoint(void)
+{
+ uint32_t wcr, len, type;
+ uint64_t addr;
+ int i;
+
+ db_printf("\nhardware watchpoints:\n");
+ db_printf(" watch status type len address symbol\n");
+ db_printf(" ----- -------- ---------- --- ------------------ ------------------\n");
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
+ if ((wcr & DBG_WB_CTRL_E) != 0) {
+ type = DBG_WATCH_CTRL_ACCESS_MASK(wcr);
+ len = DBG_WATCH_CTRL_LEN_MASK(wcr);
+ addr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i);
+ db_printf(" %-5d %-8s %10s %3d 0x%16lx ",
+ i, "enabled", dbg_watchtype_str(type),
+ dbg_watchtype_len(len), addr);
+ db_printsym((db_addr_t)addr, DB_STGY_ANY);
+ db_printf("\n");
+ } else {
+ db_printf(" %-5d disabled\n", i);
+ }
+ }
+}
+#endif /* DDB */
+
+static int
+dbg_find_free_slot(struct debug_monitor_state *monitor, enum dbg_t type)
+{
+ uint64_t *reg;
+ u_int max, i;
+
+ switch(type) {
+ case DBG_TYPE_BREAKPOINT:
+ max = dbg_breakpoint_num;
+ reg = monitor->dbg_bcr;
+ break;
+ case DBG_TYPE_WATCHPOINT:
+ max = dbg_watchpoint_num;
+ reg = monitor->dbg_wcr;
+ break;
+ default:
+ printf("Unsupported debug type\n");
+ return (i);
+ }
+
+ for (i = 0; i < max; i++) {
+ if ((reg[i] & DBG_WB_CTRL_E) == 0)
+ return (i);
+ }
+
+ return (-1);
+}
+
+static int
+dbg_find_slot(struct debug_monitor_state *monitor, enum dbg_t type,
+ vm_offset_t addr)
+{
+ uint64_t *reg_addr, *reg_ctrl;
+ u_int max, i;
+
+ switch(type) {
+ case DBG_TYPE_BREAKPOINT:
+ max = dbg_breakpoint_num;
+ reg_addr = monitor->dbg_bvr;
+ reg_ctrl = monitor->dbg_bcr;
+ break;
+ case DBG_TYPE_WATCHPOINT:
+ max = dbg_watchpoint_num;
+ reg_addr = monitor->dbg_wvr;
+ reg_ctrl = monitor->dbg_wcr;
+ break;
+ default:
+ printf("Unsupported debug type\n");
+ return (i);
+ }
+
+ for (i = 0; i < max; i++) {
+ if (reg_addr[i] == addr &&
+ (reg_ctrl[i] & DBG_WB_CTRL_E) != 0)
+ return (i);
+ }
+
+ return (-1);
+}
+
+int
+dbg_setup_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr,
+ vm_size_t size, enum dbg_access_t access)
+{
+ uint64_t wcr_size, wcr_priv, wcr_access;
+ u_int i;
+
+ if (monitor == NULL)
+ monitor = &kernel_monitor;
+
+ i = dbg_find_free_slot(monitor, DBG_TYPE_WATCHPOINT);
+ if (i == -1) {
+ printf("Can not find slot for watchpoint, max %d"
+ " watchpoints supported\n", dbg_watchpoint_num);
+ return (i);
+ }
+
+ switch(size) {
+ case 1:
+ wcr_size = DBG_WATCH_CTRL_LEN_1;
+ break;
+ case 2:
+ wcr_size = DBG_WATCH_CTRL_LEN_2;
+ break;
+ case 4:
+ wcr_size = DBG_WATCH_CTRL_LEN_4;
+ break;
+ case 8:
+ wcr_size = DBG_WATCH_CTRL_LEN_8;
+ break;
+ default:
+ printf("Unsupported address size for watchpoint\n");
+ return (-1);
+ }
+
+ if ((monitor->dbg_flags & DBGMON_KERNEL) == 0)
+ wcr_priv = DBG_WB_CTRL_EL0;
+ else
+ wcr_priv = DBG_WB_CTRL_EL1;
+
+ switch(access) {
+ case HW_BREAKPOINT_X:
+ wcr_access = DBG_WATCH_CTRL_EXEC;
+ break;
+ case HW_BREAKPOINT_R:
+ wcr_access = DBG_WATCH_CTRL_LOAD;
+ break;
+ case HW_BREAKPOINT_W:
+ wcr_access = DBG_WATCH_CTRL_STORE;
+ break;
+ case HW_BREAKPOINT_RW:
+ wcr_access = DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE;
+ break;
+ default:
+ printf("Unsupported exception level for watchpoint\n");
+ return (-1);
+ }
+
+ monitor->dbg_wvr[i] = addr;
+ monitor->dbg_wcr[i] = wcr_size | wcr_access | wcr_priv | DBG_WB_CTRL_E;
+ monitor->dbg_enable_count++;
+ monitor->dbg_flags |= DBGMON_ENABLED;
+
+ dbg_register_sync(monitor);
+ return (0);
+}
+
+int
+dbg_remove_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr,
+ vm_size_t size)
+{
+ u_int i;
+
+ if (monitor == NULL)
+ monitor = &kernel_monitor;
+
+ i = dbg_find_slot(monitor, DBG_TYPE_WATCHPOINT, addr);
+ if (i == -1) {
+ printf("Can not find watchpoint for address 0%lx\n", addr);
+ return (i);
+ }
+
+ monitor->dbg_wvr[i] = 0;
+ monitor->dbg_wcr[i] = 0;
+ monitor->dbg_enable_count--;
+ if (monitor->dbg_enable_count == 0)
+ monitor->dbg_flags &= ~DBGMON_ENABLED;
+
+ dbg_register_sync(monitor);
+ return (0);
+}
+
+void
+dbg_register_sync(struct debug_monitor_state *monitor)
+{
+ uint64_t mdscr;
+ int i;
+
+ if (monitor == NULL)
+ monitor = &kernel_monitor;
+
+ mdscr = READ_SPECIALREG(mdscr_el1);
+ if ((monitor->dbg_flags & DBGMON_ENABLED) == 0) {
+ mdscr &= ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE);
+ } else {
+ for (i = 0; i < dbg_breakpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_BCR, i,
+ monitor->dbg_bcr[i]);
+ dbg_wb_write_reg(DBG_REG_BASE_BVR, i,
+ monitor->dbg_bvr[i]);
+ }
+
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i,
+ monitor->dbg_wcr[i]);
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i,
+ monitor->dbg_wvr[i]);
+ }
+ mdscr |= DBG_MDSCR_MDE;
+ if ((monitor->dbg_flags & DBGMON_KERNEL) == DBGMON_KERNEL)
+ mdscr |= DBG_MDSCR_KDE;
+ }
+ WRITE_SPECIALREG(mdscr_el1, mdscr);
+ isb();
+}
+
+void
+dbg_monitor_init(void)
+{
+ u_int i;
+
+ /* Find out many breakpoints and watchpoints we can use */
+ dbg_watchpoint_num = ((READ_SPECIALREG(id_aa64dfr0_el1) >> 20) & 0xf) + 1;
+ dbg_breakpoint_num = ((READ_SPECIALREG(id_aa64dfr0_el1) >> 12) & 0xf) + 1;
+
+ if (bootverbose && PCPU_GET(cpuid) == 0) {
+ printf("%d watchpoints and %d breakpoints supported\n",
+ dbg_watchpoint_num, dbg_breakpoint_num);
+ }
+
+ /*
+ * We have limited number of {watch,break}points, each consists of
+ * two registers:
+ * - wcr/bcr regsiter configurates corresponding {watch,break}point
+ * behaviour
+ * - wvr/bvr register keeps address we are hunting for
+ *
+ * Reset all breakpoints and watchpoints.
+ */
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
+ }
+
+ for (i = 0; i < dbg_breakpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
+ }
+
+ dbg_enable();
+}
+
+void
+dbg_monitor_enter(struct thread *thread)
+{
+ int i;
+
+ if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
+ /* Install the kernel version of the registers */
+ dbg_register_sync(&kernel_monitor);
+ } else if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) {
+ /* Disable the user breakpoints until we return to userspace */
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
+ }
+
+ for (i = 0; i < dbg_breakpoint_num; ++i) {
+ dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
+ }
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) &
+ ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE));
+ isb();
+ }
+}
+
+void
+dbg_monitor_exit(struct thread *thread, struct trapframe *frame)
+{
+ int i;
+
+ /*
+ * PSR_D is an aarch64-only flag. On aarch32, it switches
+ * the processor to big-endian, so avoid setting it for
+ * 32bits binaries.
+ */
+ if (!(SV_PROC_FLAG(thread->td_proc, SV_ILP32)))
+ frame->tf_spsr |= PSR_D;
+ if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) {
+ /* Install the kernel version of the registers */
+ dbg_register_sync(&thread->td_pcb->pcb_dbg_regs);
+ frame->tf_spsr &= ~PSR_D;
+ } else if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
+ /* Disable the user breakpoints until we return to userspace */
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
+ }
+
+ for (i = 0; i < dbg_breakpoint_num; ++i) {
+ dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
+ }
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) &
+ ~(DBG_MDSCR_MDE | DBG_MDSCR_KDE));
+ isb();
+ }
+}
diff --git a/sys/arm64/arm64/disassem.c b/sys/arm64/arm64/disassem.c
new file mode 100644
index 000000000000..ce0bf7660b02
--- /dev/null
+++ b/sys/arm64/arm64/disassem.c
@@ -0,0 +1,545 @@
+/*-
+ * Copyright (c) 2016 Cavium
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+
+#include <sys/systm.h>
+#include <machine/disassem.h>
+#include <machine/armreg.h>
+#include <ddb/ddb.h>
+
+#define ARM64_MAX_TOKEN_LEN 8
+#define ARM64_MAX_TOKEN_CNT 10
+
+#define ARM_INSN_SIZE_OFFSET 30
+#define ARM_INSN_SIZE_MASK 0x3
+
+/* Special options for instruction printing */
+#define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */
+#define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */
+#define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */
+#define OP_SF32 (1UL << 3) /* Force 32-bit access */
+#define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */
+
+static const char *w_reg[] = {
+ "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
+ "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
+ "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+ "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
+};
+
+static const char *x_reg[] = {
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
+};
+
+static const char *shift_2[] = {
+ "LSL", "LSR", "ASR", "RSV"
+};
+
+/*
+ * Structure representing single token (operand) inside instruction.
+ * name - name of operand
+ * pos - position within the instruction (in bits)
+ * len - operand length (in bits)
+ */
+struct arm64_insn_token {
+ char name[ARM64_MAX_TOKEN_LEN];
+ int pos;
+ int len;
+};
+
+/*
+ * Define generic types for instruction printing.
+ */
+enum arm64_format_type {
+ TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+ OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+ TYPE_02, /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
+ OP <RT>, [<RN>], #<imm>{!} SF32/64
+ OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
+ TYPE_03, /* OP <RT>, #imm SF32/64 */
+};
+
+/*
+ * Structure representing single parsed instruction format.
+ * name - opcode name
+ * format - opcode format in a human-readable way
+ * type - syntax type for printing
+ * special_ops - special options passed to a printer (if any)
+ * mask - bitmask for instruction matching
+ * pattern - pattern to look for
+ * tokens - array of tokens (operands) inside instruction
+ */
+struct arm64_insn {
+ char* name;
+ char* format;
+ enum arm64_format_type type;
+ uint64_t special_ops;
+ uint32_t mask;
+ uint32_t pattern;
+ struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
+};
+
+/*
+ * Specify instruction opcode format in a human-readable way. Use notation
+ * obtained from ARM Architecture Reference Manual for ARMv8-A.
+ *
+ * Format string description:
+ * Each group must be separated by "|". Group made of 0/1 is used to
+ * generate mask and pattern for instruction matching. Groups containing
+ * an operand token (in format NAME(length_bits)) are used to retrieve any
+ * operand data from the instruction. Names here must be meaningful
+ * and match the one described in the Manual.
+ *
+ * Token description:
+ * SF - "0" represents 32-bit access, "1" represents 64-bit access
+ * SHIFT - type of shift (instruction dependent)
+ * IMM - immediate value
+ * Rx - register number
+ * OPTION - command specific options
+ * SCALE - scaling of immediate value
+ */
+static struct arm64_insn arm64_i[] = {
+ { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
+ TYPE_01, 0 },
+ { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */
+ { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldr immediate unsigned */
+ { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldr register */
+ { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
+ TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
+ { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */
+ { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */
+ { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrb register */
+ { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
+ OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */
+ { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */
+ { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF32 }, /* ldrh register */
+ { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */
+ { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
+ TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */
+ { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV }, /* ldrsb register */
+ { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */
+ { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */
+ { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, OP_SF_INV }, /* ldrsh register */
+ { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
+ TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */
+ { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldrsw immediate unsigned */
+ { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
+ TYPE_02, 0 }, /* ldrsw register */
+ { "ldrsw", "10011000|IMM(19)|RT(5)",
+ TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
+ { NULL, NULL }
+};
+
+static void
+arm64_disasm_generate_masks(struct arm64_insn *tab)
+{
+ uint32_t mask, val;
+ int a, i;
+ int len, ret;
+ int token = 0;
+ char *format;
+ int error;
+
+ while (tab->name != NULL) {
+ mask = 0;
+ val = 0;
+ format = tab->format;
+ token = 0;
+ error = 0;
+
+ /*
+ * For each entry analyze format strings from the
+ * left (i.e. from the MSB).
+ */
+ a = (INSN_SIZE * NBBY) - 1;
+ while (*format != '\0' && (a >= 0)) {
+ switch(*format) {
+ case '0':
+ /* Bit is 0, add to mask and pattern */
+ mask |= (1 << a);
+ a--;
+ format++;
+ break;
+ case '1':
+ /* Bit is 1, add to mask and pattern */
+ mask |= (1 << a);
+ val |= (1 << a);
+ a--;
+ format++;
+ break;
+ case '|':
+ /* skip */
+ format++;
+ break;
+ default:
+ /* Token found, copy the name */
+ memset(tab->tokens[token].name, 0,
+ sizeof(tab->tokens[token].name));
+ i = 0;
+ while (*format != '(') {
+ tab->tokens[token].name[i] = *format;
+ i++;
+ format++;
+ if (i >= ARM64_MAX_TOKEN_LEN) {
+ printf("ERROR: token too long in op %s\n",
+ tab->name);
+ error = 1;
+ break;
+ }
+ }
+ if (error != 0)
+ break;
+
+ /* Read the length value */
+ ret = sscanf(format, "(%d)", &len);
+ if (ret == 1) {
+ if (token >= ARM64_MAX_TOKEN_CNT) {
+ printf("ERROR: to many tokens in op %s\n",
+ tab->name);
+ error = 1;
+ break;
+ }
+
+ a -= len;
+ tab->tokens[token].pos = a + 1;
+ tab->tokens[token].len = len;
+ token++;
+ }
+
+ /* Skip to the end of the token */
+ while (*format != 0 && *format != '|')
+ format++;
+ }
+ }
+
+ /* Write mask and pattern to the instruction array */
+ tab->mask = mask;
+ tab->pattern = val;
+
+ /*
+ * If we got here, format string must be parsed and "a"
+ * should point to -1. If it's not, wrong number of bits
+ * in format string. Mark this as invalid and prevent
+ * from being matched.
+ */
+ if (*format != 0 || (a != -1) || (error != 0)) {
+ tab->mask = 0;
+ tab->pattern = 0xffffffff;
+ printf("ERROR: skipping instruction op %s\n",
+ tab->name);
+ }
+
+ tab++;
+ }
+}
+
+static int
+arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
+ const char *token, int *val)
+{
+ int i;
+
+ for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
+ if (strcmp(insn->tokens[i].name, token) == 0) {
+ *val = (opcode >> insn->tokens[i].pos &
+ ((1 << insn->tokens[i].len) - 1));
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+static int
+arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
+ const char *token, int *val)
+{
+ int i;
+ int msk;
+
+ for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
+ if (strcmp(insn->tokens[i].name, token) == 0) {
+ msk = (1 << insn->tokens[i].len) - 1;
+ *val = ((opcode >> insn->tokens[i].pos) & msk);
+
+ /* If last bit is 1, sign-extend the value */
+ if (*val & (1 << (insn->tokens[i].len - 1)))
+ *val |= ~msk;
+
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+static const char *
+arm64_reg(int b64, int num)
+{
+
+ if (b64 != 0)
+ return (x_reg[num]);
+
+ return (w_reg[num]);
+}
+
+vm_offset_t
+disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
+{
+ struct arm64_insn *i_ptr = arm64_i;
+ uint32_t insn;
+ int matchp;
+ int ret;
+ int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
+ int sign_ext;
+ int rm_absent;
+ /* Indicate if immediate should be outside or inside brackets */
+ int inside;
+ /* Print exclamation mark if pre-incremented */
+ int pre;
+
+ /* Initialize defaults, all are 0 except SF indicating 64bit access */
+ shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
+ sign_ext = 0;
+ sf = 1;
+
+ matchp = 0;
+ insn = di->di_readword(loc);
+ while (i_ptr->name) {
+ /* If mask is 0 then the parser was not initialized yet */
+ if ((i_ptr->mask != 0) &&
+ ((insn & i_ptr->mask) == i_ptr->pattern)) {
+ matchp = 1;
+ break;
+ }
+ i_ptr++;
+ }
+ if (matchp == 0)
+ goto undefined;
+
+ /* Global options */
+ if (i_ptr->special_ops & OP_SF32)
+ sf = 0;
+
+ /* Global optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
+ if (i_ptr->special_ops & OP_SF_INV)
+ sf = 1 - sf;
+ if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
+ sign_ext = 1 - sign_ext;
+ if (i_ptr->special_ops & OP_SIGN_EXT)
+ sign_ext = 1;
+ if (sign_ext != 0)
+ arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
+ else
+ arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
+ if (i_ptr->special_ops & OP_MULT_4)
+ imm <<= 2;
+
+ /* Print opcode by type */
+ switch (i_ptr->type) {
+ case TYPE_01:
+ /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+ OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
+ ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ /* Optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
+ rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
+
+ di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
+ arm64_reg(sf, rn));
+
+ /* If RM is present use it, otherwise use immediate notation */
+ if (rm_absent == 0) {
+ di->di_printf(", %s", arm64_reg(sf, rm));
+ if (imm != 0)
+ di->di_printf(", %s #%d", shift_2[shift], imm);
+ } else {
+ if (imm != 0 || shift != 0)
+ di->di_printf(", #0x%x", imm);
+ if (shift != 0)
+ di->di_printf(" LSL #12");
+ }
+ break;
+ case TYPE_02:
+ /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
+ OP <RT>, [<RN>], #<imm>{!} SF32/64
+ OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
+ ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ /* Optional tokens */
+ arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
+ arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
+ rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
+
+ if (rm_absent) {
+ /*
+ * In unsigned operation, shift immediate value
+ * and reset options to default.
+ */
+ if (sign_ext == 0) {
+ imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
+ ARM_INSN_SIZE_MASK);
+ option = 0;
+ }
+ switch (option) {
+ case 0x0:
+ pre = 0;
+ inside = 1;
+ break;
+ case 0x1:
+ pre = 0;
+ inside = 0;
+ break;
+ case 0x2:
+ default:
+ pre = 1;
+ inside = 1;
+ break;
+ }
+
+ di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
+ if (inside != 0) {
+ di->di_printf("[%s", arm64_reg(1, rn));
+ if (imm != 0)
+ di->di_printf(", #%d", imm);
+ di->di_printf("]");
+ } else {
+ di->di_printf("[%s]", arm64_reg(1, rn));
+ if (imm != 0)
+ di->di_printf(", #%d", imm);
+ }
+ if (pre != 0)
+ di->di_printf("!");
+ } else {
+ /* Last bit of option field determines 32/64 bit offset */
+ di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
+ arm64_reg(sf, rt), arm64_reg(1, rn),
+ arm64_reg(option & 1, rm));
+
+ /* Calculate amount, it's op(31:30) */
+ amount = (insn >> ARM_INSN_SIZE_OFFSET) &
+ ARM_INSN_SIZE_MASK;
+
+ switch (option) {
+ case 0x2:
+ di->di_printf(", uxtw #%d", amount);
+ break;
+ case 0x3:
+ if (scale != 0)
+ di->di_printf(", lsl #%d", amount);
+ break;
+ case 0x6:
+ di->di_printf(", sxtw #%d", amount);
+ break;
+ case 0x7:
+ di->di_printf(", sxts #%d", amount);
+ break;
+ default:
+ di->di_printf(", RSVD");
+ break;
+ }
+ di->di_printf("]");
+ }
+
+ break;
+
+ case TYPE_03:
+ /* OP <RT>, #imm SF32/64 */
+
+ /* Mandatory tokens */
+ ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
+ if (ret != 0) {
+ printf("ERROR: Missing mandatory token for op %s type %d\n",
+ i_ptr->name, i_ptr->type);
+ goto undefined;
+ }
+
+ di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
+ if (i_ptr->special_ops & OP_LITERAL)
+ di->di_printf("0x%lx", loc + imm);
+ else
+ di->di_printf("#%d", imm);
+
+ break;
+ default:
+ goto undefined;
+ }
+
+ di->di_printf("\n");
+ return(loc + INSN_SIZE);
+
+undefined:
+ di->di_printf("undefined\t%08x\n", insn);
+ return(loc + INSN_SIZE);
+}
+
+/* Parse format strings at the very beginning */
+SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
+ SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);
diff --git a/sys/arm64/arm64/dump_machdep.c b/sys/arm64/arm64/dump_machdep.c
new file mode 100644
index 000000000000..d92777fea051
--- /dev/null
+++ b/sys/arm64/arm64/dump_machdep.c
@@ -0,0 +1,73 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+
+#include <machine/dump.h>
+
+int do_minidump = 1;
+TUNABLE_INT("debug.minidump", &do_minidump);
+SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
+ "Enable mini crash dumps");
+
+void
+dumpsys_wbinv_all(void)
+{
+
+ printf("dumpsys_wbinv_all\n");
+}
+
+void
+dumpsys_map_chunk(vm_paddr_t pa, size_t chunk __unused, void **va)
+{
+
+ printf("dumpsys_map_chunk\n");
+ while(1);
+}
+
+/*
+ * Add a header to be used by libkvm to get the va to pa delta
+ */
+int
+dumpsys_write_aux_headers(struct dumperinfo *di)
+{
+
+ printf("dumpsys_map_chunk\n");
+ return (0);
+}
diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c
new file mode 100644
index 000000000000..cd4e5d7bae00
--- /dev/null
+++ b/sys/arm64/arm64/efirt_machdep.c
@@ -0,0 +1,280 @@
+/*-
+ * Copyright (c) 2004 Marcel Moolenaar
+ * Copyright (c) 2001 Doug Rabson
+ * Copyright (c) 2016 The FreeBSD Foundation
+ * Copyright (c) 2017 Andrew Turner
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/efi.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/systm.h>
+#include <sys/vmmeter.h>
+
+#include <machine/metadata.h>
+#include <machine/pcb.h>
+#include <machine/pte.h>
+#include <machine/vfp.h>
+#include <machine/vmparam.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+static vm_object_t obj_1t1_pt;
+static vm_pindex_t efi_1t1_idx;
+static pd_entry_t *efi_l0;
+static uint64_t efi_ttbr0;
+
+void
+efi_destroy_1t1_map(void)
+{
+ vm_page_t m;
+
+ if (obj_1t1_pt != NULL) {
+ VM_OBJECT_RLOCK(obj_1t1_pt);
+ TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq)
+ m->ref_count = VPRC_OBJREF;
+ vm_wire_sub(obj_1t1_pt->resident_page_count);
+ VM_OBJECT_RUNLOCK(obj_1t1_pt);
+ vm_object_deallocate(obj_1t1_pt);
+ }
+
+ obj_1t1_pt = NULL;
+ efi_1t1_idx = 0;
+ efi_l0 = NULL;
+ efi_ttbr0 = 0;
+}
+
+static vm_page_t
+efi_1t1_page(void)
+{
+
+ return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO));
+}
+
+static pt_entry_t *
+efi_1t1_l3(vm_offset_t va)
+{
+ pd_entry_t *l0, *l1, *l2;
+ pt_entry_t *l3;
+ vm_pindex_t l0_idx, l1_idx, l2_idx;
+ vm_page_t m;
+ vm_paddr_t mphys;
+
+ l0_idx = pmap_l0_index(va);
+ l0 = &efi_l0[l0_idx];
+ if (*l0 == 0) {
+ m = efi_1t1_page();
+ mphys = VM_PAGE_TO_PHYS(m);
+ *l0 = mphys | L0_TABLE;
+ } else {
+ mphys = *l0 & ~ATTR_MASK;
+ }
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(mphys);
+ l1_idx = pmap_l1_index(va);
+ l1 += l1_idx;
+ if (*l1 == 0) {
+ m = efi_1t1_page();
+ mphys = VM_PAGE_TO_PHYS(m);
+ *l1 = mphys | L1_TABLE;
+ } else {
+ mphys = *l1 & ~ATTR_MASK;
+ }
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(mphys);
+ l2_idx = pmap_l2_index(va);
+ l2 += l2_idx;
+ if (*l2 == 0) {
+ m = efi_1t1_page();
+ mphys = VM_PAGE_TO_PHYS(m);
+ *l2 = mphys | L2_TABLE;
+ } else {
+ mphys = *l2 & ~ATTR_MASK;
+ }
+
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(mphys);
+ l3 += pmap_l3_index(va);
+ KASSERT(*l3 == 0, ("%s: Already mapped: va %#jx *pt %#jx", __func__,
+ va, *l3));
+
+ return (l3);
+}
+
+/*
+ * Map a physical address from EFI runtime space into KVA space. Returns 0 to
+ * indicate a failed mapping so that the caller may handle error.
+ */
+vm_offset_t
+efi_phys_to_kva(vm_paddr_t paddr)
+{
+
+ if (!PHYS_IN_DMAP(paddr))
+ return (0);
+ return (PHYS_TO_DMAP(paddr));
+}
+
+/*
+ * Create the 1:1 virtual to physical map for EFI
+ */
+bool
+efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
+{
+ struct efi_md *p;
+ pt_entry_t *l3, l3_attr;
+ vm_offset_t va;
+ vm_page_t efi_l0_page;
+ uint64_t idx;
+ int i, mode;
+
+ obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, L0_ENTRIES +
+ L0_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES +
+ L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES,
+ VM_PROT_ALL, 0, NULL);
+ VM_OBJECT_WLOCK(obj_1t1_pt);
+ efi_l0_page = efi_1t1_page();
+ VM_OBJECT_WUNLOCK(obj_1t1_pt);
+ efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page));
+ efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) |
+ VM_PAGE_TO_PHYS(efi_l0_page);
+
+ for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
+ descsz)) {
+ if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
+ continue;
+ if (p->md_virt != NULL && (uint64_t)p->md_virt != p->md_phys) {
+ if (bootverbose)
+ printf("EFI Runtime entry %d is mapped\n", i);
+ goto fail;
+ }
+ if ((p->md_phys & EFI_PAGE_MASK) != 0) {
+ if (bootverbose)
+ printf("EFI Runtime entry %d is not aligned\n",
+ i);
+ goto fail;
+ }
+ if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys ||
+ p->md_phys + p->md_pages * EFI_PAGE_SIZE >=
+ VM_MAXUSER_ADDRESS) {
+ printf("EFI Runtime entry %d is not in mappable for RT:"
+ "base %#016jx %#jx pages\n",
+ i, (uintmax_t)p->md_phys,
+ (uintmax_t)p->md_pages);
+ goto fail;
+ }
+ if ((p->md_attr & EFI_MD_ATTR_WB) != 0)
+ mode = VM_MEMATTR_WRITE_BACK;
+ else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
+ mode = VM_MEMATTR_WRITE_THROUGH;
+ else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
+ mode = VM_MEMATTR_WRITE_COMBINING;
+ else
+ mode = VM_MEMATTR_DEVICE;
+
+ printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages);
+
+ l3_attr = ATTR_DEFAULT | ATTR_S1_IDX(mode) |
+ ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_nG | L3_PAGE;
+ if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP)
+ l3_attr |= ATTR_S1_XN;
+
+ VM_OBJECT_WLOCK(obj_1t1_pt);
+ for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++,
+ va += PAGE_SIZE) {
+ l3 = efi_1t1_l3(va);
+ *l3 = va | l3_attr;
+ }
+ VM_OBJECT_WUNLOCK(obj_1t1_pt);
+ }
+
+ return (true);
+fail:
+ efi_destroy_1t1_map();
+ return (false);
+}
+
+int
+efi_arch_enter(void)
+{
+
+ CRITICAL_ASSERT(curthread);
+
+ /*
+ * Temporarily switch to EFI's page table. However, we leave curpmap
+ * unchanged in order to prevent its ASID from being reclaimed before
+ * we switch back to its page table in efi_arch_leave().
+ */
+ set_ttbr0(efi_ttbr0);
+ if (PCPU_GET(bcast_tlbi_workaround) != 0)
+ invalidate_local_icache();
+
+ return (0);
+}
+
+void
+efi_arch_leave(void)
+{
+
+ /*
+ * Restore the pcpu pointer. Some UEFI implementations trash it and
+ * we don't store it before calling into them. To fix this we need
+ * to restore it after returning to the kernel context. As reading
+ * curpmap will access x18 we need to restore it before loading
+ * the pmap pointer.
+ */
+ __asm __volatile(
+ "mrs x18, tpidr_el1 \n"
+ );
+ set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap)));
+ if (PCPU_GET(bcast_tlbi_workaround) != 0)
+ invalidate_local_icache();
+}
+
+int
+efi_rt_arch_call(struct efirt_callinfo *ec)
+{
+
+ panic("not implemented");
+}
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
new file mode 100644
index 000000000000..f99523cb6362
--- /dev/null
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -0,0 +1,261 @@
+/*-
+ * Copyright (c) 2014, 2015 The FreeBSD Foundation.
+ * Copyright (c) 2014, 2017 Andrew Turner.
+ * Copyright (c) 2018 Olivier Houchard
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define __ELF_WORD_SIZE 32
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/linker.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/imgact_elf.h>
+#include <sys/syscall.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+
+#include <machine/elf.h>
+
+#include <compat/freebsd32/freebsd32_util.h>
+
+#define FREEBSD32_MINUSER 0x00001000
+#define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE)
+#define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE)
+#define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE
+
+extern const char *freebsd32_syscallnames[];
+
+extern char aarch32_sigcode[];
+extern int sz_aarch32_sigcode;
+
+static int freebsd32_fetch_syscall_args(struct thread *td);
+static void freebsd32_setregs(struct thread *td, struct image_params *imgp,
+ u_long stack);
+static void freebsd32_set_syscall_retval(struct thread *, int);
+
+static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *,
+ uint32_t *);
+
+extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
+
+static struct sysentvec elf32_freebsd_sysvec = {
+ .sv_size = SYS_MAXSYSCALL,
+ .sv_table = freebsd32_sysent,
+ .sv_errsize = 0,
+ .sv_errtbl = NULL,
+ .sv_transtrap = NULL,
+ .sv_fixup = elf32_freebsd_fixup,
+ .sv_sendsig = freebsd32_sendsig,
+ .sv_sigcode = aarch32_sigcode,
+ .sv_szsigcode = &sz_aarch32_sigcode,
+ .sv_name = "FreeBSD ELF32",
+ .sv_coredump = elf32_coredump,
+ .sv_imgact_try = NULL,
+ .sv_minsigstksz = MINSIGSTKSZ,
+ .sv_minuser = FREEBSD32_MINUSER,
+ .sv_maxuser = FREEBSD32_MAXUSER,
+ .sv_usrstack = FREEBSD32_USRSTACK,
+ .sv_psstrings = FREEBSD32_PS_STRINGS,
+ .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE,
+ .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs,
+ .sv_copyout_strings = freebsd32_copyout_strings,
+ .sv_setregs = freebsd32_setregs,
+ .sv_fixlimit = NULL, // XXX
+ .sv_maxssiz = NULL,
+ .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP,
+ .sv_set_syscall_retval = freebsd32_set_syscall_retval,
+ .sv_fetch_syscall_args = freebsd32_fetch_syscall_args,
+ .sv_syscallnames = freebsd32_syscallnames,
+ .sv_shared_page_base = FREEBSD32_SHAREDPAGE,
+ .sv_shared_page_len = PAGE_SIZE,
+ .sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
+ .sv_trap = NULL,
+};
+INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
+
+static Elf32_Brandinfo freebsd32_brand_info = {
+ .brand = ELFOSABI_FREEBSD,
+ .machine = EM_ARM,
+ .compat_3_brand = "FreeBSD",
+ .emul_path = NULL,
+ .interp_path = "/libexec/ld-elf.so.1",
+ .sysvec = &elf32_freebsd_sysvec,
+ .interp_newpath = "/libexec/ld-elf32.so.1",
+ .brand_note = &elf32_freebsd_brandnote,
+ .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
+ .header_supported= elf32_arm_abi_supported,
+};
+
+SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
+ (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info);
+
+static boolean_t
+elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused,
+ uint32_t *fctl0 __unused)
+{
+ const Elf32_Ehdr *hdr;
+
+ /* Check if we support AArch32 */
+ if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) !=
+ ID_AA64PFR0_EL0_64_32)
+ return (FALSE);
+
+#define EF_ARM_EABI_VERSION(x) (((x) & EF_ARM_EABIMASK) >> 24)
+#define EF_ARM_EABI_FREEBSD_MIN 4
+ hdr = (const Elf32_Ehdr *)imgp->image_header;
+ if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) {
+ if (bootverbose)
+ uprintf("Attempting to execute non EABI binary "
+ "(rev %d) image %s",
+ EF_ARM_EABI_VERSION(hdr->e_flags),
+ imgp->args->fname);
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+static int
+freebsd32_fetch_syscall_args(struct thread *td)
+{
+ struct proc *p;
+ register_t *ap;
+ struct syscall_args *sa;
+ int error, i, nap;
+ unsigned int args[4];
+
+ nap = 4;
+ p = td->td_proc;
+ ap = td->td_frame->tf_x;
+ sa = &td->td_sa;
+
+ /* r7 is the syscall id */
+ sa->code = td->td_frame->tf_x[7];
+
+ if (sa->code == SYS_syscall) {
+ sa->code = *ap++;
+ nap--;
+ } else if (sa->code == SYS___syscall) {
+ sa->code = ap[1];
+ nap -= 2;
+ ap += 2;
+ }
+
+ if (sa->code >= p->p_sysent->sv_size)
+ sa->callp = &p->p_sysent->sv_table[0];
+ else
+ sa->callp = &p->p_sysent->sv_table[sa->code];
+
+ sa->narg = sa->callp->sy_narg;
+ for (i = 0; i < nap; i++)
+ sa->args[i] = ap[i];
+ if (sa->narg > nap) {
+ if ((sa->narg - nap) > nitems(args))
+ panic("Too many system call arguiments");
+ error = copyin((void *)td->td_frame->tf_x[13], args,
+ (sa->narg - nap) * sizeof(int));
+ for (i = 0; i < (sa->narg - nap); i++)
+ sa->args[i + nap] = args[i];
+ }
+
+ td->td_retval[0] = 0;
+ td->td_retval[1] = 0;
+
+ return (0);
+}
+
+static void
+freebsd32_set_syscall_retval(struct thread *td, int error)
+{
+ struct trapframe *frame;
+
+ frame = td->td_frame;
+ switch (error) {
+ case 0:
+ frame->tf_x[0] = td->td_retval[0];
+ frame->tf_x[1] = td->td_retval[1];
+ frame->tf_spsr &= ~PSR_C;
+ break;
+ case ERESTART:
+ /*
+ * Reconstruct the pc to point at the swi.
+ */
+ if ((frame->tf_spsr & PSR_T) != 0)
+ frame->tf_elr -= 2; //THUMB_INSN_SIZE;
+ else
+ frame->tf_elr -= 4; //INSN_SIZE;
+ break;
+ case EJUSTRETURN:
+ /* nothing to do */
+ break;
+ default:
+ frame->tf_x[0] = error;
+ frame->tf_spsr |= PSR_C;
+ break;
+ }
+}
+
+static void
+freebsd32_setregs(struct thread *td, struct image_params *imgp,
+ uintptr_t stack)
+{
+ struct trapframe *tf = td->td_frame;
+
+ memset(tf, 0, sizeof(struct trapframe));
+
+ /*
+ * We need to set x0 for init as it doesn't call
+ * cpu_set_syscall_retval to copy the value. We also
+ * need to set td_retval for the cases where we do.
+ */
+ tf->tf_x[0] = stack;
+ /* SP_usr is mapped to x13 */
+ tf->tf_x[13] = stack;
+ /* LR_usr is mapped to x14 */
+ tf->tf_x[14] = imgp->entry_addr;
+ tf->tf_elr = imgp->entry_addr;
+ tf->tf_spsr = PSR_M_32;
+}
+
+void
+elf32_dump_thread(struct thread *td, void *dst, size_t *off)
+{
+ /* XXX: VFP */
+}
diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c
new file mode 100644
index 000000000000..392cdfaee246
--- /dev/null
+++ b/sys/arm64/arm64/elf_machdep.c
@@ -0,0 +1,284 @@
+/*-
+ * Copyright (c) 2014, 2015 The FreeBSD Foundation.
+ * Copyright (c) 2014 Andrew Turner.
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/linker.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/imgact_elf.h>
+#include <sys/syscall.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+
+#include <machine/elf.h>
+#include <machine/md_var.h>
+
+#include "linker_if.h"
+
+u_long elf_hwcap;
+
+static struct sysentvec elf64_freebsd_sysvec = {
+ .sv_size = SYS_MAXSYSCALL,
+ .sv_table = sysent,
+ .sv_errsize = 0,
+ .sv_errtbl = NULL,
+ .sv_transtrap = NULL,
+ .sv_fixup = __elfN(freebsd_fixup),
+ .sv_sendsig = sendsig,
+ .sv_sigcode = sigcode,
+ .sv_szsigcode = &szsigcode,
+ .sv_name = "FreeBSD ELF64",
+ .sv_coredump = __elfN(coredump),
+ .sv_imgact_try = NULL,
+ .sv_minsigstksz = MINSIGSTKSZ,
+ .sv_minuser = VM_MIN_ADDRESS,
+ .sv_maxuser = VM_MAXUSER_ADDRESS,
+ .sv_usrstack = USRSTACK,
+ .sv_psstrings = PS_STRINGS,
+ .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE,
+ .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
+ .sv_copyout_strings = exec_copyout_strings,
+ .sv_setregs = exec_setregs,
+ .sv_fixlimit = NULL,
+ .sv_maxssiz = NULL,
+ .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 |
+ SV_ASLR,
+ .sv_set_syscall_retval = cpu_set_syscall_retval,
+ .sv_fetch_syscall_args = cpu_fetch_syscall_args,
+ .sv_syscallnames = syscallnames,
+ .sv_shared_page_base = SHAREDPAGE,
+ .sv_shared_page_len = PAGE_SIZE,
+ .sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
+ .sv_trap = NULL,
+ .sv_hwcap = &elf_hwcap,
+};
+INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
+
+static Elf64_Brandinfo freebsd_brand_info = {
+ .brand = ELFOSABI_FREEBSD,
+ .machine = EM_AARCH64,
+ .compat_3_brand = "FreeBSD",
+ .emul_path = NULL,
+ .interp_path = "/libexec/ld-elf.so.1",
+ .sysvec = &elf64_freebsd_sysvec,
+ .interp_newpath = NULL,
+ .brand_note = &elf64_freebsd_brandnote,
+ .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+};
+
+SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
+ (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
+
+void
+elf64_dump_thread(struct thread *td __unused, void *dst __unused,
+ size_t *off __unused)
+{
+
+}
+
+bool
+elf_is_ifunc_reloc(Elf_Size r_info __unused)
+{
+
+ return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE);
+}
+
+static int
+reloc_instr_imm(Elf32_Addr *where, Elf_Addr val, u_int msb, u_int lsb)
+{
+
+ /* Check bounds: upper bits must be all ones or all zeros. */
+ if ((uint64_t)((int64_t)val >> (msb + 1)) + 1 > 1)
+ return (-1);
+ val >>= lsb;
+ val &= (1 << (msb - lsb + 1)) - 1;
+ *where |= (Elf32_Addr)val;
+ return (0);
+}
+
+/*
+ * Process a relocation. Support for some static relocations is required
+ * in order for the -zifunc-noplt optimization to work.
+ */
+static int
+elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
+ int type, int flags, elf_lookup_fn lookup)
+{
+#define ARM64_ELF_RELOC_LOCAL (1 << 0)
+#define ARM64_ELF_RELOC_LATE_IFUNC (1 << 1)
+ Elf_Addr *where, addr, addend, val;
+ Elf_Word rtype, symidx;
+ const Elf_Rel *rel;
+ const Elf_Rela *rela;
+ int error;
+
+ switch (type) {
+ case ELF_RELOC_REL:
+ rel = (const Elf_Rel *)data;
+ where = (Elf_Addr *) (relocbase + rel->r_offset);
+ addend = *where;
+ rtype = ELF_R_TYPE(rel->r_info);
+ symidx = ELF_R_SYM(rel->r_info);
+ break;
+ case ELF_RELOC_RELA:
+ rela = (const Elf_Rela *)data;
+ where = (Elf_Addr *) (relocbase + rela->r_offset);
+ addend = rela->r_addend;
+ rtype = ELF_R_TYPE(rela->r_info);
+ symidx = ELF_R_SYM(rela->r_info);
+ break;
+ default:
+ panic("unknown reloc type %d\n", type);
+ }
+
+ if ((flags & ARM64_ELF_RELOC_LATE_IFUNC) != 0) {
+ KASSERT(type == ELF_RELOC_RELA,
+ ("Only RELA ifunc relocations are supported"));
+ if (rtype != R_AARCH64_IRELATIVE)
+ return (0);
+ }
+
+ if ((flags & ARM64_ELF_RELOC_LOCAL) != 0) {
+ if (rtype == R_AARCH64_RELATIVE)
+ *where = elf_relocaddr(lf, relocbase + addend);
+ return (0);
+ }
+
+ error = 0;
+ switch (rtype) {
+ case R_AARCH64_NONE:
+ case R_AARCH64_RELATIVE:
+ break;
+ case R_AARCH64_TSTBR14:
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return (-1);
+ error = reloc_instr_imm((Elf32_Addr *)where,
+ addr + addend - (Elf_Addr)where, 15, 2);
+ break;
+ case R_AARCH64_CONDBR19:
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return (-1);
+ error = reloc_instr_imm((Elf32_Addr *)where,
+ addr + addend - (Elf_Addr)where, 20, 2);
+ break;
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return (-1);
+ error = reloc_instr_imm((Elf32_Addr *)where,
+ addr + addend - (Elf_Addr)where, 27, 2);
+ break;
+ case R_AARCH64_ABS64:
+ case R_AARCH64_GLOB_DAT:
+ case R_AARCH64_JUMP_SLOT:
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return (-1);
+ *where = addr + addend;
+ break;
+ case R_AARCH64_IRELATIVE:
+ addr = relocbase + addend;
+ val = ((Elf64_Addr (*)(void))addr)();
+ if (*where != val)
+ *where = val;
+ break;
+ default:
+ printf("kldload: unexpected relocation type %d\n", rtype);
+ return (-1);
+ }
+ return (error);
+}
+
+int
+elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
+ int type, elf_lookup_fn lookup)
+{
+
+ return (elf_reloc_internal(lf, relocbase, data, type,
+ ARM64_ELF_RELOC_LOCAL, lookup));
+}
+
+/* Process one elf relocation with addend. */
+int
+elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
+ elf_lookup_fn lookup)
+{
+
+ return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
+}
+
+int
+elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data,
+ int type, elf_lookup_fn lookup)
+{
+
+ return (elf_reloc_internal(lf, relocbase, data, type,
+ ARM64_ELF_RELOC_LATE_IFUNC, lookup));
+}
+
+int
+elf_cpu_load_file(linker_file_t lf)
+{
+
+ if (lf->id != 1)
+ cpu_icache_sync_range((vm_offset_t)lf->address, lf->size);
+ return (0);
+}
+
+int
+elf_cpu_unload_file(linker_file_t lf __unused)
+{
+
+ return (0);
+}
+
+int
+elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
+{
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S
new file mode 100644
index 000000000000..123f73b49734
--- /dev/null
+++ b/sys/arm64/arm64/exception.S
@@ -0,0 +1,255 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+#include <machine/armreg.h>
+__FBSDID("$FreeBSD$");
+
+#include "assym.inc"
+
+ .text
+
+.macro save_registers el
+.if \el == 1
+ mov x18, sp
+ sub sp, sp, #128
+.endif
+ sub sp, sp, #(TF_SIZE + 16)
+ stp x29, x30, [sp, #(TF_SIZE)]
+ stp x28, x29, [sp, #(TF_X + 28 * 8)]
+ stp x26, x27, [sp, #(TF_X + 26 * 8)]
+ stp x24, x25, [sp, #(TF_X + 24 * 8)]
+ stp x22, x23, [sp, #(TF_X + 22 * 8)]
+ stp x20, x21, [sp, #(TF_X + 20 * 8)]
+ stp x18, x19, [sp, #(TF_X + 18 * 8)]
+ stp x16, x17, [sp, #(TF_X + 16 * 8)]
+ stp x14, x15, [sp, #(TF_X + 14 * 8)]
+ stp x12, x13, [sp, #(TF_X + 12 * 8)]
+ stp x10, x11, [sp, #(TF_X + 10 * 8)]
+ stp x8, x9, [sp, #(TF_X + 8 * 8)]
+ stp x6, x7, [sp, #(TF_X + 6 * 8)]
+ stp x4, x5, [sp, #(TF_X + 4 * 8)]
+ stp x2, x3, [sp, #(TF_X + 2 * 8)]
+ stp x0, x1, [sp, #(TF_X + 0 * 8)]
+ mrs x10, elr_el1
+ mrs x11, spsr_el1
+ mrs x12, esr_el1
+.if \el == 0
+ mrs x18, sp_el0
+.endif
+ str x10, [sp, #(TF_ELR)]
+ stp w11, w12, [sp, #(TF_SPSR)]
+ stp x18, lr, [sp, #(TF_SP)]
+ mrs x18, tpidr_el1
+ add x29, sp, #(TF_SIZE)
+.if \el == 0
+ /* Apply the SSBD (CVE-2018-3639) workaround if needed */
+ ldr x1, [x18, #PC_SSBD]
+ cbz x1, 1f
+ mov w0, #1
+ blr x1
+1:
+
+ ldr x0, [x18, #(PC_CURTHREAD)]
+ bl dbg_monitor_enter
+.endif
+ msr daifclr, #8 /* Enable the debug exception */
+.endm
+
+.macro restore_registers el
+.if \el == 1
+ /*
+ * Disable interrupts and debug exceptions, x18 may change in the
+ * interrupt exception handler. For EL0 exceptions, do_ast already
+ * did this.
+ */
+ msr daifset, #10
+.endif
+.if \el == 0
+ ldr x0, [x18, #PC_CURTHREAD]
+ mov x1, sp
+ bl dbg_monitor_exit
+
+ /* Remove the SSBD (CVE-2018-3639) workaround if needed */
+ ldr x1, [x18, #PC_SSBD]
+ cbz x1, 1f
+ mov w0, #0
+ blr x1
+1:
+.endif
+ ldp x18, lr, [sp, #(TF_SP)]
+ ldp x10, x11, [sp, #(TF_ELR)]
+.if \el == 0
+ msr sp_el0, x18
+.endif
+ msr spsr_el1, x11
+ msr elr_el1, x10
+ ldp x0, x1, [sp, #(TF_X + 0 * 8)]
+ ldp x2, x3, [sp, #(TF_X + 2 * 8)]
+ ldp x4, x5, [sp, #(TF_X + 4 * 8)]
+ ldp x6, x7, [sp, #(TF_X + 6 * 8)]
+ ldp x8, x9, [sp, #(TF_X + 8 * 8)]
+ ldp x10, x11, [sp, #(TF_X + 10 * 8)]
+ ldp x12, x13, [sp, #(TF_X + 12 * 8)]
+ ldp x14, x15, [sp, #(TF_X + 14 * 8)]
+ ldp x16, x17, [sp, #(TF_X + 16 * 8)]
+.if \el == 0
+ /*
+ * We only restore the callee saved registers when returning to
+ * userland as they may have been updated by a system call or signal.
+ */
+ ldp x18, x19, [sp, #(TF_X + 18 * 8)]
+ ldp x20, x21, [sp, #(TF_X + 20 * 8)]
+ ldp x22, x23, [sp, #(TF_X + 22 * 8)]
+ ldp x24, x25, [sp, #(TF_X + 24 * 8)]
+ ldp x26, x27, [sp, #(TF_X + 26 * 8)]
+ ldp x28, x29, [sp, #(TF_X + 28 * 8)]
+.else
+ ldr x29, [sp, #(TF_X + 29 * 8)]
+.endif
+.if \el == 0
+ add sp, sp, #(TF_SIZE + 16)
+.else
+ mov sp, x18
+ mrs x18, tpidr_el1
+.endif
+.endm
+
+.macro do_ast
+ mrs x19, daif
+ /* Make sure the IRQs are enabled before calling ast() */
+ bic x19, x19, #PSR_I
+1:
+ /* Disable interrupts */
+ msr daifset, #10
+
+ /* Read the current thread flags */
+ ldr x1, [x18, #PC_CURTHREAD] /* Load curthread */
+ ldr x2, [x1, #TD_FLAGS]
+
+ /* Check if we have either bits set */
+ mov x3, #((TDF_ASTPENDING|TDF_NEEDRESCHED) >> 8)
+ lsl x3, x3, #8
+ and x2, x2, x3
+ cbz x2, 2f
+
+ /* Restore interrupts */
+ msr daif, x19
+
+ /* handle the ast */
+ mov x0, sp
+ bl _C_LABEL(ast)
+
+ /* Re-check for new ast scheduled */
+ b 1b
+2:
+.endm
+
+ENTRY(handle_el1h_sync)
+ save_registers 1
+ ldr x0, [x18, #PC_CURTHREAD]
+ mov x1, sp
+ bl do_el1h_sync
+ restore_registers 1
+ ERET
+END(handle_el1h_sync)
+
+ENTRY(handle_el1h_irq)
+ save_registers 1
+ mov x0, sp
+ bl intr_irq_handler
+ restore_registers 1
+ ERET
+END(handle_el1h_irq)
+
+ENTRY(handle_el0_sync)
+ save_registers 0
+ ldr x0, [x18, #PC_CURTHREAD]
+ mov x1, sp
+ str x1, [x0, #TD_FRAME]
+ bl do_el0_sync
+ do_ast
+ restore_registers 0
+ ERET
+END(handle_el0_sync)
+
+ENTRY(handle_el0_irq)
+ save_registers 0
+ mov x0, sp
+ bl intr_irq_handler
+ do_ast
+ restore_registers 0
+ ERET
+END(handle_el0_irq)
+
+ENTRY(handle_serror)
+ save_registers 0
+ mov x0, sp
+1: bl do_serror
+ b 1b
+END(handle_serror)
+
+ENTRY(handle_empty_exception)
+ save_registers 0
+ mov x0, sp
+1: bl unhandled_exception
+ b 1b
+END(handle_unhandled_exception)
+
+.macro vempty
+ .align 7
+ b handle_empty_exception
+.endm
+
+.macro vector name
+ .align 7
+ b handle_\name
+.endm
+
+ .align 11
+ .globl exception_vectors
+exception_vectors:
+ vempty /* Synchronous EL1t */
+ vempty /* IRQ EL1t */
+ vempty /* FIQ EL1t */
+ vempty /* Error EL1t */
+
+ vector el1h_sync /* Synchronous EL1h */
+ vector el1h_irq /* IRQ EL1h */
+ vempty /* FIQ EL1h */
+ vector serror /* Error EL1h */
+
+ vector el0_sync /* Synchronous 64-bit EL0 */
+ vector el0_irq /* IRQ 64-bit EL0 */
+ vempty /* FIQ 64-bit EL0 */
+ vector serror /* Error 64-bit EL0 */
+
+ vector el0_sync /* Synchronous 32-bit EL0 */
+ vector el0_irq /* IRQ 32-bit EL0 */
+ vempty /* FIQ 32-bit EL0 */
+ vector serror /* Error 32-bit EL0 */
+
diff --git a/sys/arm64/arm64/freebsd32_machdep.c b/sys/arm64/arm64/freebsd32_machdep.c
new file mode 100644
index 000000000000..b1e070feb4f6
--- /dev/null
+++ b/sys/arm64/arm64/freebsd32_machdep.c
@@ -0,0 +1,438 @@
+/*-
+ * Copyright (c) 2018 Olivier Houchard
+ * Copyright (c) 2017 Nuxi, https://nuxi.nl/
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/syscallsubr.h>
+#include <sys/ktr.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <machine/armreg.h>
+#ifdef VFP
+#include <machine/vfp.h>
+#endif
+#include <compat/freebsd32/freebsd32_proto.h>
+#include <compat/freebsd32/freebsd32_signal.h>
+
+extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
+
+/*
+ * The first two fields of a ucontext_t are the signal mask and the machine
+ * context. The next field is uc_link; we want to avoid destroying the link
+ * when copying out contexts.
+ */
+#define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link)
+
+#ifdef VFP
+static void get_fpcontext32(struct thread *td, mcontext32_vfp_t *);
+#endif
+
+/*
+ * Stubs for machine dependent 32-bits system calls.
+ */
+
+int
+freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
+{
+ int error;
+
+#define ARM_SYNC_ICACHE 0
+#define ARM_DRAIN_WRITEBUF 1
+#define ARM_SET_TP 2
+#define ARM_GET_TP 3
+#define ARM_GET_VFPSTATE 4
+
+ switch(uap->op) {
+ case ARM_SET_TP:
+ WRITE_SPECIALREG(tpidr_el0, uap->parms);
+ WRITE_SPECIALREG(tpidrro_el0, uap->parms);
+ return 0;
+ case ARM_SYNC_ICACHE:
+ {
+ struct {
+ uint32_t addr;
+ uint32_t size;
+ } args;
+
+ if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
+ return (error);
+ if ((uint64_t)args.addr + (uint64_t)args.size > 0xffffffff)
+ return (EINVAL);
+ cpu_icache_sync_range_checked(args.addr, args.size);
+ return 0;
+ }
+ case ARM_GET_VFPSTATE:
+ {
+ mcontext32_vfp_t mcontext_vfp;
+
+ struct {
+ uint32_t mc_vfp_size;
+ uint32_t mc_vfp;
+ } args;
+ if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
+ return (error);
+ if (args.mc_vfp_size != sizeof(mcontext_vfp))
+ return (EINVAL);
+#ifdef VFP
+ get_fpcontext32(td, &mcontext_vfp);
+#else
+ bzero(&mcontext_vfp, sizeof(mcontext_vfp));
+#endif
+ error = copyout(&mcontext_vfp,
+ (void *)(uintptr_t)args.mc_vfp,
+ sizeof(mcontext_vfp));
+ return error;
+ }
+ }
+
+ return (EINVAL);
+}
+
+#ifdef VFP
+static void
+get_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
+{
+ struct pcb *curpcb;
+ int i;
+
+ critical_enter();
+ curpcb = curthread->td_pcb;
+
+ if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
+ /*
+ * If we have just been running VFP instructions we will
+ * need to save the state to memcpy it below.
+ */
+ vfp_save_state(td, curpcb);
+
+ KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
+ ("Called get_fpcontext while the kernel is using the VFP"));
+ KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
+ ("Non-userspace FPU flags set in get_fpcontext"));
+ for (i = 0; i < 32; i++)
+ mcp->mcv_reg[i] = (uint64_t)curpcb->pcb_fpustate.vfp_regs[i];
+ mcp->mcv_fpscr = VFP_FPSCR_FROM_SRCR(curpcb->pcb_fpustate.vfp_fpcr,
+ curpcb->pcb_fpustate.vfp_fpsr);
+ }
+ critical_exit();
+}
+
+static void
+set_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
+{
+ struct pcb *pcb;
+ int i;
+
+ critical_enter();
+ pcb = td->td_pcb;
+ if (td == curthread)
+ vfp_discard(td);
+ for (i = 0; i < 32; i++)
+ pcb->pcb_fpustate.vfp_regs[i] = mcp->mcv_reg[i];
+ pcb->pcb_fpustate.vfp_fpsr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
+ pcb->pcb_fpustate.vfp_fpcr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
+ critical_exit();
+}
+#endif
+static void
+get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
+{
+ struct pcb *pcb;
+ struct trapframe *tf;
+ int i;
+
+ pcb = td->td_pcb;
+ tf = td->td_frame;
+
+ if ((flags & GET_MC_CLEAR_RET) != 0) {
+ mcp->mc_gregset[0] = 0;
+ mcp->mc_gregset[16] = tf->tf_spsr & ~PSR_C;
+ } else {
+ mcp->mc_gregset[0] = tf->tf_x[0];
+ mcp->mc_gregset[16] = tf->tf_spsr;
+ }
+ for (i = 1; i < 15; i++)
+ mcp->mc_gregset[i] = tf->tf_x[i];
+ mcp->mc_gregset[15] = tf->tf_elr;
+
+ mcp->mc_vfp_size = 0;
+ mcp->mc_vfp_ptr = 0;
+
+ memset(mcp->mc_spare, 0, sizeof(mcp->mc_spare));
+}
+
+static int
+set_mcontext32(struct thread *td, mcontext32_t *mcp)
+{
+ struct trapframe *tf;
+ mcontext32_vfp_t mc_vfp;
+ int i;
+
+ tf = td->td_frame;
+
+ for (i = 0; i < 15; i++)
+ tf->tf_x[i] = mcp->mc_gregset[i];
+ tf->tf_elr = mcp->mc_gregset[15];
+ tf->tf_spsr = mcp->mc_gregset[16];
+#ifdef VFP
+ if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != 0) {
+ if (copyin((void *)(uintptr_t)mcp->mc_vfp_ptr, &mc_vfp,
+ sizeof(mc_vfp)) != 0)
+ return (EFAULT);
+ set_fpcontext32(td, &mc_vfp);
+ }
+#endif
+
+ return (0);
+}
+
+#define UC_COPY_SIZE offsetof(ucontext32_t, uc_link)
+
+int
+freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
+{
+ ucontext32_t uc;
+ int ret;
+
+ if (uap->ucp == NULL)
+ ret = EINVAL;
+ else {
+ memset(&uc, 0, sizeof(uc));
+ get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
+ PROC_LOCK(td->td_proc);
+ uc.uc_sigmask = td->td_sigmask;
+ PROC_UNLOCK(td->td_proc);
+ ret = copyout(&uc, uap->ucp, UC_COPY_SIZE);
+ }
+ return (ret);
+}
+
+int
+freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
+{
+ ucontext32_t uc;
+ int ret;
+
+ if (uap->ucp == NULL)
+ ret = EINVAL;
+ else {
+ ret = copyin(uap->ucp, &uc, UC_COPY_SIZE);
+ if (ret == 0) {
+ ret = set_mcontext32(td, &uc.uc_mcontext);
+ if (ret == 0)
+ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask,
+ NULL, 0);
+ }
+ }
+ return (ret);
+}
+
+int
+freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
+{
+ ucontext32_t uc;
+ int error;
+
+ if (uap == NULL)
+ return (EFAULT);
+ if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
+ return (EFAULT);
+ error = set_mcontext32(td, &uc.uc_mcontext);
+ if (error != 0)
+ return (0);
+
+ /* Restore signal mask. */
+ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
+
+ return (EJUSTRETURN);
+
+}
+
+int
+freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
+{
+ ucontext32_t uc;
+ int ret;
+
+ if (uap->oucp == NULL || uap->ucp == NULL)
+ ret = EINVAL;
+ else {
+ bzero(&uc, sizeof(uc));
+ get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
+ PROC_LOCK(td->td_proc);
+ uc.uc_sigmask = td->td_sigmask;
+ PROC_UNLOCK(td->td_proc);
+ ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
+ if (ret == 0) {
+ ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
+ if (ret == 0) {
+ ret = set_mcontext32(td, &uc.uc_mcontext);
+ kern_sigprocmask(td, SIG_SETMASK,
+ &uc.uc_sigmask, NULL, 0);
+ }
+ }
+ }
+ return (ret);
+}
+
+void
+freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+ struct thread *td;
+ struct proc *p;
+ struct trapframe *tf;
+ struct sigframe32 *fp, frame;
+ struct sigacts *psp;
+ struct siginfo32 siginfo;
+ struct sysentvec *sysent;
+ int onstack;
+ int sig;
+ int code;
+
+ siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ sig = ksi->ksi_signo;
+ code = ksi->ksi_code;
+ psp = p->p_sigacts;
+ mtx_assert(&psp->ps_mtx, MA_OWNED);
+ tf = td->td_frame;
+ onstack = sigonstack(tf->tf_x[13]);
+
+ CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
+ catcher, sig);
+
+ /* Allocate and validate space for the signal handler context. */
+ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
+ SIGISMEMBER(psp->ps_sigonstack, sig)) {
+ fp = (struct sigframe32 *)((uintptr_t)td->td_sigstk.ss_sp +
+ td->td_sigstk.ss_size);
+#if defined(COMPAT_43)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+ } else
+ fp = (struct sigframe32 *)td->td_frame->tf_x[13];
+
+ /* make room on the stack */
+ fp--;
+
+ /* make the stack aligned */
+ fp = (struct sigframe32 *)((unsigned long)(fp) &~ (8 - 1));
+ /* Populate the siginfo frame. */
+ get_mcontext32(td, &frame.sf_uc.uc_mcontext, 0);
+#ifdef VFP
+ get_fpcontext32(td, &frame.sf_vfp);
+ frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
+ frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)(uintptr_t)&fp->sf_vfp;
+#else
+ frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
+ frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)NULL;
+#endif
+ frame.sf_si = siginfo;
+ frame.sf_uc.uc_sigmask = *mask;
+ frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK )
+ ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+ frame.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
+ frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
+
+ mtx_unlock(&psp->ps_mtx);
+ PROC_UNLOCK(td->td_proc);
+
+ /* Copy the sigframe out to the user's stack. */
+ if (copyout(&frame, fp, sizeof(*fp)) != 0) {
+ /* Process has trashed its stack. Kill it. */
+ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
+ PROC_LOCK(p);
+ sigexit(td, SIGILL);
+ }
+
+ /*
+ * Build context to run handler in. We invoke the handler
+ * directly, only returning via the trampoline. Note the
+ * trampoline version numbers are coordinated with machine-
+ * dependent code in libc.
+ */
+
+ tf->tf_x[0] = sig;
+ tf->tf_x[1] = (register_t)&fp->sf_si;
+ tf->tf_x[2] = (register_t)&fp->sf_uc;
+
+ /* the trampoline uses r5 as the uc address */
+ tf->tf_x[5] = (register_t)&fp->sf_uc;
+ tf->tf_elr = (register_t)catcher;
+ tf->tf_x[13] = (register_t)fp;
+ sysent = p->p_sysent;
+ if (sysent->sv_sigcode_base != 0)
+ tf->tf_x[14] = (register_t)sysent->sv_sigcode_base;
+ else
+ tf->tf_x[14] = (register_t)(sysent->sv_psstrings -
+ *(sysent->sv_szsigcode));
+ /* Set the mode to enter in the signal handler */
+ if ((register_t)catcher & 1)
+ tf->tf_spsr |= PSR_T;
+ else
+ tf->tf_spsr &= ~PSR_T;
+
+ CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_x[14],
+ tf->tf_x[13]);
+
+ PROC_LOCK(p);
+ mtx_lock(&psp->ps_mtx);
+
+}
+
+#ifdef COMPAT_43
+/*
+ * COMPAT_FREEBSD32 assumes we have this system call when COMPAT_43 is defined.
+ * FreeBSD/arm provies a similar getpagesize() syscall.
+ */
+#define ARM32_PAGE_SIZE 4096
+int
+ofreebsd32_getpagesize(struct thread *td,
+ struct ofreebsd32_getpagesize_args *uap)
+{
+
+ td->td_retval[0] = ARM32_PAGE_SIZE;
+ return (0);
+}
+
+/*
+ * Mirror the osigreturn definition in kern_sig.c for !i386 platforms. This
+ * mirrors what's connected to the FreeBSD/arm syscall.
+ */
+int
+ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap)
+{
+
+ return (nosys(td, (struct nosys_args *)uap));
+}
+#endif
diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
new file mode 100644
index 000000000000..3f664d898916
--- /dev/null
+++ b/sys/arm64/arm64/genassym.c
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 2004 Olivier Houchard
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/assym.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+
+#include <machine/frame.h>
+#include <machine/machdep.h>
+#include <machine/pcb.h>
+
+/* Sizeof arm64_bootparams, rounded to keep stack alignment */
+ASSYM(BOOTPARAMS_SIZE, roundup2(sizeof(struct arm64_bootparams),
+ STACKALIGNBYTES + 1));
+ASSYM(BP_MODULEP, offsetof(struct arm64_bootparams, modulep));
+ASSYM(BP_KERN_L1PT, offsetof(struct arm64_bootparams, kern_l1pt));
+ASSYM(BP_KERN_DELTA, offsetof(struct arm64_bootparams, kern_delta));
+ASSYM(BP_KERN_STACK, offsetof(struct arm64_bootparams, kern_stack));
+ASSYM(BP_KERN_L0PT, offsetof(struct arm64_bootparams, kern_l0pt));
+ASSYM(BP_BOOT_EL, offsetof(struct arm64_bootparams, boot_el));
+
+ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
+ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
+
+ASSYM(PCPU_SIZE, sizeof(struct pcpu));
+ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
+ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
+ASSYM(PC_SSBD, offsetof(struct pcpu, pc_ssbd));
+
+/* Size of pcb, rounded to keep stack alignment */
+ASSYM(PCB_SIZE, roundup2(sizeof(struct pcb), STACKALIGNBYTES + 1));
+ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT);
+ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x));
+ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
+ASSYM(PCB_TPIDRRO, offsetof(struct pcb, pcb_tpidrro_el0));
+ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
+ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
+
+ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
+
+ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
+ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
+ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
+
+ASSYM(TF_SIZE, sizeof(struct trapframe));
+ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
+ASSYM(TF_ELR, offsetof(struct trapframe, tf_elr));
+ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr));
+ASSYM(TF_X, offsetof(struct trapframe, tf_x));
diff --git a/sys/arm64/arm64/gic_v3.c b/sys/arm64/arm64/gic_v3.c
new file mode 100644
index 000000000000..a83ef576e30e
--- /dev/null
+++ b/sys/arm64/arm64/gic_v3.c
@@ -0,0 +1,1271 @@
+/*-
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bitstring.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/cpuset.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/intr.h>
+
+#ifdef FDT
+#include <dev/fdt/fdt_intr.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#endif
+
+#ifdef DEV_ACPI
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+#endif
+
+#include "pic_if.h"
+
+#include <arm/arm/gic_common.h>
+#include "gic_v3_reg.h"
+#include "gic_v3_var.h"
+
+static bus_get_domain_t gic_v3_get_domain;
+static bus_read_ivar_t gic_v3_read_ivar;
+
+static pic_disable_intr_t gic_v3_disable_intr;
+static pic_enable_intr_t gic_v3_enable_intr;
+static pic_map_intr_t gic_v3_map_intr;
+static pic_setup_intr_t gic_v3_setup_intr;
+static pic_teardown_intr_t gic_v3_teardown_intr;
+static pic_post_filter_t gic_v3_post_filter;
+static pic_post_ithread_t gic_v3_post_ithread;
+static pic_pre_ithread_t gic_v3_pre_ithread;
+static pic_bind_intr_t gic_v3_bind_intr;
+#ifdef SMP
+static pic_init_secondary_t gic_v3_init_secondary;
+static pic_ipi_send_t gic_v3_ipi_send;
+static pic_ipi_setup_t gic_v3_ipi_setup;
+#endif
+
+static u_int gic_irq_cpu;
+#ifdef SMP
+static u_int sgi_to_ipi[GIC_LAST_SGI - GIC_FIRST_SGI + 1];
+static u_int sgi_first_unused = GIC_FIRST_SGI;
+#endif
+
+static device_method_t gic_v3_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_detach, gic_v3_detach),
+
+ /* Bus interface */
+ DEVMETHOD(bus_get_domain, gic_v3_get_domain),
+ DEVMETHOD(bus_read_ivar, gic_v3_read_ivar),
+
+ /* Interrupt controller interface */
+ DEVMETHOD(pic_disable_intr, gic_v3_disable_intr),
+ DEVMETHOD(pic_enable_intr, gic_v3_enable_intr),
+ DEVMETHOD(pic_map_intr, gic_v3_map_intr),
+ DEVMETHOD(pic_setup_intr, gic_v3_setup_intr),
+ DEVMETHOD(pic_teardown_intr, gic_v3_teardown_intr),
+ DEVMETHOD(pic_post_filter, gic_v3_post_filter),
+ DEVMETHOD(pic_post_ithread, gic_v3_post_ithread),
+ DEVMETHOD(pic_pre_ithread, gic_v3_pre_ithread),
+#ifdef SMP
+ DEVMETHOD(pic_bind_intr, gic_v3_bind_intr),
+ DEVMETHOD(pic_init_secondary, gic_v3_init_secondary),
+ DEVMETHOD(pic_ipi_send, gic_v3_ipi_send),
+ DEVMETHOD(pic_ipi_setup, gic_v3_ipi_setup),
+#endif
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_0(gic, gic_v3_driver, gic_v3_methods,
+ sizeof(struct gic_v3_softc));
+
+/*
+ * Driver-specific definitions.
+ */
+MALLOC_DEFINE(M_GIC_V3, "GICv3", GIC_V3_DEVSTR);
+
+/*
+ * Helper functions and definitions.
+ */
+/* Destination registers, either Distributor or Re-Distributor */
+enum gic_v3_xdist {
+ DIST = 0,
+ REDIST,
+};
+
+struct gic_v3_irqsrc {
+ struct intr_irqsrc gi_isrc;
+ uint32_t gi_irq;
+ enum intr_polarity gi_pol;
+ enum intr_trigger gi_trig;
+};
+
+/* Helper routines starting with gic_v3_ */
+static int gic_v3_dist_init(struct gic_v3_softc *);
+static int gic_v3_redist_alloc(struct gic_v3_softc *);
+static int gic_v3_redist_find(struct gic_v3_softc *);
+static int gic_v3_redist_init(struct gic_v3_softc *);
+static int gic_v3_cpu_init(struct gic_v3_softc *);
+static void gic_v3_wait_for_rwp(struct gic_v3_softc *, enum gic_v3_xdist);
+
+/* A sequence of init functions for primary (boot) CPU */
+typedef int (*gic_v3_initseq_t) (struct gic_v3_softc *);
+/* Primary CPU initialization sequence */
+static gic_v3_initseq_t gic_v3_primary_init[] = {
+ gic_v3_dist_init,
+ gic_v3_redist_alloc,
+ gic_v3_redist_init,
+ gic_v3_cpu_init,
+ NULL
+};
+
+#ifdef SMP
+/* Secondary CPU initialization sequence */
+static gic_v3_initseq_t gic_v3_secondary_init[] = {
+ gic_v3_redist_init,
+ gic_v3_cpu_init,
+ NULL
+};
+#endif
+
+uint32_t
+gic_r_read_4(device_t dev, bus_size_t offset)
+{
+ struct gic_v3_softc *sc;
+ struct resource *rdist;
+
+ sc = device_get_softc(dev);
+ rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res;
+ return (bus_read_4(rdist, offset));
+}
+
+uint64_t
+gic_r_read_8(device_t dev, bus_size_t offset)
+{
+ struct gic_v3_softc *sc;
+ struct resource *rdist;
+
+ sc = device_get_softc(dev);
+ rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res;
+ return (bus_read_8(rdist, offset));
+}
+
+void
+gic_r_write_4(device_t dev, bus_size_t offset, uint32_t val)
+{
+ struct gic_v3_softc *sc;
+ struct resource *rdist;
+
+ sc = device_get_softc(dev);
+ rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res;
+ bus_write_4(rdist, offset, val);
+}
+
+void
+gic_r_write_8(device_t dev, bus_size_t offset, uint64_t val)
+{
+ struct gic_v3_softc *sc;
+ struct resource *rdist;
+
+ sc = device_get_softc(dev);
+ rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res;
+ bus_write_8(rdist, offset, val);
+}
+
+/*
+ * Device interface.
+ */
+int
+gic_v3_attach(device_t dev)
+{
+ struct gic_v3_softc *sc;
+ gic_v3_initseq_t *init_func;
+ uint32_t typer;
+ int rid;
+ int err;
+ size_t i;
+ u_int irq;
+ const char *name;
+
+ sc = device_get_softc(dev);
+ sc->gic_registered = FALSE;
+ sc->dev = dev;
+ err = 0;
+
+ /* Initialize mutex */
+ mtx_init(&sc->gic_mtx, "GICv3 lock", NULL, MTX_SPIN);
+
+ /*
+ * Allocate array of struct resource.
+ * One entry for Distributor and all remaining for Re-Distributor.
+ */
+ sc->gic_res = malloc(
+ sizeof(*sc->gic_res) * (sc->gic_redists.nregions + 1),
+ M_GIC_V3, M_WAITOK);
+
+ /* Now allocate corresponding resources */
+ for (i = 0, rid = 0; i < (sc->gic_redists.nregions + 1); i++, rid++) {
+ sc->gic_res[rid] = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &rid, RF_ACTIVE);
+ if (sc->gic_res[rid] == NULL)
+ return (ENXIO);
+ }
+
+ /*
+ * Distributor interface
+ */
+ sc->gic_dist = sc->gic_res[0];
+
+ /*
+ * Re-Dristributor interface
+ */
+ /* Allocate space under region descriptions */
+ sc->gic_redists.regions = malloc(
+ sizeof(*sc->gic_redists.regions) * sc->gic_redists.nregions,
+ M_GIC_V3, M_WAITOK);
+
+ /* Fill-up bus_space information for each region. */
+ for (i = 0, rid = 1; i < sc->gic_redists.nregions; i++, rid++)
+ sc->gic_redists.regions[i] = sc->gic_res[rid];
+
+ /* Get the number of supported SPI interrupts */
+ typer = gic_d_read(sc, 4, GICD_TYPER);
+ sc->gic_nirqs = GICD_TYPER_I_NUM(typer);
+ if (sc->gic_nirqs > GIC_I_NUM_MAX)
+ sc->gic_nirqs = GIC_I_NUM_MAX;
+
+ sc->gic_irqs = malloc(sizeof(*sc->gic_irqs) * sc->gic_nirqs,
+ M_GIC_V3, M_WAITOK | M_ZERO);
+ name = device_get_nameunit(dev);
+ for (irq = 0; irq < sc->gic_nirqs; irq++) {
+ struct intr_irqsrc *isrc;
+
+ sc->gic_irqs[irq].gi_irq = irq;
+ sc->gic_irqs[irq].gi_pol = INTR_POLARITY_CONFORM;
+ sc->gic_irqs[irq].gi_trig = INTR_TRIGGER_CONFORM;
+
+ isrc = &sc->gic_irqs[irq].gi_isrc;
+ if (irq <= GIC_LAST_SGI) {
+ err = intr_isrc_register(isrc, sc->dev,
+ INTR_ISRCF_IPI, "%s,i%u", name, irq - GIC_FIRST_SGI);
+ } else if (irq <= GIC_LAST_PPI) {
+ err = intr_isrc_register(isrc, sc->dev,
+ INTR_ISRCF_PPI, "%s,p%u", name, irq - GIC_FIRST_PPI);
+ } else {
+ err = intr_isrc_register(isrc, sc->dev, 0,
+ "%s,s%u", name, irq - GIC_FIRST_SPI);
+ }
+ if (err != 0) {
+ /* XXX call intr_isrc_deregister() */
+ free(sc->gic_irqs, M_DEVBUF);
+ return (err);
+ }
+ }
+
+ /*
+ * Read the Peripheral ID2 register. This is an implementation
+ * defined register, but seems to be implemented in all GICv3
+ * parts and Linux expects it to be there.
+ */
+ sc->gic_pidr2 = gic_d_read(sc, 4, GICD_PIDR2);
+
+ /* Get the number of supported interrupt identifier bits */
+ sc->gic_idbits = GICD_TYPER_IDBITS(typer);
+
+ if (bootverbose) {
+ device_printf(dev, "SPIs: %u, IDs: %u\n",
+ sc->gic_nirqs, (1 << sc->gic_idbits) - 1);
+ }
+
+ /* Train init sequence for boot CPU */
+ for (init_func = gic_v3_primary_init; *init_func != NULL; init_func++) {
+ err = (*init_func)(sc);
+ if (err != 0)
+ return (err);
+ }
+
+ return (0);
+}
+
+int
+gic_v3_detach(device_t dev)
+{
+ struct gic_v3_softc *sc;
+ size_t i;
+ int rid;
+
+ sc = device_get_softc(dev);
+
+ if (device_is_attached(dev)) {
+ /*
+ * XXX: We should probably deregister PIC
+ */
+ if (sc->gic_registered)
+ panic("Trying to detach registered PIC");
+ }
+ for (rid = 0; rid < (sc->gic_redists.nregions + 1); rid++)
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->gic_res[rid]);
+
+ for (i = 0; i <= mp_maxid; i++)
+ free(sc->gic_redists.pcpu[i], M_GIC_V3);
+
+ free(sc->gic_res, M_GIC_V3);
+ free(sc->gic_redists.regions, M_GIC_V3);
+
+ return (0);
+}
+
+static int
+gic_v3_get_domain(device_t dev, device_t child, int *domain)
+{
+ struct gic_v3_devinfo *di;
+
+ di = device_get_ivars(child);
+ if (di->gic_domain < 0)
+ return (ENOENT);
+
+ *domain = di->gic_domain;
+ return (0);
+}
+
+static int
+gic_v3_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
+{
+ struct gic_v3_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ switch (which) {
+ case GICV3_IVAR_NIRQS:
+ *result = (NIRQ - sc->gic_nirqs) / sc->gic_nchildren;
+ return (0);
+ case GICV3_IVAR_REDIST:
+ *result = (uintptr_t)sc->gic_redists.pcpu[PCPU_GET(cpuid)];
+ return (0);
+ case GIC_IVAR_HW_REV:
+ KASSERT(
+ GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv3 ||
+ GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv4,
+ ("gic_v3_read_ivar: Invalid GIC architecture: %d (%.08X)",
+ GICR_PIDR2_ARCH(sc->gic_pidr2), sc->gic_pidr2));
+ *result = GICR_PIDR2_ARCH(sc->gic_pidr2);
+ return (0);
+ case GIC_IVAR_BUS:
+ KASSERT(sc->gic_bus != GIC_BUS_UNKNOWN,
+ ("gic_v3_read_ivar: Unknown bus type"));
+ KASSERT(sc->gic_bus <= GIC_BUS_MAX,
+ ("gic_v3_read_ivar: Invalid bus type %u", sc->gic_bus));
+ *result = sc->gic_bus;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+int
+arm_gic_v3_intr(void *arg)
+{
+ struct gic_v3_softc *sc = arg;
+ struct gic_v3_irqsrc *gi;
+ struct intr_pic *pic;
+ uint64_t active_irq;
+ struct trapframe *tf;
+
+ pic = sc->gic_pic;
+
+ while (1) {
+ if (CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) {
+ /*
+ * Hardware: Cavium ThunderX
+ * Chip revision: Pass 1.0 (early version)
+ * Pass 1.1 (production)
+ * ERRATUM: 22978, 23154
+ */
+ __asm __volatile(
+ "nop;nop;nop;nop;nop;nop;nop;nop; \n"
+ "mrs %0, ICC_IAR1_EL1 \n"
+ "nop;nop;nop;nop; \n"
+ "dsb sy \n"
+ : "=&r" (active_irq));
+ } else {
+ active_irq = gic_icc_read(IAR1);
+ }
+
+ if (active_irq >= GIC_FIRST_LPI) {
+ intr_child_irq_handler(pic, active_irq);
+ continue;
+ }
+
+ if (__predict_false(active_irq >= sc->gic_nirqs))
+ return (FILTER_HANDLED);
+
+ tf = curthread->td_intr_frame;
+ gi = &sc->gic_irqs[active_irq];
+ if (active_irq <= GIC_LAST_SGI) {
+ /* Call EOI for all IPI before dispatch. */
+ gic_icc_write(EOIR1, (uint64_t)active_irq);
+#ifdef SMP
+ intr_ipi_dispatch(sgi_to_ipi[gi->gi_irq], tf);
+#else
+ device_printf(sc->dev, "SGI %ju on UP system detected\n",
+ (uintmax_t)(active_irq - GIC_FIRST_SGI));
+#endif
+ } else if (active_irq >= GIC_FIRST_PPI &&
+ active_irq <= GIC_LAST_SPI) {
+ if (gi->gi_trig == INTR_TRIGGER_EDGE)
+ gic_icc_write(EOIR1, gi->gi_irq);
+
+ if (intr_isrc_dispatch(&gi->gi_isrc, tf) != 0) {
+ if (gi->gi_trig != INTR_TRIGGER_EDGE)
+ gic_icc_write(EOIR1, gi->gi_irq);
+ gic_v3_disable_intr(sc->dev, &gi->gi_isrc);
+ device_printf(sc->dev,
+ "Stray irq %lu disabled\n", active_irq);
+ }
+ }
+ }
+}
+
+#ifdef FDT
+static int
+gic_map_fdt(device_t dev, u_int ncells, pcell_t *cells, u_int *irqp,
+ enum intr_polarity *polp, enum intr_trigger *trigp)
+{
+ u_int irq;
+
+ if (ncells < 3)
+ return (EINVAL);
+
+ /*
+ * The 1st cell is the interrupt type:
+ * 0 = SPI
+ * 1 = PPI
+ * The 2nd cell contains the interrupt number:
+ * [0 - 987] for SPI
+ * [0 - 15] for PPI
+ * The 3rd cell is the flags, encoded as follows:
+ * bits[3:0] trigger type and level flags
+ * 1 = edge triggered
+ * 2 = edge triggered (PPI only)
+ * 4 = level-sensitive
+ * 8 = level-sensitive (PPI only)
+ */
+ switch (cells[0]) {
+ case 0:
+ irq = GIC_FIRST_SPI + cells[1];
+ /* SPI irq is checked later. */
+ break;
+ case 1:
+ irq = GIC_FIRST_PPI + cells[1];
+ if (irq > GIC_LAST_PPI) {
+ device_printf(dev, "unsupported PPI interrupt "
+ "number %u\n", cells[1]);
+ return (EINVAL);
+ }
+ break;
+ default:
+ device_printf(dev, "unsupported interrupt type "
+ "configuration %u\n", cells[0]);
+ return (EINVAL);
+ }
+
+ switch (cells[2] & FDT_INTR_MASK) {
+ case FDT_INTR_EDGE_RISING:
+ *trigp = INTR_TRIGGER_EDGE;
+ *polp = INTR_POLARITY_HIGH;
+ break;
+ case FDT_INTR_EDGE_FALLING:
+ *trigp = INTR_TRIGGER_EDGE;
+ *polp = INTR_POLARITY_LOW;
+ break;
+ case FDT_INTR_LEVEL_HIGH:
+ *trigp = INTR_TRIGGER_LEVEL;
+ *polp = INTR_POLARITY_HIGH;
+ break;
+ case FDT_INTR_LEVEL_LOW:
+ *trigp = INTR_TRIGGER_LEVEL;
+ *polp = INTR_POLARITY_LOW;
+ break;
+ default:
+ device_printf(dev, "unsupported trigger/polarity "
+ "configuration 0x%02x\n", cells[2]);
+ return (EINVAL);
+ }
+
+ /* Check the interrupt is valid */
+ if (irq >= GIC_FIRST_SPI && *polp != INTR_POLARITY_HIGH)
+ return (EINVAL);
+
+ *irqp = irq;
+ return (0);
+}
+#endif
+
+static int
+gic_map_msi(device_t dev, struct intr_map_data_msi *msi_data, u_int *irqp,
+ enum intr_polarity *polp, enum intr_trigger *trigp)
+{
+ struct gic_v3_irqsrc *gi;
+
+ /* SPI-mapped MSI */
+ gi = (struct gic_v3_irqsrc *)msi_data->isrc;
+ if (gi == NULL)
+ return (ENXIO);
+
+ *irqp = gi->gi_irq;
+
+ /* MSI/MSI-X interrupts are always edge triggered with high polarity */
+ *polp = INTR_POLARITY_HIGH;
+ *trigp = INTR_TRIGGER_EDGE;
+
+ return (0);
+}
+
+static int
+do_gic_v3_map_intr(device_t dev, struct intr_map_data *data, u_int *irqp,
+ enum intr_polarity *polp, enum intr_trigger *trigp)
+{
+ struct gic_v3_softc *sc;
+ enum intr_polarity pol;
+ enum intr_trigger trig;
+ struct intr_map_data_msi *dam;
+#ifdef FDT
+ struct intr_map_data_fdt *daf;
+#endif
+#ifdef DEV_ACPI
+ struct intr_map_data_acpi *daa;
+#endif
+ u_int irq;
+
+ sc = device_get_softc(dev);
+
+ switch (data->type) {
+#ifdef FDT
+ case INTR_MAP_DATA_FDT:
+ daf = (struct intr_map_data_fdt *)data;
+ if (gic_map_fdt(dev, daf->ncells, daf->cells, &irq, &pol,
+ &trig) != 0)
+ return (EINVAL);
+ break;
+#endif
+#ifdef DEV_ACPI
+ case INTR_MAP_DATA_ACPI:
+ daa = (struct intr_map_data_acpi *)data;
+ irq = daa->irq;
+ pol = daa->pol;
+ trig = daa->trig;
+ break;
+#endif
+ case INTR_MAP_DATA_MSI:
+ /* SPI-mapped MSI */
+ dam = (struct intr_map_data_msi *)data;
+ if (gic_map_msi(dev, dam, &irq, &pol, &trig) != 0)
+ return (EINVAL);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (irq >= sc->gic_nirqs)
+ return (EINVAL);
+ switch (pol) {
+ case INTR_POLARITY_CONFORM:
+ case INTR_POLARITY_LOW:
+ case INTR_POLARITY_HIGH:
+ break;
+ default:
+ return (EINVAL);
+ }
+ switch (trig) {
+ case INTR_TRIGGER_CONFORM:
+ case INTR_TRIGGER_EDGE:
+ case INTR_TRIGGER_LEVEL:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ *irqp = irq;
+ if (polp != NULL)
+ *polp = pol;
+ if (trigp != NULL)
+ *trigp = trig;
+ return (0);
+}
+
+static int
+gic_v3_map_intr(device_t dev, struct intr_map_data *data,
+ struct intr_irqsrc **isrcp)
+{
+ struct gic_v3_softc *sc;
+ int error;
+ u_int irq;
+
+ error = do_gic_v3_map_intr(dev, data, &irq, NULL, NULL);
+ if (error == 0) {
+ sc = device_get_softc(dev);
+ *isrcp = GIC_INTR_ISRC(sc, irq);
+ }
+ return (error);
+}
+
+static int
+gic_v3_setup_intr(device_t dev, struct intr_irqsrc *isrc,
+ struct resource *res, struct intr_map_data *data)
+{
+ struct gic_v3_softc *sc = device_get_softc(dev);
+ struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc;
+ enum intr_trigger trig;
+ enum intr_polarity pol;
+ uint32_t reg;
+ u_int irq;
+ int error;
+
+ if (data == NULL)
+ return (ENOTSUP);
+
+ error = do_gic_v3_map_intr(dev, data, &irq, &pol, &trig);
+ if (error != 0)
+ return (error);
+
+ if (gi->gi_irq != irq || pol == INTR_POLARITY_CONFORM ||
+ trig == INTR_TRIGGER_CONFORM)
+ return (EINVAL);
+
+ /* Compare config if this is not first setup. */
+ if (isrc->isrc_handlers != 0) {
+ if (pol != gi->gi_pol || trig != gi->gi_trig)
+ return (EINVAL);
+ else
+ return (0);
+ }
+
+ gi->gi_pol = pol;
+ gi->gi_trig = trig;
+
+ /*
+ * XXX - In case that per CPU interrupt is going to be enabled in time
+ * when SMP is already started, we need some IPI call which
+ * enables it on others CPUs. Further, it's more complicated as
+ * pic_enable_source() and pic_disable_source() should act on
+ * per CPU basis only. Thus, it should be solved here somehow.
+ */
+ if (isrc->isrc_flags & INTR_ISRCF_PPI)
+ CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu);
+
+ if (irq >= GIC_FIRST_PPI && irq <= GIC_LAST_SPI) {
+ mtx_lock_spin(&sc->gic_mtx);
+
+ /* Set the trigger and polarity */
+ if (irq <= GIC_LAST_PPI)
+ reg = gic_r_read(sc, 4,
+ GICR_SGI_BASE_SIZE + GICD_ICFGR(irq));
+ else
+ reg = gic_d_read(sc, 4, GICD_ICFGR(irq));
+ if (trig == INTR_TRIGGER_LEVEL)
+ reg &= ~(2 << ((irq % 16) * 2));
+ else
+ reg |= 2 << ((irq % 16) * 2);
+
+ if (irq <= GIC_LAST_PPI) {
+ gic_r_write(sc, 4,
+ GICR_SGI_BASE_SIZE + GICD_ICFGR(irq), reg);
+ gic_v3_wait_for_rwp(sc, REDIST);
+ } else {
+ gic_d_write(sc, 4, GICD_ICFGR(irq), reg);
+ gic_v3_wait_for_rwp(sc, DIST);
+ }
+
+ mtx_unlock_spin(&sc->gic_mtx);
+
+ gic_v3_bind_intr(dev, isrc);
+ }
+
+ return (0);
+}
+
+static int
+gic_v3_teardown_intr(device_t dev, struct intr_irqsrc *isrc,
+ struct resource *res, struct intr_map_data *data)
+{
+ struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc;
+
+ if (isrc->isrc_handlers == 0) {
+ gi->gi_pol = INTR_POLARITY_CONFORM;
+ gi->gi_trig = INTR_TRIGGER_CONFORM;
+ }
+
+ return (0);
+}
+
+static void
+gic_v3_disable_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gic_v3_softc *sc;
+ struct gic_v3_irqsrc *gi;
+ u_int irq;
+
+ sc = device_get_softc(dev);
+ gi = (struct gic_v3_irqsrc *)isrc;
+ irq = gi->gi_irq;
+
+ if (irq <= GIC_LAST_PPI) {
+ /* SGIs and PPIs in corresponding Re-Distributor */
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ICENABLER(irq),
+ GICD_I_MASK(irq));
+ gic_v3_wait_for_rwp(sc, REDIST);
+ } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) {
+ /* SPIs in distributor */
+ gic_d_write(sc, 4, GICD_ICENABLER(irq), GICD_I_MASK(irq));
+ gic_v3_wait_for_rwp(sc, DIST);
+ } else
+ panic("%s: Unsupported IRQ %u", __func__, irq);
+}
+
+static void
+gic_v3_enable_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gic_v3_softc *sc;
+ struct gic_v3_irqsrc *gi;
+ u_int irq;
+
+ sc = device_get_softc(dev);
+ gi = (struct gic_v3_irqsrc *)isrc;
+ irq = gi->gi_irq;
+
+ if (irq <= GIC_LAST_PPI) {
+ /* SGIs and PPIs in corresponding Re-Distributor */
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ISENABLER(irq),
+ GICD_I_MASK(irq));
+ gic_v3_wait_for_rwp(sc, REDIST);
+ } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) {
+ /* SPIs in distributor */
+ gic_d_write(sc, 4, GICD_ISENABLER(irq), GICD_I_MASK(irq));
+ gic_v3_wait_for_rwp(sc, DIST);
+ } else
+ panic("%s: Unsupported IRQ %u", __func__, irq);
+}
+
+static void
+gic_v3_pre_ithread(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc;
+
+ gic_v3_disable_intr(dev, isrc);
+ gic_icc_write(EOIR1, gi->gi_irq);
+}
+
+static void
+gic_v3_post_ithread(device_t dev, struct intr_irqsrc *isrc)
+{
+
+ gic_v3_enable_intr(dev, isrc);
+}
+
+static void
+gic_v3_post_filter(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc;
+
+ if (gi->gi_trig == INTR_TRIGGER_EDGE)
+ return;
+
+ gic_icc_write(EOIR1, gi->gi_irq);
+}
+
+static int
+gic_v3_bind_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gic_v3_softc *sc;
+ struct gic_v3_irqsrc *gi;
+ int cpu;
+
+ gi = (struct gic_v3_irqsrc *)isrc;
+ if (gi->gi_irq <= GIC_LAST_PPI)
+ return (EINVAL);
+
+ KASSERT(gi->gi_irq >= GIC_FIRST_SPI && gi->gi_irq <= GIC_LAST_SPI,
+ ("%s: Attempting to bind an invalid IRQ", __func__));
+
+ sc = device_get_softc(dev);
+
+ if (CPU_EMPTY(&isrc->isrc_cpu)) {
+ gic_irq_cpu = intr_irq_next_cpu(gic_irq_cpu, &all_cpus);
+ CPU_SETOF(gic_irq_cpu, &isrc->isrc_cpu);
+ gic_d_write(sc, 4, GICD_IROUTER(gi->gi_irq),
+ CPU_AFFINITY(gic_irq_cpu));
+ } else {
+ /*
+ * We can only bind to a single CPU so select
+ * the first CPU found.
+ */
+ cpu = CPU_FFS(&isrc->isrc_cpu) - 1;
+ gic_d_write(sc, 4, GICD_IROUTER(gi->gi_irq), CPU_AFFINITY(cpu));
+ }
+
+ return (0);
+}
+
+#ifdef SMP
+static void
+gic_v3_init_secondary(device_t dev)
+{
+ device_t child;
+ struct gic_v3_softc *sc;
+ gic_v3_initseq_t *init_func;
+ struct intr_irqsrc *isrc;
+ u_int cpu, irq;
+ int err, i;
+
+ sc = device_get_softc(dev);
+ cpu = PCPU_GET(cpuid);
+
+ /* Train init sequence for boot CPU */
+ for (init_func = gic_v3_secondary_init; *init_func != NULL;
+ init_func++) {
+ err = (*init_func)(sc);
+ if (err != 0) {
+ device_printf(dev,
+ "Could not initialize GIC for CPU%u\n", cpu);
+ return;
+ }
+ }
+
+ /* Unmask attached SGI interrupts. */
+ for (irq = GIC_FIRST_SGI; irq <= GIC_LAST_SGI; irq++) {
+ isrc = GIC_INTR_ISRC(sc, irq);
+ if (intr_isrc_init_on_cpu(isrc, cpu))
+ gic_v3_enable_intr(dev, isrc);
+ }
+
+ /* Unmask attached PPI interrupts. */
+ for (irq = GIC_FIRST_PPI; irq <= GIC_LAST_PPI; irq++) {
+ isrc = GIC_INTR_ISRC(sc, irq);
+ if (intr_isrc_init_on_cpu(isrc, cpu))
+ gic_v3_enable_intr(dev, isrc);
+ }
+
+ for (i = 0; i < sc->gic_nchildren; i++) {
+ child = sc->gic_children[i];
+ PIC_INIT_SECONDARY(child);
+ }
+}
+
+static void
+gic_v3_ipi_send(device_t dev, struct intr_irqsrc *isrc, cpuset_t cpus,
+ u_int ipi)
+{
+ struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc;
+ uint64_t aff, val, irq;
+ int i;
+
+#define GIC_AFF_MASK (CPU_AFF3_MASK | CPU_AFF2_MASK | CPU_AFF1_MASK)
+#define GIC_AFFINITY(i) (CPU_AFFINITY(i) & GIC_AFF_MASK)
+ aff = GIC_AFFINITY(0);
+ irq = gi->gi_irq;
+ val = 0;
+
+ /* Iterate through all CPUs in set */
+ for (i = 0; i <= mp_maxid; i++) {
+ /* Move to the next affinity group */
+ if (aff != GIC_AFFINITY(i)) {
+ /* Send the IPI */
+ if (val != 0) {
+ gic_icc_write(SGI1R, val);
+ val = 0;
+ }
+ aff = GIC_AFFINITY(i);
+ }
+
+ /* Send the IPI to this cpu */
+ if (CPU_ISSET(i, &cpus)) {
+#define ICC_SGI1R_AFFINITY(aff) \
+ (((uint64_t)CPU_AFF3(aff) << ICC_SGI1R_EL1_AFF3_SHIFT) | \
+ ((uint64_t)CPU_AFF2(aff) << ICC_SGI1R_EL1_AFF2_SHIFT) | \
+ ((uint64_t)CPU_AFF1(aff) << ICC_SGI1R_EL1_AFF1_SHIFT))
+ /* Set the affinity when the first at this level */
+ if (val == 0)
+ val = ICC_SGI1R_AFFINITY(aff) |
+ irq << ICC_SGI1R_EL1_SGIID_SHIFT;
+ /* Set the bit to send the IPI to te CPU */
+ val |= 1 << CPU_AFF0(CPU_AFFINITY(i));
+ }
+ }
+
+ /* Send the IPI to the last cpu affinity group */
+ if (val != 0)
+ gic_icc_write(SGI1R, val);
+#undef GIC_AFF_MASK
+#undef GIC_AFFINITY
+}
+
+static int
+gic_v3_ipi_setup(device_t dev, u_int ipi, struct intr_irqsrc **isrcp)
+{
+ struct intr_irqsrc *isrc;
+ struct gic_v3_softc *sc = device_get_softc(dev);
+
+ if (sgi_first_unused > GIC_LAST_SGI)
+ return (ENOSPC);
+
+ isrc = GIC_INTR_ISRC(sc, sgi_first_unused);
+ sgi_to_ipi[sgi_first_unused++] = ipi;
+
+ CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu);
+
+ *isrcp = isrc;
+ return (0);
+}
+#endif /* SMP */
+
+/*
+ * Helper routines
+ */
+static void
+gic_v3_wait_for_rwp(struct gic_v3_softc *sc, enum gic_v3_xdist xdist)
+{
+ struct resource *res;
+ u_int cpuid;
+ size_t us_left = 1000000;
+
+ cpuid = PCPU_GET(cpuid);
+
+ switch (xdist) {
+ case DIST:
+ res = sc->gic_dist;
+ break;
+ case REDIST:
+ res = &sc->gic_redists.pcpu[cpuid]->res;
+ break;
+ default:
+ KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__));
+ return;
+ }
+
+ while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) {
+ DELAY(1);
+ if (us_left-- == 0)
+ panic("GICD Register write pending for too long");
+ }
+}
+
+/* CPU interface. */
+static __inline void
+gic_v3_cpu_priority(uint64_t mask)
+{
+
+ /* Set prority mask */
+ gic_icc_write(PMR, mask & ICC_PMR_EL1_PRIO_MASK);
+}
+
+static int
+gic_v3_cpu_enable_sre(struct gic_v3_softc *sc)
+{
+ uint64_t sre;
+ u_int cpuid;
+
+ cpuid = PCPU_GET(cpuid);
+ /*
+ * Set the SRE bit to enable access to GIC CPU interface
+ * via system registers.
+ */
+ sre = READ_SPECIALREG(icc_sre_el1);
+ sre |= ICC_SRE_EL1_SRE;
+ WRITE_SPECIALREG(icc_sre_el1, sre);
+ isb();
+ /*
+ * Now ensure that the bit is set.
+ */
+ sre = READ_SPECIALREG(icc_sre_el1);
+ if ((sre & ICC_SRE_EL1_SRE) == 0) {
+ /* We are done. This was disabled in EL2 */
+ device_printf(sc->dev, "ERROR: CPU%u cannot enable CPU interface "
+ "via system registers\n", cpuid);
+ return (ENXIO);
+ } else if (bootverbose) {
+ device_printf(sc->dev,
+ "CPU%u enabled CPU interface via system registers\n",
+ cpuid);
+ }
+
+ return (0);
+}
+
+static int
+gic_v3_cpu_init(struct gic_v3_softc *sc)
+{
+ int err;
+
+ /* Enable access to CPU interface via system registers */
+ err = gic_v3_cpu_enable_sre(sc);
+ if (err != 0)
+ return (err);
+ /* Priority mask to minimum - accept all interrupts */
+ gic_v3_cpu_priority(GIC_PRIORITY_MIN);
+ /* Disable EOI mode */
+ gic_icc_clear(CTLR, ICC_CTLR_EL1_EOIMODE);
+ /* Enable group 1 (insecure) interrups */
+ gic_icc_set(IGRPEN1, ICC_IGRPEN0_EL1_EN);
+
+ return (0);
+}
+
+/* Distributor */
+static int
+gic_v3_dist_init(struct gic_v3_softc *sc)
+{
+ uint64_t aff;
+ u_int i;
+
+ /*
+ * 1. Disable the Distributor
+ */
+ gic_d_write(sc, 4, GICD_CTLR, 0);
+ gic_v3_wait_for_rwp(sc, DIST);
+
+ /*
+ * 2. Configure the Distributor
+ */
+ /* Set all SPIs to be Group 1 Non-secure */
+ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_IGROUPRn)
+ gic_d_write(sc, 4, GICD_IGROUPR(i), 0xFFFFFFFF);
+
+ /* Set all global interrupts to be level triggered, active low. */
+ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ICFGRn)
+ gic_d_write(sc, 4, GICD_ICFGR(i), 0x00000000);
+
+ /* Set priority to all shared interrupts */
+ for (i = GIC_FIRST_SPI;
+ i < sc->gic_nirqs; i += GICD_I_PER_IPRIORITYn) {
+ /* Set highest priority */
+ gic_d_write(sc, 4, GICD_IPRIORITYR(i), GIC_PRIORITY_MAX);
+ }
+
+ /*
+ * Disable all interrupts. Leave PPI and SGIs as they are enabled in
+ * Re-Distributor registers.
+ */
+ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ISENABLERn)
+ gic_d_write(sc, 4, GICD_ICENABLER(i), 0xFFFFFFFF);
+
+ gic_v3_wait_for_rwp(sc, DIST);
+
+ /*
+ * 3. Enable Distributor
+ */
+ /* Enable Distributor with ARE, Group 1 */
+ gic_d_write(sc, 4, GICD_CTLR, GICD_CTLR_ARE_NS | GICD_CTLR_G1A |
+ GICD_CTLR_G1);
+
+ /*
+ * 4. Route all interrupts to boot CPU.
+ */
+ aff = CPU_AFFINITY(0);
+ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i++)
+ gic_d_write(sc, 4, GICD_IROUTER(i), aff);
+
+ return (0);
+}
+
+/* Re-Distributor */
+static int
+gic_v3_redist_alloc(struct gic_v3_softc *sc)
+{
+ u_int cpuid;
+
+ /* Allocate struct resource for all CPU's Re-Distributor registers */
+ for (cpuid = 0; cpuid <= mp_maxid; cpuid++)
+ if (CPU_ISSET(cpuid, &all_cpus) != 0)
+ sc->gic_redists.pcpu[cpuid] =
+ malloc(sizeof(*sc->gic_redists.pcpu[0]),
+ M_GIC_V3, M_WAITOK);
+ else
+ sc->gic_redists.pcpu[cpuid] = NULL;
+ return (0);
+}
+
+static int
+gic_v3_redist_find(struct gic_v3_softc *sc)
+{
+ struct resource r_res;
+ bus_space_handle_t r_bsh;
+ uint64_t aff;
+ uint64_t typer;
+ uint32_t pidr2;
+ u_int cpuid;
+ size_t i;
+
+ cpuid = PCPU_GET(cpuid);
+
+ aff = CPU_AFFINITY(cpuid);
+ /* Affinity in format for comparison with typer */
+ aff = (CPU_AFF3(aff) << 24) | (CPU_AFF2(aff) << 16) |
+ (CPU_AFF1(aff) << 8) | CPU_AFF0(aff);
+
+ if (bootverbose) {
+ device_printf(sc->dev,
+ "Start searching for Re-Distributor\n");
+ }
+ /* Iterate through Re-Distributor regions */
+ for (i = 0; i < sc->gic_redists.nregions; i++) {
+ /* Take a copy of the region's resource */
+ r_res = *sc->gic_redists.regions[i];
+ r_bsh = rman_get_bushandle(&r_res);
+
+ pidr2 = bus_read_4(&r_res, GICR_PIDR2);
+ switch (GICR_PIDR2_ARCH(pidr2)) {
+ case GICR_PIDR2_ARCH_GICv3: /* fall through */
+ case GICR_PIDR2_ARCH_GICv4:
+ break;
+ default:
+ device_printf(sc->dev,
+ "No Re-Distributor found for CPU%u\n", cpuid);
+ return (ENODEV);
+ }
+
+ do {
+ typer = bus_read_8(&r_res, GICR_TYPER);
+ if ((typer >> GICR_TYPER_AFF_SHIFT) == aff) {
+ KASSERT(sc->gic_redists.pcpu[cpuid] != NULL,
+ ("Invalid pointer to per-CPU redistributor"));
+ /* Copy res contents to its final destination */
+ sc->gic_redists.pcpu[cpuid]->res = r_res;
+ sc->gic_redists.pcpu[cpuid]->lpi_enabled = false;
+ if (bootverbose) {
+ device_printf(sc->dev,
+ "CPU%u Re-Distributor has been found\n",
+ cpuid);
+ }
+ return (0);
+ }
+
+ r_bsh += (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+ if ((typer & GICR_TYPER_VLPIS) != 0) {
+ r_bsh +=
+ (GICR_VLPI_BASE_SIZE + GICR_RESERVED_SIZE);
+ }
+
+ rman_set_bushandle(&r_res, r_bsh);
+ } while ((typer & GICR_TYPER_LAST) == 0);
+ }
+
+ device_printf(sc->dev, "No Re-Distributor found for CPU%u\n", cpuid);
+ return (ENXIO);
+}
+
+static int
+gic_v3_redist_wake(struct gic_v3_softc *sc)
+{
+ uint32_t waker;
+ size_t us_left = 1000000;
+
+ waker = gic_r_read(sc, 4, GICR_WAKER);
+ /* Wake up Re-Distributor for this CPU */
+ waker &= ~GICR_WAKER_PS;
+ gic_r_write(sc, 4, GICR_WAKER, waker);
+ /*
+ * When clearing ProcessorSleep bit it is required to wait for
+ * ChildrenAsleep to become zero following the processor power-on.
+ */
+ while ((gic_r_read(sc, 4, GICR_WAKER) & GICR_WAKER_CA) != 0) {
+ DELAY(1);
+ if (us_left-- == 0) {
+ panic("Could not wake Re-Distributor for CPU%u",
+ PCPU_GET(cpuid));
+ }
+ }
+
+ if (bootverbose) {
+ device_printf(sc->dev, "CPU%u Re-Distributor woke up\n",
+ PCPU_GET(cpuid));
+ }
+
+ return (0);
+}
+
+static int
+gic_v3_redist_init(struct gic_v3_softc *sc)
+{
+ int err;
+ size_t i;
+
+ err = gic_v3_redist_find(sc);
+ if (err != 0)
+ return (err);
+
+ err = gic_v3_redist_wake(sc);
+ if (err != 0)
+ return (err);
+
+ /* Configure SGIs and PPIs to be Group1 Non-secure */
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_IGROUPR0,
+ 0xFFFFFFFF);
+
+ /* Disable SPIs */
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ICENABLER0,
+ GICR_I_ENABLER_PPI_MASK);
+ /* Enable SGIs */
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ISENABLER0,
+ GICR_I_ENABLER_SGI_MASK);
+
+ /* Set priority for SGIs and PPIs */
+ for (i = 0; i <= GIC_LAST_PPI; i += GICR_I_PER_IPRIORITYn) {
+ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_IPRIORITYR(i),
+ GIC_PRIORITY_MAX);
+ }
+
+ gic_v3_wait_for_rwp(sc, REDIST);
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/gic_v3_acpi.c b/sys/arm64/arm64/gic_v3_acpi.c
new file mode 100644
index 000000000000..b54ecfb014e5
--- /dev/null
+++ b/sys/arm64/arm64/gic_v3_acpi.c
@@ -0,0 +1,389 @@
+/*-
+ * Copyright (c) 2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+
+#include <machine/intr.h>
+#include <machine/resource.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include "gic_v3_reg.h"
+#include "gic_v3_var.h"
+
+struct gic_v3_acpi_devinfo {
+ struct gic_v3_devinfo di_gic_dinfo;
+ struct resource_list di_rl;
+};
+
+static device_identify_t gic_v3_acpi_identify;
+static device_probe_t gic_v3_acpi_probe;
+static device_attach_t gic_v3_acpi_attach;
+static bus_alloc_resource_t gic_v3_acpi_bus_alloc_res;
+
+static void gic_v3_acpi_bus_attach(device_t);
+
+static device_method_t gic_v3_acpi_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, gic_v3_acpi_identify),
+ DEVMETHOD(device_probe, gic_v3_acpi_probe),
+ DEVMETHOD(device_attach, gic_v3_acpi_attach),
+
+ /* Bus interface */
+ DEVMETHOD(bus_alloc_resource, gic_v3_acpi_bus_alloc_res),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(gic, gic_v3_acpi_driver, gic_v3_acpi_methods,
+ sizeof(struct gic_v3_softc), gic_v3_driver);
+
+static devclass_t gic_v3_acpi_devclass;
+
+EARLY_DRIVER_MODULE(gic_v3, acpi, gic_v3_acpi_driver, gic_v3_acpi_devclass,
+ 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+
+struct madt_table_data {
+ device_t parent;
+ device_t dev;
+ ACPI_MADT_GENERIC_DISTRIBUTOR *dist;
+ int count;
+};
+
+static void
+madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ struct madt_table_data *madt_data;
+
+ madt_data = (struct madt_table_data *)arg;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR:
+ if (madt_data->dist != NULL) {
+ if (bootverbose)
+ device_printf(madt_data->parent,
+ "gic: Already have a distributor table");
+ break;
+ }
+ madt_data->dist = (ACPI_MADT_GENERIC_DISTRIBUTOR *)entry;
+ break;
+
+ case ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR:
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+rdist_map(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_REDISTRIBUTOR *redist;
+ struct madt_table_data *madt_data;
+
+ madt_data = (struct madt_table_data *)arg;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR:
+ redist = (ACPI_MADT_GENERIC_REDISTRIBUTOR *)entry;
+
+ madt_data->count++;
+ BUS_SET_RESOURCE(madt_data->parent, madt_data->dev,
+ SYS_RES_MEMORY, madt_data->count, redist->BaseAddress,
+ redist->Length);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+gic_v3_acpi_identify(driver_t *driver, device_t parent)
+{
+ struct madt_table_data madt_data;
+ ACPI_TABLE_MADT *madt;
+ vm_paddr_t physaddr;
+ device_t dev;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return;
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ device_printf(parent, "gic: Unable to map the MADT\n");
+ return;
+ }
+
+ madt_data.parent = parent;
+ madt_data.dist = NULL;
+ madt_data.count = 0;
+
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ madt_handler, &madt_data);
+ if (madt_data.dist == NULL) {
+ device_printf(parent,
+ "No gic interrupt or distributor table\n");
+ goto out;
+ }
+ /* This is for the wrong GIC version */
+ if (madt_data.dist->Version != ACPI_MADT_GIC_VERSION_V3)
+ goto out;
+
+ dev = BUS_ADD_CHILD(parent, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE,
+ "gic", -1);
+ if (dev == NULL) {
+ device_printf(parent, "add gic child failed\n");
+ goto out;
+ }
+
+ /* Add the MADT data */
+ BUS_SET_RESOURCE(parent, dev, SYS_RES_MEMORY, 0,
+ madt_data.dist->BaseAddress, 128 * 1024);
+
+ madt_data.dev = dev;
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ rdist_map, &madt_data);
+
+ acpi_set_private(dev, (void *)(uintptr_t)madt_data.dist->Version);
+
+out:
+ acpi_unmap_table(madt);
+}
+
+static int
+gic_v3_acpi_probe(device_t dev)
+{
+
+ switch((uintptr_t)acpi_get_private(dev)) {
+ case ACPI_MADT_GIC_VERSION_V3:
+ break;
+ default:
+ return (ENXIO);
+ }
+
+ device_set_desc(dev, GIC_V3_DEVSTR);
+ return (BUS_PROBE_NOWILDCARD);
+}
+
+static void
+madt_count_redistrib(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ struct gic_v3_softc *sc = arg;
+
+ if (entry->Type == ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR)
+ sc->gic_redists.nregions++;
+}
+
+static int
+gic_v3_acpi_count_regions(device_t dev)
+{
+ struct gic_v3_softc *sc;
+ ACPI_TABLE_MADT *madt;
+ vm_paddr_t physaddr;
+
+ sc = device_get_softc(dev);
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return (ENXIO);
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ device_printf(dev, "Unable to map the MADT\n");
+ return (ENXIO);
+ }
+
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ madt_count_redistrib, sc);
+ acpi_unmap_table(madt);
+
+ return (sc->gic_redists.nregions > 0 ? 0 : ENXIO);
+}
+
+static int
+gic_v3_acpi_attach(device_t dev)
+{
+ struct gic_v3_softc *sc;
+ int err;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+ sc->gic_bus = GIC_BUS_ACPI;
+
+ err = gic_v3_acpi_count_regions(dev);
+ if (err != 0)
+ goto error;
+
+ err = gic_v3_attach(dev);
+ if (err != 0)
+ goto error;
+
+ sc->gic_pic = intr_pic_register(dev, ACPI_INTR_XREF);
+ if (sc->gic_pic == NULL) {
+ device_printf(dev, "could not register PIC\n");
+ err = ENXIO;
+ goto error;
+ }
+
+ if (intr_pic_claim_root(dev, ACPI_INTR_XREF, arm_gic_v3_intr, sc,
+ GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
+ err = ENXIO;
+ goto error;
+ }
+
+ /*
+ * Try to register the ITS driver to this GIC. The GIC will act as
+ * a bus in that case. Failure here will not affect the main GIC
+ * functionality.
+ */
+ gic_v3_acpi_bus_attach(dev);
+
+ if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) !=0)
+ sc->gic_nchildren = 0;
+
+ return (0);
+
+error:
+ if (bootverbose) {
+ device_printf(dev,
+ "Failed to attach. Error %d\n", err);
+ }
+ /* Failure so free resources */
+ gic_v3_detach(dev);
+
+ return (err);
+}
+
+static void
+gic_v3_add_children(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_TRANSLATOR *gict;
+ struct gic_v3_acpi_devinfo *di;
+ struct gic_v3_softc *sc;
+ device_t child, dev;
+ u_int xref;
+ int err, pxm;
+
+ if (entry->Type == ACPI_MADT_TYPE_GENERIC_TRANSLATOR) {
+ /* We have an ITS, add it as a child */
+ gict = (ACPI_MADT_GENERIC_TRANSLATOR *)entry;
+ dev = arg;
+ sc = device_get_softc(dev);
+
+ child = device_add_child(dev, "its", -1);
+ if (child == NULL)
+ return;
+
+ di = malloc(sizeof(*di), M_GIC_V3, M_WAITOK | M_ZERO);
+ resource_list_init(&di->di_rl);
+ resource_list_add(&di->di_rl, SYS_RES_MEMORY, 0,
+ gict->BaseAddress, gict->BaseAddress + 128 * 1024 - 1,
+ 128 * 1024);
+ err = acpi_iort_its_lookup(gict->TranslationId, &xref, &pxm);
+ if (err == 0) {
+ di->di_gic_dinfo.gic_domain = pxm;
+ di->di_gic_dinfo.msi_xref = xref;
+ } else {
+ di->di_gic_dinfo.gic_domain = -1;
+ di->di_gic_dinfo.msi_xref = ACPI_MSI_XREF;
+ }
+ sc->gic_nchildren++;
+ device_set_ivars(child, di);
+ }
+}
+
+static void
+gic_v3_acpi_bus_attach(device_t dev)
+{
+ ACPI_TABLE_MADT *madt;
+ vm_paddr_t physaddr;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return;
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ device_printf(dev, "Unable to map the MADT to add children\n");
+ return;
+ }
+
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ gic_v3_add_children, dev);
+
+ acpi_unmap_table(madt);
+
+ bus_generic_attach(dev);
+}
+
+static struct resource *
+gic_v3_acpi_bus_alloc_res(device_t bus, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct gic_v3_acpi_devinfo *di;
+ struct resource_list_entry *rle;
+
+ /* We only allocate memory */
+ if (type != SYS_RES_MEMORY)
+ return (NULL);
+
+ if (RMAN_IS_DEFAULT_RANGE(start, end)) {
+ if ((di = device_get_ivars(child)) == NULL)
+ return (NULL);
+
+ /* Find defaults for this rid */
+ rle = resource_list_find(&di->di_rl, type, *rid);
+ if (rle == NULL)
+ return (NULL);
+
+ start = rle->start;
+ end = rle->end;
+ count = rle->count;
+ }
+
+ return (bus_generic_alloc_resource(bus, child, type, rid, start, end,
+ count, flags));
+}
diff --git a/sys/arm64/arm64/gic_v3_fdt.c b/sys/arm64/arm64/gic_v3_fdt.c
new file mode 100644
index 000000000000..c8a9615a8a5f
--- /dev/null
+++ b/sys/arm64/arm64/gic_v3_fdt.c
@@ -0,0 +1,331 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bitstring.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+
+#include <machine/intr.h>
+#include <machine/resource.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <arm/arm/gic_common.h>
+#include "gic_v3_reg.h"
+#include "gic_v3_var.h"
+
+/*
+ * FDT glue.
+ */
+static int gic_v3_fdt_probe(device_t);
+static int gic_v3_fdt_attach(device_t);
+static int gic_v3_fdt_print_child(device_t, device_t);
+
+static struct resource *gic_v3_ofw_bus_alloc_res(device_t, device_t, int, int *,
+ rman_res_t, rman_res_t, rman_res_t, u_int);
+static const struct ofw_bus_devinfo *gic_v3_ofw_get_devinfo(device_t, device_t);
+
+static device_method_t gic_v3_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, gic_v3_fdt_probe),
+ DEVMETHOD(device_attach, gic_v3_fdt_attach),
+
+ /* Bus interface */
+ DEVMETHOD(bus_print_child, gic_v3_fdt_print_child),
+ DEVMETHOD(bus_alloc_resource, gic_v3_ofw_bus_alloc_res),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+
+ /* ofw_bus interface */
+ DEVMETHOD(ofw_bus_get_devinfo, gic_v3_ofw_get_devinfo),
+ DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat),
+ DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model),
+ DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name),
+ DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node),
+ DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(gic, gic_v3_fdt_driver, gic_v3_fdt_methods,
+ sizeof(struct gic_v3_softc), gic_v3_driver);
+
+static devclass_t gic_v3_fdt_devclass;
+
+EARLY_DRIVER_MODULE(gic_v3, simplebus, gic_v3_fdt_driver, gic_v3_fdt_devclass,
+ 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+EARLY_DRIVER_MODULE(gic_v3, ofwbus, gic_v3_fdt_driver, gic_v3_fdt_devclass,
+ 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+
+/*
+ * Helper functions declarations.
+ */
+static int gic_v3_ofw_bus_attach(device_t);
+
+/*
+ * Device interface.
+ */
+static int
+gic_v3_fdt_probe(device_t dev)
+{
+
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+
+ if (!ofw_bus_is_compatible(dev, "arm,gic-v3"))
+ return (ENXIO);
+
+ device_set_desc(dev, GIC_V3_DEVSTR);
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+gic_v3_fdt_attach(device_t dev)
+{
+ struct gic_v3_softc *sc;
+ pcell_t redist_regions;
+ intptr_t xref;
+ int err;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+ sc->gic_bus = GIC_BUS_FDT;
+
+ /*
+ * Recover number of the Re-Distributor regions.
+ */
+ if (OF_getencprop(ofw_bus_get_node(dev), "#redistributor-regions",
+ &redist_regions, sizeof(redist_regions)) <= 0)
+ sc->gic_redists.nregions = 1;
+ else
+ sc->gic_redists.nregions = redist_regions;
+
+ err = gic_v3_attach(dev);
+ if (err != 0)
+ goto error;
+
+ xref = OF_xref_from_node(ofw_bus_get_node(dev));
+ sc->gic_pic = intr_pic_register(dev, xref);
+ if (sc->gic_pic == NULL) {
+ device_printf(dev, "could not register PIC\n");
+ err = ENXIO;
+ goto error;
+ }
+
+ /* Register xref */
+ OF_device_register_xref(xref, dev);
+
+ if (intr_pic_claim_root(dev, xref, arm_gic_v3_intr, sc,
+ GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
+ err = ENXIO;
+ goto error;
+ }
+
+ /*
+ * Try to register ITS to this GIC.
+ * GIC will act as a bus in that case.
+ * Failure here will not affect main GIC functionality.
+ */
+ if (gic_v3_ofw_bus_attach(dev) != 0) {
+ if (bootverbose) {
+ device_printf(dev,
+ "Failed to attach ITS to this GIC\n");
+ }
+ }
+
+ if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) != 0)
+ sc->gic_nchildren = 0;
+
+ return (err);
+
+error:
+ if (bootverbose) {
+ device_printf(dev,
+ "Failed to attach. Error %d\n", err);
+ }
+ /* Failure so free resources */
+ gic_v3_detach(dev);
+
+ return (err);
+}
+
+/* OFW bus interface */
+struct gic_v3_ofw_devinfo {
+ struct gic_v3_devinfo di_gic_dinfo;
+ struct ofw_bus_devinfo di_dinfo;
+ struct resource_list di_rl;
+};
+
+static int
+gic_v3_fdt_print_child(device_t bus, device_t child)
+{
+ struct gic_v3_ofw_devinfo *di = device_get_ivars(child);
+ struct resource_list *rl = &di->di_rl;
+ int retval = 0;
+
+ retval += bus_print_child_header(bus, child);
+ retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
+ retval += bus_print_child_footer(bus, child);
+
+ return (retval);
+}
+
+static const struct ofw_bus_devinfo *
+gic_v3_ofw_get_devinfo(device_t bus __unused, device_t child)
+{
+ struct gic_v3_ofw_devinfo *di;
+
+ di = device_get_ivars(child);
+ return (&di->di_dinfo);
+}
+
+static struct resource *
+gic_v3_ofw_bus_alloc_res(device_t bus, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct gic_v3_ofw_devinfo *di;
+ struct resource_list_entry *rle;
+ int ranges_len;
+
+ if (RMAN_IS_DEFAULT_RANGE(start, end)) {
+ if ((di = device_get_ivars(child)) == NULL)
+ return (NULL);
+ if (type != SYS_RES_MEMORY)
+ return (NULL);
+
+ /* Find defaults for this rid */
+ rle = resource_list_find(&di->di_rl, type, *rid);
+ if (rle == NULL)
+ return (NULL);
+
+ start = rle->start;
+ end = rle->end;
+ count = rle->count;
+ }
+ /*
+ * XXX: No ranges remap!
+ * Absolute address is expected.
+ */
+ if (ofw_bus_has_prop(bus, "ranges")) {
+ ranges_len = OF_getproplen(ofw_bus_get_node(bus), "ranges");
+ if (ranges_len != 0) {
+ if (bootverbose) {
+ device_printf(child,
+ "Ranges remap not supported\n");
+ }
+ return (NULL);
+ }
+ }
+ return (bus_generic_alloc_resource(bus, child, type, rid, start, end,
+ count, flags));
+}
+
+/* Helper functions */
+
+/*
+ * Bus capability support for GICv3.
+ * Collects and configures device informations and finally
+ * adds ITS device as a child of GICv3 in Newbus hierarchy.
+ */
+static int
+gic_v3_ofw_bus_attach(device_t dev)
+{
+ struct gic_v3_ofw_devinfo *di;
+ struct gic_v3_softc *sc;
+ device_t child;
+ phandle_t parent, node;
+ pcell_t addr_cells, size_cells;
+
+ sc = device_get_softc(dev);
+ parent = ofw_bus_get_node(dev);
+ if (parent > 0) {
+ addr_cells = 2;
+ OF_getencprop(parent, "#address-cells", &addr_cells,
+ sizeof(addr_cells));
+ size_cells = 2;
+ OF_getencprop(parent, "#size-cells", &size_cells,
+ sizeof(size_cells));
+ /* Iterate through all GIC subordinates */
+ for (node = OF_child(parent); node > 0; node = OF_peer(node)) {
+ /* Allocate and populate devinfo. */
+ di = malloc(sizeof(*di), M_GIC_V3, M_WAITOK | M_ZERO);
+
+ /* Read the numa node, or -1 if there is none */
+ if (OF_getencprop(node, "numa-node-id",
+ &di->di_gic_dinfo.gic_domain,
+ sizeof(di->di_gic_dinfo.gic_domain)) <= 0) {
+ di->di_gic_dinfo.gic_domain = -1;
+ }
+
+ if (ofw_bus_gen_setup_devinfo(&di->di_dinfo, node)) {
+ if (bootverbose) {
+ device_printf(dev,
+ "Could not set up devinfo for ITS\n");
+ }
+ free(di, M_GIC_V3);
+ continue;
+ }
+
+ /* Initialize and populate resource list. */
+ resource_list_init(&di->di_rl);
+ ofw_bus_reg_to_rl(dev, node, addr_cells, size_cells,
+ &di->di_rl);
+
+ /* Should not have any interrupts, so don't add any */
+
+ /* Add newbus device for this FDT node */
+ child = device_add_child(dev, NULL, -1);
+ if (!child) {
+ if (bootverbose) {
+ device_printf(dev,
+ "Could not add child: %s\n",
+ di->di_dinfo.obd_name);
+ }
+ resource_list_free(&di->di_rl);
+ ofw_bus_gen_destroy_devinfo(&di->di_dinfo);
+ free(di, M_GIC_V3);
+ continue;
+ }
+
+ sc->gic_nchildren++;
+ device_set_ivars(child, di);
+ }
+ }
+
+ return (bus_generic_attach(dev));
+}
diff --git a/sys/arm64/arm64/gic_v3_reg.h b/sys/arm64/arm64/gic_v3_reg.h
new file mode 100644
index 000000000000..34082b1bde0a
--- /dev/null
+++ b/sys/arm64/arm64/gic_v3_reg.h
@@ -0,0 +1,434 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GIC_V3_REG_H_
+#define _GIC_V3_REG_H_
+
+/*
+ * Maximum number of interrupts
+ * supported by GIC (including SGIs, PPIs and SPIs)
+ */
+#define GIC_I_NUM_MAX (1020)
+/*
+ * Priority MAX/MIN values
+ */
+#define GIC_PRIORITY_MAX (0x00UL)
+/* Upper value is determined by LPI max priority */
+#define GIC_PRIORITY_MIN (0xFCUL)
+
+/* Numbers for shared peripheral interrupts */
+#define GIC_LAST_SPI (1019)
+/* Numbers for local peripheral interrupts */
+#define GIC_FIRST_LPI (8192)
+
+/*
+ * Registers (v2/v3)
+ */
+/* GICD_CTLR */
+#define GICD_CTLR_G1 (1 << 0)
+#define GICD_CTLR_G1A (1 << 1)
+#define GICD_CTLR_ARE_NS (1 << 4)
+#define GICD_CTLR_RWP (1 << 31)
+/* GICD_TYPER */
+#define GICD_TYPER_IDBITS(n) ((((n) >> 19) & 0x1F) + 1)
+
+/*
+ * Registers (v3)
+ */
+#define GICD_IROUTER(n) (0x6000 + ((n) * 8))
+
+#define GICD_PIDR4 0xFFD0
+#define GICD_PIDR5 0xFFD4
+#define GICD_PIDR6 0xFFD8
+#define GICD_PIDR7 0xFFDC
+#define GICD_PIDR0 0xFFE0
+#define GICD_PIDR1 0xFFE4
+#define GICD_PIDR2 0xFFE8
+
+#define GICR_PIDR2_ARCH_SHIFT 4
+#define GICR_PIDR2_ARCH_MASK 0xF0
+#define GICR_PIDR2_ARCH(x) \
+ (((x) & GICR_PIDR2_ARCH_MASK) >> GICR_PIDR2_ARCH_SHIFT)
+#define GICR_PIDR2_ARCH_GICv3 0x3
+#define GICR_PIDR2_ARCH_GICv4 0x4
+
+#define GICD_PIDR3 0xFFEC
+
+/* Redistributor registers */
+#define GICR_CTLR GICD_CTLR
+#define GICR_CTLR_LPI_ENABLE (1 << 0)
+
+#define GICR_PIDR2 GICD_PIDR2
+
+#define GICR_TYPER (0x0008)
+#define GICR_TYPER_PLPIS (1 << 0)
+#define GICR_TYPER_VLPIS (1 << 1)
+#define GICR_TYPER_LAST (1 << 4)
+#define GICR_TYPER_CPUNUM_SHIFT (8)
+#define GICR_TYPER_CPUNUM_MASK (0xFFFUL << GICR_TYPER_CPUNUM_SHIFT)
+#define GICR_TYPER_CPUNUM(x) \
+ (((x) & GICR_TYPER_CPUNUM_MASK) >> GICR_TYPER_CPUNUM_SHIFT)
+#define GICR_TYPER_AFF_SHIFT (32)
+
+#define GICR_WAKER (0x0014)
+#define GICR_WAKER_PS (1 << 1) /* Processor sleep */
+#define GICR_WAKER_CA (1 << 2) /* Children asleep */
+
+#define GICR_PROPBASER (0x0070)
+#define GICR_PROPBASER_IDBITS_MASK 0x1FUL
+/*
+ * Cacheability
+ * 0x0 - Device-nGnRnE
+ * 0x1 - Normal Inner Non-cacheable
+ * 0x2 - Normal Inner Read-allocate, Write-through
+ * 0x3 - Normal Inner Read-allocate, Write-back
+ * 0x4 - Normal Inner Write-allocate, Write-through
+ * 0x5 - Normal Inner Write-allocate, Write-back
+ * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through
+ * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back
+ */
+#define GICR_PROPBASER_CACHE_SHIFT 7
+#define GICR_PROPBASER_CACHE_DnGnRnE 0x0UL
+#define GICR_PROPBASER_CACHE_NIN 0x1UL
+#define GICR_PROPBASER_CACHE_NIRAWT 0x2UL
+#define GICR_PROPBASER_CACHE_NIRAWB 0x3UL
+#define GICR_PROPBASER_CACHE_NIWAWT 0x4UL
+#define GICR_PROPBASER_CACHE_NIWAWB 0x5UL
+#define GICR_PROPBASER_CACHE_NIRAWAWT 0x6UL
+#define GICR_PROPBASER_CACHE_NIRAWAWB 0x7UL
+#define GICR_PROPBASER_CACHE_MASK \
+ (0x7UL << GICR_PROPBASER_CACHE_SHIFT)
+
+/*
+ * Shareability
+ * 0x0 - Non-shareable
+ * 0x1 - Inner-shareable
+ * 0x2 - Outer-shareable
+ * 0x3 - Reserved. Threated as 0x0
+ */
+#define GICR_PROPBASER_SHARE_SHIFT 10
+#define GICR_PROPBASER_SHARE_NS 0x0UL
+#define GICR_PROPBASER_SHARE_IS 0x1UL
+#define GICR_PROPBASER_SHARE_OS 0x2UL
+#define GICR_PROPBASER_SHARE_RES 0x3UL
+#define GICR_PROPBASER_SHARE_MASK \
+ (0x3UL << GICR_PROPBASER_SHARE_SHIFT)
+
+#define GICR_PENDBASER (0x0078)
+/*
+ * Cacheability
+ * 0x0 - Device-nGnRnE
+ * 0x1 - Normal Inner Non-cacheable
+ * 0x2 - Normal Inner Read-allocate, Write-through
+ * 0x3 - Normal Inner Read-allocate, Write-back
+ * 0x4 - Normal Inner Write-allocate, Write-through
+ * 0x5 - Normal Inner Write-allocate, Write-back
+ * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through
+ * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back
+ */
+#define GICR_PENDBASER_CACHE_SHIFT 7
+#define GICR_PENDBASER_CACHE_DnGnRnE 0x0UL
+#define GICR_PENDBASER_CACHE_NIN 0x1UL
+#define GICR_PENDBASER_CACHE_NIRAWT 0x2UL
+#define GICR_PENDBASER_CACHE_NIRAWB 0x3UL
+#define GICR_PENDBASER_CACHE_NIWAWT 0x4UL
+#define GICR_PENDBASER_CACHE_NIWAWB 0x5UL
+#define GICR_PENDBASER_CACHE_NIRAWAWT 0x6UL
+#define GICR_PENDBASER_CACHE_NIRAWAWB 0x7UL
+#define GICR_PENDBASER_CACHE_MASK \
+ (0x7UL << GICR_PENDBASER_CACHE_SHIFT)
+
+/*
+ * Shareability
+ * 0x0 - Non-shareable
+ * 0x1 - Inner-shareable
+ * 0x2 - Outer-shareable
+ * 0x3 - Reserved. Threated as 0x0
+ */
+#define GICR_PENDBASER_SHARE_SHIFT 10
+#define GICR_PENDBASER_SHARE_NS 0x0UL
+#define GICR_PENDBASER_SHARE_IS 0x1UL
+#define GICR_PENDBASER_SHARE_OS 0x2UL
+#define GICR_PENDBASER_SHARE_RES 0x3UL
+#define GICR_PENDBASER_SHARE_MASK \
+ (0x3UL << GICR_PENDBASER_SHARE_SHIFT)
+
+/* Re-distributor registers for SGIs and PPIs */
+#define GICR_RD_BASE_SIZE PAGE_SIZE_64K
+#define GICR_SGI_BASE_SIZE PAGE_SIZE_64K
+#define GICR_VLPI_BASE_SIZE PAGE_SIZE_64K
+#define GICR_RESERVED_SIZE PAGE_SIZE_64K
+
+#define GICR_IGROUPR0 (0x0080)
+#define GICR_ISENABLER0 (0x0100)
+#define GICR_ICENABLER0 (0x0180)
+#define GICR_I_ENABLER_SGI_MASK (0x0000FFFF)
+#define GICR_I_ENABLER_PPI_MASK (0xFFFF0000)
+
+#define GICR_I_PER_IPRIORITYn (GICD_I_PER_IPRIORITYn)
+
+/* ITS registers */
+#define GITS_PIDR2 GICR_PIDR2
+#define GITS_PIDR2_ARCH_MASK GICR_PIDR2_ARCH_MASK
+#define GITS_PIDR2_ARCH_GICv3 GICR_PIDR2_ARCH_GICv3
+#define GITS_PIDR2_ARCH_GICv4 GICR_PIDR2_ARCH_GICv4
+
+#define GITS_CTLR (0x0000)
+#define GITS_CTLR_EN (1 << 0)
+
+#define GITS_IIDR (0x0004)
+#define GITS_IIDR_PRODUCT_SHIFT 24
+#define GITS_IIDR_PRODUCT_MASK (0xff << GITS_IIDR_PRODUCT_SHIFT)
+#define GITS_IIDR_VARIANT_SHIFT 16
+#define GITS_IIDR_VARIANT_MASK (0xf << GITS_IIDR_VARIANT_SHIFT)
+#define GITS_IIDR_REVISION_SHIFT 12
+#define GITS_IIDR_REVISION_MASK (0xf << GITS_IIDR_REVISION_SHIFT)
+#define GITS_IIDR_IMPLEMENTOR_SHIFT 0
+#define GITS_IIDR_IMPLEMENTOR_MASK (0xfff << GITS_IIDR_IMPLEMENTOR_SHIFT)
+
+#define GITS_IIDR_RAW(impl, prod, var, rev) \
+ ((prod) << GITS_IIDR_PRODUCT_SHIFT | \
+ (var) << GITS_IIDR_VARIANT_SHIFT | \
+ (rev) << GITS_IIDR_REVISION_SHIFT | \
+ (impl) << GITS_IIDR_IMPLEMENTOR_SHIFT)
+
+#define GITS_IIDR_IMPL_ARM (0x43B)
+#define GITS_IIDR_PROD_GIC500 (0x0)
+#define GITS_IIDR_IMPL_CAVIUM (0x34c)
+#define GITS_IIDR_PROD_THUNDER (0xa1)
+#define GITS_IIDR_VAR_THUNDER_1 (0x0)
+
+#define GITS_CBASER (0x0080)
+#define GITS_CBASER_VALID (1UL << 63)
+/*
+ * Cacheability
+ * 0x0 - Device-nGnRnE
+ * 0x1 - Normal Inner Non-cacheable
+ * 0x2 - Normal Inner Read-allocate, Write-through
+ * 0x3 - Normal Inner Read-allocate, Write-back
+ * 0x4 - Normal Inner Write-allocate, Write-through
+ * 0x5 - Normal Inner Write-allocate, Write-back
+ * 0x6 - Normal Inner Read-allocate, Write-allocate, Write-through
+ * 0x7 - Normal Inner Read-allocate, Write-allocate, Write-back
+ */
+#define GITS_CBASER_CACHE_SHIFT 59
+#define GITS_CBASER_CACHE_DnGnRnE 0x0UL
+#define GITS_CBASER_CACHE_NIN 0x1UL
+#define GITS_CBASER_CACHE_NIRAWT 0x2UL
+#define GITS_CBASER_CACHE_NIRAWB 0x3UL
+#define GITS_CBASER_CACHE_NIWAWT 0x4UL
+#define GITS_CBASER_CACHE_NIWAWB 0x5UL
+#define GITS_CBASER_CACHE_NIRAWAWT 0x6UL
+#define GITS_CBASER_CACHE_NIRAWAWB 0x7UL
+#define GITS_CBASER_CACHE_MASK (0x7UL << GITS_CBASER_CACHE_SHIFT)
+/*
+ * Shareability
+ * 0x0 - Non-shareable
+ * 0x1 - Inner-shareable
+ * 0x2 - Outer-shareable
+ * 0x3 - Reserved. Threated as 0x0
+ */
+#define GITS_CBASER_SHARE_SHIFT 10
+#define GITS_CBASER_SHARE_NS 0x0UL
+#define GITS_CBASER_SHARE_IS 0x1UL
+#define GITS_CBASER_SHARE_OS 0x2UL
+#define GITS_CBASER_SHARE_RES 0x3UL
+#define GITS_CBASER_SHARE_MASK \
+ (0x3UL << GITS_CBASER_SHARE_SHIFT)
+
+#define GITS_CBASER_PA_SHIFT 12
+#define GITS_CBASER_PA_MASK (0xFFFFFFFFFUL << GITS_CBASER_PA_SHIFT)
+
+#define GITS_CWRITER (0x0088)
+#define GITS_CREADR (0x0090)
+
+#define GITS_BASER_BASE (0x0100)
+#define GITS_BASER(x) (GITS_BASER_BASE + (x) * 8)
+
+#define GITS_BASER_VALID (1UL << 63)
+
+#define GITS_BASER_TYPE_SHIFT 56
+#define GITS_BASER_TYPE(x) \
+ (((x) & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT)
+#define GITS_BASER_TYPE_UNIMPL 0x0UL /* Unimplemented */
+#define GITS_BASER_TYPE_DEV 0x1UL /* Devices */
+#define GITS_BASER_TYPE_VP 0x2UL /* Virtual Processors */
+#define GITS_BASER_TYPE_PP 0x3UL /* Physical Processors */
+#define GITS_BASER_TYPE_IC 0x4UL /* Interrupt Collections */
+#define GITS_BASER_TYPE_RES5 0x5UL /* Reserved */
+#define GITS_BASER_TYPE_RES6 0x6UL /* Reserved */
+#define GITS_BASER_TYPE_RES7 0x7UL /* Reserved */
+#define GITS_BASER_TYPE_MASK (0x7UL << GITS_BASER_TYPE_SHIFT)
+/*
+ * Cacheability
+ * 0x0 - Non-cacheable, non-bufferable
+ * 0x1 - Non-cacheable
+ * 0x2 - Read-allocate, Write-through
+ * 0x3 - Read-allocate, Write-back
+ * 0x4 - Write-allocate, Write-through
+ * 0x5 - Write-allocate, Write-back
+ * 0x6 - Read-allocate, Write-allocate, Write-through
+ * 0x7 - Read-allocate, Write-allocate, Write-back
+ */
+#define GITS_BASER_CACHE_SHIFT 59
+#define GITS_BASER_CACHE_NCNB 0x0UL
+#define GITS_BASER_CACHE_NC 0x1UL
+#define GITS_BASER_CACHE_RAWT 0x2UL
+#define GITS_BASER_CACHE_RAWB 0x3UL
+#define GITS_BASER_CACHE_WAWT 0x4UL
+#define GITS_BASER_CACHE_WAWB 0x5UL
+#define GITS_BASER_CACHE_RAWAWT 0x6UL
+#define GITS_BASER_CACHE_RAWAWB 0x7UL
+#define GITS_BASER_CACHE_MASK (0x7UL << GITS_BASER_CACHE_SHIFT)
+
+#define GITS_BASER_ESIZE_SHIFT 48
+#define GITS_BASER_ESIZE_MASK (0x1FUL << GITS_BASER_ESIZE_SHIFT)
+#define GITS_BASER_ESIZE(x) \
+ ((((x) & GITS_BASER_ESIZE_MASK) >> GITS_BASER_ESIZE_SHIFT) + 1)
+
+#define GITS_BASER_PA_SHIFT 12
+#define GITS_BASER_PA_MASK (0xFFFFFFFFFUL << GITS_BASER_PA_SHIFT)
+
+/*
+ * Shareability
+ * 0x0 - Non-shareable
+ * 0x1 - Inner-shareable
+ * 0x2 - Outer-shareable
+ * 0x3 - Reserved. Threated as 0x0
+ */
+#define GITS_BASER_SHARE_SHIFT 10
+#define GITS_BASER_SHARE_NS 0x0UL
+#define GITS_BASER_SHARE_IS 0x1UL
+#define GITS_BASER_SHARE_OS 0x2UL
+#define GITS_BASER_SHARE_RES 0x3UL
+#define GITS_BASER_SHARE_MASK (0x3UL << GITS_BASER_SHARE_SHIFT)
+
+#define GITS_BASER_PSZ_SHIFT 8
+#define GITS_BASER_PSZ_4K 0x0UL
+#define GITS_BASER_PSZ_16K 0x1UL
+#define GITS_BASER_PSZ_64K 0x2UL
+#define GITS_BASER_PSZ_MASK (0x3UL << GITS_BASER_PSZ_SHIFT)
+
+#define GITS_BASER_SIZE_MASK 0xFFUL
+
+#define GITS_BASER_NUM 8
+
+#define GITS_TYPER (0x0008)
+#define GITS_TYPER_PTA (1UL << 19)
+#define GITS_TYPER_DEVB_SHIFT 13
+#define GITS_TYPER_DEVB_MASK (0x1FUL << GITS_TYPER_DEVB_SHIFT)
+/* Number of device identifiers implemented */
+#define GITS_TYPER_DEVB(x) \
+ ((((x) & GITS_TYPER_DEVB_MASK) >> GITS_TYPER_DEVB_SHIFT) + 1)
+#define GITS_TYPER_ITTES_SHIFT 4
+#define GITS_TYPER_ITTES_MASK (0xFUL << GITS_TYPER_ITTES_SHIFT)
+/* Number of bytes per ITT Entry */
+#define GITS_TYPER_ITTES(x) \
+ ((((x) & GITS_TYPER_ITTES_MASK) >> GITS_TYPER_ITTES_SHIFT) + 1)
+
+#define GITS_TRANSLATER (0x10040)
+/*
+ * LPI related
+ */
+#define LPI_CONF_PRIO_MASK (0xFC)
+#define LPI_CONF_GROUP1 (1 << 1)
+#define LPI_CONF_ENABLE (1 << 0)
+
+/*
+ * GIC 500 ITS tracking facility
+ */
+#define GITS_TRKCTLR 0xC000
+#define GITS_TRKR 0xC004
+#define GITS_TRKDIDR 0xC008
+#define GITS_TRKPIDR 0xC00C
+#define GITS_TRKVIDR 0xC010
+#define GITS_TRKTGTR 0xC014
+#define GITS_TRKICR 0xC018
+#define GITS_TRKLCR 0xC018
+
+/*
+ * CPU interface
+ */
+
+/*
+ * Registers list (ICC_xyz_EL1):
+ *
+ * PMR - Priority Mask Register
+ * * interrupts of priority higher than specified
+ * in this mask will be signalled to the CPU.
+ * (0xff - lowest possible prio., 0x00 - highest prio.)
+ *
+ * CTLR - Control Register
+ * * controls behavior of the CPU interface and displays
+ * implemented features.
+ *
+ * IGRPEN1 - Interrupt Group 1 Enable Register
+ *
+ * IAR1 - Interrupt Acknowledge Register Group 1
+ * * contains number of the highest priority pending
+ * interrupt from the Group 1.
+ *
+ * EOIR1 - End of Interrupt Register Group 1
+ * * Writes inform CPU interface about completed Group 1
+ * interrupts processing.
+ */
+
+#define gic_icc_write(reg, val) \
+do { \
+ WRITE_SPECIALREG(icc_ ##reg ##_el1, val); \
+ isb(); \
+} while (0)
+
+#define gic_icc_read(reg) \
+({ \
+ uint64_t val; \
+ \
+ val = READ_SPECIALREG(icc_ ##reg ##_el1); \
+ (val); \
+})
+
+#define gic_icc_set(reg, mask) \
+do { \
+ uint64_t val; \
+ val = gic_icc_read(reg); \
+ val |= (mask); \
+ gic_icc_write(reg, val); \
+} while (0)
+
+#define gic_icc_clear(reg, mask) \
+do { \
+ uint64_t val; \
+ val = gic_icc_read(reg); \
+ val &= ~(mask); \
+ gic_icc_write(reg, val); \
+} while (0)
+
+#endif /* _GIC_V3_REG_H_ */
diff --git a/sys/arm64/arm64/gic_v3_var.h b/sys/arm64/arm64/gic_v3_var.h
new file mode 100644
index 000000000000..f855e425d66d
--- /dev/null
+++ b/sys/arm64/arm64/gic_v3_var.h
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GIC_V3_VAR_H_
+#define _GIC_V3_VAR_H_
+
+#include <arm/arm/gic_common.h>
+
+#define GIC_V3_DEVSTR "ARM Generic Interrupt Controller v3.0"
+
+DECLARE_CLASS(gic_v3_driver);
+
+struct gic_v3_irqsrc;
+
+struct redist_pcpu {
+ struct resource res; /* mem resource for redist */
+ vm_offset_t pend_base;
+ bool lpi_enabled; /* redist LPI configured? */
+};
+
+struct gic_redists {
+ /*
+ * Re-Distributor region description.
+ * We will have few of those depending
+ * on the #redistributor-regions property in FDT.
+ */
+ struct resource ** regions;
+ /* Number of Re-Distributor regions */
+ u_int nregions;
+ /* Per-CPU Re-Distributor data */
+ struct redist_pcpu *pcpu[MAXCPU];
+};
+
+struct gic_v3_softc {
+ device_t dev;
+ struct resource ** gic_res;
+ struct mtx gic_mtx;
+ /* Distributor */
+ struct resource * gic_dist;
+ /* Re-Distributors */
+ struct gic_redists gic_redists;
+
+ uint32_t gic_pidr2;
+ u_int gic_bus;
+
+ u_int gic_nirqs;
+ u_int gic_idbits;
+
+ boolean_t gic_registered;
+
+ int gic_nchildren;
+ device_t *gic_children;
+ struct intr_pic *gic_pic;
+ struct gic_v3_irqsrc *gic_irqs;
+};
+
+struct gic_v3_devinfo {
+ int gic_domain;
+ int msi_xref;
+};
+
+#define GIC_INTR_ISRC(sc, irq) (&sc->gic_irqs[irq].gi_isrc)
+
+MALLOC_DECLARE(M_GIC_V3);
+
+/* ivars */
+#define GICV3_IVAR_NIRQS 1000
+/* 1001 was GICV3_IVAR_REDIST_VADDR */
+#define GICV3_IVAR_REDIST 1002
+
+__BUS_ACCESSOR(gicv3, nirqs, GICV3, NIRQS, u_int);
+__BUS_ACCESSOR(gicv3, redist, GICV3, REDIST, void *);
+
+/* Device methods */
+int gic_v3_attach(device_t dev);
+int gic_v3_detach(device_t dev);
+int arm_gic_v3_intr(void *);
+
+uint32_t gic_r_read_4(device_t, bus_size_t);
+uint64_t gic_r_read_8(device_t, bus_size_t);
+void gic_r_write_4(device_t, bus_size_t, uint32_t var);
+void gic_r_write_8(device_t, bus_size_t, uint64_t var);
+
+/*
+ * GIC Distributor accessors.
+ * Notice that only GIC sofc can be passed.
+ */
+#define gic_d_read(sc, len, reg) \
+({ \
+ bus_read_##len(sc->gic_dist, reg); \
+})
+
+#define gic_d_write(sc, len, reg, val) \
+({ \
+ bus_write_##len(sc->gic_dist, reg, val);\
+})
+
+/* GIC Re-Distributor accessors (per-CPU) */
+#define gic_r_read(sc, len, reg) \
+({ \
+ u_int cpu = PCPU_GET(cpuid); \
+ \
+ bus_read_##len( \
+ &sc->gic_redists.pcpu[cpu]->res, \
+ reg); \
+})
+
+#define gic_r_write(sc, len, reg, val) \
+({ \
+ u_int cpu = PCPU_GET(cpuid); \
+ \
+ bus_write_##len( \
+ &sc->gic_redists.pcpu[cpu]->res, \
+ reg, val); \
+})
+
+#endif /* _GIC_V3_VAR_H_ */
diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c
new file mode 100644
index 000000000000..bfb069c195a5
--- /dev/null
+++ b/sys/arm64/arm64/gicv3_its.c
@@ -0,0 +1,1960 @@
+/*-
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpuset.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/sbuf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/vmem.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/intr.h>
+
+#include <arm/arm/gic_common.h>
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm64/arm64/gic_v3_var.h>
+
+#ifdef FDT
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#endif
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "pcib_if.h"
+#include "pic_if.h"
+#include "msi_if.h"
+
+MALLOC_DEFINE(M_GICV3_ITS, "GICv3 ITS",
+ "ARM GICv3 Interrupt Translation Service");
+
+#define LPI_NIRQS (64 * 1024)
+
+/* The size and alignment of the command circular buffer */
+#define ITS_CMDQ_SIZE (64 * 1024) /* Must be a multiple of 4K */
+#define ITS_CMDQ_ALIGN (64 * 1024)
+
+#define LPI_CONFTAB_SIZE LPI_NIRQS
+#define LPI_CONFTAB_ALIGN (64 * 1024)
+#define LPI_CONFTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */
+
+/* 1 bit per SPI, PPI, and SGI (8k), and 1 bit per LPI (LPI_CONFTAB_SIZE) */
+#define LPI_PENDTAB_SIZE ((LPI_NIRQS + GIC_FIRST_LPI) / 8)
+#define LPI_PENDTAB_ALIGN (64 * 1024)
+#define LPI_PENDTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */
+
+#define LPI_INT_TRANS_TAB_ALIGN 256
+#define LPI_INT_TRANS_TAB_MAX_ADDR ((1ul << 48) - 1)
+
+/* ITS commands encoding */
+#define ITS_CMD_MOVI (0x01)
+#define ITS_CMD_SYNC (0x05)
+#define ITS_CMD_MAPD (0x08)
+#define ITS_CMD_MAPC (0x09)
+#define ITS_CMD_MAPTI (0x0a)
+#define ITS_CMD_MAPI (0x0b)
+#define ITS_CMD_INV (0x0c)
+#define ITS_CMD_INVALL (0x0d)
+/* Command */
+#define CMD_COMMAND_MASK (0xFFUL)
+/* PCI device ID */
+#define CMD_DEVID_SHIFT (32)
+#define CMD_DEVID_MASK (0xFFFFFFFFUL << CMD_DEVID_SHIFT)
+/* Size of IRQ ID bitfield */
+#define CMD_SIZE_MASK (0xFFUL)
+/* Virtual LPI ID */
+#define CMD_ID_MASK (0xFFFFFFFFUL)
+/* Physical LPI ID */
+#define CMD_PID_SHIFT (32)
+#define CMD_PID_MASK (0xFFFFFFFFUL << CMD_PID_SHIFT)
+/* Collection */
+#define CMD_COL_MASK (0xFFFFUL)
+/* Target (CPU or Re-Distributor) */
+#define CMD_TARGET_SHIFT (16)
+#define CMD_TARGET_MASK (0xFFFFFFFFUL << CMD_TARGET_SHIFT)
+/* Interrupt Translation Table address */
+#define CMD_ITT_MASK (0xFFFFFFFFFF00UL)
+/* Valid command bit */
+#define CMD_VALID_SHIFT (63)
+#define CMD_VALID_MASK (1UL << CMD_VALID_SHIFT)
+
+#define ITS_TARGET_NONE 0xFBADBEEF
+
+/* LPI chunk owned by ITS device */
+struct lpi_chunk {
+ u_int lpi_base;
+ u_int lpi_free; /* First free LPI in set */
+ u_int lpi_num; /* Total number of LPIs in chunk */
+ u_int lpi_busy; /* Number of busy LPIs in chink */
+};
+
+/* ITS device */
+struct its_dev {
+ TAILQ_ENTRY(its_dev) entry;
+ /* PCI device */
+ device_t pci_dev;
+ /* Device ID (i.e. PCI device ID) */
+ uint32_t devid;
+ /* List of assigned LPIs */
+ struct lpi_chunk lpis;
+ /* Virtual address of ITT */
+ vm_offset_t itt;
+ size_t itt_size;
+};
+
+/*
+ * ITS command descriptor.
+ * Idea for command description passing taken from Linux.
+ */
+struct its_cmd_desc {
+ uint8_t cmd_type;
+
+ union {
+ struct {
+ struct its_dev *its_dev;
+ struct its_col *col;
+ uint32_t id;
+ } cmd_desc_movi;
+
+ struct {
+ struct its_col *col;
+ } cmd_desc_sync;
+
+ struct {
+ struct its_col *col;
+ uint8_t valid;
+ } cmd_desc_mapc;
+
+ struct {
+ struct its_dev *its_dev;
+ struct its_col *col;
+ uint32_t pid;
+ uint32_t id;
+ } cmd_desc_mapvi;
+
+ struct {
+ struct its_dev *its_dev;
+ struct its_col *col;
+ uint32_t pid;
+ } cmd_desc_mapi;
+
+ struct {
+ struct its_dev *its_dev;
+ uint8_t valid;
+ } cmd_desc_mapd;
+
+ struct {
+ struct its_dev *its_dev;
+ struct its_col *col;
+ uint32_t pid;
+ } cmd_desc_inv;
+
+ struct {
+ struct its_col *col;
+ } cmd_desc_invall;
+ };
+};
+
+/* ITS command. Each command is 32 bytes long */
+struct its_cmd {
+ uint64_t cmd_dword[4]; /* ITS command double word */
+};
+
+/* An ITS private table */
+struct its_ptable {
+ vm_offset_t ptab_vaddr;
+ unsigned long ptab_size;
+};
+
+/* ITS collection description. */
+struct its_col {
+ uint64_t col_target; /* Target Re-Distributor */
+ uint64_t col_id; /* Collection ID */
+};
+
+struct gicv3_its_irqsrc {
+ struct intr_irqsrc gi_isrc;
+ u_int gi_id;
+ u_int gi_lpi;
+ struct its_dev *gi_its_dev;
+ TAILQ_ENTRY(gicv3_its_irqsrc) gi_link;
+};
+
+struct gicv3_its_softc {
+ device_t dev;
+ struct intr_pic *sc_pic;
+ struct resource *sc_its_res;
+
+ cpuset_t sc_cpus;
+ u_int gic_irq_cpu;
+
+ struct its_ptable sc_its_ptab[GITS_BASER_NUM];
+ struct its_col *sc_its_cols[MAXCPU]; /* Per-CPU collections */
+
+ /*
+ * TODO: We should get these from the parent as we only want a
+ * single copy of each across the interrupt controller.
+ */
+ uint8_t *sc_conf_base;
+ vm_offset_t sc_pend_base[MAXCPU];
+
+ /* Command handling */
+ struct mtx sc_its_cmd_lock;
+ struct its_cmd *sc_its_cmd_base; /* Command circular buffer address */
+ size_t sc_its_cmd_next_idx;
+
+ vmem_t *sc_irq_alloc;
+ struct gicv3_its_irqsrc **sc_irqs;
+ u_int sc_irq_base;
+ u_int sc_irq_length;
+ u_int sc_irq_count;
+
+ struct mtx sc_its_dev_lock;
+ TAILQ_HEAD(its_dev_list, its_dev) sc_its_dev_list;
+ TAILQ_HEAD(free_irqs, gicv3_its_irqsrc) sc_free_irqs;
+
+#define ITS_FLAGS_CMDQ_FLUSH 0x00000001
+#define ITS_FLAGS_LPI_CONF_FLUSH 0x00000002
+#define ITS_FLAGS_ERRATA_CAVIUM_22375 0x00000004
+ u_int sc_its_flags;
+ bool trace_enable;
+};
+
+static void *conf_base;
+
+typedef void (its_quirk_func_t)(device_t);
+static its_quirk_func_t its_quirk_cavium_22375;
+
+static const struct {
+ const char *desc;
+ uint32_t iidr;
+ uint32_t iidr_mask;
+ its_quirk_func_t *func;
+} its_quirks[] = {
+ {
+ /* Cavium ThunderX Pass 1.x */
+ .desc = "Cavium ThunderX errata: 22375, 24313",
+ .iidr = GITS_IIDR_RAW(GITS_IIDR_IMPL_CAVIUM,
+ GITS_IIDR_PROD_THUNDER, GITS_IIDR_VAR_THUNDER_1, 0),
+ .iidr_mask = ~GITS_IIDR_REVISION_MASK,
+ .func = its_quirk_cavium_22375,
+ },
+};
+
+#define gic_its_read_4(sc, reg) \
+ bus_read_4((sc)->sc_its_res, (reg))
+#define gic_its_read_8(sc, reg) \
+ bus_read_8((sc)->sc_its_res, (reg))
+
+#define gic_its_write_4(sc, reg, val) \
+ bus_write_4((sc)->sc_its_res, (reg), (val))
+#define gic_its_write_8(sc, reg, val) \
+ bus_write_8((sc)->sc_its_res, (reg), (val))
+
+static device_attach_t gicv3_its_attach;
+static device_detach_t gicv3_its_detach;
+
+static pic_disable_intr_t gicv3_its_disable_intr;
+static pic_enable_intr_t gicv3_its_enable_intr;
+static pic_map_intr_t gicv3_its_map_intr;
+static pic_setup_intr_t gicv3_its_setup_intr;
+static pic_post_filter_t gicv3_its_post_filter;
+static pic_post_ithread_t gicv3_its_post_ithread;
+static pic_pre_ithread_t gicv3_its_pre_ithread;
+static pic_bind_intr_t gicv3_its_bind_intr;
+#ifdef SMP
+static pic_init_secondary_t gicv3_its_init_secondary;
+#endif
+static msi_alloc_msi_t gicv3_its_alloc_msi;
+static msi_release_msi_t gicv3_its_release_msi;
+static msi_alloc_msix_t gicv3_its_alloc_msix;
+static msi_release_msix_t gicv3_its_release_msix;
+static msi_map_msi_t gicv3_its_map_msi;
+
+static void its_cmd_movi(device_t, struct gicv3_its_irqsrc *);
+static void its_cmd_mapc(device_t, struct its_col *, uint8_t);
+static void its_cmd_mapti(device_t, struct gicv3_its_irqsrc *);
+static void its_cmd_mapd(device_t, struct its_dev *, uint8_t);
+static void its_cmd_inv(device_t, struct its_dev *, struct gicv3_its_irqsrc *);
+static void its_cmd_invall(device_t, struct its_col *);
+
+static device_method_t gicv3_its_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_detach, gicv3_its_detach),
+
+ /* Interrupt controller interface */
+ DEVMETHOD(pic_disable_intr, gicv3_its_disable_intr),
+ DEVMETHOD(pic_enable_intr, gicv3_its_enable_intr),
+ DEVMETHOD(pic_map_intr, gicv3_its_map_intr),
+ DEVMETHOD(pic_setup_intr, gicv3_its_setup_intr),
+ DEVMETHOD(pic_post_filter, gicv3_its_post_filter),
+ DEVMETHOD(pic_post_ithread, gicv3_its_post_ithread),
+ DEVMETHOD(pic_pre_ithread, gicv3_its_pre_ithread),
+#ifdef SMP
+ DEVMETHOD(pic_bind_intr, gicv3_its_bind_intr),
+ DEVMETHOD(pic_init_secondary, gicv3_its_init_secondary),
+#endif
+
+ /* MSI/MSI-X */
+ DEVMETHOD(msi_alloc_msi, gicv3_its_alloc_msi),
+ DEVMETHOD(msi_release_msi, gicv3_its_release_msi),
+ DEVMETHOD(msi_alloc_msix, gicv3_its_alloc_msix),
+ DEVMETHOD(msi_release_msix, gicv3_its_release_msix),
+ DEVMETHOD(msi_map_msi, gicv3_its_map_msi),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+static DEFINE_CLASS_0(gic, gicv3_its_driver, gicv3_its_methods,
+ sizeof(struct gicv3_its_softc));
+
+static void
+gicv3_its_cmdq_init(struct gicv3_its_softc *sc)
+{
+ vm_paddr_t cmd_paddr;
+ uint64_t reg, tmp;
+
+ /* Set up the command circular buffer */
+ sc->sc_its_cmd_base = contigmalloc(ITS_CMDQ_SIZE, M_GICV3_ITS,
+ M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, ITS_CMDQ_ALIGN, 0);
+ sc->sc_its_cmd_next_idx = 0;
+
+ cmd_paddr = vtophys(sc->sc_its_cmd_base);
+
+ /* Set the base of the command buffer */
+ reg = GITS_CBASER_VALID |
+ (GITS_CBASER_CACHE_NIWAWB << GITS_CBASER_CACHE_SHIFT) |
+ cmd_paddr | (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT) |
+ (ITS_CMDQ_SIZE / 4096 - 1);
+ gic_its_write_8(sc, GITS_CBASER, reg);
+
+ /* Read back to check for fixed value fields */
+ tmp = gic_its_read_8(sc, GITS_CBASER);
+
+ if ((tmp & GITS_CBASER_SHARE_MASK) !=
+ (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT)) {
+ /* Check if the hardware reported non-shareable */
+ if ((tmp & GITS_CBASER_SHARE_MASK) ==
+ (GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT)) {
+ /* If so remove the cache attribute */
+ reg &= ~GITS_CBASER_CACHE_MASK;
+ reg &= ~GITS_CBASER_SHARE_MASK;
+ /* Set to Non-cacheable, Non-shareable */
+ reg |= GITS_CBASER_CACHE_NIN << GITS_CBASER_CACHE_SHIFT;
+ reg |= GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT;
+
+ gic_its_write_8(sc, GITS_CBASER, reg);
+ }
+
+ /* The command queue has to be flushed after each command */
+ sc->sc_its_flags |= ITS_FLAGS_CMDQ_FLUSH;
+ }
+
+ /* Get the next command from the start of the buffer */
+ gic_its_write_8(sc, GITS_CWRITER, 0x0);
+}
+
+static int
+gicv3_its_table_init(device_t dev, struct gicv3_its_softc *sc)
+{
+ vm_offset_t table;
+ vm_paddr_t paddr;
+ uint64_t cache, reg, share, tmp, type;
+ size_t esize, its_tbl_size, nidents, nitspages, npages;
+ int i, page_size;
+ int devbits;
+
+ if ((sc->sc_its_flags & ITS_FLAGS_ERRATA_CAVIUM_22375) != 0) {
+ /*
+ * GITS_TYPER[17:13] of ThunderX reports that device IDs
+ * are to be 21 bits in length. The entry size of the ITS
+ * table can be read from GITS_BASERn[52:48] and on ThunderX
+ * is supposed to be 8 bytes in length (for device table).
+ * Finally the page size that is to be used by ITS to access
+ * this table will be set to 64KB.
+ *
+ * This gives 0x200000 entries of size 0x8 bytes covered by
+ * 256 pages each of which 64KB in size. The number of pages
+ * (minus 1) should then be written to GITS_BASERn[7:0]. In
+ * that case this value would be 0xFF but on ThunderX the
+ * maximum value that HW accepts is 0xFD.
+ *
+ * Set an arbitrary number of device ID bits to 20 in order
+ * to limit the number of entries in ITS device table to
+ * 0x100000 and the table size to 8MB.
+ */
+ devbits = 20;
+ cache = 0;
+ } else {
+ devbits = GITS_TYPER_DEVB(gic_its_read_8(sc, GITS_TYPER));
+ cache = GITS_BASER_CACHE_WAWB;
+ }
+ share = GITS_BASER_SHARE_IS;
+ page_size = PAGE_SIZE_64K;
+
+ for (i = 0; i < GITS_BASER_NUM; i++) {
+ reg = gic_its_read_8(sc, GITS_BASER(i));
+ /* The type of table */
+ type = GITS_BASER_TYPE(reg);
+ /* The table entry size */
+ esize = GITS_BASER_ESIZE(reg);
+
+ switch(type) {
+ case GITS_BASER_TYPE_DEV:
+ nidents = (1 << devbits);
+ its_tbl_size = esize * nidents;
+ its_tbl_size = roundup2(its_tbl_size, PAGE_SIZE_64K);
+ break;
+ case GITS_BASER_TYPE_VP:
+ case GITS_BASER_TYPE_PP: /* Undocumented? */
+ case GITS_BASER_TYPE_IC:
+ its_tbl_size = page_size;
+ break;
+ default:
+ continue;
+ }
+ npages = howmany(its_tbl_size, PAGE_SIZE);
+
+ /* Allocate the table */
+ table = (vm_offset_t)contigmalloc(npages * PAGE_SIZE,
+ M_GICV3_ITS, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1,
+ PAGE_SIZE_64K, 0);
+
+ sc->sc_its_ptab[i].ptab_vaddr = table;
+ sc->sc_its_ptab[i].ptab_size = npages * PAGE_SIZE;
+
+ paddr = vtophys(table);
+
+ while (1) {
+ nitspages = howmany(its_tbl_size, page_size);
+
+ /* Clear the fields we will be setting */
+ reg &= ~(GITS_BASER_VALID |
+ GITS_BASER_CACHE_MASK | GITS_BASER_TYPE_MASK |
+ GITS_BASER_ESIZE_MASK | GITS_BASER_PA_MASK |
+ GITS_BASER_SHARE_MASK | GITS_BASER_PSZ_MASK |
+ GITS_BASER_SIZE_MASK);
+ /* Set the new values */
+ reg |= GITS_BASER_VALID |
+ (cache << GITS_BASER_CACHE_SHIFT) |
+ (type << GITS_BASER_TYPE_SHIFT) |
+ ((esize - 1) << GITS_BASER_ESIZE_SHIFT) |
+ paddr | (share << GITS_BASER_SHARE_SHIFT) |
+ (nitspages - 1);
+
+ switch (page_size) {
+ case PAGE_SIZE: /* 4KB */
+ reg |=
+ GITS_BASER_PSZ_4K << GITS_BASER_PSZ_SHIFT;
+ break;
+ case PAGE_SIZE_16K: /* 16KB */
+ reg |=
+ GITS_BASER_PSZ_16K << GITS_BASER_PSZ_SHIFT;
+ break;
+ case PAGE_SIZE_64K: /* 64KB */
+ reg |=
+ GITS_BASER_PSZ_64K << GITS_BASER_PSZ_SHIFT;
+ break;
+ }
+
+ gic_its_write_8(sc, GITS_BASER(i), reg);
+
+ /* Read back to check */
+ tmp = gic_its_read_8(sc, GITS_BASER(i));
+
+ /* Do the shareability masks line up? */
+ if ((tmp & GITS_BASER_SHARE_MASK) !=
+ (reg & GITS_BASER_SHARE_MASK)) {
+ share = (tmp & GITS_BASER_SHARE_MASK) >>
+ GITS_BASER_SHARE_SHIFT;
+ continue;
+ }
+
+ if ((tmp & GITS_BASER_PSZ_MASK) !=
+ (reg & GITS_BASER_PSZ_MASK)) {
+ switch (page_size) {
+ case PAGE_SIZE_16K:
+ page_size = PAGE_SIZE;
+ continue;
+ case PAGE_SIZE_64K:
+ page_size = PAGE_SIZE_16K;
+ continue;
+ }
+ }
+
+ if (tmp != reg) {
+ device_printf(dev, "GITS_BASER%d: "
+ "unable to be updated: %lx != %lx\n",
+ i, reg, tmp);
+ return (ENXIO);
+ }
+
+ /* We should have made all needed changes */
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static void
+gicv3_its_conftable_init(struct gicv3_its_softc *sc)
+{
+ void *conf_table;
+
+ conf_table = atomic_load_ptr(&conf_base);
+ if (conf_table == NULL) {
+ conf_table = contigmalloc(LPI_CONFTAB_SIZE,
+ M_GICV3_ITS, M_WAITOK, 0, LPI_CONFTAB_MAX_ADDR,
+ LPI_CONFTAB_ALIGN, 0);
+
+ if (atomic_cmpset_ptr((uintptr_t *)&conf_base,
+ (uintptr_t)NULL, (uintptr_t)conf_table) == 0) {
+ contigfree(conf_table, LPI_CONFTAB_SIZE, M_GICV3_ITS);
+ conf_table = atomic_load_ptr(&conf_base);
+ }
+ }
+ sc->sc_conf_base = conf_table;
+
+ /* Set the default configuration */
+ memset(sc->sc_conf_base, GIC_PRIORITY_MAX | LPI_CONF_GROUP1,
+ LPI_CONFTAB_SIZE);
+
+ /* Flush the table to memory */
+ cpu_dcache_wb_range((vm_offset_t)sc->sc_conf_base, LPI_CONFTAB_SIZE);
+}
+
+static void
+gicv3_its_pendtables_init(struct gicv3_its_softc *sc)
+{
+ int i;
+
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ISSET(i, &sc->sc_cpus) == 0)
+ continue;
+
+ sc->sc_pend_base[i] = (vm_offset_t)contigmalloc(
+ LPI_PENDTAB_SIZE, M_GICV3_ITS, M_WAITOK | M_ZERO,
+ 0, LPI_PENDTAB_MAX_ADDR, LPI_PENDTAB_ALIGN, 0);
+
+ /* Flush so the ITS can see the memory */
+ cpu_dcache_wb_range((vm_offset_t)sc->sc_pend_base[i],
+ LPI_PENDTAB_SIZE);
+ }
+}
+
+static void
+its_init_cpu_lpi(device_t dev, struct gicv3_its_softc *sc)
+{
+ device_t gicv3;
+ uint64_t xbaser, tmp;
+ uint32_t ctlr;
+ u_int cpuid;
+
+ gicv3 = device_get_parent(dev);
+ cpuid = PCPU_GET(cpuid);
+
+ /* Disable LPIs */
+ ctlr = gic_r_read_4(gicv3, GICR_CTLR);
+ ctlr &= ~GICR_CTLR_LPI_ENABLE;
+ gic_r_write_4(gicv3, GICR_CTLR, ctlr);
+
+ /* Make sure changes are observable my the GIC */
+ dsb(sy);
+
+ /*
+ * Set the redistributor base
+ */
+ xbaser = vtophys(sc->sc_conf_base) |
+ (GICR_PROPBASER_SHARE_IS << GICR_PROPBASER_SHARE_SHIFT) |
+ (GICR_PROPBASER_CACHE_NIWAWB << GICR_PROPBASER_CACHE_SHIFT) |
+ (flsl(LPI_CONFTAB_SIZE | GIC_FIRST_LPI) - 1);
+ gic_r_write_8(gicv3, GICR_PROPBASER, xbaser);
+
+ /* Check the cache attributes we set */
+ tmp = gic_r_read_8(gicv3, GICR_PROPBASER);
+
+ if ((tmp & GICR_PROPBASER_SHARE_MASK) !=
+ (xbaser & GICR_PROPBASER_SHARE_MASK)) {
+ if ((tmp & GICR_PROPBASER_SHARE_MASK) ==
+ (GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT)) {
+ /* We need to mark as non-cacheable */
+ xbaser &= ~(GICR_PROPBASER_SHARE_MASK |
+ GICR_PROPBASER_CACHE_MASK);
+ /* Non-cacheable */
+ xbaser |= GICR_PROPBASER_CACHE_NIN <<
+ GICR_PROPBASER_CACHE_SHIFT;
+ /* Non-sareable */
+ xbaser |= GICR_PROPBASER_SHARE_NS <<
+ GICR_PROPBASER_SHARE_SHIFT;
+ gic_r_write_8(gicv3, GICR_PROPBASER, xbaser);
+ }
+ sc->sc_its_flags |= ITS_FLAGS_LPI_CONF_FLUSH;
+ }
+
+ /*
+ * Set the LPI pending table base
+ */
+ xbaser = vtophys(sc->sc_pend_base[cpuid]) |
+ (GICR_PENDBASER_CACHE_NIWAWB << GICR_PENDBASER_CACHE_SHIFT) |
+ (GICR_PENDBASER_SHARE_IS << GICR_PENDBASER_SHARE_SHIFT);
+
+ gic_r_write_8(gicv3, GICR_PENDBASER, xbaser);
+
+ tmp = gic_r_read_8(gicv3, GICR_PENDBASER);
+
+ if ((tmp & GICR_PENDBASER_SHARE_MASK) ==
+ (GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT)) {
+ /* Clear the cahce and shareability bits */
+ xbaser &= ~(GICR_PENDBASER_CACHE_MASK |
+ GICR_PENDBASER_SHARE_MASK);
+ /* Mark as non-shareable */
+ xbaser |= GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT;
+ /* And non-cacheable */
+ xbaser |= GICR_PENDBASER_CACHE_NIN <<
+ GICR_PENDBASER_CACHE_SHIFT;
+ }
+
+ /* Enable LPIs */
+ ctlr = gic_r_read_4(gicv3, GICR_CTLR);
+ ctlr |= GICR_CTLR_LPI_ENABLE;
+ gic_r_write_4(gicv3, GICR_CTLR, ctlr);
+
+ /* Make sure the GIC has seen everything */
+ dsb(sy);
+}
+
+static int
+its_init_cpu(device_t dev, struct gicv3_its_softc *sc)
+{
+ device_t gicv3;
+ vm_paddr_t target;
+ u_int cpuid;
+ struct redist_pcpu *rpcpu;
+
+ gicv3 = device_get_parent(dev);
+ cpuid = PCPU_GET(cpuid);
+ if (!CPU_ISSET(cpuid, &sc->sc_cpus))
+ return (0);
+
+ /* Check if the ITS is enabled on this CPU */
+ if ((gic_r_read_4(gicv3, GICR_TYPER) & GICR_TYPER_PLPIS) == 0)
+ return (ENXIO);
+
+ rpcpu = gicv3_get_redist(dev);
+
+ /* Do per-cpu LPI init once */
+ if (!rpcpu->lpi_enabled) {
+ its_init_cpu_lpi(dev, sc);
+ rpcpu->lpi_enabled = true;
+ }
+
+ if ((gic_its_read_8(sc, GITS_TYPER) & GITS_TYPER_PTA) != 0) {
+ /* This ITS wants the redistributor physical address */
+ target = vtophys(rman_get_virtual(&rpcpu->res));
+ } else {
+ /* This ITS wants the unique processor number */
+ target = GICR_TYPER_CPUNUM(gic_r_read_8(gicv3, GICR_TYPER)) <<
+ CMD_TARGET_SHIFT;
+ }
+
+ sc->sc_its_cols[cpuid]->col_target = target;
+ sc->sc_its_cols[cpuid]->col_id = cpuid;
+
+ its_cmd_mapc(dev, sc->sc_its_cols[cpuid], 1);
+ its_cmd_invall(dev, sc->sc_its_cols[cpuid]);
+
+ return (0);
+}
+
+static int
+gicv3_its_sysctl_trace_enable(SYSCTL_HANDLER_ARGS)
+{
+ struct gicv3_its_softc *sc;
+ int rv;
+
+ sc = arg1;
+
+ rv = sysctl_handle_bool(oidp, &sc->trace_enable, 0, req);
+ if (rv != 0 || req->newptr == NULL)
+ return (rv);
+ if (sc->trace_enable)
+ gic_its_write_8(sc, GITS_TRKCTLR, 3);
+ else
+ gic_its_write_8(sc, GITS_TRKCTLR, 0);
+
+ return (0);
+}
+
+static int
+gicv3_its_sysctl_trace_regs(SYSCTL_HANDLER_ARGS)
+{
+ struct gicv3_its_softc *sc;
+ struct sbuf *sb;
+ int err;
+
+ sc = arg1;
+ sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
+ if (sb == NULL) {
+ device_printf(sc->dev, "Could not allocate sbuf for output.\n");
+ return (ENOMEM);
+ }
+ sbuf_cat(sb, "\n");
+ sbuf_printf(sb, "GITS_TRKCTLR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKCTLR));
+ sbuf_printf(sb, "GITS_TRKR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKR));
+ sbuf_printf(sb, "GITS_TRKDIDR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKDIDR));
+ sbuf_printf(sb, "GITS_TRKPIDR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKPIDR));
+ sbuf_printf(sb, "GITS_TRKVIDR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKVIDR));
+ sbuf_printf(sb, "GITS_TRKTGTR: 0x%08X\n",
+ gic_its_read_4(sc, GITS_TRKTGTR));
+
+ err = sbuf_finish(sb);
+ if (err)
+ device_printf(sc->dev, "Error finishing sbuf: %d\n", err);
+ sbuf_delete(sb);
+ return(err);
+}
+
+static int
+gicv3_its_init_sysctl(struct gicv3_its_softc *sc)
+{
+ struct sysctl_oid *oid, *child;
+ struct sysctl_ctx_list *ctx_list;
+
+ ctx_list = device_get_sysctl_ctx(sc->dev);
+ child = device_get_sysctl_tree(sc->dev);
+ oid = SYSCTL_ADD_NODE(ctx_list,
+ SYSCTL_CHILDREN(child), OID_AUTO, "tracing",
+ CTLFLAG_RD| CTLFLAG_MPSAFE, NULL, "Messages tracing");
+ if (oid == NULL)
+ return (ENXIO);
+
+ /* Add registers */
+ SYSCTL_ADD_PROC(ctx_list,
+ SYSCTL_CHILDREN(oid), OID_AUTO, "enable",
+ CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ gicv3_its_sysctl_trace_enable, "CU", "Enable tracing");
+ SYSCTL_ADD_PROC(ctx_list,
+ SYSCTL_CHILDREN(oid), OID_AUTO, "capture",
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ gicv3_its_sysctl_trace_regs, "", "Captured tracing registers.");
+
+ return (0);
+}
+
+static int
+gicv3_its_attach(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+ uint32_t iidr;
+ int domain, err, i, rid;
+
+ sc = device_get_softc(dev);
+
+ sc->sc_irq_length = gicv3_get_nirqs(dev);
+ sc->sc_irq_base = GIC_FIRST_LPI;
+ sc->sc_irq_base += device_get_unit(dev) * sc->sc_irq_length;
+
+ rid = 0;
+ sc->sc_its_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
+ RF_ACTIVE);
+ if (sc->sc_its_res == NULL) {
+ device_printf(dev, "Could not allocate memory\n");
+ return (ENXIO);
+ }
+
+ iidr = gic_its_read_4(sc, GITS_IIDR);
+ for (i = 0; i < nitems(its_quirks); i++) {
+ if ((iidr & its_quirks[i].iidr_mask) == its_quirks[i].iidr) {
+ if (bootverbose) {
+ device_printf(dev, "Applying %s\n",
+ its_quirks[i].desc);
+ }
+ its_quirks[i].func(dev);
+ break;
+ }
+ }
+
+ /* Allocate the private tables */
+ err = gicv3_its_table_init(dev, sc);
+ if (err != 0)
+ return (err);
+
+ /* Protects access to the device list */
+ mtx_init(&sc->sc_its_dev_lock, "ITS device lock", NULL, MTX_SPIN);
+
+ /* Protects access to the ITS command circular buffer. */
+ mtx_init(&sc->sc_its_cmd_lock, "ITS cmd lock", NULL, MTX_SPIN);
+
+ CPU_ZERO(&sc->sc_cpus);
+ if (bus_get_domain(dev, &domain) == 0) {
+ if (domain < MAXMEMDOM)
+ CPU_COPY(&cpuset_domain[domain], &sc->sc_cpus);
+ } else {
+ CPU_COPY(&all_cpus, &sc->sc_cpus);
+ }
+
+ /* Allocate the command circular buffer */
+ gicv3_its_cmdq_init(sc);
+
+ /* Allocate the per-CPU collections */
+ for (int cpu = 0; cpu <= mp_maxid; cpu++)
+ if (CPU_ISSET(cpu, &sc->sc_cpus) != 0)
+ sc->sc_its_cols[cpu] = malloc(
+ sizeof(*sc->sc_its_cols[0]), M_GICV3_ITS,
+ M_WAITOK | M_ZERO);
+ else
+ sc->sc_its_cols[cpu] = NULL;
+
+ /* Enable the ITS */
+ gic_its_write_4(sc, GITS_CTLR,
+ gic_its_read_4(sc, GITS_CTLR) | GITS_CTLR_EN);
+
+ /* Create the LPI configuration table */
+ gicv3_its_conftable_init(sc);
+
+ /* And the pending tebles */
+ gicv3_its_pendtables_init(sc);
+
+ /* Enable LPIs on this CPU */
+ its_init_cpu(dev, sc);
+
+ TAILQ_INIT(&sc->sc_its_dev_list);
+ TAILQ_INIT(&sc->sc_free_irqs);
+
+ /*
+ * Create the vmem object to allocate INTRNG IRQs from. We try to
+ * use all IRQs not already used by the GICv3.
+ * XXX: This assumes there are no other interrupt controllers in the
+ * system.
+ */
+ sc->sc_irq_alloc = vmem_create(device_get_nameunit(dev), 0,
+ gicv3_get_nirqs(dev), 1, 0, M_FIRSTFIT | M_WAITOK);
+
+ sc->sc_irqs = malloc(sizeof(*sc->sc_irqs) * sc->sc_irq_length,
+ M_GICV3_ITS, M_WAITOK | M_ZERO);
+
+ /* For GIC-500 install tracking sysctls. */
+ if ((iidr & (GITS_IIDR_PRODUCT_MASK | GITS_IIDR_IMPLEMENTOR_MASK)) ==
+ GITS_IIDR_RAW(GITS_IIDR_IMPL_ARM, GITS_IIDR_PROD_GIC500, 0, 0))
+ gicv3_its_init_sysctl(sc);
+
+ return (0);
+}
+
+static int
+gicv3_its_detach(device_t dev)
+{
+
+ return (ENXIO);
+}
+
+static void
+its_quirk_cavium_22375(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+
+ sc = device_get_softc(dev);
+ sc->sc_its_flags |= ITS_FLAGS_ERRATA_CAVIUM_22375;
+}
+
+static void
+gicv3_its_disable_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ uint8_t *conf;
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ conf = sc->sc_conf_base;
+
+ conf[girq->gi_lpi] &= ~LPI_CONF_ENABLE;
+
+ if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) {
+ /* Clean D-cache under command. */
+ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1);
+ } else {
+ /* DSB inner shareable, store */
+ dsb(ishst);
+ }
+
+ its_cmd_inv(dev, girq->gi_its_dev, girq);
+}
+
+static void
+gicv3_its_enable_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ uint8_t *conf;
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ conf = sc->sc_conf_base;
+
+ conf[girq->gi_lpi] |= LPI_CONF_ENABLE;
+
+ if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) {
+ /* Clean D-cache under command. */
+ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1);
+ } else {
+ /* DSB inner shareable, store */
+ dsb(ishst);
+ }
+
+ its_cmd_inv(dev, girq->gi_its_dev, girq);
+}
+
+static int
+gicv3_its_intr(void *arg, uintptr_t irq)
+{
+ struct gicv3_its_softc *sc = arg;
+ struct gicv3_its_irqsrc *girq;
+ struct trapframe *tf;
+
+ irq -= sc->sc_irq_base;
+ girq = sc->sc_irqs[irq];
+ if (girq == NULL)
+ panic("gicv3_its_intr: Invalid interrupt %ld",
+ irq + sc->sc_irq_base);
+
+ tf = curthread->td_intr_frame;
+ intr_isrc_dispatch(&girq->gi_isrc, tf);
+ return (FILTER_HANDLED);
+}
+
+static void
+gicv3_its_pre_ithread(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_irqsrc *girq;
+ struct gicv3_its_softc *sc;
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ gicv3_its_disable_intr(dev, isrc);
+ gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI);
+}
+
+static void
+gicv3_its_post_ithread(device_t dev, struct intr_irqsrc *isrc)
+{
+
+ gicv3_its_enable_intr(dev, isrc);
+}
+
+static void
+gicv3_its_post_filter(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_irqsrc *girq;
+ struct gicv3_its_softc *sc;
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI);
+}
+
+static int
+gicv3_its_select_cpu(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_softc *sc;
+
+ sc = device_get_softc(dev);
+ if (CPU_EMPTY(&isrc->isrc_cpu)) {
+ sc->gic_irq_cpu = intr_irq_next_cpu(sc->gic_irq_cpu,
+ &sc->sc_cpus);
+ CPU_SETOF(sc->gic_irq_cpu, &isrc->isrc_cpu);
+ }
+
+ return (0);
+}
+
+static int
+gicv3_its_bind_intr(device_t dev, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_irqsrc *girq;
+
+ gicv3_its_select_cpu(dev, isrc);
+
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ its_cmd_movi(dev, girq);
+ return (0);
+}
+
+static int
+gicv3_its_map_intr(device_t dev, struct intr_map_data *data,
+ struct intr_irqsrc **isrcp)
+{
+
+ /*
+ * This should never happen, we only call this function to map
+ * interrupts found before the controller driver is ready.
+ */
+ panic("gicv3_its_map_intr: Unable to map a MSI interrupt");
+}
+
+static int
+gicv3_its_setup_intr(device_t dev, struct intr_irqsrc *isrc,
+ struct resource *res, struct intr_map_data *data)
+{
+
+ /* Bind the interrupt to a CPU */
+ gicv3_its_bind_intr(dev, isrc);
+
+ return (0);
+}
+
+#ifdef SMP
+static void
+gicv3_its_init_secondary(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ /*
+ * This is fatal as otherwise we may bind interrupts to this CPU.
+ * We need a way to tell the interrupt framework to only bind to a
+ * subset of given CPUs when it performs the shuffle.
+ */
+ if (its_init_cpu(dev, sc) != 0)
+ panic("gicv3_its_init_secondary: No usable ITS on CPU%d",
+ PCPU_GET(cpuid));
+}
+#endif
+
+static uint32_t
+its_get_devid(device_t pci_dev)
+{
+ uintptr_t id;
+
+ if (pci_get_id(pci_dev, PCI_ID_MSI, &id) != 0)
+ panic("its_get_devid: Unable to get the MSI DeviceID");
+
+ return (id);
+}
+
+static struct its_dev *
+its_device_find(device_t dev, device_t child)
+{
+ struct gicv3_its_softc *sc;
+ struct its_dev *its_dev = NULL;
+
+ sc = device_get_softc(dev);
+
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ TAILQ_FOREACH(its_dev, &sc->sc_its_dev_list, entry) {
+ if (its_dev->pci_dev == child)
+ break;
+ }
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+
+ return (its_dev);
+}
+
+static struct its_dev *
+its_device_get(device_t dev, device_t child, u_int nvecs)
+{
+ struct gicv3_its_softc *sc;
+ struct its_dev *its_dev;
+ vmem_addr_t irq_base;
+ size_t esize;
+
+ sc = device_get_softc(dev);
+
+ its_dev = its_device_find(dev, child);
+ if (its_dev != NULL)
+ return (its_dev);
+
+ its_dev = malloc(sizeof(*its_dev), M_GICV3_ITS, M_NOWAIT | M_ZERO);
+ if (its_dev == NULL)
+ return (NULL);
+
+ its_dev->pci_dev = child;
+ its_dev->devid = its_get_devid(child);
+
+ its_dev->lpis.lpi_busy = 0;
+ its_dev->lpis.lpi_num = nvecs;
+ its_dev->lpis.lpi_free = nvecs;
+
+ if (vmem_alloc(sc->sc_irq_alloc, nvecs, M_FIRSTFIT | M_NOWAIT,
+ &irq_base) != 0) {
+ free(its_dev, M_GICV3_ITS);
+ return (NULL);
+ }
+ its_dev->lpis.lpi_base = irq_base;
+
+ /* Get ITT entry size */
+ esize = GITS_TYPER_ITTES(gic_its_read_8(sc, GITS_TYPER));
+
+ /*
+ * Allocate ITT for this device.
+ * PA has to be 256 B aligned. At least two entries for device.
+ */
+ its_dev->itt_size = roundup2(MAX(nvecs, 2) * esize, 256);
+ its_dev->itt = (vm_offset_t)contigmalloc(its_dev->itt_size,
+ M_GICV3_ITS, M_NOWAIT | M_ZERO, 0, LPI_INT_TRANS_TAB_MAX_ADDR,
+ LPI_INT_TRANS_TAB_ALIGN, 0);
+ if (its_dev->itt == 0) {
+ vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, nvecs);
+ free(its_dev, M_GICV3_ITS);
+ return (NULL);
+ }
+
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ TAILQ_INSERT_TAIL(&sc->sc_its_dev_list, its_dev, entry);
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+
+ /* Map device to its ITT */
+ its_cmd_mapd(dev, its_dev, 1);
+
+ return (its_dev);
+}
+
+static void
+its_device_release(device_t dev, struct its_dev *its_dev)
+{
+ struct gicv3_its_softc *sc;
+
+ KASSERT(its_dev->lpis.lpi_busy == 0,
+ ("its_device_release: Trying to release an inuse ITS device"));
+
+ /* Unmap device in ITS */
+ its_cmd_mapd(dev, its_dev, 0);
+
+ sc = device_get_softc(dev);
+
+ /* Remove the device from the list of devices */
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ TAILQ_REMOVE(&sc->sc_its_dev_list, its_dev, entry);
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+
+ /* Free ITT */
+ KASSERT(its_dev->itt != 0, ("Invalid ITT in valid ITS device"));
+ contigfree((void *)its_dev->itt, its_dev->itt_size, M_GICV3_ITS);
+
+ /* Free the IRQ allocation */
+ vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base,
+ its_dev->lpis.lpi_num);
+
+ free(its_dev, M_GICV3_ITS);
+}
+
+static struct gicv3_its_irqsrc *
+gicv3_its_alloc_irqsrc(device_t dev, struct gicv3_its_softc *sc, u_int irq)
+{
+ struct gicv3_its_irqsrc *girq = NULL;
+
+ KASSERT(sc->sc_irqs[irq] == NULL,
+ ("%s: Interrupt %u already allocated", __func__, irq));
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ if (!TAILQ_EMPTY(&sc->sc_free_irqs)) {
+ girq = TAILQ_FIRST(&sc->sc_free_irqs);
+ TAILQ_REMOVE(&sc->sc_free_irqs, girq, gi_link);
+ }
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+ if (girq == NULL) {
+ girq = malloc(sizeof(*girq), M_GICV3_ITS,
+ M_NOWAIT | M_ZERO);
+ if (girq == NULL)
+ return (NULL);
+ girq->gi_id = -1;
+ if (intr_isrc_register(&girq->gi_isrc, dev, 0,
+ "%s,%u", device_get_nameunit(dev), irq) != 0) {
+ free(girq, M_GICV3_ITS);
+ return (NULL);
+ }
+ }
+ girq->gi_lpi = irq + sc->sc_irq_base - GIC_FIRST_LPI;
+ sc->sc_irqs[irq] = girq;
+
+ return (girq);
+}
+
+static void
+gicv3_its_release_irqsrc(struct gicv3_its_softc *sc,
+ struct gicv3_its_irqsrc *girq)
+{
+ u_int irq;
+
+ mtx_assert(&sc->sc_its_dev_lock, MA_OWNED);
+
+ irq = girq->gi_lpi + GIC_FIRST_LPI - sc->sc_irq_base;
+ sc->sc_irqs[irq] = NULL;
+
+ girq->gi_id = -1;
+ girq->gi_its_dev = NULL;
+ TAILQ_INSERT_TAIL(&sc->sc_free_irqs, girq, gi_link);
+}
+
+static int
+gicv3_its_alloc_msi(device_t dev, device_t child, int count, int maxcount,
+ device_t *pic, struct intr_irqsrc **srcs)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ struct its_dev *its_dev;
+ u_int irq;
+ int i;
+
+ its_dev = its_device_get(dev, child, count);
+ if (its_dev == NULL)
+ return (ENXIO);
+
+ KASSERT(its_dev->lpis.lpi_free >= count,
+ ("gicv3_its_alloc_msi: No free LPIs"));
+ sc = device_get_softc(dev);
+ irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num -
+ its_dev->lpis.lpi_free;
+
+ /* Allocate the irqsrc for each MSI */
+ for (i = 0; i < count; i++, irq++) {
+ its_dev->lpis.lpi_free--;
+ srcs[i] = (struct intr_irqsrc *)gicv3_its_alloc_irqsrc(dev,
+ sc, irq);
+ if (srcs[i] == NULL)
+ break;
+ }
+
+ /* The allocation failed, release them */
+ if (i != count) {
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ for (i = 0; i < count; i++) {
+ girq = (struct gicv3_its_irqsrc *)srcs[i];
+ if (girq == NULL)
+ break;
+ gicv3_its_release_irqsrc(sc, girq);
+ srcs[i] = NULL;
+ }
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+ return (ENXIO);
+ }
+
+ /* Finish the allocation now we have all MSI irqsrcs */
+ for (i = 0; i < count; i++) {
+ girq = (struct gicv3_its_irqsrc *)srcs[i];
+ girq->gi_id = i;
+ girq->gi_its_dev = its_dev;
+
+ /* Map the message to the given IRQ */
+ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq);
+ its_cmd_mapti(dev, girq);
+ }
+ its_dev->lpis.lpi_busy += count;
+ *pic = dev;
+
+ return (0);
+}
+
+static int
+gicv3_its_release_msi(device_t dev, device_t child, int count,
+ struct intr_irqsrc **isrc)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ struct its_dev *its_dev;
+ int i;
+
+ its_dev = its_device_find(dev, child);
+
+ KASSERT(its_dev != NULL,
+ ("gicv3_its_release_msi: Releasing a MSI interrupt with "
+ "no ITS device"));
+ KASSERT(its_dev->lpis.lpi_busy >= count,
+ ("gicv3_its_release_msi: Releasing more interrupts than "
+ "were allocated: releasing %d, allocated %d", count,
+ its_dev->lpis.lpi_busy));
+
+ sc = device_get_softc(dev);
+ mtx_lock_spin(&sc->sc_its_dev_lock);
+ for (i = 0; i < count; i++) {
+ girq = (struct gicv3_its_irqsrc *)isrc[i];
+ gicv3_its_release_irqsrc(sc, girq);
+ }
+ mtx_unlock_spin(&sc->sc_its_dev_lock);
+ its_dev->lpis.lpi_busy -= count;
+
+ if (its_dev->lpis.lpi_busy == 0)
+ its_device_release(dev, its_dev);
+
+ return (0);
+}
+
+static int
+gicv3_its_alloc_msix(device_t dev, device_t child, device_t *pic,
+ struct intr_irqsrc **isrcp)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ struct its_dev *its_dev;
+ u_int nvecs, irq;
+
+ nvecs = pci_msix_count(child);
+ its_dev = its_device_get(dev, child, nvecs);
+ if (its_dev == NULL)
+ return (ENXIO);
+
+ KASSERT(its_dev->lpis.lpi_free > 0,
+ ("gicv3_its_alloc_msix: No free LPIs"));
+ sc = device_get_softc(dev);
+ irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num -
+ its_dev->lpis.lpi_free;
+
+ girq = gicv3_its_alloc_irqsrc(dev, sc, irq);
+ if (girq == NULL)
+ return (ENXIO);
+ girq->gi_id = its_dev->lpis.lpi_busy;
+ girq->gi_its_dev = its_dev;
+
+ its_dev->lpis.lpi_free--;
+ its_dev->lpis.lpi_busy++;
+
+ /* Map the message to the given IRQ */
+ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq);
+ its_cmd_mapti(dev, girq);
+
+ *pic = dev;
+ *isrcp = (struct intr_irqsrc *)girq;
+
+ return (0);
+}
+
+static int
+gicv3_its_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+ struct its_dev *its_dev;
+
+ its_dev = its_device_find(dev, child);
+
+ KASSERT(its_dev != NULL,
+ ("gicv3_its_release_msix: Releasing a MSI-X interrupt with "
+ "no ITS device"));
+ KASSERT(its_dev->lpis.lpi_busy > 0,
+ ("gicv3_its_release_msix: Releasing more interrupts than "
+ "were allocated: allocated %d", its_dev->lpis.lpi_busy));
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+ gicv3_its_release_irqsrc(sc, girq);
+ its_dev->lpis.lpi_busy--;
+
+ if (its_dev->lpis.lpi_busy == 0)
+ its_device_release(dev, its_dev);
+
+ return (0);
+}
+
+static int
+gicv3_its_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc,
+ uint64_t *addr, uint32_t *data)
+{
+ struct gicv3_its_softc *sc;
+ struct gicv3_its_irqsrc *girq;
+
+ sc = device_get_softc(dev);
+ girq = (struct gicv3_its_irqsrc *)isrc;
+
+ *addr = vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER;
+ *data = girq->gi_id;
+
+ return (0);
+}
+
+/*
+ * Commands handling.
+ */
+
+static __inline void
+cmd_format_command(struct its_cmd *cmd, uint8_t cmd_type)
+{
+ /* Command field: DW0 [7:0] */
+ cmd->cmd_dword[0] &= htole64(~CMD_COMMAND_MASK);
+ cmd->cmd_dword[0] |= htole64(cmd_type);
+}
+
+static __inline void
+cmd_format_devid(struct its_cmd *cmd, uint32_t devid)
+{
+ /* Device ID field: DW0 [63:32] */
+ cmd->cmd_dword[0] &= htole64(~CMD_DEVID_MASK);
+ cmd->cmd_dword[0] |= htole64((uint64_t)devid << CMD_DEVID_SHIFT);
+}
+
+static __inline void
+cmd_format_size(struct its_cmd *cmd, uint16_t size)
+{
+ /* Size field: DW1 [4:0] */
+ cmd->cmd_dword[1] &= htole64(~CMD_SIZE_MASK);
+ cmd->cmd_dword[1] |= htole64((size & CMD_SIZE_MASK));
+}
+
+static __inline void
+cmd_format_id(struct its_cmd *cmd, uint32_t id)
+{
+ /* ID field: DW1 [31:0] */
+ cmd->cmd_dword[1] &= htole64(~CMD_ID_MASK);
+ cmd->cmd_dword[1] |= htole64(id);
+}
+
+static __inline void
+cmd_format_pid(struct its_cmd *cmd, uint32_t pid)
+{
+ /* Physical ID field: DW1 [63:32] */
+ cmd->cmd_dword[1] &= htole64(~CMD_PID_MASK);
+ cmd->cmd_dword[1] |= htole64((uint64_t)pid << CMD_PID_SHIFT);
+}
+
+static __inline void
+cmd_format_col(struct its_cmd *cmd, uint16_t col_id)
+{
+ /* Collection field: DW2 [16:0] */
+ cmd->cmd_dword[2] &= htole64(~CMD_COL_MASK);
+ cmd->cmd_dword[2] |= htole64(col_id);
+}
+
+static __inline void
+cmd_format_target(struct its_cmd *cmd, uint64_t target)
+{
+ /* Target Address field: DW2 [47:16] */
+ cmd->cmd_dword[2] &= htole64(~CMD_TARGET_MASK);
+ cmd->cmd_dword[2] |= htole64(target & CMD_TARGET_MASK);
+}
+
+static __inline void
+cmd_format_itt(struct its_cmd *cmd, uint64_t itt)
+{
+ /* ITT Address field: DW2 [47:8] */
+ cmd->cmd_dword[2] &= htole64(~CMD_ITT_MASK);
+ cmd->cmd_dword[2] |= htole64(itt & CMD_ITT_MASK);
+}
+
+static __inline void
+cmd_format_valid(struct its_cmd *cmd, uint8_t valid)
+{
+ /* Valid field: DW2 [63] */
+ cmd->cmd_dword[2] &= htole64(~CMD_VALID_MASK);
+ cmd->cmd_dword[2] |= htole64((uint64_t)valid << CMD_VALID_SHIFT);
+}
+
+static inline bool
+its_cmd_queue_full(struct gicv3_its_softc *sc)
+{
+ size_t read_idx, next_write_idx;
+
+ /* Get the index of the next command */
+ next_write_idx = (sc->sc_its_cmd_next_idx + 1) %
+ (ITS_CMDQ_SIZE / sizeof(struct its_cmd));
+ /* And the index of the current command being read */
+ read_idx = gic_its_read_4(sc, GITS_CREADR) / sizeof(struct its_cmd);
+
+ /*
+ * The queue is full when the write offset points
+ * at the command before the current read offset.
+ */
+ return (next_write_idx == read_idx);
+}
+
+static inline void
+its_cmd_sync(struct gicv3_its_softc *sc, struct its_cmd *cmd)
+{
+
+ if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) {
+ /* Clean D-cache under command. */
+ cpu_dcache_wb_range((vm_offset_t)cmd, sizeof(*cmd));
+ } else {
+ /* DSB inner shareable, store */
+ dsb(ishst);
+ }
+
+}
+
+static inline uint64_t
+its_cmd_cwriter_offset(struct gicv3_its_softc *sc, struct its_cmd *cmd)
+{
+ uint64_t off;
+
+ off = (cmd - sc->sc_its_cmd_base) * sizeof(*cmd);
+
+ return (off);
+}
+
+static void
+its_cmd_wait_completion(device_t dev, struct its_cmd *cmd_first,
+ struct its_cmd *cmd_last)
+{
+ struct gicv3_its_softc *sc;
+ uint64_t first, last, read;
+ size_t us_left;
+
+ sc = device_get_softc(dev);
+
+ /*
+ * XXX ARM64TODO: This is obviously a significant delay.
+ * The reason for that is that currently the time frames for
+ * the command to complete are not known.
+ */
+ us_left = 1000000;
+
+ first = its_cmd_cwriter_offset(sc, cmd_first);
+ last = its_cmd_cwriter_offset(sc, cmd_last);
+
+ for (;;) {
+ read = gic_its_read_8(sc, GITS_CREADR);
+ if (first < last) {
+ if (read < first || read >= last)
+ break;
+ } else if (read < first && read >= last)
+ break;
+
+ if (us_left-- == 0) {
+ /* This means timeout */
+ device_printf(dev,
+ "Timeout while waiting for CMD completion.\n");
+ return;
+ }
+ DELAY(1);
+ }
+}
+
+static struct its_cmd *
+its_cmd_alloc_locked(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+ struct its_cmd *cmd;
+ size_t us_left;
+
+ sc = device_get_softc(dev);
+
+ /*
+ * XXX ARM64TODO: This is obviously a significant delay.
+ * The reason for that is that currently the time frames for
+ * the command to complete (and therefore free the descriptor)
+ * are not known.
+ */
+ us_left = 1000000;
+
+ mtx_assert(&sc->sc_its_cmd_lock, MA_OWNED);
+ while (its_cmd_queue_full(sc)) {
+ if (us_left-- == 0) {
+ /* Timeout while waiting for free command */
+ device_printf(dev,
+ "Timeout while waiting for free command\n");
+ return (NULL);
+ }
+ DELAY(1);
+ }
+
+ cmd = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx];
+ sc->sc_its_cmd_next_idx++;
+ sc->sc_its_cmd_next_idx %= ITS_CMDQ_SIZE / sizeof(struct its_cmd);
+
+ return (cmd);
+}
+
+static uint64_t
+its_cmd_prepare(struct its_cmd *cmd, struct its_cmd_desc *desc)
+{
+ uint64_t target;
+ uint8_t cmd_type;
+ u_int size;
+
+ cmd_type = desc->cmd_type;
+ target = ITS_TARGET_NONE;
+
+ switch (cmd_type) {
+ case ITS_CMD_MOVI: /* Move interrupt ID to another collection */
+ target = desc->cmd_desc_movi.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_MOVI);
+ cmd_format_id(cmd, desc->cmd_desc_movi.id);
+ cmd_format_col(cmd, desc->cmd_desc_movi.col->col_id);
+ cmd_format_devid(cmd, desc->cmd_desc_movi.its_dev->devid);
+ break;
+ case ITS_CMD_SYNC: /* Wait for previous commands completion */
+ target = desc->cmd_desc_sync.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_SYNC);
+ cmd_format_target(cmd, target);
+ break;
+ case ITS_CMD_MAPD: /* Assign ITT to device */
+ cmd_format_command(cmd, ITS_CMD_MAPD);
+ cmd_format_itt(cmd, vtophys(desc->cmd_desc_mapd.its_dev->itt));
+ /*
+ * Size describes number of bits to encode interrupt IDs
+ * supported by the device minus one.
+ * When V (valid) bit is zero, this field should be written
+ * as zero.
+ */
+ if (desc->cmd_desc_mapd.valid != 0) {
+ size = fls(desc->cmd_desc_mapd.its_dev->lpis.lpi_num);
+ size = MAX(1, size) - 1;
+ } else
+ size = 0;
+
+ cmd_format_size(cmd, size);
+ cmd_format_devid(cmd, desc->cmd_desc_mapd.its_dev->devid);
+ cmd_format_valid(cmd, desc->cmd_desc_mapd.valid);
+ break;
+ case ITS_CMD_MAPC: /* Map collection to Re-Distributor */
+ target = desc->cmd_desc_mapc.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_MAPC);
+ cmd_format_col(cmd, desc->cmd_desc_mapc.col->col_id);
+ cmd_format_valid(cmd, desc->cmd_desc_mapc.valid);
+ cmd_format_target(cmd, target);
+ break;
+ case ITS_CMD_MAPTI:
+ target = desc->cmd_desc_mapvi.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_MAPTI);
+ cmd_format_devid(cmd, desc->cmd_desc_mapvi.its_dev->devid);
+ cmd_format_id(cmd, desc->cmd_desc_mapvi.id);
+ cmd_format_pid(cmd, desc->cmd_desc_mapvi.pid);
+ cmd_format_col(cmd, desc->cmd_desc_mapvi.col->col_id);
+ break;
+ case ITS_CMD_MAPI:
+ target = desc->cmd_desc_mapi.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_MAPI);
+ cmd_format_devid(cmd, desc->cmd_desc_mapi.its_dev->devid);
+ cmd_format_id(cmd, desc->cmd_desc_mapi.pid);
+ cmd_format_col(cmd, desc->cmd_desc_mapi.col->col_id);
+ break;
+ case ITS_CMD_INV:
+ target = desc->cmd_desc_inv.col->col_target;
+ cmd_format_command(cmd, ITS_CMD_INV);
+ cmd_format_devid(cmd, desc->cmd_desc_inv.its_dev->devid);
+ cmd_format_id(cmd, desc->cmd_desc_inv.pid);
+ break;
+ case ITS_CMD_INVALL:
+ cmd_format_command(cmd, ITS_CMD_INVALL);
+ cmd_format_col(cmd, desc->cmd_desc_invall.col->col_id);
+ break;
+ default:
+ panic("its_cmd_prepare: Invalid command: %x", cmd_type);
+ }
+
+ return (target);
+}
+
+static int
+its_cmd_send(device_t dev, struct its_cmd_desc *desc)
+{
+ struct gicv3_its_softc *sc;
+ struct its_cmd *cmd, *cmd_sync, *cmd_write;
+ struct its_col col_sync;
+ struct its_cmd_desc desc_sync;
+ uint64_t target, cwriter;
+
+ sc = device_get_softc(dev);
+ mtx_lock_spin(&sc->sc_its_cmd_lock);
+ cmd = its_cmd_alloc_locked(dev);
+ if (cmd == NULL) {
+ device_printf(dev, "could not allocate ITS command\n");
+ mtx_unlock_spin(&sc->sc_its_cmd_lock);
+ return (EBUSY);
+ }
+
+ target = its_cmd_prepare(cmd, desc);
+ its_cmd_sync(sc, cmd);
+
+ if (target != ITS_TARGET_NONE) {
+ cmd_sync = its_cmd_alloc_locked(dev);
+ if (cmd_sync != NULL) {
+ desc_sync.cmd_type = ITS_CMD_SYNC;
+ col_sync.col_target = target;
+ desc_sync.cmd_desc_sync.col = &col_sync;
+ its_cmd_prepare(cmd_sync, &desc_sync);
+ its_cmd_sync(sc, cmd_sync);
+ }
+ }
+
+ /* Update GITS_CWRITER */
+ cwriter = sc->sc_its_cmd_next_idx * sizeof(struct its_cmd);
+ gic_its_write_8(sc, GITS_CWRITER, cwriter);
+ cmd_write = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx];
+ mtx_unlock_spin(&sc->sc_its_cmd_lock);
+
+ its_cmd_wait_completion(dev, cmd, cmd_write);
+
+ return (0);
+}
+
+/* Handlers to send commands */
+static void
+its_cmd_movi(device_t dev, struct gicv3_its_irqsrc *girq)
+{
+ struct gicv3_its_softc *sc;
+ struct its_cmd_desc desc;
+ struct its_col *col;
+
+ sc = device_get_softc(dev);
+ col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1];
+
+ desc.cmd_type = ITS_CMD_MOVI;
+ desc.cmd_desc_movi.its_dev = girq->gi_its_dev;
+ desc.cmd_desc_movi.col = col;
+ desc.cmd_desc_movi.id = girq->gi_id;
+
+ its_cmd_send(dev, &desc);
+}
+
+static void
+its_cmd_mapc(device_t dev, struct its_col *col, uint8_t valid)
+{
+ struct its_cmd_desc desc;
+
+ desc.cmd_type = ITS_CMD_MAPC;
+ desc.cmd_desc_mapc.col = col;
+ /*
+ * Valid bit set - map the collection.
+ * Valid bit cleared - unmap the collection.
+ */
+ desc.cmd_desc_mapc.valid = valid;
+
+ its_cmd_send(dev, &desc);
+}
+
+static void
+its_cmd_mapti(device_t dev, struct gicv3_its_irqsrc *girq)
+{
+ struct gicv3_its_softc *sc;
+ struct its_cmd_desc desc;
+ struct its_col *col;
+ u_int col_id;
+
+ sc = device_get_softc(dev);
+
+ col_id = CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1;
+ col = sc->sc_its_cols[col_id];
+
+ desc.cmd_type = ITS_CMD_MAPTI;
+ desc.cmd_desc_mapvi.its_dev = girq->gi_its_dev;
+ desc.cmd_desc_mapvi.col = col;
+ /* The EventID sent to the device */
+ desc.cmd_desc_mapvi.id = girq->gi_id;
+ /* The physical interrupt presented to softeware */
+ desc.cmd_desc_mapvi.pid = girq->gi_lpi + GIC_FIRST_LPI;
+
+ its_cmd_send(dev, &desc);
+}
+
+static void
+its_cmd_mapd(device_t dev, struct its_dev *its_dev, uint8_t valid)
+{
+ struct its_cmd_desc desc;
+
+ desc.cmd_type = ITS_CMD_MAPD;
+ desc.cmd_desc_mapd.its_dev = its_dev;
+ desc.cmd_desc_mapd.valid = valid;
+
+ its_cmd_send(dev, &desc);
+}
+
+static void
+its_cmd_inv(device_t dev, struct its_dev *its_dev,
+ struct gicv3_its_irqsrc *girq)
+{
+ struct gicv3_its_softc *sc;
+ struct its_cmd_desc desc;
+ struct its_col *col;
+
+ sc = device_get_softc(dev);
+ col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1];
+
+ desc.cmd_type = ITS_CMD_INV;
+ /* The EventID sent to the device */
+ desc.cmd_desc_inv.pid = girq->gi_id;
+ desc.cmd_desc_inv.its_dev = its_dev;
+ desc.cmd_desc_inv.col = col;
+
+ its_cmd_send(dev, &desc);
+}
+
+static void
+its_cmd_invall(device_t dev, struct its_col *col)
+{
+ struct its_cmd_desc desc;
+
+ desc.cmd_type = ITS_CMD_INVALL;
+ desc.cmd_desc_invall.col = col;
+
+ its_cmd_send(dev, &desc);
+}
+
+#ifdef FDT
+static device_probe_t gicv3_its_fdt_probe;
+static device_attach_t gicv3_its_fdt_attach;
+
+static device_method_t gicv3_its_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, gicv3_its_fdt_probe),
+ DEVMETHOD(device_attach, gicv3_its_fdt_attach),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+#define its_baseclasses its_fdt_baseclasses
+DEFINE_CLASS_1(its, gicv3_its_fdt_driver, gicv3_its_fdt_methods,
+ sizeof(struct gicv3_its_softc), gicv3_its_driver);
+#undef its_baseclasses
+static devclass_t gicv3_its_fdt_devclass;
+
+EARLY_DRIVER_MODULE(its_fdt, gic, gicv3_its_fdt_driver,
+ gicv3_its_fdt_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+
+static int
+gicv3_its_fdt_probe(device_t dev)
+{
+
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+
+ if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its"))
+ return (ENXIO);
+
+ device_set_desc(dev, "ARM GIC Interrupt Translation Service");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+gicv3_its_fdt_attach(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+ phandle_t xref;
+ int err;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+ err = gicv3_its_attach(dev);
+ if (err != 0)
+ return (err);
+
+ /* Register this device as a interrupt controller */
+ xref = OF_xref_from_node(ofw_bus_get_node(dev));
+ sc->sc_pic = intr_pic_register(dev, xref);
+ intr_pic_add_handler(device_get_parent(dev), sc->sc_pic,
+ gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length);
+
+ /* Register this device to handle MSI interrupts */
+ intr_msi_register(dev, xref);
+
+ return (0);
+}
+#endif
+
+#ifdef DEV_ACPI
+static device_probe_t gicv3_its_acpi_probe;
+static device_attach_t gicv3_its_acpi_attach;
+
+static device_method_t gicv3_its_acpi_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, gicv3_its_acpi_probe),
+ DEVMETHOD(device_attach, gicv3_its_acpi_attach),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+#define its_baseclasses its_acpi_baseclasses
+DEFINE_CLASS_1(its, gicv3_its_acpi_driver, gicv3_its_acpi_methods,
+ sizeof(struct gicv3_its_softc), gicv3_its_driver);
+#undef its_baseclasses
+static devclass_t gicv3_its_acpi_devclass;
+
+EARLY_DRIVER_MODULE(its_acpi, gic, gicv3_its_acpi_driver,
+ gicv3_its_acpi_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+
+static int
+gicv3_its_acpi_probe(device_t dev)
+{
+
+ if (gic_get_bus(dev) != GIC_BUS_ACPI)
+ return (EINVAL);
+
+ if (gic_get_hw_rev(dev) < 3)
+ return (EINVAL);
+
+ device_set_desc(dev, "ARM GIC Interrupt Translation Service");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+gicv3_its_acpi_attach(device_t dev)
+{
+ struct gicv3_its_softc *sc;
+ struct gic_v3_devinfo *di;
+ int err;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+ err = gicv3_its_attach(dev);
+ if (err != 0)
+ return (err);
+
+ di = device_get_ivars(dev);
+ sc->sc_pic = intr_pic_register(dev, di->msi_xref);
+ intr_pic_add_handler(device_get_parent(dev), sc->sc_pic,
+ gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length);
+
+ /* Register this device to handle MSI interrupts */
+ intr_msi_register(dev, di->msi_xref);
+
+ return (0);
+}
+#endif
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
new file mode 100644
index 000000000000..d9c5c50fe568
--- /dev/null
+++ b/sys/arm64/arm64/identcpu.c
@@ -0,0 +1,1667 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Semihalf
+ * under sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/sbuf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/atomic.h>
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/undefined.h>
+#include <machine/elf.h>
+
+static void print_cpu_features(u_int cpu);
+static u_long parse_cpu_features_hwcap(void);
+
+char machine[] = "arm64";
+
+#ifdef SCTL_MASK32
+extern int adaptive_machine_arch;
+#endif
+
+static SYSCTL_NODE(_machdep, OID_AUTO, cache, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "Cache management tuning");
+
+static int allow_dic = 1;
+SYSCTL_INT(_machdep_cache, OID_AUTO, allow_dic, CTLFLAG_RDTUN, &allow_dic, 0,
+ "Allow optimizations based on the DIC cache bit");
+
+static int allow_idc = 1;
+SYSCTL_INT(_machdep_cache, OID_AUTO, allow_idc, CTLFLAG_RDTUN, &allow_idc, 0,
+ "Allow optimizations based on the IDC cache bit");
+
+static void check_cpu_regs(u_int cpu);
+
+/*
+ * The default implementation of I-cache sync assumes we have an
+ * aliasing cache until we know otherwise.
+ */
+void (*arm64_icache_sync_range)(vm_offset_t, vm_size_t) =
+ &arm64_aliasing_icache_sync_range;
+
+static int
+sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
+{
+#ifdef SCTL_MASK32
+ static const char machine32[] = "arm";
+#endif
+ int error;
+
+#ifdef SCTL_MASK32
+ if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
+ error = SYSCTL_OUT(req, machine32, sizeof(machine32));
+ else
+#endif
+ error = SYSCTL_OUT(req, machine, sizeof(machine));
+ return (error);
+}
+
+SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class");
+
+static char cpu_model[64];
+SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
+ cpu_model, sizeof(cpu_model), "Machine model");
+
+/*
+ * Per-CPU affinity as provided in MPIDR_EL1
+ * Indexed by CPU number in logical order selected by the system.
+ * Relevant fields can be extracted using CPU_AFFn macros,
+ * Aff3.Aff2.Aff1.Aff0 construct a unique CPU address in the system.
+ *
+ * Fields used by us:
+ * Aff1 - Cluster number
+ * Aff0 - CPU number in Aff1 cluster
+ */
+uint64_t __cpu_affinity[MAXCPU];
+static u_int cpu_aff_levels;
+
+struct cpu_desc {
+ u_int cpu_impl;
+ u_int cpu_part_num;
+ u_int cpu_variant;
+ u_int cpu_revision;
+ const char *cpu_impl_name;
+ const char *cpu_part_name;
+
+ uint64_t mpidr;
+ uint64_t id_aa64afr0;
+ uint64_t id_aa64afr1;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
+ uint64_t id_aa64isar0;
+ uint64_t id_aa64isar1;
+ uint64_t id_aa64mmfr0;
+ uint64_t id_aa64mmfr1;
+ uint64_t id_aa64mmfr2;
+ uint64_t id_aa64pfr0;
+ uint64_t id_aa64pfr1;
+ uint64_t ctr;
+};
+
+static struct cpu_desc cpu_desc[MAXCPU];
+static struct cpu_desc kern_cpu_desc;
+static struct cpu_desc user_cpu_desc;
+static u_int cpu_print_regs;
+#define PRINT_ID_AA64_AFR0 0x00000001
+#define PRINT_ID_AA64_AFR1 0x00000002
+#define PRINT_ID_AA64_DFR0 0x00000010
+#define PRINT_ID_AA64_DFR1 0x00000020
+#define PRINT_ID_AA64_ISAR0 0x00000100
+#define PRINT_ID_AA64_ISAR1 0x00000200
+#define PRINT_ID_AA64_MMFR0 0x00001000
+#define PRINT_ID_AA64_MMFR1 0x00002000
+#define PRINT_ID_AA64_MMFR2 0x00004000
+#define PRINT_ID_AA64_PFR0 0x00010000
+#define PRINT_ID_AA64_PFR1 0x00020000
+#define PRINT_CTR_EL0 0x10000000
+
+struct cpu_parts {
+ u_int part_id;
+ const char *part_name;
+};
+#define CPU_PART_NONE { 0, "Unknown Processor" }
+
+struct cpu_implementers {
+ u_int impl_id;
+ const char *impl_name;
+ /*
+ * Part number is implementation defined
+ * so each vendor will have its own set of values and names.
+ */
+ const struct cpu_parts *cpu_parts;
+};
+#define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer", cpu_parts_none }
+
+/*
+ * Per-implementer table of (PartNum, CPU Name) pairs.
+ */
+/* ARM Ltd. */
+static const struct cpu_parts cpu_parts_arm[] = {
+ { CPU_PART_FOUNDATION, "Foundation-Model" },
+ { CPU_PART_CORTEX_A35, "Cortex-A35" },
+ { CPU_PART_CORTEX_A53, "Cortex-A53" },
+ { CPU_PART_CORTEX_A55, "Cortex-A55" },
+ { CPU_PART_CORTEX_A57, "Cortex-A57" },
+ { CPU_PART_CORTEX_A65, "Cortex-A65" },
+ { CPU_PART_CORTEX_A72, "Cortex-A72" },
+ { CPU_PART_CORTEX_A73, "Cortex-A73" },
+ { CPU_PART_CORTEX_A75, "Cortex-A75" },
+ { CPU_PART_CORTEX_A76, "Cortex-A76" },
+ { CPU_PART_CORTEX_A76AE, "Cortex-A76AE" },
+ { CPU_PART_CORTEX_A77, "Cortex-A77" },
+ { CPU_PART_NEOVERSE_N1, "Neoverse-N1" },
+ CPU_PART_NONE,
+};
+
+/* Cavium */
+static const struct cpu_parts cpu_parts_cavium[] = {
+ { CPU_PART_THUNDERX, "ThunderX" },
+ { CPU_PART_THUNDERX2, "ThunderX2" },
+ CPU_PART_NONE,
+};
+
+/* APM / Ampere */
+static const struct cpu_parts cpu_parts_apm[] = {
+ { CPU_PART_EMAG8180, "eMAG 8180" },
+ CPU_PART_NONE,
+};
+
+/* Unknown */
+static const struct cpu_parts cpu_parts_none[] = {
+ CPU_PART_NONE,
+};
+
+/*
+ * Implementers table.
+ */
+const struct cpu_implementers cpu_implementers[] = {
+ { CPU_IMPL_ARM, "ARM", cpu_parts_arm },
+ { CPU_IMPL_BROADCOM, "Broadcom", cpu_parts_none },
+ { CPU_IMPL_CAVIUM, "Cavium", cpu_parts_cavium },
+ { CPU_IMPL_DEC, "DEC", cpu_parts_none },
+ { CPU_IMPL_INFINEON, "IFX", cpu_parts_none },
+ { CPU_IMPL_FREESCALE, "Freescale", cpu_parts_none },
+ { CPU_IMPL_NVIDIA, "NVIDIA", cpu_parts_none },
+ { CPU_IMPL_APM, "APM", cpu_parts_apm },
+ { CPU_IMPL_QUALCOMM, "Qualcomm", cpu_parts_none },
+ { CPU_IMPL_MARVELL, "Marvell", cpu_parts_none },
+ { CPU_IMPL_INTEL, "Intel", cpu_parts_none },
+ CPU_IMPLEMENTER_NONE,
+};
+
+#define MRS_TYPE_MASK 0xf
+#define MRS_INVALID 0
+#define MRS_EXACT 1
+#define MRS_EXACT_VAL(x) (MRS_EXACT | ((x) << 4))
+#define MRS_EXACT_FIELD(x) ((x) >> 4)
+#define MRS_LOWER 2
+
+struct mrs_field_value {
+ uint64_t value;
+ const char *desc;
+};
+
+#define MRS_FIELD_VALUE(_value, _desc) \
+ { \
+ .value = (_value), \
+ .desc = (_desc), \
+ }
+
+#define MRS_FIELD_VALUE_NONE_IMPL(_reg, _field, _none, _impl) \
+ MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _none, ""), \
+ MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _impl, #_field)
+
+#define MRS_FIELD_VALUE_COUNT(_reg, _field, _desc) \
+ MRS_FIELD_VALUE(0ul << _reg ## _ ## _field ## _SHIFT, "1 " _desc), \
+ MRS_FIELD_VALUE(1ul << _reg ## _ ## _field ## _SHIFT, "2 " _desc "s"), \
+ MRS_FIELD_VALUE(2ul << _reg ## _ ## _field ## _SHIFT, "3 " _desc "s"), \
+ MRS_FIELD_VALUE(3ul << _reg ## _ ## _field ## _SHIFT, "4 " _desc "s"), \
+ MRS_FIELD_VALUE(4ul << _reg ## _ ## _field ## _SHIFT, "5 " _desc "s"), \
+ MRS_FIELD_VALUE(5ul << _reg ## _ ## _field ## _SHIFT, "6 " _desc "s"), \
+ MRS_FIELD_VALUE(6ul << _reg ## _ ## _field ## _SHIFT, "7 " _desc "s"), \
+ MRS_FIELD_VALUE(7ul << _reg ## _ ## _field ## _SHIFT, "8 " _desc "s"), \
+ MRS_FIELD_VALUE(8ul << _reg ## _ ## _field ## _SHIFT, "9 " _desc "s"), \
+ MRS_FIELD_VALUE(9ul << _reg ## _ ## _field ## _SHIFT, "10 "_desc "s"), \
+ MRS_FIELD_VALUE(10ul<< _reg ## _ ## _field ## _SHIFT, "11 "_desc "s"), \
+ MRS_FIELD_VALUE(11ul<< _reg ## _ ## _field ## _SHIFT, "12 "_desc "s"), \
+ MRS_FIELD_VALUE(12ul<< _reg ## _ ## _field ## _SHIFT, "13 "_desc "s"), \
+ MRS_FIELD_VALUE(13ul<< _reg ## _ ## _field ## _SHIFT, "14 "_desc "s"), \
+ MRS_FIELD_VALUE(14ul<< _reg ## _ ## _field ## _SHIFT, "15 "_desc "s"), \
+ MRS_FIELD_VALUE(15ul<< _reg ## _ ## _field ## _SHIFT, "16 "_desc "s")
+
+#define MRS_FIELD_VALUE_END { .desc = NULL }
+
+struct mrs_field {
+ const char *name;
+ struct mrs_field_value *values;
+ uint64_t mask;
+ bool sign;
+ u_int type;
+ u_int shift;
+};
+
+#define MRS_FIELD(_register, _name, _sign, _type, _values) \
+ { \
+ .name = #_name, \
+ .sign = (_sign), \
+ .type = (_type), \
+ .shift = _register ## _ ## _name ## _SHIFT, \
+ .mask = _register ## _ ## _name ## _MASK, \
+ .values = (_values), \
+ }
+
+#define MRS_FIELD_END { .type = MRS_INVALID, }
+
+/* ID_AA64AFR0_EL1 */
+static struct mrs_field id_aa64afr0_fields[] = {
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64AFR1_EL1 */
+static struct mrs_field id_aa64afr1_fields[] = {
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64DFR0_EL1 */
+static struct mrs_field_value id_aa64dfr0_pmsver[] = {
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_V1, "SPE"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_ctx_cmps[] = {
+ MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, CTX_CMPs, "CTX BKPT"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_wrps[] = {
+ MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, WRPs, "Watchpoint"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_brps[] = {
+ MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, BRPs, "Breakpoint"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_pmuver[] = {
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3, "PMUv3"),
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_1, "PMUv3+16 bit evtCount"),
+ MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_IMPL, "IMPL PMU"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_tracever[] = {
+ MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_IMPL, "Trace"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64dfr0_debugver[] = {
+ MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8, "Debugv8"),
+ MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_VHE, "Debugv8_VHE"),
+ MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_2, "Debugv8.2"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64dfr0_fields[] = {
+ MRS_FIELD(ID_AA64DFR0, PMSVer, false, MRS_EXACT, id_aa64dfr0_pmsver),
+ MRS_FIELD(ID_AA64DFR0, CTX_CMPs, false, MRS_EXACT,
+ id_aa64dfr0_ctx_cmps),
+ MRS_FIELD(ID_AA64DFR0, WRPs, false, MRS_EXACT, id_aa64dfr0_wrps),
+ MRS_FIELD(ID_AA64DFR0, BRPs, false, MRS_LOWER, id_aa64dfr0_brps),
+ MRS_FIELD(ID_AA64DFR0, PMUVer, false, MRS_EXACT, id_aa64dfr0_pmuver),
+ MRS_FIELD(ID_AA64DFR0, TraceVer, false, MRS_EXACT,
+ id_aa64dfr0_tracever),
+ MRS_FIELD(ID_AA64DFR0, DebugVer, false, MRS_EXACT_VAL(0x6),
+ id_aa64dfr0_debugver),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64DFR1 */
+static struct mrs_field id_aa64dfr1_fields[] = {
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64ISAR0_EL1 */
+static struct mrs_field_value id_aa64isar0_rndr[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_IMPL, "RNG"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_tlb[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOS, "TLBI-OS"),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOSR, "TLBI-OSR"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_ts[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TS_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_4, "CondM-8.4"),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_5, "CondM-8.5"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_fhm[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, FHM, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_dp[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, DP, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_sm4[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM4, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_sm3[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM3, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_sha3[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA3, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_rdm[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, RDM, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_atomic[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, Atomic, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_crc32[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, CRC32, NONE, BASE),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_sha2[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA2, NONE, BASE),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_SHA2_512, "SHA2+SHA512"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_sha1[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA1, NONE, BASE),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar0_aes[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, AES, NONE, BASE),
+ MRS_FIELD_VALUE(ID_AA64ISAR0_AES_PMULL, "AES+PMULL"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64isar0_fields[] = {
+ MRS_FIELD(ID_AA64ISAR0, RNDR, false, MRS_LOWER, id_aa64isar0_rndr),
+ MRS_FIELD(ID_AA64ISAR0, TLB, false, MRS_LOWER, id_aa64isar0_tlb),
+ MRS_FIELD(ID_AA64ISAR0, TS, false, MRS_LOWER, id_aa64isar0_ts),
+ MRS_FIELD(ID_AA64ISAR0, FHM, false, MRS_LOWER, id_aa64isar0_fhm),
+ MRS_FIELD(ID_AA64ISAR0, DP, false, MRS_LOWER, id_aa64isar0_dp),
+ MRS_FIELD(ID_AA64ISAR0, SM4, false, MRS_LOWER, id_aa64isar0_sm4),
+ MRS_FIELD(ID_AA64ISAR0, SM3, false, MRS_LOWER, id_aa64isar0_sm3),
+ MRS_FIELD(ID_AA64ISAR0, SHA3, false, MRS_LOWER, id_aa64isar0_sha3),
+ MRS_FIELD(ID_AA64ISAR0, RDM, false, MRS_LOWER, id_aa64isar0_rdm),
+ MRS_FIELD(ID_AA64ISAR0, Atomic, false, MRS_LOWER, id_aa64isar0_atomic),
+ MRS_FIELD(ID_AA64ISAR0, CRC32, false, MRS_LOWER, id_aa64isar0_crc32),
+ MRS_FIELD(ID_AA64ISAR0, SHA2, false, MRS_LOWER, id_aa64isar0_sha2),
+ MRS_FIELD(ID_AA64ISAR0, SHA1, false, MRS_LOWER, id_aa64isar0_sha1),
+ MRS_FIELD(ID_AA64ISAR0, AES, false, MRS_LOWER, id_aa64isar0_aes),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64ISAR1_EL1 */
+static struct mrs_field_value id_aa64isar1_i8mm[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, I8MM, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_dgh[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, DGH, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_bf16[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, BF16, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_specres[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_IMPL, "PredInv"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_sb[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, SB, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_frintts[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FRINTTS, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_gpi[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPI, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_gpa[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPA, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_lrcpc[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_3, "RCPC-8.3"),
+ MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_4, "RCPC-8.4"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_fcma[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FCMA, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_jscvt[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, JSCVT, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_api[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, API, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_apa[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, APA, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64isar1_dpb[] = {
+ MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVAP, "DCPoP"),
+ MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVADP, "DCCVADP"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64isar1_fields[] = {
+ MRS_FIELD(ID_AA64ISAR1, I8MM, false, MRS_LOWER, id_aa64isar1_i8mm),
+ MRS_FIELD(ID_AA64ISAR1, DGH, false, MRS_LOWER, id_aa64isar1_dgh),
+ MRS_FIELD(ID_AA64ISAR1, BF16, false, MRS_LOWER, id_aa64isar1_bf16),
+ MRS_FIELD(ID_AA64ISAR1, SPECRES, false, MRS_LOWER,
+ id_aa64isar1_specres),
+ MRS_FIELD(ID_AA64ISAR1, SB, false, MRS_LOWER, id_aa64isar1_sb),
+ MRS_FIELD(ID_AA64ISAR1, FRINTTS, false, MRS_LOWER,
+ id_aa64isar1_frintts),
+ MRS_FIELD(ID_AA64ISAR1, GPI, false, MRS_EXACT, id_aa64isar1_gpi),
+ MRS_FIELD(ID_AA64ISAR1, GPA, false, MRS_EXACT, id_aa64isar1_gpa),
+ MRS_FIELD(ID_AA64ISAR1, LRCPC, false, MRS_LOWER, id_aa64isar1_lrcpc),
+ MRS_FIELD(ID_AA64ISAR1, FCMA, false, MRS_LOWER, id_aa64isar1_fcma),
+ MRS_FIELD(ID_AA64ISAR1, JSCVT, false, MRS_LOWER, id_aa64isar1_jscvt),
+ MRS_FIELD(ID_AA64ISAR1, API, false, MRS_EXACT, id_aa64isar1_api),
+ MRS_FIELD(ID_AA64ISAR1, APA, false, MRS_EXACT, id_aa64isar1_apa),
+ MRS_FIELD(ID_AA64ISAR1, DPB, false, MRS_LOWER, id_aa64isar1_dpb),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64MMFR0_EL1 */
+static struct mrs_field_value id_aa64mmfr0_tgran4[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran4, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_tgran64[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran64, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_tgran16[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran16, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_bigend_el0[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEndEL0, FIXED, MIXED),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_snsmem[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, SNSMem, NONE, DISTINCT),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_bigend[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEnd, FIXED, MIXED),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_asid_bits[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_8, "8bit ASID"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_16, "16bit ASID"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr0_parange[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4G, "4GB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_64G, "64GB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_1T, "1TB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4T, "4TB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_16T, "16TB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_256T, "256TB PA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4P, "4PB PA"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64mmfr0_fields[] = {
+ MRS_FIELD(ID_AA64MMFR0, TGran4, false, MRS_EXACT, id_aa64mmfr0_tgran4),
+ MRS_FIELD(ID_AA64MMFR0, TGran64, false, MRS_EXACT,
+ id_aa64mmfr0_tgran64),
+ MRS_FIELD(ID_AA64MMFR0, TGran16, false, MRS_EXACT,
+ id_aa64mmfr0_tgran16),
+ MRS_FIELD(ID_AA64MMFR0, BigEndEL0, false, MRS_EXACT,
+ id_aa64mmfr0_bigend_el0),
+ MRS_FIELD(ID_AA64MMFR0, SNSMem, false, MRS_EXACT, id_aa64mmfr0_snsmem),
+ MRS_FIELD(ID_AA64MMFR0, BigEnd, false, MRS_EXACT, id_aa64mmfr0_bigend),
+ MRS_FIELD(ID_AA64MMFR0, ASIDBits, false, MRS_EXACT,
+ id_aa64mmfr0_asid_bits),
+ MRS_FIELD(ID_AA64MMFR0, PARange, false, MRS_EXACT,
+ id_aa64mmfr0_parange),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64MMFR1_EL1 */
+static struct mrs_field_value id_aa64mmfr1_xnx[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, XNX, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_specsei[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, SpecSEI, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_pan[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, PAN, NONE, IMPL),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_PAN_ATS1E1, "PAN+ATS1E1"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_lo[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, LO, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_hpds[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_HPD, "HPD"),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_TTPBHA, "HPD+TTPBHA"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_vh[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, VH, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_vmidbits[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_8, "8bit VMID"),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_16, "16bit VMID"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr1_hafdbs[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF, "HAF"),
+ MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF_DBS, "HAF+DS"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64mmfr1_fields[] = {
+ MRS_FIELD(ID_AA64MMFR1, XNX, false, MRS_EXACT, id_aa64mmfr1_xnx),
+ MRS_FIELD(ID_AA64MMFR1, SpecSEI, false, MRS_EXACT,
+ id_aa64mmfr1_specsei),
+ MRS_FIELD(ID_AA64MMFR1, PAN, false, MRS_EXACT, id_aa64mmfr1_pan),
+ MRS_FIELD(ID_AA64MMFR1, LO, false, MRS_EXACT, id_aa64mmfr1_lo),
+ MRS_FIELD(ID_AA64MMFR1, HPDS, false, MRS_EXACT, id_aa64mmfr1_hpds),
+ MRS_FIELD(ID_AA64MMFR1, VH, false, MRS_EXACT, id_aa64mmfr1_vh),
+ MRS_FIELD(ID_AA64MMFR1, VMIDBits, false, MRS_EXACT,
+ id_aa64mmfr1_vmidbits),
+ MRS_FIELD(ID_AA64MMFR1, HAFDBS, false, MRS_EXACT, id_aa64mmfr1_hafdbs),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64MMFR2_EL1 */
+static struct mrs_field_value id_aa64mmfr2_nv[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, NV, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_ccidx[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_32, "32bit CCIDX"),
+ MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_64, "64bit CCIDX"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_varange[] = {
+ MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_48, "48bit VA"),
+ MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_52, "52bit VA"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_iesb[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, IESB, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_lsm[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, LSM, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_uao[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, UAO, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64mmfr2_cnp[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, CnP, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64mmfr2_fields[] = {
+ MRS_FIELD(ID_AA64MMFR2, NV, false, MRS_EXACT, id_aa64mmfr2_nv),
+ MRS_FIELD(ID_AA64MMFR2, CCIDX, false, MRS_EXACT, id_aa64mmfr2_ccidx),
+ MRS_FIELD(ID_AA64MMFR2, VARange, false, MRS_EXACT,
+ id_aa64mmfr2_varange),
+ MRS_FIELD(ID_AA64MMFR2, IESB, false, MRS_EXACT, id_aa64mmfr2_iesb),
+ MRS_FIELD(ID_AA64MMFR2, LSM, false, MRS_EXACT, id_aa64mmfr2_lsm),
+ MRS_FIELD(ID_AA64MMFR2, UAO, false, MRS_EXACT, id_aa64mmfr2_uao),
+ MRS_FIELD(ID_AA64MMFR2, CnP, false, MRS_EXACT, id_aa64mmfr2_cnp),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64PFR0_EL1 */
+static struct mrs_field_value id_aa64pfr0_csv3[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_ISOLATED, "CSV3"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_csv2[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_ISOLATED, "CSV2"),
+ MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_SCXTNUM, "SCXTNUM"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_dit[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_DIT_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR0_DIT_PSTATE, "PSTATE.DIT"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_amu[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_AMU_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR0_AMU_V1, "AMUv1"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_mpam[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, MPAM, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_sel2[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SEL2, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_sve[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SVE, NONE, IMPL),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_ras[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_RAS_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR0_RAS_V1, "RASv1"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_gic[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, GIC, CPUIF_NONE, CPUIF_EN),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_advsimd[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, AdvSIMD, NONE, IMPL),
+ MRS_FIELD_VALUE(ID_AA64PFR0_AdvSIMD_HP, "AdvSIMD+HP"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_fp[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, FP, NONE, IMPL),
+ MRS_FIELD_VALUE(ID_AA64PFR0_FP_HP, "FP+HP"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_el3[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL3, NONE, 64),
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL3_64_32, "EL3 32"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_el2[] = {
+ MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL2, NONE, 64),
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL2_64_32, "EL2 32"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_el1[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64, "EL1"),
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64_32, "EL1 32"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr0_el0[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64, "EL0"),
+ MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64_32, "EL0 32"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64pfr0_fields[] = {
+ MRS_FIELD(ID_AA64PFR0, CSV3, false, MRS_EXACT, id_aa64pfr0_csv3),
+ MRS_FIELD(ID_AA64PFR0, CSV2, false, MRS_EXACT, id_aa64pfr0_csv2),
+ MRS_FIELD(ID_AA64PFR0, DIT, false, MRS_EXACT, id_aa64pfr0_dit),
+ MRS_FIELD(ID_AA64PFR0, AMU, false, MRS_EXACT, id_aa64pfr0_amu),
+ MRS_FIELD(ID_AA64PFR0, MPAM, false, MRS_EXACT, id_aa64pfr0_mpam),
+ MRS_FIELD(ID_AA64PFR0, SEL2, false, MRS_EXACT, id_aa64pfr0_sel2),
+ MRS_FIELD(ID_AA64PFR0, SVE, false, MRS_EXACT, id_aa64pfr0_sve),
+ MRS_FIELD(ID_AA64PFR0, RAS, false, MRS_EXACT, id_aa64pfr0_ras),
+ MRS_FIELD(ID_AA64PFR0, GIC, false, MRS_EXACT, id_aa64pfr0_gic),
+ MRS_FIELD(ID_AA64PFR0, AdvSIMD, true, MRS_LOWER, id_aa64pfr0_advsimd),
+ MRS_FIELD(ID_AA64PFR0, FP, true, MRS_LOWER, id_aa64pfr0_fp),
+ MRS_FIELD(ID_AA64PFR0, EL3, false, MRS_EXACT, id_aa64pfr0_el3),
+ MRS_FIELD(ID_AA64PFR0, EL2, false, MRS_EXACT, id_aa64pfr0_el2),
+ MRS_FIELD(ID_AA64PFR0, EL1, false, MRS_LOWER, id_aa64pfr0_el1),
+ MRS_FIELD(ID_AA64PFR0, EL0, false, MRS_LOWER, id_aa64pfr0_el0),
+ MRS_FIELD_END,
+};
+
+
+/* ID_AA64PFR1_EL1 */
+static struct mrs_field_value id_aa64pfr1_bt[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR1_BT_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR1_BT_IMPL, "BTI"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr1_ssbs[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE, "PSTATE.SSBS"),
+ MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE_MSR, "PSTATE.SSBS MSR"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field_value id_aa64pfr1_mte[] = {
+ MRS_FIELD_VALUE(ID_AA64PFR1_MTE_NONE, ""),
+ MRS_FIELD_VALUE(ID_AA64PFR1_MTE_IMPL_EL0, "MTE EL0"),
+ MRS_FIELD_VALUE(ID_AA64PFR1_MTE_IMPL, "MTE"),
+ MRS_FIELD_VALUE_END,
+};
+
+static struct mrs_field id_aa64pfr1_fields[] = {
+ MRS_FIELD(ID_AA64PFR1, BT, false, MRS_EXACT, id_aa64pfr1_bt),
+ MRS_FIELD(ID_AA64PFR1, SSBS, false, MRS_EXACT, id_aa64pfr1_ssbs),
+ MRS_FIELD(ID_AA64PFR1, MTE, false, MRS_EXACT, id_aa64pfr1_mte),
+ MRS_FIELD_END,
+};
+
+struct mrs_user_reg {
+ u_int reg;
+ u_int CRm;
+ u_int Op2;
+ size_t offset;
+ struct mrs_field *fields;
+};
+
+static struct mrs_user_reg user_regs[] = {
+ { /* id_aa64isar0_el1 */
+ .reg = ID_AA64ISAR0_EL1,
+ .CRm = 6,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64isar0),
+ .fields = id_aa64isar0_fields,
+ },
+ { /* id_aa64isar1_el1 */
+ .reg = ID_AA64ISAR1_EL1,
+ .CRm = 6,
+ .Op2 = 1,
+ .offset = __offsetof(struct cpu_desc, id_aa64isar1),
+ .fields = id_aa64isar1_fields,
+ },
+ { /* id_aa64pfr0_el1 */
+ .reg = ID_AA64PFR0_EL1,
+ .CRm = 4,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64pfr0),
+ .fields = id_aa64pfr0_fields,
+ },
+ { /* id_aa64pfr0_el1 */
+ .reg = ID_AA64PFR1_EL1,
+ .CRm = 4,
+ .Op2 = 1,
+ .offset = __offsetof(struct cpu_desc, id_aa64pfr1),
+ .fields = id_aa64pfr1_fields,
+ },
+ { /* id_aa64dfr0_el1 */
+ .reg = ID_AA64DFR0_EL1,
+ .CRm = 5,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64dfr0),
+ .fields = id_aa64dfr0_fields,
+ },
+ { /* id_aa64mmfr0_el1 */
+ .reg = ID_AA64MMFR0_EL1,
+ .CRm = 7,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64mmfr0),
+ .fields = id_aa64mmfr0_fields,
+ },
+};
+
+#define CPU_DESC_FIELD(desc, idx) \
+ *(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset)
+
+static int
+user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame,
+ uint32_t esr)
+{
+ uint64_t value;
+ int CRm, Op2, i, reg;
+
+ if ((insn & MRS_MASK) != MRS_VALUE)
+ return (0);
+
+ /*
+ * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}.
+ * These are in the EL1 CPU identification space.
+ * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1.
+ * CRm == {4-7} holds the ID_AA64 registers.
+ *
+ * For full details see the ARMv8 ARM (ARM DDI 0487C.a)
+ * Table D9-2 System instruction encodings for non-Debug System
+ * register accesses.
+ */
+ if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0)
+ return (0);
+
+ CRm = mrs_CRm(insn);
+ if (CRm > 7 || (CRm < 4 && CRm != 0))
+ return (0);
+
+ Op2 = mrs_Op2(insn);
+ value = 0;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ if (user_regs[i].CRm == CRm && user_regs[i].Op2 == Op2) {
+ value = CPU_DESC_FIELD(user_cpu_desc, i);
+ break;
+ }
+ }
+
+ if (CRm == 0) {
+ switch (Op2) {
+ case 0:
+ value = READ_SPECIALREG(midr_el1);
+ break;
+ case 5:
+ value = READ_SPECIALREG(mpidr_el1);
+ break;
+ case 6:
+ value = READ_SPECIALREG(revidr_el1);
+ break;
+ default:
+ return (0);
+ }
+ }
+
+ /*
+ * We will handle this instruction, move to the next so we
+ * don't trap here again.
+ */
+ frame->tf_elr += INSN_SIZE;
+
+ reg = MRS_REGISTER(insn);
+ /* If reg is 31 then write to xzr, i.e. do nothing */
+ if (reg == 31)
+ return (1);
+
+ if (reg < nitems(frame->tf_x))
+ frame->tf_x[reg] = value;
+ else if (reg == 30)
+ frame->tf_lr = value;
+
+ return (1);
+}
+
+bool
+extract_user_id_field(u_int reg, u_int field_shift, uint8_t *val)
+{
+ uint64_t value;
+ int i;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ if (user_regs[i].reg == reg) {
+ value = CPU_DESC_FIELD(user_cpu_desc, i);
+ *val = value >> field_shift;
+ return (true);
+ }
+ }
+
+ return (false);
+}
+
+bool
+get_kernel_reg(u_int reg, uint64_t *val)
+{
+ int i;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ if (user_regs[i].reg == reg) {
+ *val = CPU_DESC_FIELD(kern_cpu_desc, i);
+ return (true);
+ }
+ }
+
+ return (false);
+}
+
+static uint64_t
+update_lower_register(uint64_t val, uint64_t new_val, u_int shift,
+ int width, bool sign)
+{
+ uint64_t mask;
+ uint64_t new_field, old_field;
+ bool update;
+
+ KASSERT(width > 0 && width < 64, ("%s: Invalid width %d", __func__,
+ width));
+
+ mask = (1ul << width) - 1;
+ new_field = (new_val >> shift) & mask;
+ old_field = (val >> shift) & mask;
+
+ update = false;
+ if (sign) {
+ /*
+ * The field is signed. Toggle the upper bit so the comparison
+ * works on unsigned values as this makes positive numbers,
+ * i.e. those with a 0 bit, larger than negative numbers,
+ * i.e. those with a 1 bit, in an unsigned comparison.
+ */
+ if ((new_field ^ (1ul << (width - 1))) <
+ (old_field ^ (1ul << (width - 1))))
+ update = true;
+ } else {
+ if (new_field < old_field)
+ update = true;
+ }
+
+ if (update) {
+ val &= ~(mask << shift);
+ val |= new_field << shift;
+ }
+
+ return (val);
+}
+
+void
+update_special_regs(u_int cpu)
+{
+ struct mrs_field *fields;
+ uint64_t user_reg, kern_reg, value;
+ int i, j;
+
+ if (cpu == 0) {
+ /* Create a user visible cpu description with safe values */
+ memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
+ /* Safe values for these registers */
+ user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE |
+ ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 |
+ ID_AA64PFR0_EL0_64;
+ user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8;
+ }
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ value = CPU_DESC_FIELD(cpu_desc[cpu], i);
+ if (cpu == 0) {
+ kern_reg = value;
+ user_reg = value;
+ } else {
+ kern_reg = CPU_DESC_FIELD(kern_cpu_desc, i);
+ user_reg = CPU_DESC_FIELD(user_cpu_desc, i);
+ }
+
+ fields = user_regs[i].fields;
+ for (j = 0; fields[j].type != 0; j++) {
+ switch (fields[j].type & MRS_TYPE_MASK) {
+ case MRS_EXACT:
+ user_reg &= ~(0xful << fields[j].shift);
+ user_reg |=
+ (uint64_t)MRS_EXACT_FIELD(fields[j].type) <<
+ fields[j].shift;
+ break;
+ case MRS_LOWER:
+ user_reg = update_lower_register(user_reg,
+ value, fields[j].shift, 4, fields[j].sign);
+ break;
+ default:
+ panic("Invalid field type: %d", fields[j].type);
+ }
+ kern_reg = update_lower_register(kern_reg, value,
+ fields[j].shift, 4, fields[j].sign);
+ }
+
+ CPU_DESC_FIELD(kern_cpu_desc, i) = kern_reg;
+ CPU_DESC_FIELD(user_cpu_desc, i) = user_reg;
+ }
+}
+
+/* HWCAP */
+extern u_long elf_hwcap;
+bool __read_frequently lse_supported = false;
+
+bool __read_frequently icache_aliasing = false;
+bool __read_frequently icache_vmid = false;
+
+int64_t dcache_line_size; /* The minimum D cache line size */
+int64_t icache_line_size; /* The minimum I cache line size */
+int64_t idcache_line_size; /* The minimum cache line size */
+
+static void
+identify_cpu_sysinit(void *dummy __unused)
+{
+ int cpu;
+ bool dic, idc;
+
+ dic = (allow_dic != 0);
+ idc = (allow_idc != 0);
+
+ CPU_FOREACH(cpu) {
+ check_cpu_regs(cpu);
+ if (cpu != 0)
+ update_special_regs(cpu);
+
+ if (CTR_DIC_VAL(cpu_desc[cpu].ctr) == 0)
+ dic = false;
+ if (CTR_IDC_VAL(cpu_desc[cpu].ctr) == 0)
+ idc = false;
+ }
+
+ /* Exposed to userspace as AT_HWCAP */
+ elf_hwcap = parse_cpu_features_hwcap();
+
+ if (dic && idc) {
+ arm64_icache_sync_range = &arm64_dic_idc_icache_sync_range;
+ if (bootverbose)
+ printf("Enabling DIC & IDC ICache sync\n");
+ }
+
+ if ((elf_hwcap & HWCAP_ATOMICS) != 0) {
+ lse_supported = true;
+ if (bootverbose)
+ printf("Enabling LSE atomics in the kernel\n");
+ }
+#ifdef LSE_ATOMICS
+ if (!lse_supported)
+ panic("CPU does not support LSE atomic instructions");
+#endif
+
+ install_undef_handler(true, user_mrs_handler);
+}
+SYSINIT(identify_cpu, SI_SUB_CPU, SI_ORDER_ANY, identify_cpu_sysinit, NULL);
+
+static void
+cpu_features_sysinit(void *dummy __unused)
+{
+ u_int cpu;
+
+ CPU_FOREACH(cpu)
+ print_cpu_features(cpu);
+}
+SYSINIT(cpu_features, SI_SUB_SMP, SI_ORDER_ANY, cpu_features_sysinit, NULL);
+
+static u_long
+parse_cpu_features_hwcap(void)
+{
+ u_long hwcap = 0;
+
+ if (ID_AA64ISAR0_DP_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_DP_IMPL)
+ hwcap |= HWCAP_ASIMDDP;
+
+ if (ID_AA64ISAR0_SM4_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_SM4_IMPL)
+ hwcap |= HWCAP_SM4;
+
+ if (ID_AA64ISAR0_SM3_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_SM3_IMPL)
+ hwcap |= HWCAP_SM3;
+
+ if (ID_AA64ISAR0_RDM_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_RDM_IMPL)
+ hwcap |= HWCAP_ASIMDRDM;
+
+ if (ID_AA64ISAR0_Atomic_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_Atomic_IMPL)
+ hwcap |= HWCAP_ATOMICS;
+
+ if (ID_AA64ISAR0_CRC32_VAL(user_cpu_desc.id_aa64isar0) ==
+ ID_AA64ISAR0_CRC32_BASE)
+ hwcap |= HWCAP_CRC32;
+
+ switch (ID_AA64ISAR0_SHA2_VAL(user_cpu_desc.id_aa64isar0)) {
+ case ID_AA64ISAR0_SHA2_BASE:
+ hwcap |= HWCAP_SHA2;
+ break;
+ case ID_AA64ISAR0_SHA2_512:
+ hwcap |= HWCAP_SHA2 | HWCAP_SHA512;
+ break;
+ default:
+ break;
+ }
+
+ if (ID_AA64ISAR0_SHA1_VAL(user_cpu_desc.id_aa64isar0))
+ hwcap |= HWCAP_SHA1;
+
+ switch (ID_AA64ISAR0_AES_VAL(user_cpu_desc.id_aa64isar0)) {
+ case ID_AA64ISAR0_AES_BASE:
+ hwcap |= HWCAP_AES;
+ break;
+ case ID_AA64ISAR0_AES_PMULL:
+ hwcap |= HWCAP_PMULL | HWCAP_AES;
+ break;
+ default:
+ break;
+ }
+
+ if (ID_AA64ISAR1_LRCPC_VAL(user_cpu_desc.id_aa64isar1) ==
+ ID_AA64ISAR1_LRCPC_RCPC_8_3)
+ hwcap |= HWCAP_LRCPC;
+
+ if (ID_AA64ISAR1_FCMA_VAL(user_cpu_desc.id_aa64isar1) ==
+ ID_AA64ISAR1_FCMA_IMPL)
+ hwcap |= HWCAP_FCMA;
+
+ if (ID_AA64ISAR1_JSCVT_VAL(user_cpu_desc.id_aa64isar1) ==
+ ID_AA64ISAR1_JSCVT_IMPL)
+ hwcap |= HWCAP_JSCVT;
+
+ if (ID_AA64ISAR1_DPB_VAL(user_cpu_desc.id_aa64isar1) ==
+ ID_AA64ISAR1_DPB_DCCVAP)
+ hwcap |= HWCAP_DCPOP;
+
+ if (ID_AA64PFR0_SVE_VAL(user_cpu_desc.id_aa64pfr0) ==
+ ID_AA64PFR0_SVE_IMPL)
+ hwcap |= HWCAP_SVE;
+
+ switch (ID_AA64PFR0_AdvSIMD_VAL(user_cpu_desc.id_aa64pfr0)) {
+ case ID_AA64PFR0_AdvSIMD_IMPL:
+ hwcap |= HWCAP_ASIMD;
+ break;
+ case ID_AA64PFR0_AdvSIMD_HP:
+ hwcap |= HWCAP_ASIMD | HWCAP_ASIMDDP;
+ break;
+ default:
+ break;
+ }
+
+ switch (ID_AA64PFR0_FP_VAL(user_cpu_desc.id_aa64pfr0)) {
+ case ID_AA64PFR0_FP_IMPL:
+ hwcap |= HWCAP_FP;
+ break;
+ case ID_AA64PFR0_FP_HP:
+ hwcap |= HWCAP_FP | HWCAP_FPHP;
+ break;
+ default:
+ break;
+ }
+
+ return (hwcap);
+}
+
+static void
+print_ctr_fields(struct sbuf *sb, uint64_t reg, void *arg)
+{
+
+ sbuf_printf(sb, "%u byte D-cacheline,", CTR_DLINE_SIZE(reg));
+ sbuf_printf(sb, "%u byte I-cacheline,", CTR_ILINE_SIZE(reg));
+ reg &= ~(CTR_DLINE_MASK | CTR_ILINE_MASK);
+
+ switch(CTR_L1IP_VAL(reg)) {
+ case CTR_L1IP_VPIPT:
+ sbuf_printf(sb, "VPIPT");
+ break;
+ case CTR_L1IP_AIVIVT:
+ sbuf_printf(sb, "AIVIVT");
+ break;
+ case CTR_L1IP_VIPT:
+ sbuf_printf(sb, "VIPT");
+ break;
+ case CTR_L1IP_PIPT:
+ sbuf_printf(sb, "PIPT");
+ break;
+ }
+ sbuf_printf(sb, " ICache,");
+ reg &= ~CTR_L1IP_MASK;
+
+ sbuf_printf(sb, "%d byte ERG,", CTR_ERG_SIZE(reg));
+ sbuf_printf(sb, "%d byte CWG", CTR_CWG_SIZE(reg));
+ reg &= ~(CTR_ERG_MASK | CTR_CWG_MASK);
+
+ if (CTR_IDC_VAL(reg) != 0)
+ sbuf_printf(sb, ",IDC");
+ if (CTR_DIC_VAL(reg) != 0)
+ sbuf_printf(sb, ",DIC");
+ reg &= ~(CTR_IDC_MASK | CTR_DIC_MASK);
+ reg &= ~CTR_RES1;
+
+ if (reg != 0)
+ sbuf_printf(sb, ",%lx", reg);
+}
+
+static void
+print_register(struct sbuf *sb, const char *reg_name, uint64_t reg,
+ void (*print_fields)(struct sbuf *, uint64_t, void *), void *arg)
+{
+
+ sbuf_printf(sb, "%29s = <", reg_name);
+
+ print_fields(sb, reg, arg);
+
+ sbuf_finish(sb);
+ printf("%s>\n", sbuf_data(sb));
+ sbuf_clear(sb);
+}
+
+static void
+print_id_fields(struct sbuf *sb, uint64_t reg, void *arg)
+{
+ struct mrs_field *fields = arg;
+ struct mrs_field_value *fv;
+ int field, i, j, printed;
+
+#define SEP_STR ((printed++) == 0) ? "" : ","
+ printed = 0;
+ for (i = 0; fields[i].type != 0; i++) {
+ fv = fields[i].values;
+
+ /* TODO: Handle with an unknown message */
+ if (fv == NULL)
+ continue;
+
+ field = (reg & fields[i].mask) >> fields[i].shift;
+ for (j = 0; fv[j].desc != NULL; j++) {
+ if ((fv[j].value >> fields[i].shift) != field)
+ continue;
+
+ if (fv[j].desc[0] != '\0')
+ sbuf_printf(sb, "%s%s", SEP_STR, fv[j].desc);
+ break;
+ }
+ if (fv[j].desc == NULL)
+ sbuf_printf(sb, "%sUnknown %s(%x)", SEP_STR,
+ fields[i].name, field);
+
+ reg &= ~(0xful << fields[i].shift);
+ }
+
+ if (reg != 0)
+ sbuf_printf(sb, "%s%#lx", SEP_STR, reg);
+#undef SEP_STR
+}
+
+static void
+print_id_register(struct sbuf *sb, const char *reg_name, uint64_t reg,
+ struct mrs_field *fields)
+{
+
+ print_register(sb, reg_name, reg, print_id_fields, fields);
+}
+
+static void
+print_cpu_features(u_int cpu)
+{
+ struct sbuf *sb;
+
+ sb = sbuf_new_auto();
+ sbuf_printf(sb, "CPU%3d: %s %s r%dp%d", cpu,
+ cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name,
+ cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision);
+
+ sbuf_cat(sb, " affinity:");
+ switch(cpu_aff_levels) {
+ default:
+ case 4:
+ sbuf_printf(sb, " %2d", CPU_AFF3(cpu_desc[cpu].mpidr));
+ /* FALLTHROUGH */
+ case 3:
+ sbuf_printf(sb, " %2d", CPU_AFF2(cpu_desc[cpu].mpidr));
+ /* FALLTHROUGH */
+ case 2:
+ sbuf_printf(sb, " %2d", CPU_AFF1(cpu_desc[cpu].mpidr));
+ /* FALLTHROUGH */
+ case 1:
+ case 0: /* On UP this will be zero */
+ sbuf_printf(sb, " %2d", CPU_AFF0(cpu_desc[cpu].mpidr));
+ break;
+ }
+ sbuf_finish(sb);
+ printf("%s\n", sbuf_data(sb));
+ sbuf_clear(sb);
+
+ /*
+ * There is a hardware errata where, if one CPU is performing a TLB
+ * invalidation while another is performing a store-exclusive the
+ * store-exclusive may return the wrong status. A workaround seems
+ * to be to use an IPI to invalidate on each CPU, however given the
+ * limited number of affected units (pass 1.1 is the evaluation
+ * hardware revision), and the lack of information from Cavium
+ * this has not been implemented.
+ *
+ * At the time of writing this the only information is from:
+ * https://lkml.org/lkml/2016/8/4/722
+ */
+ /*
+ * XXX: CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 on its own also
+ * triggers on pass 2.0+.
+ */
+ if (cpu == 0 && CPU_VAR(PCPU_GET(midr)) == 0 &&
+ CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1)
+ printf("WARNING: ThunderX Pass 1.1 detected.\nThis has known "
+ "hardware bugs that may cause the incorrect operation of "
+ "atomic operations.\n");
+
+ /* Cache Type Register */
+ if (cpu == 0 || (cpu_print_regs & PRINT_CTR_EL0) != 0) {
+ print_register(sb, "Cache Type",
+ cpu_desc[cpu].ctr, print_ctr_fields, NULL);
+ }
+
+ /* AArch64 Instruction Set Attribute Register 0 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR0) != 0)
+ print_id_register(sb, "Instruction Set Attributes 0",
+ cpu_desc[cpu].id_aa64isar0, id_aa64isar0_fields);
+
+ /* AArch64 Instruction Set Attribute Register 1 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR1) != 0)
+ print_id_register(sb, "Instruction Set Attributes 1",
+ cpu_desc[cpu].id_aa64isar1, id_aa64isar1_fields);
+
+ /* AArch64 Processor Feature Register 0 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR0) != 0)
+ print_id_register(sb, "Processor Features 0",
+ cpu_desc[cpu].id_aa64pfr0, id_aa64pfr0_fields);
+
+ /* AArch64 Processor Feature Register 1 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR1) != 0)
+ print_id_register(sb, "Processor Features 1",
+ cpu_desc[cpu].id_aa64pfr1, id_aa64pfr1_fields);
+
+ /* AArch64 Memory Model Feature Register 0 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR0) != 0)
+ print_id_register(sb, "Memory Model Features 0",
+ cpu_desc[cpu].id_aa64mmfr0, id_aa64mmfr0_fields);
+
+ /* AArch64 Memory Model Feature Register 1 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR1) != 0)
+ print_id_register(sb, "Memory Model Features 1",
+ cpu_desc[cpu].id_aa64mmfr1, id_aa64mmfr1_fields);
+
+ /* AArch64 Memory Model Feature Register 2 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR2) != 0)
+ print_id_register(sb, "Memory Model Features 2",
+ cpu_desc[cpu].id_aa64mmfr2, id_aa64mmfr2_fields);
+
+ /* AArch64 Debug Feature Register 0 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR0) != 0)
+ print_id_register(sb, "Debug Features 0",
+ cpu_desc[cpu].id_aa64dfr0, id_aa64dfr0_fields);
+
+ /* AArch64 Memory Model Feature Register 1 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR1) != 0)
+ print_id_register(sb, "Debug Features 1",
+ cpu_desc[cpu].id_aa64dfr1, id_aa64dfr1_fields);
+
+ /* AArch64 Auxiliary Feature Register 0 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR0) != 0)
+ print_id_register(sb, "Auxiliary Features 0",
+ cpu_desc[cpu].id_aa64afr0, id_aa64afr0_fields);
+
+ /* AArch64 Auxiliary Feature Register 1 */
+ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR1) != 0)
+ print_id_register(sb, "Auxiliary Features 1",
+ cpu_desc[cpu].id_aa64afr1, id_aa64afr1_fields);
+
+ sbuf_delete(sb);
+ sb = NULL;
+#undef SEP_STR
+}
+
+void
+identify_cache(uint64_t ctr)
+{
+
+ /* Identify the L1 cache type */
+ switch (CTR_L1IP_VAL(ctr)) {
+ case CTR_L1IP_PIPT:
+ break;
+ case CTR_L1IP_VPIPT:
+ icache_vmid = true;
+ break;
+ default:
+ case CTR_L1IP_VIPT:
+ icache_aliasing = true;
+ break;
+ }
+
+ if (dcache_line_size == 0) {
+ KASSERT(icache_line_size == 0, ("%s: i-cacheline size set: %ld",
+ __func__, icache_line_size));
+
+ /* Get the D cache line size */
+ dcache_line_size = CTR_DLINE_SIZE(ctr);
+ /* And the same for the I cache */
+ icache_line_size = CTR_ILINE_SIZE(ctr);
+
+ idcache_line_size = MIN(dcache_line_size, icache_line_size);
+ }
+
+ if (dcache_line_size != CTR_DLINE_SIZE(ctr)) {
+ printf("WARNING: D-cacheline size mismatch %ld != %d\n",
+ dcache_line_size, CTR_DLINE_SIZE(ctr));
+ }
+
+ if (icache_line_size != CTR_ILINE_SIZE(ctr)) {
+ printf("WARNING: I-cacheline size mismatch %ld != %d\n",
+ icache_line_size, CTR_ILINE_SIZE(ctr));
+ }
+}
+
+void
+identify_cpu(u_int cpu)
+{
+ u_int midr;
+ u_int impl_id;
+ u_int part_id;
+ size_t i;
+ const struct cpu_parts *cpu_partsp = NULL;
+
+ midr = get_midr();
+
+ impl_id = CPU_IMPL(midr);
+ for (i = 0; i < nitems(cpu_implementers); i++) {
+ if (impl_id == cpu_implementers[i].impl_id ||
+ cpu_implementers[i].impl_id == 0) {
+ cpu_desc[cpu].cpu_impl = impl_id;
+ cpu_desc[cpu].cpu_impl_name =
+ cpu_implementers[i].impl_name;
+ cpu_partsp = cpu_implementers[i].cpu_parts;
+ break;
+ }
+ }
+
+ part_id = CPU_PART(midr);
+ for (i = 0; &cpu_partsp[i] != NULL; i++) {
+ if (part_id == cpu_partsp[i].part_id ||
+ cpu_partsp[i].part_id == 0) {
+ cpu_desc[cpu].cpu_part_num = part_id;
+ cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name;
+ break;
+ }
+ }
+
+ cpu_desc[cpu].cpu_revision = CPU_REV(midr);
+ cpu_desc[cpu].cpu_variant = CPU_VAR(midr);
+
+ snprintf(cpu_model, sizeof(cpu_model), "%s %s r%dp%d",
+ cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name,
+ cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision);
+
+ /* Save affinity for current CPU */
+ cpu_desc[cpu].mpidr = get_mpidr();
+ CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK;
+
+ cpu_desc[cpu].ctr = READ_SPECIALREG(ctr_el0);
+ cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
+ cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(id_aa64dfr1_el1);
+ cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(id_aa64isar0_el1);
+ cpu_desc[cpu].id_aa64isar1 = READ_SPECIALREG(id_aa64isar1_el1);
+ cpu_desc[cpu].id_aa64mmfr0 = READ_SPECIALREG(id_aa64mmfr0_el1);
+ cpu_desc[cpu].id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+ cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1);
+ cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
+ cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1);
+}
+
+static void
+check_cpu_regs(u_int cpu)
+{
+
+ switch (cpu_aff_levels) {
+ case 0:
+ if (CPU_AFF0(cpu_desc[cpu].mpidr) !=
+ CPU_AFF0(cpu_desc[0].mpidr))
+ cpu_aff_levels = 1;
+ /* FALLTHROUGH */
+ case 1:
+ if (CPU_AFF1(cpu_desc[cpu].mpidr) !=
+ CPU_AFF1(cpu_desc[0].mpidr))
+ cpu_aff_levels = 2;
+ /* FALLTHROUGH */
+ case 2:
+ if (CPU_AFF2(cpu_desc[cpu].mpidr) !=
+ CPU_AFF2(cpu_desc[0].mpidr))
+ cpu_aff_levels = 3;
+ /* FALLTHROUGH */
+ case 3:
+ if (CPU_AFF3(cpu_desc[cpu].mpidr) !=
+ CPU_AFF3(cpu_desc[0].mpidr))
+ cpu_aff_levels = 4;
+ break;
+ }
+
+ if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0)
+ cpu_print_regs |= PRINT_ID_AA64_AFR0;
+ if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1)
+ cpu_print_regs |= PRINT_ID_AA64_AFR1;
+
+ if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0)
+ cpu_print_regs |= PRINT_ID_AA64_DFR0;
+ if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1)
+ cpu_print_regs |= PRINT_ID_AA64_DFR1;
+
+ if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0)
+ cpu_print_regs |= PRINT_ID_AA64_ISAR0;
+ if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1)
+ cpu_print_regs |= PRINT_ID_AA64_ISAR1;
+
+ if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0)
+ cpu_print_regs |= PRINT_ID_AA64_MMFR0;
+ if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1)
+ cpu_print_regs |= PRINT_ID_AA64_MMFR1;
+ if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2)
+ cpu_print_regs |= PRINT_ID_AA64_MMFR2;
+
+ if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0)
+ cpu_print_regs |= PRINT_ID_AA64_PFR0;
+ if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1)
+ cpu_print_regs |= PRINT_ID_AA64_PFR1;
+
+ if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) {
+ /*
+ * If the cache type register is different we may
+ * have a different l1 cache type.
+ */
+ identify_cache(cpu_desc[cpu].ctr);
+ cpu_print_regs |= PRINT_CTR_EL0;
+ }
+}
diff --git a/sys/arm64/arm64/in_cksum.c b/sys/arm64/arm64/in_cksum.c
new file mode 100644
index 000000000000..ae02e91d9203
--- /dev/null
+++ b/sys/arm64/arm64/in_cksum.c
@@ -0,0 +1,241 @@
+/* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1996
+ * Matt Thomas <matt@3am-software.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <machine/in_cksum.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers
+ * (Portable Alpha version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
+#define REDUCE32 \
+ { \
+ q_util.q = sum; \
+ sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
+ }
+#define REDUCE16 \
+ { \
+ q_util.q = sum; \
+ l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
+ sum = l_util.s[0] + l_util.s[1]; \
+ ADDCARRY(sum); \
+ }
+
+static const u_int32_t in_masks[] = {
+ /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
+ 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
+ 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
+ 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
+ 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
+};
+
+union l_util {
+ u_int16_t s[2];
+ u_int32_t l;
+};
+union q_util {
+ u_int16_t s[4];
+ u_int32_t l[2];
+ u_int64_t q;
+};
+
+static u_int64_t
+in_cksumdata(const void *buf, int len)
+{
+ const u_int32_t *lw = (const u_int32_t *) buf;
+ u_int64_t sum = 0;
+ u_int64_t prefilled;
+ int offset;
+ union q_util q_util;
+
+ if ((3 & (long) lw) == 0 && len == 20) {
+ sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
+ REDUCE32;
+ return sum;
+ }
+
+ if ((offset = 3 & (long) lw) != 0) {
+ const u_int32_t *masks = in_masks + (offset << 2);
+ lw = (u_int32_t *) (((long) lw) - offset);
+ sum = *lw++ & masks[len >= 3 ? 3 : len];
+ len -= 4 - offset;
+ if (len <= 0) {
+ REDUCE32;
+ return sum;
+ }
+ }
+#if 0
+ /*
+ * Force to cache line boundary.
+ */
+ offset = 32 - (0x1f & (long) lw);
+ if (offset < 32 && len > offset) {
+ len -= offset;
+ if (4 & offset) {
+ sum += (u_int64_t) lw[0];
+ lw += 1;
+ }
+ if (8 & offset) {
+ sum += (u_int64_t) lw[0] + lw[1];
+ lw += 2;
+ }
+ if (16 & offset) {
+ sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
+ lw += 4;
+ }
+ }
+#endif
+ /*
+ * access prefilling to start load of next cache line.
+ * then add current cache line
+ * save result of prefilling for loop iteration.
+ */
+ prefilled = lw[0];
+ while ((len -= 32) >= 4) {
+ u_int64_t prefilling = lw[8];
+ sum += prefilled + lw[1] + lw[2] + lw[3]
+ + lw[4] + lw[5] + lw[6] + lw[7];
+ lw += 8;
+ prefilled = prefilling;
+ }
+ if (len >= 0) {
+ sum += prefilled + lw[1] + lw[2] + lw[3]
+ + lw[4] + lw[5] + lw[6] + lw[7];
+ lw += 8;
+ } else {
+ len += 32;
+ }
+ while ((len -= 16) >= 0) {
+ sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
+ lw += 4;
+ }
+ len += 16;
+ while ((len -= 4) >= 0) {
+ sum += (u_int64_t) *lw++;
+ }
+ len += 4;
+ if (len > 0)
+ sum += (u_int64_t) (in_masks[len] & *lw);
+ REDUCE32;
+ return sum;
+}
+
+u_short
+in_addword(u_short a, u_short b)
+{
+ u_int64_t sum = a + b;
+
+ ADDCARRY(sum);
+ return (sum);
+}
+
+u_short
+in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
+{
+ u_int64_t sum;
+ union q_util q_util;
+ union l_util l_util;
+
+ sum = (u_int64_t) a + b + c;
+ REDUCE16;
+ return (sum);
+}
+
+u_short
+in_cksum_skip(struct mbuf *m, int len, int skip)
+{
+ u_int64_t sum = 0;
+ int mlen = 0;
+ int clen = 0;
+ caddr_t addr;
+ union q_util q_util;
+ union l_util l_util;
+
+ len -= skip;
+ for (; skip && m; m = m->m_next) {
+ if (m->m_len > skip) {
+ mlen = m->m_len - skip;
+ addr = mtod(m, caddr_t) + skip;
+ goto skip_start;
+ } else {
+ skip -= m->m_len;
+ }
+ }
+
+ for (; m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ mlen = m->m_len;
+ addr = mtod(m, caddr_t);
+skip_start:
+ if (len < mlen)
+ mlen = len;
+ if ((clen ^ (long) addr) & 1)
+ sum += in_cksumdata(addr, mlen) << 8;
+ else
+ sum += in_cksumdata(addr, mlen);
+
+ clen += mlen;
+ len -= mlen;
+ }
+ REDUCE16;
+ return (~sum & 0xffff);
+}
+
+u_int in_cksum_hdr(const struct ip *ip)
+{
+ u_int64_t sum = in_cksumdata(ip, sizeof(struct ip));
+ union q_util q_util;
+ union l_util l_util;
+ REDUCE16;
+ return (~sum & 0xffff);
+}
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
new file mode 100644
index 000000000000..b9147df32815
--- /dev/null
+++ b/sys/arm64/arm64/locore.S
@@ -0,0 +1,859 @@
+/*-
+ * Copyright (c) 2012-2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "assym.inc"
+#include "opt_kstack_pages.h"
+#include <sys/syscall.h>
+#include <machine/asm.h>
+#include <machine/armreg.h>
+#include <machine/hypervisor.h>
+#include <machine/param.h>
+#include <machine/pte.h>
+#include <machine/vm.h>
+#include <machine/vmparam.h>
+
+#define VIRT_BITS 48
+#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
+
+ .globl kernbase
+ .set kernbase, KERNBASE
+
+
+/* U-Boot booti related constants. */
+#if defined(LINUX_BOOT_ABI)
+#define FDT_MAGIC 0xEDFE0DD0 /* FDT blob Magic */
+
+#ifndef UBOOT_IMAGE_OFFSET
+#define UBOOT_IMAGE_OFFSET 0 /* Image offset from start of */
+#endif /* 2 MiB page */
+
+#ifndef UBOOT_IMAGE_SIZE /* Total size of image */
+#define UBOOT_IMAGE_SIZE _end - _start
+#endif
+
+#ifndef UBOOT_IMAGE_FLAGS
+#define UBOOT_IMAGE_FLAGS 0 /* LE kernel, unspecified */
+#endif /* page size */
+#endif /* defined(LINUX_BOOT_ABI) */
+
+/*
+ * We assume:
+ * MMU on with an identity map, or off
+ * D-Cache: off
+ * I-Cache: on or off
+ * We are loaded at a 2MiB aligned address
+ */
+
+ .text
+ .globl _start
+_start:
+#if defined(LINUX_BOOT_ABI)
+ /* U-boot image header */
+ b 1f /* code 0 */
+ .long 0 /* code 1 */
+ .quad UBOOT_IMAGE_OFFSET /* Image offset in 2 MiB page, LE */
+ .quad UBOOT_IMAGE_SIZE /* Image size, LE */
+ .quad UBOOT_IMAGE_FLAGS /* Flags for kernel. LE */
+ .quad 0 /* Reserved */
+ .quad 0 /* Reserved */
+ .quad 0 /* Reserved */
+ .long 0x644d5241 /* Magic "ARM\x64", LE */
+ .long 0 /* Reserved for PE COFF offset*/
+1:
+#endif /* defined(LINUX_BOOT_ABI) */
+
+ /* Drop to EL1 */
+ bl drop_to_el1
+
+ /*
+ * Disable the MMU. We may have entered the kernel with it on and
+ * will need to update the tables later. If this has been set up
+ * with anything other than a VA == PA map then this will fail,
+ * but in this case the code to find where we are running from
+ * would have also failed.
+ */
+ dsb sy
+ mrs x2, sctlr_el1
+ bic x2, x2, SCTLR_M
+ msr sctlr_el1, x2
+ isb
+
+ /* Set the context id */
+ msr contextidr_el1, xzr
+
+ /* Get the virt -> phys offset */
+ bl get_virt_delta
+
+ /*
+ * At this point:
+ * x29 = PA - VA
+ * x28 = Our physical load address
+ */
+
+ /* Create the page tables */
+ bl create_pagetables
+
+ /*
+ * At this point:
+ * x27 = TTBR0 table
+ * x26 = Kernel L1 table
+ * x24 = TTBR1 table
+ */
+
+ /* Enable the mmu */
+ bl start_mmu
+
+ /* Load the new ttbr0 pagetable */
+ adr x27, pagetable_l0_ttbr0
+
+ /* Jump to the virtual address space */
+ ldr x15, .Lvirtdone
+ br x15
+
+virtdone:
+ /* Set up the stack */
+ adr x25, initstack_end
+ mov sp, x25
+ sub sp, sp, #PCB_SIZE
+
+ /* Zero the BSS */
+ ldr x15, .Lbss
+ ldr x14, .Lend
+1:
+ str xzr, [x15], #8
+ cmp x15, x14
+ b.lo 1b
+
+ /* Backup the module pointer */
+ mov x1, x0
+
+ /* Make the page table base a virtual address */
+ sub x26, x26, x29
+ sub x24, x24, x29
+
+ sub sp, sp, #BOOTPARAMS_SIZE
+ mov x0, sp
+
+ /* Degate the delda so it is VA -> PA */
+ neg x29, x29
+
+ str x1, [x0, #BP_MODULEP]
+ str x26, [x0, #BP_KERN_L1PT]
+ str x29, [x0, #BP_KERN_DELTA]
+ adr x25, initstack
+ str x25, [x0, #BP_KERN_STACK]
+ str x24, [x0, #BP_KERN_L0PT]
+ str x23, [x0, #BP_BOOT_EL]
+ str x27, [x0, 40] /* kern_ttbr0 */
+
+ /* trace back starts here */
+ mov fp, #0
+ /* Branch to C code */
+ bl initarm
+ bl mi_startup
+
+ /* We should not get here */
+ brk 0
+
+ .align 3
+.Lvirtdone:
+ .quad virtdone
+.Lbss:
+ .quad __bss_start
+.Lend:
+ .quad _end
+
+#ifdef SMP
+/*
+ * mpentry(unsigned long)
+ *
+ * Called by a core when it is being brought online.
+ * The data in x0 is passed straight to init_secondary.
+ */
+ENTRY(mpentry)
+ /* Disable interrupts */
+ msr daifset, #2
+
+ /* Drop to EL1 */
+ bl drop_to_el1
+
+ /* Set the context id */
+ msr contextidr_el1, xzr
+
+ /* Load the kernel page table */
+ adr x24, pagetable_l0_ttbr1
+ /* Load the identity page table */
+ adr x27, pagetable_l0_ttbr0_boostrap
+
+ /* Enable the mmu */
+ bl start_mmu
+
+ /* Load the new ttbr0 pagetable */
+ adr x27, pagetable_l0_ttbr0
+
+ /* Jump to the virtual address space */
+ ldr x15, =mp_virtdone
+ br x15
+
+mp_virtdone:
+ /* Start using the AP boot stack */
+ ldr x4, =bootstack
+ ldr x4, [x4]
+ mov sp, x4
+
+ /* Load the kernel ttbr0 pagetable */
+ msr ttbr0_el1, x27
+ isb
+
+ /* Invalidate the TLB */
+ tlbi vmalle1
+ dsb sy
+ isb
+
+ b init_secondary
+END(mpentry)
+#endif
+
+/*
+ * If we are started in EL2, configure the required hypervisor
+ * registers and drop to EL1.
+ */
+drop_to_el1:
+ mrs x23, CurrentEL
+ lsr x23, x23, #2
+ cmp x23, #0x2
+ b.eq 1f
+ ret
+1:
+ /* Configure the Hypervisor */
+ mov x2, #(HCR_RW)
+ msr hcr_el2, x2
+
+ /* Load the Virtualization Process ID Register */
+ mrs x2, midr_el1
+ msr vpidr_el2, x2
+
+ /* Load the Virtualization Multiprocess ID Register */
+ mrs x2, mpidr_el1
+ msr vmpidr_el2, x2
+
+ /* Set the bits that need to be 1 in sctlr_el1 */
+ ldr x2, .Lsctlr_res1
+ msr sctlr_el1, x2
+
+ /* Don't trap to EL2 for exceptions */
+ mov x2, #CPTR_RES1
+ msr cptr_el2, x2
+
+ /* Don't trap to EL2 for CP15 traps */
+ msr hstr_el2, xzr
+
+ /* Enable access to the physical timers at EL1 */
+ mrs x2, cnthctl_el2
+ orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
+ msr cnthctl_el2, x2
+
+ /* Set the counter offset to a known value */
+ msr cntvoff_el2, xzr
+
+ /* Hypervisor trap functions */
+ adr x2, hyp_vectors
+ msr vbar_el2, x2
+
+ mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
+ msr spsr_el2, x2
+
+ /* Configure GICv3 CPU interface */
+ mrs x2, id_aa64pfr0_el1
+ /* Extract GIC bits from the register */
+ ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
+ /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
+ cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
+ b.ne 2f
+
+ mrs x2, icc_sre_el2
+ orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */
+ orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */
+ msr icc_sre_el2, x2
+2:
+
+ /* Set the address to return to our return address */
+ msr elr_el2, x30
+ isb
+
+ eret
+
+ .align 3
+.Lsctlr_res1:
+ .quad SCTLR_RES1
+
+#define VECT_EMPTY \
+ .align 7; \
+ 1: b 1b
+
+ .align 11
+hyp_vectors:
+ VECT_EMPTY /* Synchronous EL2t */
+ VECT_EMPTY /* IRQ EL2t */
+ VECT_EMPTY /* FIQ EL2t */
+ VECT_EMPTY /* Error EL2t */
+
+ VECT_EMPTY /* Synchronous EL2h */
+ VECT_EMPTY /* IRQ EL2h */
+ VECT_EMPTY /* FIQ EL2h */
+ VECT_EMPTY /* Error EL2h */
+
+ VECT_EMPTY /* Synchronous 64-bit EL1 */
+ VECT_EMPTY /* IRQ 64-bit EL1 */
+ VECT_EMPTY /* FIQ 64-bit EL1 */
+ VECT_EMPTY /* Error 64-bit EL1 */
+
+ VECT_EMPTY /* Synchronous 32-bit EL1 */
+ VECT_EMPTY /* IRQ 32-bit EL1 */
+ VECT_EMPTY /* FIQ 32-bit EL1 */
+ VECT_EMPTY /* Error 32-bit EL1 */
+
+/*
+ * Get the delta between the physical address we were loaded to and the
+ * virtual address we expect to run from. This is used when building the
+ * initial page table.
+ */
+get_virt_delta:
+ /* Load the physical address of virt_map */
+ adr x29, virt_map
+ /* Load the virtual address of virt_map stored in virt_map */
+ ldr x28, [x29]
+ /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
+ sub x29, x29, x28
+ /* Find the load address for the kernel */
+ mov x28, #(KERNBASE)
+ add x28, x28, x29
+ ret
+
+ .align 3
+virt_map:
+ .quad virt_map
+
+/*
+ * This builds the page tables containing the identity map, and the kernel
+ * virtual map.
+ *
+ * It relys on:
+ * We were loaded to an address that is on a 2MiB boundary
+ * All the memory must not cross a 1GiB boundaty
+ * x28 contains the physical address we were loaded from
+ *
+ * TODO: This is out of date.
+ * There are at least 5 pages before that address for the page tables
+ * The pages used are:
+ * - The Kernel L2 table
+ * - The Kernel L1 table
+ * - The Kernel L0 table (TTBR1)
+ * - The identity (PA = VA) L1 table
+ * - The identity (PA = VA) L0 table (TTBR0)
+ * - The DMAP L1 tables
+ */
+create_pagetables:
+ /* Save the Link register */
+ mov x5, x30
+
+ /* Clean the page table */
+ adr x6, pagetable
+ mov x26, x6
+ adr x27, pagetable_end
+1:
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ cmp x6, x27
+ b.lo 1b
+
+ /*
+ * Build the TTBR1 maps.
+ */
+
+ /* Find the size of the kernel */
+ mov x6, #(KERNBASE)
+
+#if defined(LINUX_BOOT_ABI)
+ /* X19 is used as 'map FDT data' flag */
+ mov x19, xzr
+
+ /* No modules or FDT pointer ? */
+ cbz x0, booti_no_fdt
+
+ /* Test if modulep points to modules descriptor or to FDT */
+ ldr w8, [x0]
+ ldr w7, =FDT_MAGIC
+ cmp w7, w8
+ b.eq booti_fdt
+#endif
+
+ /* Booted with modules pointer */
+ /* Find modulep - begin */
+ sub x8, x0, x6
+ /* Add two 2MiB pages for the module data and round up */
+ ldr x7, =(3 * L2_SIZE - 1)
+ add x8, x8, x7
+ b common
+
+#if defined(LINUX_BOOT_ABI)
+booti_fdt:
+ /* Booted by U-Boot booti with FDT data */
+ /* Set 'map FDT data' flag */
+ mov x19, #1
+
+booti_no_fdt:
+ /* Booted by U-Boot booti without FTD data */
+ /* Find the end - begin */
+ ldr x7, .Lend
+ sub x8, x7, x6
+
+ /*
+ * Add one 2MiB page for copy of FDT data (maximum FDT size),
+ * one for metadata and round up
+ */
+ ldr x7, =(3 * L2_SIZE - 1)
+ add x8, x8, x7
+#endif
+
+common:
+ /* Get the number of l2 pages to allocate, rounded down */
+ lsr x10, x8, #(L2_SHIFT)
+
+ /* Create the kernel space L2 table */
+ mov x6, x26
+ mov x7, #VM_MEMATTR_WRITE_BACK
+ mov x8, #(KERNBASE & L2_BLOCK_MASK)
+ mov x9, x28
+ bl build_l2_block_pagetable
+
+ /* Move to the l1 table */
+ add x26, x26, #PAGE_SIZE
+
+ /* Link the l1 -> l2 table */
+ mov x9, x6
+ mov x6, x26
+ bl link_l1_pagetable
+
+ /* Move to the l0 table */
+ add x24, x26, #PAGE_SIZE
+
+ /* Link the l0 -> l1 table */
+ mov x9, x6
+ mov x6, x24
+ mov x10, #1
+ bl link_l0_pagetable
+
+ /* Link the DMAP tables */
+ ldr x8, =DMAP_MIN_ADDRESS
+ adr x9, pagetable_dmap;
+ mov x10, #DMAP_TABLES
+ bl link_l0_pagetable
+
+ /*
+ * Build the TTBR0 maps. As TTBR0 maps, they must specify ATTR_S1_nG.
+ * They are only needed early on, so the VA = PA map is uncached.
+ */
+ add x27, x24, #PAGE_SIZE
+
+ mov x6, x27 /* The initial page table */
+#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
+ /* Create a table for the UART */
+ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE))
+ mov x8, #(SOCDEV_VA) /* VA start */
+ mov x9, #(SOCDEV_PA) /* PA start */
+ mov x10, #1
+ bl build_l1_block_pagetable
+#endif
+
+#if defined(LINUX_BOOT_ABI)
+ /* Map FDT data ? */
+ cbz x19, 1f
+
+ /* Create the identity mapping for FDT data (2 MiB max) */
+ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE))
+ mov x9, x0
+ mov x8, x0 /* VA start (== PA start) */
+ mov x10, #1
+ bl build_l1_block_pagetable
+
+1:
+#endif
+
+ /* Create the VA = PA map */
+ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE))
+ mov x9, x27
+ mov x8, x9 /* VA start (== PA start) */
+ mov x10, #1
+ bl build_l1_block_pagetable
+
+ /* Move to the l0 table */
+ add x27, x27, #PAGE_SIZE
+
+ /* Link the l0 -> l1 table */
+ mov x9, x6
+ mov x6, x27
+ mov x10, #1
+ bl link_l0_pagetable
+
+ /* Restore the Link register */
+ mov x30, x5
+ ret
+
+/*
+ * Builds an L0 -> L1 table descriptor
+ *
+ * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
+ * within it by build_l1_block_pagetable.
+ *
+ * x6 = L0 table
+ * x8 = Virtual Address
+ * x9 = L1 PA (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+link_l0_pagetable:
+ /*
+ * Link an L0 -> L1 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L0_SHIFT
+ and x11, x11, #L0_ADDR_MASK
+
+ /* Build the L0 block entry */
+ mov x12, #L0_TABLE
+
+ /* Only use the output address bits */
+ lsr x9, x9, #PAGE_SHIFT
+1: orr x13, x12, x9, lsl #PAGE_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+/*
+ * Builds an L1 -> L2 table descriptor
+ *
+ * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
+ * within it by build_l2_block_pagetable.
+ *
+ * x6 = L1 table
+ * x8 = Virtual Address
+ * x9 = L2 PA (trashed)
+ * x11, x12 and x13 are trashed
+ */
+link_l1_pagetable:
+ /*
+ * Link an L1 -> L2 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L1_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L1 block entry */
+ mov x12, #L1_TABLE
+
+ /* Only use the output address bits */
+ lsr x9, x9, #PAGE_SHIFT
+ orr x13, x12, x9, lsl #PAGE_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ ret
+
+/*
+ * Builds count 1 GiB page table entry
+ * x6 = L1 table
+ * x7 = Variable lower block attributes
+ * x8 = VA start
+ * x9 = PA start (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+build_l1_block_pagetable:
+ /*
+ * Build the L1 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L1_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L1 block entry */
+ orr x12, x7, #L1_BLOCK
+ orr x12, x12, #(ATTR_AF)
+#ifdef SMP
+ orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+ /* Only use the output address bits */
+ lsr x9, x9, #L1_SHIFT
+
+ /* Set the physical address for this virtual address */
+1: orr x13, x12, x9, lsl #L1_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+/*
+ * Builds count 2 MiB page table entry
+ * x6 = L2 table
+ * x7 = Type (0 = Device, 1 = Normal)
+ * x8 = VA start
+ * x9 = PA start (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+build_l2_block_pagetable:
+ /*
+ * Build the L2 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L2_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L2 block entry */
+ lsl x12, x7, #2
+ orr x12, x12, #L2_BLOCK
+ orr x12, x12, #(ATTR_AF)
+ orr x12, x12, #(ATTR_S1_UXN)
+#ifdef SMP
+ orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+ /* Only use the output address bits */
+ lsr x9, x9, #L2_SHIFT
+
+ /* Set the physical address for this virtual address */
+1: orr x13, x12, x9, lsl #L2_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+start_mmu:
+ dsb sy
+
+ /* Load the exception vectors */
+ ldr x2, =exception_vectors
+ msr vbar_el1, x2
+
+ /* Load ttbr0 and ttbr1 */
+ msr ttbr0_el1, x27
+ msr ttbr1_el1, x24
+ isb
+
+ /* Clear the Monitor Debug System control register */
+ msr mdscr_el1, xzr
+
+ /* Invalidate the TLB */
+ tlbi vmalle1is
+ dsb ish
+ isb
+
+ ldr x2, mair
+ msr mair_el1, x2
+
+ /*
+ * Setup TCR according to the PARange and ASIDBits fields
+ * from ID_AA64MMFR0_EL1 and the HAFDBS field from the
+ * ID_AA64MMFR1_EL1. More precisely, set TCR_EL1.AS
+ * to 1 only if the ASIDBits field equals 0b0010.
+ */
+ ldr x2, tcr
+ mrs x3, id_aa64mmfr0_el1
+
+ /* Copy the bottom 3 bits from id_aa64mmfr0_el1 into TCR.IPS */
+ bfi x2, x3, #(TCR_IPS_SHIFT), #(TCR_IPS_WIDTH)
+ and x3, x3, #(ID_AA64MMFR0_ASIDBits_MASK)
+
+ /* Check if the HW supports 16 bit ASIDS */
+ cmp x3, #(ID_AA64MMFR0_ASIDBits_16)
+ /* If so x3 == 1, else x3 == 0 */
+ cset x3, eq
+ /* Set TCR.AS with x3 */
+ bfi x2, x3, #(TCR_ASID_SHIFT), #(TCR_ASID_WIDTH)
+
+ /*
+ * Check if the HW supports access flag and dirty state updates,
+ * and set TCR_EL1.HA and TCR_EL1.HD accordingly.
+ */
+ mrs x3, id_aa64mmfr1_el1
+ and x3, x3, #(ID_AA64MMFR1_HAFDBS_MASK)
+ cmp x3, #1
+ b.ne 1f
+ orr x2, x2, #(TCR_HA)
+ b 2f
+1:
+ cmp x3, #2
+ b.ne 2f
+ orr x2, x2, #(TCR_HA | TCR_HD)
+2:
+ msr tcr_el1, x2
+
+ /*
+ * Setup SCTLR.
+ */
+ ldr x2, sctlr_set
+ ldr x3, sctlr_clear
+ mrs x1, sctlr_el1
+ bic x1, x1, x3 /* Clear the required bits */
+ orr x1, x1, x2 /* Set the required bits */
+ msr sctlr_el1, x1
+ isb
+
+ ret
+
+ .align 3
+mair:
+ .quad MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE) | \
+ MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE) | \
+ MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK) | \
+ MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH)
+tcr:
+ .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \
+ TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
+sctlr_set:
+ /* Bits to set */
+ .quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \
+ SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
+ SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \
+ SCTLR_M | SCTLR_CP15BEN)
+sctlr_clear:
+ /* Bits to clear */
+ .quad (SCTLR_EE | SCTLR_EOE | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \
+ SCTLR_ITD | SCTLR_A)
+
+ .globl abort
+abort:
+ b abort
+
+ //.section .init_pagetable
+ .align 12 /* 4KiB aligned */
+ /*
+ * 6 initial tables (in the following order):
+ * L2 for kernel (High addresses)
+ * L1 for kernel
+ * L0 for kernel
+ * L1 bootstrap for user (Low addresses)
+ * L0 bootstrap for user
+ * L0 for user
+ */
+pagetable:
+ .space PAGE_SIZE
+pagetable_l1_ttbr1:
+ .space PAGE_SIZE
+pagetable_l0_ttbr1:
+ .space PAGE_SIZE
+pagetable_l1_ttbr0_bootstrap:
+ .space PAGE_SIZE
+pagetable_l0_ttbr0_boostrap:
+ .space PAGE_SIZE
+pagetable_l0_ttbr0:
+ .space PAGE_SIZE
+
+ .globl pagetable_dmap
+pagetable_dmap:
+ .space PAGE_SIZE * DMAP_TABLES
+pagetable_end:
+
+el2_pagetable:
+ .space PAGE_SIZE
+
+ .globl init_pt_va
+init_pt_va:
+ .quad pagetable /* XXX: Keep page tables VA */
+
+ .align 4
+initstack:
+ .space (PAGE_SIZE * KSTACK_PAGES)
+initstack_end:
+
+
+ENTRY(sigcode)
+ mov x0, sp
+ add x0, x0, #SF_UC
+
+1:
+ mov x8, #SYS_sigreturn
+ svc 0
+
+ /* sigreturn failed, exit */
+ mov x8, #SYS_exit
+ svc 0
+
+ b 1b
+END(sigcode)
+ /* This may be copied to the stack, keep it 16-byte aligned */
+ .align 3
+esigcode:
+
+ .data
+ .align 3
+ .global szsigcode
+szsigcode:
+ .quad esigcode - sigcode
+
+ENTRY(aarch32_sigcode)
+ .word 0xe1a0000d // mov r0, sp
+ .word 0xe2800040 // add r0, r0, #SIGF_UC
+ .word 0xe59f700c // ldr r7, [pc, #12]
+ .word 0xef000000 // swi #0
+ .word 0xe59f7008 // ldr r7, [pc, #8]
+ .word 0xef000000 // swi #0
+ .word 0xeafffffa // b . - 16
+END(aarch32_sigcode)
+ .word SYS_sigreturn
+ .word SYS_exit
+ .align 3
+aarch32_esigcode:
+ .data
+ .global sz_aarch32_sigcode
+sz_aarch32_sigcode:
+ .quad aarch32_esigcode - aarch32_sigcode
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
new file mode 100644
index 000000000000..cb8d33ff57d5
--- /dev/null
+++ b/sys/arm64/arm64/machdep.c
@@ -0,0 +1,1375 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "opt_acpi.h"
+#include "opt_platform.h"
+#include "opt_ddb.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/cons.h>
+#include <sys/cpu.h>
+#include <sys/csan.h>
+#include <sys/devmap.h>
+#include <sys/efi.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/linker.h>
+#include <sys/msgbuf.h>
+#include <sys/pcpu.h>
+#include <sys/physmem.h>
+#include <sys/proc.h>
+#include <sys/ptrace.h>
+#include <sys/reboot.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/signalvar.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/ucontext.h>
+#include <sys/vdso.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pager.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/debug_monitor.h>
+#include <machine/kdb.h>
+#include <machine/machdep.h>
+#include <machine/metadata.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/reg.h>
+#include <machine/undefined.h>
+#include <machine/vmparam.h>
+
+#ifdef VFP
+#include <machine/vfp.h>
+#endif
+
+#ifdef DEV_ACPI
+#include <contrib/dev/acpica/include/acpi.h>
+#include <machine/acpica_machdep.h>
+#endif
+
+#ifdef FDT
+#include <dev/fdt/fdt_common.h>
+#include <dev/ofw/openfirm.h>
+#endif
+
+static void get_fpcontext(struct thread *td, mcontext_t *mcp);
+static void set_fpcontext(struct thread *td, mcontext_t *mcp);
+
+enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
+
+struct pcpu __pcpu[MAXCPU];
+
+static struct trapframe proc0_tf;
+
+int early_boot = 1;
+int cold = 1;
+static int boot_el;
+
+struct kva_md_info kmi;
+
+int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
+int has_pan;
+
+/*
+ * Physical address of the EFI System Table. Stashed from the metadata hints
+ * passed into the kernel and used by the EFI code to call runtime services.
+ */
+vm_paddr_t efi_systbl_phys;
+static struct efi_map_header *efihdr;
+
+/* pagezero_* implementations are provided in support.S */
+void pagezero_simple(void *);
+void pagezero_cache(void *);
+
+/* pagezero_simple is default pagezero */
+void (*pagezero)(void *p) = pagezero_simple;
+
+int (*apei_nmi)(void);
+
+static void
+pan_setup(void)
+{
+ uint64_t id_aa64mfr1;
+
+ id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+ if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
+ has_pan = 1;
+}
+
+void
+pan_enable(void)
+{
+
+ /*
+ * The LLVM integrated assembler doesn't understand the PAN
+ * PSTATE field. Because of this we need to manually create
+ * the instruction in an asm block. This is equivalent to:
+ * msr pan, #1
+ *
+ * This sets the PAN bit, stopping the kernel from accessing
+ * memory when userspace can also access it unless the kernel
+ * uses the userspace load/store instructions.
+ */
+ if (has_pan) {
+ WRITE_SPECIALREG(sctlr_el1,
+ READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
+ __asm __volatile(".inst 0xd500409f | (0x1 << 8)");
+ }
+}
+
+bool
+has_hyp(void)
+{
+
+ return (boot_el == 2);
+}
+
+static void
+cpu_startup(void *dummy)
+{
+ vm_paddr_t size;
+ int i;
+
+ printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
+ ptoa((uintmax_t)realmem) / 1024 / 1024);
+
+ if (bootverbose) {
+ printf("Physical memory chunk(s):\n");
+ for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+ size = phys_avail[i + 1] - phys_avail[i];
+ printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
+ (uintmax_t)phys_avail[i],
+ (uintmax_t)phys_avail[i + 1] - 1,
+ (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
+ }
+ }
+
+ printf("avail memory = %ju (%ju MB)\n",
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
+
+ undef_init();
+ install_cpu_errata();
+
+ vm_ksubmap_init(&kmi);
+ bufinit();
+ vm_pager_bufferinit();
+}
+
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
+
+static void
+late_ifunc_resolve(void *dummy __unused)
+{
+ link_elf_late_ireloc();
+}
+SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
+
+int
+cpu_idle_wakeup(int cpu)
+{
+
+ return (0);
+}
+
+int
+fill_regs(struct thread *td, struct reg *regs)
+{
+ struct trapframe *frame;
+
+ frame = td->td_frame;
+ regs->sp = frame->tf_sp;
+ regs->lr = frame->tf_lr;
+ regs->elr = frame->tf_elr;
+ regs->spsr = frame->tf_spsr;
+
+ memcpy(regs->x, frame->tf_x, sizeof(regs->x));
+
+#ifdef COMPAT_FREEBSD32
+ /*
+ * We may be called here for a 32bits process, if we're using a
+ * 64bits debugger. If so, put PC and SPSR where it expects it.
+ */
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
+ regs->x[15] = frame->tf_elr;
+ regs->x[16] = frame->tf_spsr;
+ }
+#endif
+ return (0);
+}
+
+int
+set_regs(struct thread *td, struct reg *regs)
+{
+ struct trapframe *frame;
+
+ frame = td->td_frame;
+ frame->tf_sp = regs->sp;
+ frame->tf_lr = regs->lr;
+ frame->tf_elr = regs->elr;
+ frame->tf_spsr &= ~PSR_FLAGS;
+ frame->tf_spsr |= regs->spsr & PSR_FLAGS;
+
+ memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x));
+
+#ifdef COMPAT_FREEBSD32
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
+ /*
+ * We may be called for a 32bits process if we're using
+ * a 64bits debugger. If so, get PC and SPSR from where
+ * it put it.
+ */
+ frame->tf_elr = regs->x[15];
+ frame->tf_spsr = regs->x[16] & PSR_FLAGS;
+ }
+#endif
+ return (0);
+}
+
+int
+fill_fpregs(struct thread *td, struct fpreg *regs)
+{
+#ifdef VFP
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
+ /*
+ * If we have just been running VFP instructions we will
+ * need to save the state to memcpy it below.
+ */
+ if (td == curthread)
+ vfp_save_state(td, pcb);
+
+ KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+ ("Called fill_fpregs while the kernel is using the VFP"));
+ memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs,
+ sizeof(regs->fp_q));
+ regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr;
+ regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
+ } else
+#endif
+ memset(regs, 0, sizeof(*regs));
+ return (0);
+}
+
+int
+set_fpregs(struct thread *td, struct fpreg *regs)
+{
+#ifdef VFP
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+ ("Called set_fpregs while the kernel is using the VFP"));
+ memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q));
+ pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr;
+ pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr;
+#endif
+ return (0);
+}
+
+int
+fill_dbregs(struct thread *td, struct dbreg *regs)
+{
+ struct debug_monitor_state *monitor;
+ int count, i;
+ uint8_t debug_ver, nbkpts;
+
+ memset(regs, 0, sizeof(*regs));
+
+ extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT,
+ &debug_ver);
+ extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT,
+ &nbkpts);
+
+ /*
+ * The BRPs field contains the number of breakpoints - 1. Armv8-A
+ * allows the hardware to provide 2-16 breakpoints so this won't
+ * overflow an 8 bit value.
+ */
+ count = nbkpts + 1;
+
+ regs->db_info = debug_ver;
+ regs->db_info <<= 8;
+ regs->db_info |= count;
+
+ monitor = &td->td_pcb->pcb_dbg_regs;
+ if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) {
+ for (i = 0; i < count; i++) {
+ regs->db_regs[i].dbr_addr = monitor->dbg_bvr[i];
+ regs->db_regs[i].dbr_ctrl = monitor->dbg_bcr[i];
+ }
+ }
+
+ return (0);
+}
+
+int
+set_dbregs(struct thread *td, struct dbreg *regs)
+{
+ struct debug_monitor_state *monitor;
+ int count;
+ int i;
+
+ monitor = &td->td_pcb->pcb_dbg_regs;
+ count = 0;
+ monitor->dbg_enable_count = 0;
+ for (i = 0; i < DBG_BRP_MAX; i++) {
+ /* TODO: Check these values */
+ monitor->dbg_bvr[i] = regs->db_regs[i].dbr_addr;
+ monitor->dbg_bcr[i] = regs->db_regs[i].dbr_ctrl;
+ if ((monitor->dbg_bcr[i] & 1) != 0)
+ monitor->dbg_enable_count++;
+ }
+ if (monitor->dbg_enable_count > 0)
+ monitor->dbg_flags |= DBGMON_ENABLED;
+
+ return (0);
+}
+
+#ifdef COMPAT_FREEBSD32
+int
+fill_regs32(struct thread *td, struct reg32 *regs)
+{
+ int i;
+ struct trapframe *tf;
+
+ tf = td->td_frame;
+ for (i = 0; i < 13; i++)
+ regs->r[i] = tf->tf_x[i];
+ /* For arm32, SP is r13 and LR is r14 */
+ regs->r_sp = tf->tf_x[13];
+ regs->r_lr = tf->tf_x[14];
+ regs->r_pc = tf->tf_elr;
+ regs->r_cpsr = tf->tf_spsr;
+
+ return (0);
+}
+
+int
+set_regs32(struct thread *td, struct reg32 *regs)
+{
+ int i;
+ struct trapframe *tf;
+
+ tf = td->td_frame;
+ for (i = 0; i < 13; i++)
+ tf->tf_x[i] = regs->r[i];
+ /* For arm 32, SP is r13 an LR is r14 */
+ tf->tf_x[13] = regs->r_sp;
+ tf->tf_x[14] = regs->r_lr;
+ tf->tf_elr = regs->r_pc;
+ tf->tf_spsr = regs->r_cpsr;
+
+ return (0);
+}
+
+int
+fill_fpregs32(struct thread *td, struct fpreg32 *regs)
+{
+
+ printf("ARM64TODO: fill_fpregs32");
+ return (EDOOFUS);
+}
+
+int
+set_fpregs32(struct thread *td, struct fpreg32 *regs)
+{
+
+ printf("ARM64TODO: set_fpregs32");
+ return (EDOOFUS);
+}
+
+int
+fill_dbregs32(struct thread *td, struct dbreg32 *regs)
+{
+
+ printf("ARM64TODO: fill_dbregs32");
+ return (EDOOFUS);
+}
+
+int
+set_dbregs32(struct thread *td, struct dbreg32 *regs)
+{
+
+ printf("ARM64TODO: set_dbregs32");
+ return (EDOOFUS);
+}
+#endif
+
+int
+ptrace_set_pc(struct thread *td, u_long addr)
+{
+
+ td->td_frame->tf_elr = addr;
+ return (0);
+}
+
+int
+ptrace_single_step(struct thread *td)
+{
+
+ td->td_frame->tf_spsr |= PSR_SS;
+ td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
+ return (0);
+}
+
+int
+ptrace_clear_single_step(struct thread *td)
+{
+
+ td->td_frame->tf_spsr &= ~PSR_SS;
+ td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
+ return (0);
+}
+
+void
+exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
+{
+ struct trapframe *tf = td->td_frame;
+
+ memset(tf, 0, sizeof(struct trapframe));
+
+ tf->tf_x[0] = stack;
+ tf->tf_sp = STACKALIGN(stack);
+ tf->tf_lr = imgp->entry_addr;
+ tf->tf_elr = imgp->entry_addr;
+}
+
+/* Sanity check these are the same size, they will be memcpy'd to and fro */
+CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
+ sizeof((struct gpregs *)0)->gp_x);
+CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
+ sizeof((struct reg *)0)->x);
+
+int
+get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
+{
+ struct trapframe *tf = td->td_frame;
+
+ if (clear_ret & GET_MC_CLEAR_RET) {
+ mcp->mc_gpregs.gp_x[0] = 0;
+ mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C;
+ } else {
+ mcp->mc_gpregs.gp_x[0] = tf->tf_x[0];
+ mcp->mc_gpregs.gp_spsr = tf->tf_spsr;
+ }
+
+ memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1],
+ sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1));
+
+ mcp->mc_gpregs.gp_sp = tf->tf_sp;
+ mcp->mc_gpregs.gp_lr = tf->tf_lr;
+ mcp->mc_gpregs.gp_elr = tf->tf_elr;
+ get_fpcontext(td, mcp);
+
+ return (0);
+}
+
+int
+set_mcontext(struct thread *td, mcontext_t *mcp)
+{
+ struct trapframe *tf = td->td_frame;
+ uint32_t spsr;
+
+ spsr = mcp->mc_gpregs.gp_spsr;
+ if ((spsr & PSR_M_MASK) != PSR_M_EL0t ||
+ (spsr & PSR_AARCH32) != 0 ||
+ (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF))
+ return (EINVAL);
+
+ memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x));
+
+ tf->tf_sp = mcp->mc_gpregs.gp_sp;
+ tf->tf_lr = mcp->mc_gpregs.gp_lr;
+ tf->tf_elr = mcp->mc_gpregs.gp_elr;
+ tf->tf_spsr = mcp->mc_gpregs.gp_spsr;
+ set_fpcontext(td, mcp);
+
+ return (0);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifdef VFP
+ struct pcb *curpcb;
+
+ critical_enter();
+
+ curpcb = curthread->td_pcb;
+
+ if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
+ /*
+ * If we have just been running VFP instructions we will
+ * need to save the state to memcpy it below.
+ */
+ vfp_save_state(td, curpcb);
+
+ KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
+ ("Called get_fpcontext while the kernel is using the VFP"));
+ KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
+ ("Non-userspace FPU flags set in get_fpcontext"));
+ memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs,
+ sizeof(mcp->mc_fpregs));
+ mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr;
+ mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr;
+ mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
+ mcp->mc_flags |= _MC_FP_VALID;
+ }
+
+ critical_exit();
+#endif
+}
+
+static void
+set_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifdef VFP
+ struct pcb *curpcb;
+
+ critical_enter();
+
+ if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
+ curpcb = curthread->td_pcb;
+
+ /*
+ * Discard any vfp state for the current thread, we
+ * are about to override it.
+ */
+ vfp_discard(td);
+
+ KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
+ ("Called set_fpcontext while the kernel is using the VFP"));
+ memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q,
+ sizeof(mcp->mc_fpregs));
+ curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr;
+ curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr;
+ curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
+ }
+
+ critical_exit();
+#endif
+}
+
+void
+cpu_idle(int busy)
+{
+
+ spinlock_enter();
+ if (!busy)
+ cpu_idleclock();
+ if (!sched_runnable())
+ __asm __volatile(
+ "dsb sy \n"
+ "wfi \n");
+ if (!busy)
+ cpu_activeclock();
+ spinlock_exit();
+}
+
+void
+cpu_halt(void)
+{
+
+ /* We should have shutdown by now, if not enter a low power sleep */
+ intr_disable();
+ while (1) {
+ __asm __volatile("wfi");
+ }
+}
+
+/*
+ * Flush the D-cache for non-DMA I/O so that the I-cache can
+ * be made coherent later.
+ */
+void
+cpu_flush_dcache(void *ptr, size_t len)
+{
+
+ /* ARM64TODO TBD */
+}
+
+/* Get current clock frequency for the given CPU ID. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+ struct pcpu *pc;
+
+ pc = pcpu_find(cpu_id);
+ if (pc == NULL || rate == NULL)
+ return (EINVAL);
+
+ if (pc->pc_clock == 0)
+ return (EOPNOTSUPP);
+
+ *rate = pc->pc_clock;
+ return (0);
+}
+
+void
+cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
+{
+
+ pcpu->pc_acpi_id = 0xffffffff;
+}
+
+void
+spinlock_enter(void)
+{
+ struct thread *td;
+ register_t daif;
+
+ td = curthread;
+ if (td->td_md.md_spinlock_count == 0) {
+ daif = intr_disable();
+ td->td_md.md_spinlock_count = 1;
+ td->td_md.md_saved_daif = daif;
+ critical_enter();
+ } else
+ td->td_md.md_spinlock_count++;
+}
+
+void
+spinlock_exit(void)
+{
+ struct thread *td;
+ register_t daif;
+
+ td = curthread;
+ daif = td->td_md.md_saved_daif;
+ td->td_md.md_spinlock_count--;
+ if (td->td_md.md_spinlock_count == 0) {
+ critical_exit();
+ intr_restore(daif);
+ }
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct sigreturn_args {
+ ucontext_t *ucp;
+};
+#endif
+
+int
+sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
+{
+ ucontext_t uc;
+ int error;
+
+ if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
+ return (EFAULT);
+
+ error = set_mcontext(td, &uc.uc_mcontext);
+ if (error != 0)
+ return (error);
+
+ /* Restore signal mask. */
+ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
+
+ return (EJUSTRETURN);
+}
+
+/*
+ * Construct a PCB from a trapframe. This is called from kdb_trap() where
+ * we want to start a backtrace from the function that caused us to enter
+ * the debugger. We have the context in the trapframe, but base the trace
+ * on the PCB. The PCB doesn't have to be perfect, as long as it contains
+ * enough for a backtrace.
+ */
+void
+makectx(struct trapframe *tf, struct pcb *pcb)
+{
+ int i;
+
+ for (i = 0; i < PCB_LR; i++)
+ pcb->pcb_x[i] = tf->tf_x[i];
+
+ pcb->pcb_x[PCB_LR] = tf->tf_lr;
+ pcb->pcb_pc = tf->tf_elr;
+ pcb->pcb_sp = tf->tf_sp;
+}
+
+void
+sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+ struct thread *td;
+ struct proc *p;
+ struct trapframe *tf;
+ struct sigframe *fp, frame;
+ struct sigacts *psp;
+ struct sysentvec *sysent;
+ int onstack, sig;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ sig = ksi->ksi_signo;
+ psp = p->p_sigacts;
+ mtx_assert(&psp->ps_mtx, MA_OWNED);
+
+ tf = td->td_frame;
+ onstack = sigonstack(tf->tf_sp);
+
+ CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
+ catcher, sig);
+
+ /* Allocate and validate space for the signal handler context. */
+ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
+ SIGISMEMBER(psp->ps_sigonstack, sig)) {
+ fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
+ td->td_sigstk.ss_size);
+#if defined(COMPAT_43)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+ } else {
+ fp = (struct sigframe *)td->td_frame->tf_sp;
+ }
+
+ /* Make room, keeping the stack aligned */
+ fp--;
+ fp = (struct sigframe *)STACKALIGN(fp);
+
+ /* Fill in the frame to copy out */
+ bzero(&frame, sizeof(frame));
+ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
+ frame.sf_si = ksi->ksi_info;
+ frame.sf_uc.uc_sigmask = *mask;
+ frame.sf_uc.uc_stack = td->td_sigstk;
+ frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
+ (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
+ mtx_unlock(&psp->ps_mtx);
+ PROC_UNLOCK(td->td_proc);
+
+ /* Copy the sigframe out to the user's stack. */
+ if (copyout(&frame, fp, sizeof(*fp)) != 0) {
+ /* Process has trashed its stack. Kill it. */
+ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
+ PROC_LOCK(p);
+ sigexit(td, SIGILL);
+ }
+
+ tf->tf_x[0]= sig;
+ tf->tf_x[1] = (register_t)&fp->sf_si;
+ tf->tf_x[2] = (register_t)&fp->sf_uc;
+
+ tf->tf_elr = (register_t)catcher;
+ tf->tf_sp = (register_t)fp;
+ sysent = p->p_sysent;
+ if (sysent->sv_sigcode_base != 0)
+ tf->tf_lr = (register_t)sysent->sv_sigcode_base;
+ else
+ tf->tf_lr = (register_t)(sysent->sv_psstrings -
+ *(sysent->sv_szsigcode));
+
+ CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
+ tf->tf_sp);
+
+ PROC_LOCK(p);
+ mtx_lock(&psp->ps_mtx);
+}
+
+static void
+init_proc0(vm_offset_t kstack)
+{
+ struct pcpu *pcpup = &__pcpu[0];
+
+ proc_linkup0(&proc0, &thread0);
+ thread0.td_kstack = kstack;
+ thread0.td_kstack_pages = KSTACK_PAGES;
+ thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
+ thread0.td_kstack_pages * PAGE_SIZE) - 1;
+ thread0.td_pcb->pcb_fpflags = 0;
+ thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
+ thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
+ thread0.td_frame = &proc0_tf;
+ pcpup->pc_curpcb = thread0.td_pcb;
+}
+
+typedef struct {
+ uint32_t type;
+ uint64_t phys_start;
+ uint64_t virt_start;
+ uint64_t num_pages;
+ uint64_t attr;
+} EFI_MEMORY_DESCRIPTOR;
+
+typedef void (*efi_map_entry_cb)(struct efi_md *);
+
+static void
+foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
+{
+ struct efi_md *map, *p;
+ size_t efisz;
+ int ndesc, i;
+
+ /*
+ * Memory map data provided by UEFI via the GetMemoryMap
+ * Boot Services API.
+ */
+ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
+ map = (struct efi_md *)((uint8_t *)efihdr + efisz);
+
+ if (efihdr->descriptor_size == 0)
+ return;
+ ndesc = efihdr->memory_size / efihdr->descriptor_size;
+
+ for (i = 0, p = map; i < ndesc; i++,
+ p = efi_next_descriptor(p, efihdr->descriptor_size)) {
+ cb(p);
+ }
+}
+
+static void
+exclude_efi_map_entry(struct efi_md *p)
+{
+
+ switch (p->md_type) {
+ case EFI_MD_TYPE_CODE:
+ case EFI_MD_TYPE_DATA:
+ case EFI_MD_TYPE_BS_CODE:
+ case EFI_MD_TYPE_BS_DATA:
+ case EFI_MD_TYPE_FREE:
+ /*
+ * We're allowed to use any entry with these types.
+ */
+ break;
+ default:
+ physmem_exclude_region(p->md_phys, p->md_pages * PAGE_SIZE,
+ EXFLAG_NOALLOC);
+ }
+}
+
+static void
+exclude_efi_map_entries(struct efi_map_header *efihdr)
+{
+
+ foreach_efi_map_entry(efihdr, exclude_efi_map_entry);
+}
+
+static void
+add_efi_map_entry(struct efi_md *p)
+{
+
+ switch (p->md_type) {
+ case EFI_MD_TYPE_RT_DATA:
+ /*
+ * Runtime data will be excluded after the DMAP
+ * region is created to stop it from being added
+ * to phys_avail.
+ */
+ case EFI_MD_TYPE_CODE:
+ case EFI_MD_TYPE_DATA:
+ case EFI_MD_TYPE_BS_CODE:
+ case EFI_MD_TYPE_BS_DATA:
+ case EFI_MD_TYPE_FREE:
+ /*
+ * We're allowed to use any entry with these types.
+ */
+ physmem_hardware_region(p->md_phys,
+ p->md_pages * PAGE_SIZE);
+ break;
+ }
+}
+
+static void
+add_efi_map_entries(struct efi_map_header *efihdr)
+{
+
+ foreach_efi_map_entry(efihdr, add_efi_map_entry);
+}
+
+static void
+print_efi_map_entry(struct efi_md *p)
+{
+ const char *type;
+ static const char *types[] = {
+ "Reserved",
+ "LoaderCode",
+ "LoaderData",
+ "BootServicesCode",
+ "BootServicesData",
+ "RuntimeServicesCode",
+ "RuntimeServicesData",
+ "ConventionalMemory",
+ "UnusableMemory",
+ "ACPIReclaimMemory",
+ "ACPIMemoryNVS",
+ "MemoryMappedIO",
+ "MemoryMappedIOPortSpace",
+ "PalCode",
+ "PersistentMemory"
+ };
+
+ if (p->md_type < nitems(types))
+ type = types[p->md_type];
+ else
+ type = "<INVALID>";
+ printf("%23s %012lx %12p %08lx ", type, p->md_phys,
+ p->md_virt, p->md_pages);
+ if (p->md_attr & EFI_MD_ATTR_UC)
+ printf("UC ");
+ if (p->md_attr & EFI_MD_ATTR_WC)
+ printf("WC ");
+ if (p->md_attr & EFI_MD_ATTR_WT)
+ printf("WT ");
+ if (p->md_attr & EFI_MD_ATTR_WB)
+ printf("WB ");
+ if (p->md_attr & EFI_MD_ATTR_UCE)
+ printf("UCE ");
+ if (p->md_attr & EFI_MD_ATTR_WP)
+ printf("WP ");
+ if (p->md_attr & EFI_MD_ATTR_RP)
+ printf("RP ");
+ if (p->md_attr & EFI_MD_ATTR_XP)
+ printf("XP ");
+ if (p->md_attr & EFI_MD_ATTR_NV)
+ printf("NV ");
+ if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
+ printf("MORE_RELIABLE ");
+ if (p->md_attr & EFI_MD_ATTR_RO)
+ printf("RO ");
+ if (p->md_attr & EFI_MD_ATTR_RT)
+ printf("RUNTIME");
+ printf("\n");
+}
+
+static void
+print_efi_map_entries(struct efi_map_header *efihdr)
+{
+
+ printf("%23s %12s %12s %8s %4s\n",
+ "Type", "Physical", "Virtual", "#Pages", "Attr");
+ foreach_efi_map_entry(efihdr, print_efi_map_entry);
+}
+
+#ifdef FDT
+static void
+try_load_dtb(caddr_t kmdp)
+{
+ vm_offset_t dtbp;
+
+ dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
+#if defined(FDT_DTB_STATIC)
+ /*
+ * In case the device tree blob was not retrieved (from metadata) try
+ * to use the statically embedded one.
+ */
+ if (dtbp == 0)
+ dtbp = (vm_offset_t)&fdt_static_dtb;
+#endif
+
+ if (dtbp == (vm_offset_t)NULL) {
+ printf("ERROR loading DTB\n");
+ return;
+ }
+
+ if (OF_install(OFW_FDT, 0) == FALSE)
+ panic("Cannot install FDT");
+
+ if (OF_init((void *)dtbp) != 0)
+ panic("OF_init failed with the found device tree");
+
+ parse_fdt_bootargs();
+}
+#endif
+
+static bool
+bus_probe(void)
+{
+ bool has_acpi, has_fdt;
+ char *order, *env;
+
+ has_acpi = has_fdt = false;
+
+#ifdef FDT
+ has_fdt = (OF_peer(0) != 0);
+#endif
+#ifdef DEV_ACPI
+ has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0);
+#endif
+
+ env = kern_getenv("kern.cfg.order");
+ if (env != NULL) {
+ order = env;
+ while (order != NULL) {
+ if (has_acpi &&
+ strncmp(order, "acpi", 4) == 0 &&
+ (order[4] == ',' || order[4] == '\0')) {
+ arm64_bus_method = ARM64_BUS_ACPI;
+ break;
+ }
+ if (has_fdt &&
+ strncmp(order, "fdt", 3) == 0 &&
+ (order[3] == ',' || order[3] == '\0')) {
+ arm64_bus_method = ARM64_BUS_FDT;
+ break;
+ }
+ order = strchr(order, ',');
+ }
+ freeenv(env);
+
+ /* If we set the bus method it is valid */
+ if (arm64_bus_method != ARM64_BUS_NONE)
+ return (true);
+ }
+ /* If no order or an invalid order was set use the default */
+ if (arm64_bus_method == ARM64_BUS_NONE) {
+ if (has_fdt)
+ arm64_bus_method = ARM64_BUS_FDT;
+ else if (has_acpi)
+ arm64_bus_method = ARM64_BUS_ACPI;
+ }
+
+ /*
+ * If no option was set the default is valid, otherwise we are
+ * setting one to get cninit() working, then calling panic to tell
+ * the user about the invalid bus setup.
+ */
+ return (env == NULL);
+}
+
+static void
+cache_setup(void)
+{
+ int dczva_line_shift;
+ uint32_t dczid_el0;
+
+ identify_cache(READ_SPECIALREG(ctr_el0));
+
+ dczid_el0 = READ_SPECIALREG(dczid_el0);
+
+ /* Check if dc zva is not prohibited */
+ if (dczid_el0 & DCZID_DZP)
+ dczva_line_size = 0;
+ else {
+ /* Same as with above calculations */
+ dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
+ dczva_line_size = sizeof(int) << dczva_line_shift;
+
+ /* Change pagezero function */
+ pagezero = pagezero_cache;
+ }
+}
+
+int
+memory_mapping_mode(vm_paddr_t pa)
+{
+ struct efi_md *map, *p;
+ size_t efisz;
+ int ndesc, i;
+
+ if (efihdr == NULL)
+ return (VM_MEMATTR_WRITE_BACK);
+
+ /*
+ * Memory map data provided by UEFI via the GetMemoryMap
+ * Boot Services API.
+ */
+ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
+ map = (struct efi_md *)((uint8_t *)efihdr + efisz);
+
+ if (efihdr->descriptor_size == 0)
+ return (VM_MEMATTR_WRITE_BACK);
+ ndesc = efihdr->memory_size / efihdr->descriptor_size;
+
+ for (i = 0, p = map; i < ndesc; i++,
+ p = efi_next_descriptor(p, efihdr->descriptor_size)) {
+ if (pa < p->md_phys ||
+ pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
+ continue;
+ if (p->md_type == EFI_MD_TYPE_IOMEM ||
+ p->md_type == EFI_MD_TYPE_IOPORT)
+ return (VM_MEMATTR_DEVICE);
+ else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
+ p->md_type == EFI_MD_TYPE_RECLAIM)
+ return (VM_MEMATTR_WRITE_BACK);
+ else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
+ return (VM_MEMATTR_WRITE_THROUGH);
+ else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
+ return (VM_MEMATTR_WRITE_COMBINING);
+ break;
+ }
+
+ return (VM_MEMATTR_DEVICE);
+}
+
+void
+initarm(struct arm64_bootparams *abp)
+{
+ struct efi_fb *efifb;
+ struct pcpu *pcpup;
+ char *env;
+#ifdef FDT
+ struct mem_region mem_regions[FDT_MEM_REGIONS];
+ int mem_regions_sz;
+#endif
+ vm_offset_t lastaddr;
+ caddr_t kmdp;
+ bool valid;
+
+ boot_el = abp->boot_el;
+
+ /* Parse loader or FDT boot parametes. Determine last used address. */
+ lastaddr = parse_boot_param(abp);
+
+ /* Find the kernel address */
+ kmdp = preload_search_by_type("elf kernel");
+ if (kmdp == NULL)
+ kmdp = preload_search_by_type("elf64 kernel");
+
+ identify_cpu(0);
+ update_special_regs(0);
+
+ link_elf_ireloc(kmdp);
+ try_load_dtb(kmdp);
+
+ efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
+
+ /* Load the physical memory ranges */
+ efihdr = (struct efi_map_header *)preload_search_info(kmdp,
+ MODINFO_METADATA | MODINFOMD_EFI_MAP);
+ if (efihdr != NULL)
+ add_efi_map_entries(efihdr);
+#ifdef FDT
+ else {
+ /* Grab physical memory regions information from device tree. */
+ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
+ NULL) != 0)
+ panic("Cannot get physical memory regions");
+ physmem_hardware_regions(mem_regions, mem_regions_sz);
+ }
+ if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
+ physmem_exclude_regions(mem_regions, mem_regions_sz,
+ EXFLAG_NODUMP | EXFLAG_NOALLOC);
+#endif
+
+ /* Exclude the EFI framebuffer from our view of physical memory. */
+ efifb = (struct efi_fb *)preload_search_info(kmdp,
+ MODINFO_METADATA | MODINFOMD_EFI_FB);
+ if (efifb != NULL)
+ physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
+ EXFLAG_NOALLOC);
+
+ /* Set the pcpu data, this is needed by pmap_bootstrap */
+ pcpup = &__pcpu[0];
+ pcpu_init(pcpup, 0, sizeof(struct pcpu));
+
+ /*
+ * Set the pcpu pointer with a backup in tpidr_el1 to be
+ * loaded when entering the kernel from userland.
+ */
+ __asm __volatile(
+ "mov x18, %0 \n"
+ "msr tpidr_el1, %0" :: "r"(pcpup));
+
+ PCPU_SET(curthread, &thread0);
+ PCPU_SET(midr, get_midr());
+
+ /* Do basic tuning, hz etc */
+ init_param1();
+
+ cache_setup();
+ pan_setup();
+
+ /* Bootstrap enough of pmap to enter the kernel proper */
+ pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
+ KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
+ /* Exclude entries neexed in teh DMAP region, but not phys_avail */
+ if (efihdr != NULL)
+ exclude_efi_map_entries(efihdr);
+ physmem_init_kernel_globals();
+
+ devmap_bootstrap(0, NULL);
+
+ valid = bus_probe();
+
+ cninit();
+ set_ttbr0(abp->kern_ttbr0);
+ cpu_tlb_flushID();
+
+ if (!valid)
+ panic("Invalid bus configuration: %s",
+ kern_getenv("kern.cfg.order"));
+
+ init_proc0(abp->kern_stack);
+ msgbufinit(msgbufp, msgbufsize);
+ mutex_init();
+ init_param2(physmem);
+
+ dbg_init();
+ kdb_init();
+ pan_enable();
+
+ kcsan_cpu_init(0);
+
+ env = kern_getenv("kernelname");
+ if (env != NULL)
+ strlcpy(kernelname, env, sizeof(kernelname));
+
+ if (boothowto & RB_VERBOSE) {
+ print_efi_map_entries(efihdr);
+ physmem_print_tables();
+ }
+
+ early_boot = 0;
+}
+
+void
+dbg_init(void)
+{
+
+ /* Clear OS lock */
+ WRITE_SPECIALREG(oslar_el1, 0);
+
+ /* This permits DDB to use debug registers for watchpoints. */
+ dbg_monitor_init();
+
+ /* TODO: Eventually will need to initialize debug registers here. */
+}
+
+#ifdef DDB
+#include <ddb/ddb.h>
+
+DB_SHOW_COMMAND(specialregs, db_show_spregs)
+{
+#define PRINT_REG(reg) \
+ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
+
+ PRINT_REG(actlr_el1);
+ PRINT_REG(afsr0_el1);
+ PRINT_REG(afsr1_el1);
+ PRINT_REG(aidr_el1);
+ PRINT_REG(amair_el1);
+ PRINT_REG(ccsidr_el1);
+ PRINT_REG(clidr_el1);
+ PRINT_REG(contextidr_el1);
+ PRINT_REG(cpacr_el1);
+ PRINT_REG(csselr_el1);
+ PRINT_REG(ctr_el0);
+ PRINT_REG(currentel);
+ PRINT_REG(daif);
+ PRINT_REG(dczid_el0);
+ PRINT_REG(elr_el1);
+ PRINT_REG(esr_el1);
+ PRINT_REG(far_el1);
+#if 0
+ /* ARM64TODO: Enable VFP before reading floating-point registers */
+ PRINT_REG(fpcr);
+ PRINT_REG(fpsr);
+#endif
+ PRINT_REG(id_aa64afr0_el1);
+ PRINT_REG(id_aa64afr1_el1);
+ PRINT_REG(id_aa64dfr0_el1);
+ PRINT_REG(id_aa64dfr1_el1);
+ PRINT_REG(id_aa64isar0_el1);
+ PRINT_REG(id_aa64isar1_el1);
+ PRINT_REG(id_aa64pfr0_el1);
+ PRINT_REG(id_aa64pfr1_el1);
+ PRINT_REG(id_afr0_el1);
+ PRINT_REG(id_dfr0_el1);
+ PRINT_REG(id_isar0_el1);
+ PRINT_REG(id_isar1_el1);
+ PRINT_REG(id_isar2_el1);
+ PRINT_REG(id_isar3_el1);
+ PRINT_REG(id_isar4_el1);
+ PRINT_REG(id_isar5_el1);
+ PRINT_REG(id_mmfr0_el1);
+ PRINT_REG(id_mmfr1_el1);
+ PRINT_REG(id_mmfr2_el1);
+ PRINT_REG(id_mmfr3_el1);
+#if 0
+ /* Missing from llvm */
+ PRINT_REG(id_mmfr4_el1);
+#endif
+ PRINT_REG(id_pfr0_el1);
+ PRINT_REG(id_pfr1_el1);
+ PRINT_REG(isr_el1);
+ PRINT_REG(mair_el1);
+ PRINT_REG(midr_el1);
+ PRINT_REG(mpidr_el1);
+ PRINT_REG(mvfr0_el1);
+ PRINT_REG(mvfr1_el1);
+ PRINT_REG(mvfr2_el1);
+ PRINT_REG(revidr_el1);
+ PRINT_REG(sctlr_el1);
+ PRINT_REG(sp_el0);
+ PRINT_REG(spsel);
+ PRINT_REG(spsr_el1);
+ PRINT_REG(tcr_el1);
+ PRINT_REG(tpidr_el0);
+ PRINT_REG(tpidr_el1);
+ PRINT_REG(tpidrro_el0);
+ PRINT_REG(ttbr0_el1);
+ PRINT_REG(ttbr1_el1);
+ PRINT_REG(vbar_el1);
+#undef PRINT_REG
+}
+
+DB_SHOW_COMMAND(vtop, db_show_vtop)
+{
+ uint64_t phys;
+
+ if (have_addr) {
+ phys = arm64_address_translate_s1e1r(addr);
+ db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
+ phys = arm64_address_translate_s1e1w(addr);
+ db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
+ phys = arm64_address_translate_s1e0r(addr);
+ db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
+ phys = arm64_address_translate_s1e0w(addr);
+ db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
+ } else
+ db_printf("show vtop <virt_addr>\n");
+}
+#endif
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
new file mode 100644
index 000000000000..9ab4edf616e2
--- /dev/null
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -0,0 +1,232 @@
+/*-
+ * Copyright (c) 2004 Olivier Houchard
+ * Copyright (c) 1994-1998 Mark Brinicombe.
+ * Copyright (c) 1994 Brini.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ctype.h>
+#include <sys/linker.h>
+#include <sys/reboot.h>
+#include <sys/sysctl.h>
+#ifdef FDT
+#include <sys/boot.h>
+#endif
+
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/metadata.h>
+#include <machine/vmparam.h>
+
+#ifdef FDT
+#include <contrib/libfdt/libfdt.h>
+#include <dev/fdt/fdt_common.h>
+#endif
+
+extern int *end;
+static char *loader_envp;
+static char static_kenv[4096];
+
+#ifdef FDT
+#define CMDLINE_GUARD "FreeBSD:"
+#define LBABI_MAX_COMMAND_LINE 512
+static char linux_command_line[LBABI_MAX_COMMAND_LINE + 1];
+#endif
+
+/*
+ * Fake up a boot descriptor table
+ */
+ #define PRELOAD_PUSH_VALUE(type, value) do { \
+ *(type *)(preload_ptr + size) = (value); \
+ size += sizeof(type); \
+} while (0)
+
+ #define PRELOAD_PUSH_STRING(str) do { \
+ uint32_t ssize; \
+ ssize = strlen(str) + 1; \
+ PRELOAD_PUSH_VALUE(uint32_t, ssize); \
+ strcpy((char*)(preload_ptr + size), str); \
+ size += ssize; \
+ size = roundup(size, sizeof(u_long)); \
+} while (0)
+
+/* Build minimal set of metatda. */
+static vm_offset_t
+fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
+{
+ vm_offset_t lastaddr;
+ static char fake_preload[256];
+ caddr_t preload_ptr;
+ size_t size;
+
+ lastaddr = (vm_offset_t)&end;
+ preload_ptr = (caddr_t)&fake_preload[0];
+ size = 0;
+
+ PRELOAD_PUSH_VALUE(uint32_t, MODINFO_NAME);
+ PRELOAD_PUSH_STRING("kernel");
+
+ PRELOAD_PUSH_VALUE(uint32_t, MODINFO_TYPE);
+ PRELOAD_PUSH_STRING("elf kernel");
+
+ PRELOAD_PUSH_VALUE(uint32_t, MODINFO_ADDR);
+ PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t));
+ PRELOAD_PUSH_VALUE(uint64_t, VM_MIN_KERNEL_ADDRESS);
+
+ PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE);
+ PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t));
+ PRELOAD_PUSH_VALUE(uint64_t, (size_t)(&end - VM_MIN_KERNEL_ADDRESS));
+
+ if (dtb_ptr != NULL) {
+ /* Copy DTB to KVA space and insert it into module chain. */
+ lastaddr = roundup(lastaddr, sizeof(int));
+ PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP);
+ PRELOAD_PUSH_VALUE(uint32_t, sizeof(uint64_t));
+ PRELOAD_PUSH_VALUE(uint64_t, (uint64_t)lastaddr);
+ memmove((void *)lastaddr, dtb_ptr, dtb_size);
+ lastaddr += dtb_size;
+ lastaddr = roundup(lastaddr, sizeof(int));
+ }
+ /* End marker */
+ PRELOAD_PUSH_VALUE(uint32_t, 0);
+ PRELOAD_PUSH_VALUE(uint32_t, 0);
+
+ preload_metadata = (caddr_t)(uintptr_t)fake_preload;
+
+ init_static_kenv(NULL, 0);
+
+ return (lastaddr);
+}
+
+#ifdef FDT
+
+/* Convert the U-Boot command line into FreeBSD kenv and boot options. */
+static void
+cmdline_set_env(char *cmdline, const char *guard)
+{
+ size_t guard_len;
+
+ /* Skip leading spaces. */
+ while (isspace(*cmdline))
+ cmdline++;
+
+ /* Test and remove guard. */
+ if (guard != NULL && guard[0] != '\0') {
+ guard_len = strlen(guard);
+ if (strncasecmp(cmdline, guard, guard_len) != 0)
+ return;
+ cmdline += guard_len;
+ }
+
+ boothowto |= boot_parse_cmdline(cmdline);
+}
+
+void
+parse_fdt_bootargs(void)
+{
+
+ if (loader_envp == NULL && fdt_get_chosen_bootargs(linux_command_line,
+ LBABI_MAX_COMMAND_LINE) == 0) {
+ init_static_kenv(static_kenv, sizeof(static_kenv));
+ cmdline_set_env(linux_command_line, CMDLINE_GUARD);
+ }
+}
+
+#endif
+
+#if defined(LINUX_BOOT_ABI) && defined(FDT)
+static vm_offset_t
+linux_parse_boot_param(struct arm64_bootparams *abp)
+{
+ struct fdt_header *dtb_ptr;
+ size_t dtb_size;
+
+ if (abp->modulep == 0)
+ return (0);
+ /* Test if modulep point to valid DTB. */
+ dtb_ptr = (struct fdt_header *)abp->modulep;
+ if (fdt_check_header(dtb_ptr) != 0)
+ return (0);
+ dtb_size = fdt_totalsize(dtb_ptr);
+ return (fake_preload_metadata(dtb_ptr, dtb_size));
+}
+
+#endif
+
+static vm_offset_t
+freebsd_parse_boot_param(struct arm64_bootparams *abp)
+{
+ vm_offset_t lastaddr = 0;
+ void *kmdp;
+#ifdef DDB
+ vm_offset_t ksym_start;
+ vm_offset_t ksym_end;
+#endif
+
+ if (abp->modulep == 0)
+ return (0);
+
+ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep);
+ kmdp = preload_search_by_type("elf kernel");
+ if (kmdp == NULL)
+ return (0);
+
+ boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
+ loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
+ init_static_kenv(loader_envp, 0);
+ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
+#ifdef DDB
+ ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
+ ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
+ db_fetch_ksymtab(ksym_start, ksym_end, 0);
+#endif
+ return (lastaddr);
+}
+
+vm_offset_t
+parse_boot_param(struct arm64_bootparams *abp)
+{
+ vm_offset_t lastaddr;
+
+#if defined(LINUX_BOOT_ABI) && defined(FDT)
+ lastaddr = linux_parse_boot_param(abp);
+ if (lastaddr != 0)
+ return (lastaddr);
+#endif
+ lastaddr = freebsd_parse_boot_param(abp);
+ if (lastaddr != 0)
+ return (lastaddr);
+
+ /* Fall back to hardcoded metadata. */
+ lastaddr = fake_preload_metadata(NULL, 0);
+
+ return (lastaddr);
+}
diff --git a/sys/arm64/arm64/mem.c b/sys/arm64/arm64/mem.c
new file mode 100644
index 000000000000..d51744c6fbe3
--- /dev/null
+++ b/sys/arm64/arm64/mem.c
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/uio.h>
+
+#include <machine/memdev.h>
+#include <machine/vmparam.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+
+struct mem_range_softc mem_range_softc;
+
+int
+memrw(struct cdev *dev, struct uio *uio, int flags)
+{
+ struct iovec *iov;
+ struct vm_page m;
+ vm_page_t marr;
+ vm_offset_t off, v;
+ u_int cnt;
+ int error;
+
+ error = 0;
+
+ while (uio->uio_resid > 0 && error == 0) {
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ if (uio->uio_iovcnt < 0)
+ panic("memrw");
+ continue;
+ }
+
+ v = uio->uio_offset;
+ off = v & PAGE_MASK;
+ cnt = ulmin(iov->iov_len, PAGE_SIZE - (u_int)off);
+ if (cnt == 0)
+ continue;
+
+ switch(dev2unit(dev)) {
+ case CDEV_MINOR_KMEM:
+ /* If the address is in the DMAP just copy it */
+ if (VIRT_IN_DMAP(v)) {
+ error = uiomove((void *)v, cnt, uio);
+ break;
+ }
+
+ if (!kernacc((void *)v, cnt, uio->uio_rw == UIO_READ ?
+ VM_PROT_READ : VM_PROT_WRITE)) {
+ error = EFAULT;
+ break;
+ }
+
+ /* Get the physical address to read */
+ v = pmap_extract(kernel_pmap, v);
+ if (v == 0) {
+ error = EFAULT;
+ break;
+ }
+
+ /* FALLTHROUGH */
+ case CDEV_MINOR_MEM:
+ /* If within the DMAP use this to copy from */
+ if (PHYS_IN_DMAP(v)) {
+ v = PHYS_TO_DMAP(v);
+ error = uiomove((void *)v, cnt, uio);
+ break;
+ }
+
+ /* Have uiomove_fromphys handle the data */
+ m.phys_addr = trunc_page(v);
+ marr = &m;
+ uiomove_fromphys(&marr, off, cnt, uio);
+ break;
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * allow user processes to MMAP some memory sections
+ * instead of going through read/write
+ */
+/* ARGSUSED */
+int
+memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int prot __unused, vm_memattr_t *memattr __unused)
+{
+ if (dev2unit(dev) == CDEV_MINOR_MEM) {
+ *paddr = offset;
+ return (0);
+ }
+ return (-1);
+}
+
+int
+memioctl_md(struct cdev *dev __unused, u_long cmd __unused,
+ caddr_t data __unused, int flags __unused, struct thread *td __unused)
+{
+ return (ENOTTY);
+}
diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S
new file mode 100644
index 000000000000..f98c2513fa58
--- /dev/null
+++ b/sys/arm64/arm64/memcpy.S
@@ -0,0 +1,219 @@
+/* Copyright (c) 2012, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Linaro nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#define dstin x0
+#define src x1
+#define count x2
+#define dst x3
+#define srcend x4
+#define dstend x5
+#define A_l x6
+#define A_lw w6
+#define A_h x7
+#define A_hw w7
+#define B_l x8
+#define B_lw w8
+#define B_h x9
+#define C_l x10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l src
+#define E_h count
+#define F_l srcend
+#define F_h dst
+#define tmp1 x9
+
+#define L(l) .L ## l
+
+/* Copies are split into 3 main cases: small copies of up to 16 bytes,
+ medium copies of 17..96 bytes which are fully unrolled. Large copies
+ of more than 96 bytes align the destination and use an unrolled loop
+ processing 64 bytes per iteration.
+ Small and medium copies read all data before writing, allowing any
+ kind of overlap, and memmove tailcalls memcpy for these cases as
+ well as non-overlapping copies.
+*/
+
+ENTRY(memcpy)
+ prfm PLDL1KEEP, [src]
+ add srcend, src, count
+ add dstend, dstin, count
+ cmp count, 16
+ b.ls L(copy16)
+ cmp count, 96
+ b.hi L(copy_long)
+
+ /* Medium copies: 17..96 bytes. */
+ sub tmp1, count, 1
+ ldp A_l, A_h, [src]
+ tbnz tmp1, 6, L(copy96)
+ ldp D_l, D_h, [srcend, -16]
+ tbz tmp1, 5, 1f
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [srcend, -32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstend, -32]
+1:
+ stp A_l, A_h, [dstin]
+ stp D_l, D_h, [dstend, -16]
+ ret
+
+ .p2align 4
+ /* Small copies: 0..16 bytes. */
+L(copy16):
+ cmp count, 8
+ b.lo 1f
+ ldr A_l, [src]
+ ldr A_h, [srcend, -8]
+ str A_l, [dstin]
+ str A_h, [dstend, -8]
+ ret
+ .p2align 4
+1:
+ tbz count, 2, 1f
+ ldr A_lw, [src]
+ ldr A_hw, [srcend, -4]
+ str A_lw, [dstin]
+ str A_hw, [dstend, -4]
+ ret
+
+ /* Copy 0..3 bytes. Use a branchless sequence that copies the same
+ byte 3 times if count==1, or the 2nd byte twice if count==2. */
+1:
+ cbz count, 2f
+ lsr tmp1, count, 1
+ ldrb A_lw, [src]
+ ldrb A_hw, [srcend, -1]
+ ldrb B_lw, [src, tmp1]
+ strb A_lw, [dstin]
+ strb B_lw, [dstin, tmp1]
+ strb A_hw, [dstend, -1]
+2: ret
+
+ .p2align 4
+ /* Copy 64..96 bytes. Copy 64 bytes from the start and
+ 32 bytes from the end. */
+L(copy96):
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [src, 32]
+ ldp D_l, D_h, [src, 48]
+ ldp E_l, E_h, [srcend, -32]
+ ldp F_l, F_h, [srcend, -16]
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin, 32]
+ stp D_l, D_h, [dstin, 48]
+ stp E_l, E_h, [dstend, -32]
+ stp F_l, F_h, [dstend, -16]
+ ret
+
+ /* Align DST to 16 byte alignment so that we don't cross cache line
+ boundaries on both loads and stores. There are at least 96 bytes
+ to copy, so copy 16 bytes unaligned and then align. The loop
+ copies 64 bytes per iteration and prefetches one iteration ahead. */
+
+ .p2align 4
+L(copy_long):
+ and tmp1, dstin, 15
+ bic dst, dstin, 15
+ ldp D_l, D_h, [src]
+ sub src, src, tmp1
+ add count, count, tmp1 /* Count is now 16 too large. */
+ ldp A_l, A_h, [src, 16]
+ stp D_l, D_h, [dstin]
+ ldp B_l, B_h, [src, 32]
+ ldp C_l, C_h, [src, 48]
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 128 + 16 /* Test and readjust count. */
+ b.ls 2f
+1:
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [src, 16]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [src, 32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [src, 48]
+ stp D_l, D_h, [dst, 64]!
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 64
+ b.hi 1b
+
+ /* Write the last full set of 64 bytes. The remainder is at most 64
+ bytes, so it is safe to always copy 64 bytes from the end even if
+ there is just 1 byte left. */
+2:
+ ldp E_l, E_h, [srcend, -64]
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [srcend, -48]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [srcend, -16]
+ stp D_l, D_h, [dst, 64]
+ stp E_l, E_h, [dstend, -64]
+ stp A_l, A_h, [dstend, -48]
+ stp B_l, B_h, [dstend, -32]
+ stp C_l, C_h, [dstend, -16]
+ ret
+END(memcpy)
diff --git a/sys/arm64/arm64/memmove.S b/sys/arm64/arm64/memmove.S
new file mode 100644
index 000000000000..4b99dccc536e
--- /dev/null
+++ b/sys/arm64/arm64/memmove.S
@@ -0,0 +1,150 @@
+/* Copyright (c) 2013, Linaro Limited
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Linaro nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ */
+
+/* Parameters and result. */
+#define dstin x0
+#define src x1
+#define count x2
+#define srcend x3
+#define dstend x4
+#define tmp1 x5
+#define A_l x6
+#define A_h x7
+#define B_l x8
+#define B_h x9
+#define C_l x10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l count
+#define E_h tmp1
+
+/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
+ Larger backwards copies are also handled by memcpy. The only remaining
+ case is forward large copies. The destination is aligned, and an
+ unrolled loop processes 64 bytes per iteration.
+*/
+
+ENTRY(bcopy)
+ /* Switch the input pointers when called as bcopy */
+ mov x3, x1
+ mov x1, x0
+ mov x0, x3
+EENTRY(memmove)
+ sub tmp1, dstin, src
+ cmp count, 96
+ ccmp tmp1, count, 2, hi
+ b.hs memcpy
+
+ cbz tmp1, 3f
+ add dstend, dstin, count
+ add srcend, src, count
+
+ /* Align dstend to 16 byte alignment so that we don't cross cache line
+ boundaries on both loads and stores. There are at least 96 bytes
+ to copy, so copy 16 bytes unaligned and then align. The loop
+ copies 64 bytes per iteration and prefetches one iteration ahead. */
+
+ and tmp1, dstend, 15
+ ldp D_l, D_h, [srcend, -16]
+ sub srcend, srcend, tmp1
+ sub count, count, tmp1
+ ldp A_l, A_h, [srcend, -16]
+ stp D_l, D_h, [dstend, -16]
+ ldp B_l, B_h, [srcend, -32]
+ ldp C_l, C_h, [srcend, -48]
+ ldp D_l, D_h, [srcend, -64]!
+ sub dstend, dstend, tmp1
+ subs count, count, 128
+ b.ls 2f
+ nop
+1:
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [srcend, -16]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [srcend, -48]
+ stp D_l, D_h, [dstend, -64]!
+ ldp D_l, D_h, [srcend, -64]!
+ subs count, count, 64
+ b.hi 1b
+
+ /* Write the last full set of 64 bytes. The remainder is at most 64
+ bytes, so it is safe to always copy 64 bytes from the start even if
+ there is just 1 byte left. */
+2:
+ ldp E_l, E_h, [src, 48]
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [src, 32]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [src, 16]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [src]
+ stp D_l, D_h, [dstend, -64]
+ stp E_l, E_h, [dstin, 48]
+ stp A_l, A_h, [dstin, 32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin]
+3: ret
+EEND(memmove)
+END(bcopy)
diff --git a/sys/arm64/arm64/minidump_machdep.c b/sys/arm64/arm64/minidump_machdep.c
new file mode 100644
index 000000000000..ba22f7dfc16f
--- /dev/null
+++ b/sys/arm64/arm64/minidump_machdep.c
@@ -0,0 +1,448 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_watchdog.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/msgbuf.h>
+#include <sys/watchdog.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <vm/pmap.h>
+
+#include <machine/md_var.h>
+#include <machine/pte.h>
+#include <machine/minidump.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+uint64_t *vm_page_dump;
+int vm_page_dump_size;
+
+static struct kerneldumpheader kdh;
+
+/* Handle chunked writes. */
+static size_t fragsz;
+static void *dump_va;
+static size_t counter, progress, dumpsize;
+
+static uint64_t tmpbuffer[Ln_ENTRIES];
+
+CTASSERT(sizeof(*vm_page_dump) == 8);
+
+static int
+is_dumpable(vm_paddr_t pa)
+{
+ vm_page_t m;
+ int i;
+
+ if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
+ return ((m->flags & PG_NODUMP) == 0);
+ for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
+ if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+ return (1);
+ }
+ return (0);
+}
+
+static int
+blk_flush(struct dumperinfo *di)
+{
+ int error;
+
+ if (fragsz == 0)
+ return (0);
+
+ error = dump_append(di, dump_va, 0, fragsz);
+ fragsz = 0;
+ return (error);
+}
+
+static struct {
+ int min_per;
+ int max_per;
+ int visited;
+} progress_track[10] = {
+ { 0, 10, 0},
+ { 10, 20, 0},
+ { 20, 30, 0},
+ { 30, 40, 0},
+ { 40, 50, 0},
+ { 50, 60, 0},
+ { 60, 70, 0},
+ { 70, 80, 0},
+ { 80, 90, 0},
+ { 90, 100, 0}
+};
+
+static void
+report_progress(size_t progress, size_t dumpsize)
+{
+ int sofar, i;
+
+ sofar = 100 - ((progress * 100) / dumpsize);
+ for (i = 0; i < nitems(progress_track); i++) {
+ if (sofar < progress_track[i].min_per ||
+ sofar > progress_track[i].max_per)
+ continue;
+ if (progress_track[i].visited)
+ return;
+ progress_track[i].visited = 1;
+ printf("..%d%%", sofar);
+ return;
+ }
+}
+
+static int
+blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
+{
+ size_t len;
+ int error, c;
+ u_int maxdumpsz;
+
+ maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
+ if (maxdumpsz == 0) /* seatbelt */
+ maxdumpsz = PAGE_SIZE;
+ error = 0;
+ if ((sz % PAGE_SIZE) != 0) {
+ printf("size not page aligned\n");
+ return (EINVAL);
+ }
+ if (ptr != NULL && pa != 0) {
+ printf("cant have both va and pa!\n");
+ return (EINVAL);
+ }
+ if ((((uintptr_t)pa) % PAGE_SIZE) != 0) {
+ printf("address not page aligned %p\n", ptr);
+ return (EINVAL);
+ }
+ if (ptr != NULL) {
+ /*
+ * If we're doing a virtual dump, flush any
+ * pre-existing pa pages.
+ */
+ error = blk_flush(di);
+ if (error)
+ return (error);
+ }
+ while (sz) {
+ len = maxdumpsz - fragsz;
+ if (len > sz)
+ len = sz;
+ counter += len;
+ progress -= len;
+ if (counter >> 22) {
+ report_progress(progress, dumpsize);
+ counter &= (1 << 22) - 1;
+ }
+
+ wdog_kern_pat(WD_LASTVAL);
+
+ if (ptr) {
+ error = dump_append(di, ptr, 0, len);
+ if (error)
+ return (error);
+ ptr += len;
+ sz -= len;
+ } else {
+ dump_va = (void *)PHYS_TO_DMAP(pa);
+ fragsz += len;
+ pa += len;
+ sz -= len;
+ error = blk_flush(di);
+ if (error)
+ return (error);
+ }
+
+ /* Check for user abort. */
+ c = cncheckc();
+ if (c == 0x03)
+ return (ECANCELED);
+ if (c != -1)
+ printf(" (CTRL-C to abort) ");
+ }
+
+ return (0);
+}
+
+int
+minidumpsys(struct dumperinfo *di)
+{
+ struct minidumphdr mdhdr;
+ pd_entry_t *l0, *l1, *l2;
+ pt_entry_t *l3;
+ vm_offset_t va;
+ vm_paddr_t pa;
+ uint64_t bits;
+ uint32_t pmapsize;
+ int bit, error, i, j, retry_count;
+
+ retry_count = 0;
+ retry:
+ retry_count++;
+ error = 0;
+ pmapsize = 0;
+ for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
+ pmapsize += PAGE_SIZE;
+ if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3))
+ continue;
+
+ if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
+ pa = *l1 & ~ATTR_MASK;
+ for (i = 0; i < Ln_ENTRIES * Ln_ENTRIES;
+ i++, pa += PAGE_SIZE)
+ if (is_dumpable(pa))
+ dump_add_page(pa);
+ pmapsize += (Ln_ENTRIES - 1) * PAGE_SIZE;
+ va += L1_SIZE - L2_SIZE;
+ } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
+ pa = *l2 & ~ATTR_MASK;
+ for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) {
+ if (is_dumpable(pa))
+ dump_add_page(pa);
+ }
+ } else if ((*l2 & ATTR_DESCR_MASK) == L2_TABLE) {
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ if ((l3[i] & ATTR_DESCR_MASK) != L3_PAGE)
+ continue;
+ pa = l3[i] & ~ATTR_MASK;
+ if (is_dumpable(pa))
+ dump_add_page(pa);
+ }
+ }
+ }
+
+ /* Calculate dump size. */
+ dumpsize = pmapsize;
+ dumpsize += round_page(msgbufp->msg_size);
+ dumpsize += round_page(vm_page_dump_size);
+ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+ bits = vm_page_dump[i];
+ while (bits) {
+ bit = ffsl(bits) - 1;
+ pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
+ bit) * PAGE_SIZE;
+ /* Clear out undumpable pages now if needed */
+ if (is_dumpable(pa))
+ dumpsize += PAGE_SIZE;
+ else
+ dump_drop_page(pa);
+ bits &= ~(1ul << bit);
+ }
+ }
+ dumpsize += PAGE_SIZE;
+
+ progress = dumpsize;
+
+ /* Initialize mdhdr */
+ bzero(&mdhdr, sizeof(mdhdr));
+ strcpy(mdhdr.magic, MINIDUMP_MAGIC);
+ mdhdr.version = MINIDUMP_VERSION;
+ mdhdr.msgbufsize = msgbufp->msg_size;
+ mdhdr.bitmapsize = vm_page_dump_size;
+ mdhdr.pmapsize = pmapsize;
+ mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
+ mdhdr.dmapphys = DMAP_MIN_PHYSADDR;
+ mdhdr.dmapbase = DMAP_MIN_ADDRESS;
+ mdhdr.dmapend = DMAP_MAX_ADDRESS;
+
+ dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION,
+ dumpsize);
+
+ error = dump_start(di, &kdh);
+ if (error != 0)
+ goto fail;
+
+ printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
+ ptoa((uintmax_t)physmem) / 1048576);
+
+ /* Dump my header */
+ bzero(&tmpbuffer, sizeof(tmpbuffer));
+ bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr));
+ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+
+ /* Dump msgbuf up front */
+ error = blk_write(di, (char *)msgbufp->msg_ptr, 0,
+ round_page(msgbufp->msg_size));
+ if (error)
+ goto fail;
+
+ /* Dump bitmap */
+ error = blk_write(di, (char *)vm_page_dump, 0,
+ round_page(vm_page_dump_size));
+ if (error)
+ goto fail;
+
+ /* Dump kernel page directory pages */
+ bzero(&tmpbuffer, sizeof(tmpbuffer));
+ for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
+ if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) {
+ /* We always write a page, even if it is zero */
+ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+ /* flush, in case we reuse tmpbuffer in the same block*/
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+ } else if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
+ /*
+ * Handle a 1GB block mapping: write out 512 fake L2
+ * pages.
+ */
+ pa = (*l1 & ~ATTR_MASK) | (va & L1_OFFSET);
+
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ for (j = 0; j < Ln_ENTRIES; j++) {
+ tmpbuffer[j] = pa + i * L2_SIZE +
+ j * PAGE_SIZE | ATTR_DEFAULT |
+ L3_PAGE;
+ }
+ error = blk_write(di, (char *)&tmpbuffer, 0,
+ PAGE_SIZE);
+ if (error)
+ goto fail;
+ }
+ /* flush, in case we reuse tmpbuffer in the same block*/
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+ bzero(&tmpbuffer, sizeof(tmpbuffer));
+ va += L1_SIZE - L2_SIZE;
+ } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
+ pa = (*l2 & ~ATTR_MASK) | (va & L2_OFFSET);
+
+ /* Generate fake l3 entries based upon the l1 entry */
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ tmpbuffer[i] = pa + (i * PAGE_SIZE) |
+ ATTR_DEFAULT | L3_PAGE;
+ }
+ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
+ if (error)
+ goto fail;
+ /* flush, in case we reuse fakepd in the same block */
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+ bzero(&tmpbuffer, sizeof(tmpbuffer));
+ continue;
+ } else {
+ pa = *l2 & ~ATTR_MASK;
+
+ error = blk_write(di, NULL, pa, PAGE_SIZE);
+ if (error)
+ goto fail;
+ }
+ }
+
+ /* Dump memory chunks */
+ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+ bits = vm_page_dump[i];
+ while (bits) {
+ bit = ffsl(bits) - 1;
+ pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
+ bit) * PAGE_SIZE;
+ error = blk_write(di, 0, pa, PAGE_SIZE);
+ if (error)
+ goto fail;
+ bits &= ~(1ul << bit);
+ }
+ }
+
+ error = blk_flush(di);
+ if (error)
+ goto fail;
+
+ error = dump_finish(di, &kdh);
+ if (error != 0)
+ goto fail;
+
+ printf("\nDump complete\n");
+ return (0);
+
+fail:
+ if (error < 0)
+ error = -error;
+
+ printf("\n");
+ if (error == ENOSPC) {
+ printf("Dump map grown while dumping. ");
+ if (retry_count < 5) {
+ printf("Retrying...\n");
+ goto retry;
+ }
+ printf("Dump failed.\n");
+ }
+ else if (error == ECANCELED)
+ printf("Dump aborted\n");
+ else if (error == E2BIG) {
+ printf("Dump failed. Partition too small (about %lluMB were "
+ "needed this time).\n", (long long)dumpsize >> 20);
+ } else
+ printf("** DUMP FAILED (ERROR %d) **\n", error);
+ return (error);
+}
+
+void
+dump_add_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 6; /* 2^6 = 64 */
+ bit = pa & 63;
+ atomic_set_long(&vm_page_dump[idx], 1ul << bit);
+}
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 6; /* 2^6 = 64 */
+ bit = pa & 63;
+ atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
+}
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
new file mode 100644
index 000000000000..8c8ceafe18e9
--- /dev/null
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -0,0 +1,896 @@
+/*-
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+#include "opt_kstack_pages.h"
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/csan.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#include <machine/machdep.h>
+#include <machine/debug_monitor.h>
+#include <machine/intr.h>
+#include <machine/smp.h>
+#ifdef VFP
+#include <machine/vfp.h>
+#endif
+
+#ifdef DEV_ACPI
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+#endif
+
+#ifdef FDT
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/ofw_cpu.h>
+#endif
+
+#include <dev/psci/psci.h>
+
+#include "pic_if.h"
+
+#define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */
+ /* don't panic if one fails to start */
+static uint32_t mp_quirks;
+
+#ifdef FDT
+static struct {
+ const char *compat;
+ uint32_t quirks;
+} fdt_quirks[] = {
+ { "arm,foundation-aarch64", MP_QUIRK_CPULIST },
+ { "arm,fvp-base", MP_QUIRK_CPULIST },
+ /* This is incorrect in some DTS files */
+ { "arm,vfp-base", MP_QUIRK_CPULIST },
+ { NULL, 0 },
+};
+#endif
+
+typedef void intr_ipi_send_t(void *, cpuset_t, u_int);
+typedef void intr_ipi_handler_t(void *);
+
+#define INTR_IPI_NAMELEN (MAXCOMLEN + 1)
+struct intr_ipi {
+ intr_ipi_handler_t * ii_handler;
+ void * ii_handler_arg;
+ intr_ipi_send_t * ii_send;
+ void * ii_send_arg;
+ char ii_name[INTR_IPI_NAMELEN];
+ u_long * ii_count;
+};
+
+static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
+
+static struct intr_ipi *intr_ipi_lookup(u_int);
+static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *,
+ void *);
+
+static void ipi_ast(void *);
+static void ipi_hardclock(void *);
+static void ipi_preempt(void *);
+static void ipi_rendezvous(void *);
+static void ipi_stop(void *);
+
+struct pcb stoppcbs[MAXCPU];
+
+/*
+ * Not all systems boot from the first CPU in the device tree. To work around
+ * this we need to find which CPU we have booted from so when we later
+ * enable the secondary CPUs we skip this one.
+ */
+static int cpu0 = -1;
+
+void mpentry(unsigned long cpuid);
+void init_secondary(uint64_t);
+
+/* Synchronize AP startup. */
+static struct mtx ap_boot_mtx;
+
+/* Stacks for AP initialization, discarded once idle threads are started. */
+void *bootstack;
+static void *bootstacks[MAXCPU];
+
+/* Count of started APs, used to synchronize access to bootstack. */
+static volatile int aps_started;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+static volatile int aps_ready;
+
+/* Temporary variables for init_secondary() */
+void *dpcpu[MAXCPU - 1];
+
+static void
+release_aps(void *dummy __unused)
+{
+ int i, started;
+
+ /* Only release CPUs if they exist */
+ if (mp_ncpus == 1)
+ return;
+
+ intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
+ intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
+ intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
+ intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
+ intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
+ intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
+
+ atomic_store_rel_int(&aps_ready, 1);
+ /* Wake up the other CPUs */
+ __asm __volatile(
+ "dsb ishst \n"
+ "sev \n"
+ ::: "memory");
+
+ printf("Release APs...");
+
+ started = 0;
+ for (i = 0; i < 2000; i++) {
+ if (smp_started) {
+ printf("done\n");
+ return;
+ }
+ /*
+ * Don't time out while we are making progress. Some large
+ * systems can take a while to start all CPUs.
+ */
+ if (smp_cpus > started) {
+ i = 0;
+ started = smp_cpus;
+ }
+ DELAY(1000);
+ }
+
+ printf("APs not started\n");
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+
+void
+init_secondary(uint64_t cpu)
+{
+ struct pcpu *pcpup;
+ pmap_t pmap0;
+
+ pcpup = &__pcpu[cpu];
+ /*
+ * Set the pcpu pointer with a backup in tpidr_el1 to be
+ * loaded when entering the kernel from userland.
+ */
+ __asm __volatile(
+ "mov x18, %0 \n"
+ "msr tpidr_el1, %0" :: "r"(pcpup));
+
+ /*
+ * Identify current CPU. This is necessary to setup
+ * affinity registers and to provide support for
+ * runtime chip identification.
+ *
+ * We need this before signalling the CPU is ready to
+ * let the boot CPU use the results.
+ */
+ identify_cpu(cpu);
+
+ /* Ensure the stores in identify_cpu have completed */
+ atomic_thread_fence_acq_rel();
+
+ /* Signal the BSP and spin until it has released all APs. */
+ atomic_add_int(&aps_started, 1);
+ while (!atomic_load_int(&aps_ready))
+ __asm __volatile("wfe");
+
+ pcpup->pc_midr = get_midr();
+
+ /* Initialize curthread */
+ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
+ pcpup->pc_curthread = pcpup->pc_idlethread;
+
+ /* Initialize curpmap to match TTBR0's current setting. */
+ pmap0 = vmspace_pmap(&vmspace0);
+ KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1),
+ ("pmap0 doesn't match cpu %ld's ttbr0", cpu));
+ pcpup->pc_curpmap = pmap0;
+
+ install_cpu_errata();
+
+ intr_pic_init_secondary();
+
+ /* Start per-CPU event timers. */
+ cpu_initclocks_ap();
+
+#ifdef VFP
+ vfp_init();
+#endif
+
+ dbg_init();
+ pan_enable();
+
+ mtx_lock_spin(&ap_boot_mtx);
+ atomic_add_rel_32(&smp_cpus, 1);
+ if (smp_cpus == mp_ncpus) {
+ /* enable IPI's, tlb shootdown, freezes etc */
+ atomic_store_rel_int(&smp_started, 1);
+ }
+ mtx_unlock_spin(&ap_boot_mtx);
+
+ kcsan_cpu_init(cpu);
+
+ /*
+ * Assert that smp_after_idle_runnable condition is reasonable.
+ */
+ MPASS(PCPU_GET(curpcb) == NULL);
+
+ /* Enter the scheduler */
+ sched_throw(NULL);
+
+ panic("scheduler returned us to init_secondary");
+ /* NOTREACHED */
+}
+
+static void
+smp_after_idle_runnable(void *arg __unused)
+{
+ struct pcpu *pc;
+ int cpu;
+
+ for (cpu = 1; cpu < mp_ncpus; cpu++) {
+ if (bootstacks[cpu] != NULL) {
+ pc = pcpu_find(cpu);
+ while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
+ cpu_spinwait();
+ kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
+ }
+ }
+}
+SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
+ smp_after_idle_runnable, NULL);
+
+/*
+ * Send IPI thru interrupt controller.
+ */
+static void
+pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi)
+{
+
+ KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
+
+ /*
+ * Ensure that this CPU's stores will be visible to IPI
+ * recipients before starting to send the interrupts.
+ */
+ dsb(ishst);
+
+ PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi);
+}
+
+/*
+ * Setup IPI handler on interrupt controller.
+ *
+ * Not SMP coherent.
+ */
+static void
+intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
+ void *arg)
+{
+ struct intr_irqsrc *isrc;
+ struct intr_ipi *ii;
+ int error;
+
+ KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
+ KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
+
+ error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc);
+ if (error != 0)
+ return;
+
+ isrc->isrc_handlers++;
+
+ ii = intr_ipi_lookup(ipi);
+ KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
+
+ ii->ii_handler = hand;
+ ii->ii_handler_arg = arg;
+ ii->ii_send = pic_ipi_send;
+ ii->ii_send_arg = isrc;
+ strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
+ ii->ii_count = intr_ipi_setup_counters(name);
+}
+
+static void
+intr_ipi_send(cpuset_t cpus, u_int ipi)
+{
+ struct intr_ipi *ii;
+
+ ii = intr_ipi_lookup(ipi);
+ if (ii->ii_count == NULL)
+ panic("%s: not setup IPI %u", __func__, ipi);
+
+ ii->ii_send(ii->ii_send_arg, cpus, ipi);
+}
+
+static void
+ipi_ast(void *dummy __unused)
+{
+
+ CTR0(KTR_SMP, "IPI_AST");
+}
+
+static void
+ipi_hardclock(void *dummy __unused)
+{
+
+ CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
+ hardclockintr();
+}
+
+static void
+ipi_preempt(void *dummy __unused)
+{
+ CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
+ sched_preempt(curthread);
+}
+
+static void
+ipi_rendezvous(void *dummy __unused)
+{
+
+ CTR0(KTR_SMP, "IPI_RENDEZVOUS");
+ smp_rendezvous_action();
+}
+
+static void
+ipi_stop(void *dummy __unused)
+{
+ u_int cpu;
+
+ CTR0(KTR_SMP, "IPI_STOP");
+
+ cpu = PCPU_GET(cpuid);
+ savectx(&stoppcbs[cpu]);
+
+ /* Indicate we are stopped */
+ CPU_SET_ATOMIC(cpu, &stopped_cpus);
+
+ /* Wait for restart */
+ while (!CPU_ISSET(cpu, &started_cpus))
+ cpu_spinwait();
+
+#ifdef DDB
+ dbg_register_sync(NULL);
+#endif
+
+ CPU_CLR_ATOMIC(cpu, &started_cpus);
+ CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+ CTR0(KTR_SMP, "IPI_STOP (restart)");
+}
+
+struct cpu_group *
+cpu_topo(void)
+{
+
+ return (smp_topo_none());
+}
+
+/* Determine if we running MP machine */
+int
+cpu_mp_probe(void)
+{
+
+ /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
+ return (1);
+}
+
+static bool
+start_cpu(u_int id, uint64_t target_cpu)
+{
+ struct pcpu *pcpup;
+ vm_paddr_t pa;
+ u_int cpuid;
+ int err, naps;
+
+ /* Check we are able to start this cpu */
+ if (id > mp_maxid)
+ return (false);
+
+ KASSERT(id < MAXCPU, ("Too many CPUs"));
+
+ /* We are already running on cpu 0 */
+ if (id == cpu0)
+ return (true);
+
+ /*
+ * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other
+ * CPUs ordered as they are likely grouped into clusters so it can be
+ * useful to keep that property, e.g. for the GICv3 driver to send
+ * an IPI to all CPUs in the cluster.
+ */
+ cpuid = id;
+ if (cpuid < cpu0)
+ cpuid += mp_maxid + 1;
+ cpuid -= cpu0;
+
+ pcpup = &__pcpu[cpuid];
+ pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
+
+ dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
+ dpcpu_init(dpcpu[cpuid - 1], cpuid);
+
+ bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+
+ naps = atomic_load_int(&aps_started);
+ bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
+
+ printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
+ pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
+ err = psci_cpu_on(target_cpu, pa, cpuid);
+ if (err != PSCI_RETVAL_SUCCESS) {
+ /*
+ * Panic here if INVARIANTS are enabled and PSCI failed to
+ * start the requested CPU. psci_cpu_on() returns PSCI_MISSING
+ * to indicate we are unable to use it to start the given CPU.
+ */
+ KASSERT(err == PSCI_MISSING ||
+ (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
+ ("Failed to start CPU %u (%lx), error %d\n",
+ id, target_cpu, err));
+
+ pcpu_destroy(pcpup);
+ kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
+ dpcpu[cpuid - 1] = NULL;
+ kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE);
+ bootstacks[cpuid] = NULL;
+ mp_ncpus--;
+
+ /* Notify the user that the CPU failed to start */
+ printf("Failed to start CPU %u (%lx), error %d\n",
+ id, target_cpu, err);
+ } else {
+ /* Wait for the AP to switch to its boot stack. */
+ while (atomic_load_int(&aps_started) < naps + 1)
+ cpu_spinwait();
+ CPU_SET(cpuid, &all_cpus);
+ }
+
+ return (true);
+}
+
+#ifdef DEV_ACPI
+static void
+madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_INTERRUPT *intr;
+ u_int *cpuid;
+ u_int id;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+ intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
+ cpuid = arg;
+ id = *cpuid;
+ start_cpu(id, intr->ArmMpidr);
+ __pcpu[id].pc_acpi_id = intr->Uid;
+ (*cpuid)++;
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+cpu_init_acpi(void)
+{
+ ACPI_TABLE_MADT *madt;
+ vm_paddr_t physaddr;
+ u_int cpuid;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return;
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ printf("Unable to map the MADT, not starting APs\n");
+ return;
+ }
+
+ cpuid = 0;
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ madt_handler, &cpuid);
+
+ acpi_unmap_table(madt);
+
+#if MAXMEMDOM > 1
+ acpi_pxm_set_cpu_locality();
+#endif
+}
+#endif
+
+#ifdef FDT
+static boolean_t
+cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
+{
+ uint64_t target_cpu;
+ int domain;
+
+ target_cpu = reg[0];
+ if (addr_size == 2) {
+ target_cpu <<= 32;
+ target_cpu |= reg[1];
+ }
+
+ if (!start_cpu(id, target_cpu))
+ return (FALSE);
+
+ /* Try to read the numa node of this cpu */
+ if (vm_ndomains == 1 ||
+ OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
+ domain = 0;
+ __pcpu[id].pc_domain = domain;
+ if (domain < MAXMEMDOM)
+ CPU_SET(id, &cpuset_domain[domain]);
+
+ return (TRUE);
+}
+#endif
+
+/* Initialize and fire up non-boot processors */
+void
+cpu_mp_start(void)
+{
+#ifdef FDT
+ phandle_t node;
+ int i;
+#endif
+
+ mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+
+ CPU_SET(0, &all_cpus);
+
+ switch(arm64_bus_method) {
+#ifdef DEV_ACPI
+ case ARM64_BUS_ACPI:
+ mp_quirks = MP_QUIRK_CPULIST;
+ KASSERT(cpu0 >= 0, ("Current CPU was not found"));
+ cpu_init_acpi();
+ break;
+#endif
+#ifdef FDT
+ case ARM64_BUS_FDT:
+ node = OF_peer(0);
+ for (i = 0; fdt_quirks[i].compat != NULL; i++) {
+ if (ofw_bus_node_is_compatible(node,
+ fdt_quirks[i].compat) != 0) {
+ mp_quirks = fdt_quirks[i].quirks;
+ }
+ }
+ KASSERT(cpu0 >= 0, ("Current CPU was not found"));
+ ofw_cpu_early_foreach(cpu_init_fdt, true);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
+/* Introduce rest of cores to the world */
+void
+cpu_mp_announce(void)
+{
+}
+
+#ifdef DEV_ACPI
+static void
+cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+ ACPI_MADT_GENERIC_INTERRUPT *intr;
+ u_int *cores = arg;
+ uint64_t mpidr_reg;
+
+ switch(entry->Type) {
+ case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+ intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
+ if (cpu0 < 0) {
+ mpidr_reg = READ_SPECIALREG(mpidr_el1);
+ if ((mpidr_reg & 0xff00fffffful) == intr->ArmMpidr)
+ cpu0 = *cores;
+ }
+ (*cores)++;
+ break;
+ default:
+ break;
+ }
+}
+
+static u_int
+cpu_count_acpi(void)
+{
+ ACPI_TABLE_MADT *madt;
+ vm_paddr_t physaddr;
+ u_int cores;
+
+ physaddr = acpi_find_table(ACPI_SIG_MADT);
+ if (physaddr == 0)
+ return (0);
+
+ madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+ if (madt == NULL) {
+ printf("Unable to map the MADT, not starting APs\n");
+ return (0);
+ }
+
+ cores = 0;
+ acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+ cpu_count_acpi_handler, &cores);
+
+ acpi_unmap_table(madt);
+
+ return (cores);
+}
+#endif
+
+#ifdef FDT
+static boolean_t
+cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
+{
+ uint64_t mpidr_fdt, mpidr_reg;
+
+ if (cpu0 < 0) {
+ mpidr_fdt = reg[0];
+ if (addr_size == 2) {
+ mpidr_fdt <<= 32;
+ mpidr_fdt |= reg[1];
+ }
+
+ mpidr_reg = READ_SPECIALREG(mpidr_el1);
+
+ if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt)
+ cpu0 = id;
+ }
+
+ return (TRUE);
+}
+#endif
+
+void
+cpu_mp_setmaxid(void)
+{
+ int cores;
+
+ mp_ncpus = 1;
+ mp_maxid = 0;
+
+ switch(arm64_bus_method) {
+#ifdef DEV_ACPI
+ case ARM64_BUS_ACPI:
+ cores = cpu_count_acpi();
+ if (cores > 0) {
+ cores = MIN(cores, MAXCPU);
+ if (bootverbose)
+ printf("Found %d CPUs in the ACPI tables\n",
+ cores);
+ mp_ncpus = cores;
+ mp_maxid = cores - 1;
+ }
+ break;
+#endif
+#ifdef FDT
+ case ARM64_BUS_FDT:
+ cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false);
+ if (cores > 0) {
+ cores = MIN(cores, MAXCPU);
+ if (bootverbose)
+ printf("Found %d CPUs in the device tree\n",
+ cores);
+ mp_ncpus = cores;
+ mp_maxid = cores - 1;
+ }
+ break;
+#endif
+ default:
+ if (bootverbose)
+ printf("No CPU data, limiting to 1 core\n");
+ break;
+ }
+
+ if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) {
+ if (cores > 0 && cores < mp_ncpus) {
+ mp_ncpus = cores;
+ mp_maxid = cores - 1;
+ }
+ }
+}
+
+/*
+ * Lookup IPI source.
+ */
+static struct intr_ipi *
+intr_ipi_lookup(u_int ipi)
+{
+
+ if (ipi >= INTR_IPI_COUNT)
+ panic("%s: no such IPI %u", __func__, ipi);
+
+ return (&ipi_sources[ipi]);
+}
+
+/*
+ * interrupt controller dispatch function for IPIs. It should
+ * be called straight from the interrupt controller, when associated
+ * interrupt source is learned. Or from anybody who has an interrupt
+ * source mapped.
+ */
+void
+intr_ipi_dispatch(u_int ipi, struct trapframe *tf)
+{
+ void *arg;
+ struct intr_ipi *ii;
+
+ ii = intr_ipi_lookup(ipi);
+ if (ii->ii_count == NULL)
+ panic("%s: not setup IPI %u", __func__, ipi);
+
+ intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
+
+ /*
+ * Supply ipi filter with trapframe argument
+ * if none is registered.
+ */
+ arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf;
+ ii->ii_handler(arg);
+}
+
+#ifdef notyet
+/*
+ * Map IPI into interrupt controller.
+ *
+ * Not SMP coherent.
+ */
+static int
+ipi_map(struct intr_irqsrc *isrc, u_int ipi)
+{
+ boolean_t is_percpu;
+ int error;
+
+ if (ipi >= INTR_IPI_COUNT)
+ panic("%s: no such IPI %u", __func__, ipi);
+
+ KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
+
+ isrc->isrc_type = INTR_ISRCT_NAMESPACE;
+ isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI;
+ isrc->isrc_nspc_num = ipi_next_num;
+
+ error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu);
+ if (error == 0) {
+ isrc->isrc_dev = intr_irq_root_dev;
+ ipi_next_num++;
+ }
+ return (error);
+}
+
+/*
+ * Setup IPI handler to interrupt source.
+ *
+ * Note that there could be more ways how to send and receive IPIs
+ * on a platform like fast interrupts for example. In that case,
+ * one can call this function with ASIF_NOALLOC flag set and then
+ * call intr_ipi_dispatch() when appropriate.
+ *
+ * Not SMP coherent.
+ */
+int
+intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter,
+ void *arg, u_int flags)
+{
+ struct intr_irqsrc *isrc;
+ int error;
+
+ if (filter == NULL)
+ return(EINVAL);
+
+ isrc = intr_ipi_lookup(ipi);
+ if (isrc->isrc_ipifilter != NULL)
+ return (EEXIST);
+
+ if ((flags & AISHF_NOALLOC) == 0) {
+ error = ipi_map(isrc, ipi);
+ if (error != 0)
+ return (error);
+ }
+
+ isrc->isrc_ipifilter = filter;
+ isrc->isrc_arg = arg;
+ isrc->isrc_handlers = 1;
+ isrc->isrc_count = intr_ipi_setup_counters(name);
+ isrc->isrc_index = 0; /* it should not be used in IPI case */
+
+ if (isrc->isrc_dev != NULL) {
+ PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
+ PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc);
+ }
+ return (0);
+}
+#endif
+
+/* Sending IPI */
+void
+ipi_all_but_self(u_int ipi)
+{
+ cpuset_t cpus;
+
+ cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &cpus);
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ intr_ipi_send(cpus, ipi);
+}
+
+void
+ipi_cpu(int cpu, u_int ipi)
+{
+ cpuset_t cpus;
+
+ CPU_ZERO(&cpus);
+ CPU_SET(cpu, &cpus);
+
+ CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
+ intr_ipi_send(cpus, ipi);
+}
+
+void
+ipi_selected(cpuset_t cpus, u_int ipi)
+{
+
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ intr_ipi_send(cpus, ipi);
+}
diff --git a/sys/arm64/arm64/nexus.c b/sys/arm64/arm64/nexus.c
new file mode 100644
index 000000000000..924496ec7f52
--- /dev/null
+++ b/sys/arm64/arm64/nexus.c
@@ -0,0 +1,549 @@
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This code implements a `root nexus' for Arm Architecture
+ * machines. The function of the root nexus is to serve as an
+ * attachment point for both processors and buses, and to manage
+ * resources which are common to all of them. In particular,
+ * this code implements the core resource managers for interrupt
+ * requests, DMA requests (which rightfully should be a part of the
+ * ISA code but it's easier to do it here for now), I/O port addresses,
+ * and I/O memory address space.
+ */
+
+#include "opt_acpi.h"
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/interrupt.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/pcb.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/resource.h>
+#include <machine/intr.h>
+
+#ifdef FDT
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/openfirm.h>
+#include "ofw_bus_if.h"
+#endif
+#ifdef DEV_ACPI
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+#include "acpi_bus_if.h"
+#include "pcib_if.h"
+#endif
+
+extern struct bus_space memmap_bus;
+
+static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device");
+
+struct nexus_device {
+ struct resource_list nx_resources;
+};
+
+#define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev))
+
+static struct rman mem_rman;
+static struct rman irq_rman;
+
+static int nexus_attach(device_t);
+
+#ifdef FDT
+static device_probe_t nexus_fdt_probe;
+static device_attach_t nexus_fdt_attach;
+#endif
+#ifdef DEV_ACPI
+static device_probe_t nexus_acpi_probe;
+static device_attach_t nexus_acpi_attach;
+#endif
+
+static int nexus_print_child(device_t, device_t);
+static device_t nexus_add_child(device_t, u_int, const char *, int);
+static struct resource *nexus_alloc_resource(device_t, device_t, int, int *,
+ rman_res_t, rman_res_t, rman_res_t, u_int);
+static int nexus_activate_resource(device_t, device_t, int, int,
+ struct resource *);
+static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig,
+ enum intr_polarity pol);
+static struct resource_list *nexus_get_reslist(device_t, device_t);
+static int nexus_set_resource(device_t, device_t, int, int,
+ rman_res_t, rman_res_t);
+static int nexus_deactivate_resource(device_t, device_t, int, int,
+ struct resource *);
+static int nexus_release_resource(device_t, device_t, int, int,
+ struct resource *);
+
+static int nexus_setup_intr(device_t dev, device_t child, struct resource *res,
+ int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep);
+static int nexus_teardown_intr(device_t, device_t, struct resource *, void *);
+static bus_space_tag_t nexus_get_bus_tag(device_t, device_t);
+#ifdef SMP
+static int nexus_bind_intr(device_t, device_t, struct resource *, int);
+#endif
+
+#ifdef FDT
+static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent,
+ int icells, pcell_t *intr);
+#endif
+
+static device_method_t nexus_methods[] = {
+ /* Bus interface */
+ DEVMETHOD(bus_print_child, nexus_print_child),
+ DEVMETHOD(bus_add_child, nexus_add_child),
+ DEVMETHOD(bus_alloc_resource, nexus_alloc_resource),
+ DEVMETHOD(bus_activate_resource, nexus_activate_resource),
+ DEVMETHOD(bus_config_intr, nexus_config_intr),
+ DEVMETHOD(bus_get_resource_list, nexus_get_reslist),
+ DEVMETHOD(bus_set_resource, nexus_set_resource),
+ DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource),
+ DEVMETHOD(bus_release_resource, nexus_release_resource),
+ DEVMETHOD(bus_setup_intr, nexus_setup_intr),
+ DEVMETHOD(bus_teardown_intr, nexus_teardown_intr),
+ DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag),
+#ifdef SMP
+ DEVMETHOD(bus_bind_intr, nexus_bind_intr),
+#endif
+ { 0, 0 }
+};
+
+static driver_t nexus_driver = {
+ "nexus",
+ nexus_methods,
+ 1 /* no softc */
+};
+
+static int
+nexus_attach(device_t dev)
+{
+
+ mem_rman.rm_start = 0;
+ mem_rman.rm_end = BUS_SPACE_MAXADDR;
+ mem_rman.rm_type = RMAN_ARRAY;
+ mem_rman.rm_descr = "I/O memory addresses";
+ if (rman_init(&mem_rman) ||
+ rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR))
+ panic("nexus_attach mem_rman");
+ irq_rman.rm_start = 0;
+ irq_rman.rm_end = ~0;
+ irq_rman.rm_type = RMAN_ARRAY;
+ irq_rman.rm_descr = "Interrupts";
+ if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0))
+ panic("nexus_attach irq_rman");
+
+ bus_generic_probe(dev);
+ bus_generic_attach(dev);
+
+ return (0);
+}
+
+static int
+nexus_print_child(device_t bus, device_t child)
+{
+ int retval = 0;
+
+ retval += bus_print_child_header(bus, child);
+ retval += printf("\n");
+
+ return (retval);
+}
+
+static device_t
+nexus_add_child(device_t bus, u_int order, const char *name, int unit)
+{
+ device_t child;
+ struct nexus_device *ndev;
+
+ ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO);
+ if (!ndev)
+ return (0);
+ resource_list_init(&ndev->nx_resources);
+
+ child = device_add_child_ordered(bus, order, name, unit);
+
+ /* should we free this in nexus_child_detached? */
+ device_set_ivars(child, ndev);
+
+ return (child);
+}
+
+/*
+ * Allocate a resource on behalf of child. NB: child is usually going to be a
+ * child of one of our descendants, not a direct child of nexus0.
+ * (Exceptions include footbridge.)
+ */
+static struct resource *
+nexus_alloc_resource(device_t bus, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct nexus_device *ndev = DEVTONX(child);
+ struct resource *rv;
+ struct resource_list_entry *rle;
+ struct rman *rm;
+ int needactivate = flags & RF_ACTIVE;
+
+ /*
+ * If this is an allocation of the "default" range for a given
+ * RID, and we know what the resources for this device are
+ * (ie. they aren't maintained by a child bus), then work out
+ * the start/end values.
+ */
+ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) {
+ if (device_get_parent(child) != bus || ndev == NULL)
+ return(NULL);
+ rle = resource_list_find(&ndev->nx_resources, type, *rid);
+ if (rle == NULL)
+ return(NULL);
+ start = rle->start;
+ end = rle->end;
+ count = rle->count;
+ }
+
+ switch (type) {
+ case SYS_RES_IRQ:
+ rm = &irq_rman;
+ break;
+
+ case SYS_RES_MEMORY:
+ case SYS_RES_IOPORT:
+ rm = &mem_rman;
+ break;
+
+ default:
+ return (NULL);
+ }
+
+ rv = rman_reserve_resource(rm, start, end, count, flags, child);
+ if (rv == NULL)
+ return (NULL);
+
+ rman_set_rid(rv, *rid);
+ rman_set_bushandle(rv, rman_get_start(rv));
+
+ if (needactivate) {
+ if (bus_activate_resource(child, type, *rid, rv)) {
+ rman_release_resource(rv);
+ return (NULL);
+ }
+ }
+
+ return (rv);
+}
+
+static int
+nexus_release_resource(device_t bus, device_t child, int type, int rid,
+ struct resource *res)
+{
+ int error;
+
+ if (rman_get_flags(res) & RF_ACTIVE) {
+ error = bus_deactivate_resource(child, type, rid, res);
+ if (error)
+ return (error);
+ }
+ return (rman_release_resource(res));
+}
+
+static int
+nexus_config_intr(device_t dev, int irq, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+
+ /*
+ * On arm64 (due to INTRNG), ACPI interrupt configuration is
+ * done in nexus_acpi_map_intr().
+ */
+ return (0);
+}
+
+static int
+nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags,
+ driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep)
+{
+ int error;
+
+ if ((rman_get_flags(res) & RF_SHAREABLE) == 0)
+ flags |= INTR_EXCL;
+
+ /* We depend here on rman_activate_resource() being idempotent. */
+ error = rman_activate_resource(res);
+ if (error)
+ return (error);
+
+ error = intr_setup_irq(child, res, filt, intr, arg, flags, cookiep);
+
+ return (error);
+}
+
+static int
+nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih)
+{
+
+ return (intr_teardown_irq(child, r, ih));
+}
+
+#ifdef SMP
+static int
+nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu)
+{
+
+ return (intr_bind_irq(child, irq, cpu));
+}
+#endif
+
+static bus_space_tag_t
+nexus_get_bus_tag(device_t bus __unused, device_t child __unused)
+{
+
+ return(&memmap_bus);
+}
+
+static int
+nexus_activate_resource(device_t bus, device_t child, int type, int rid,
+ struct resource *r)
+{
+ int err;
+ bus_addr_t paddr;
+ bus_size_t psize;
+ bus_space_handle_t vaddr;
+
+ if ((err = rman_activate_resource(r)) != 0)
+ return (err);
+
+ /*
+ * If this is a memory resource, map it into the kernel.
+ */
+ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
+ paddr = (bus_addr_t)rman_get_start(r);
+ psize = (bus_size_t)rman_get_size(r);
+ err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr);
+ if (err != 0) {
+ rman_deactivate_resource(r);
+ return (err);
+ }
+ rman_set_bustag(r, &memmap_bus);
+ rman_set_virtual(r, (void *)vaddr);
+ rman_set_bushandle(r, vaddr);
+ } else if (type == SYS_RES_IRQ) {
+ err = intr_activate_irq(child, r);
+ if (err != 0) {
+ rman_deactivate_resource(r);
+ return (err);
+ }
+ }
+ return (0);
+}
+
+static struct resource_list *
+nexus_get_reslist(device_t dev, device_t child)
+{
+ struct nexus_device *ndev = DEVTONX(child);
+
+ return (&ndev->nx_resources);
+}
+
+static int
+nexus_set_resource(device_t dev, device_t child, int type, int rid,
+ rman_res_t start, rman_res_t count)
+{
+ struct nexus_device *ndev = DEVTONX(child);
+ struct resource_list *rl = &ndev->nx_resources;
+
+ /* XXX this should return a success/failure indicator */
+ resource_list_add(rl, type, rid, start, start + count - 1, count);
+
+ return(0);
+}
+
+static int
+nexus_deactivate_resource(device_t bus, device_t child, int type, int rid,
+ struct resource *r)
+{
+ bus_size_t psize;
+ bus_space_handle_t vaddr;
+
+ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
+ psize = (bus_size_t)rman_get_size(r);
+ vaddr = rman_get_bushandle(r);
+
+ if (vaddr != 0) {
+ bus_space_unmap(&memmap_bus, vaddr, psize);
+ rman_set_virtual(r, NULL);
+ rman_set_bushandle(r, 0);
+ }
+ } else if (type == SYS_RES_IRQ) {
+ intr_deactivate_irq(child, r);
+ }
+
+ return (rman_deactivate_resource(r));
+}
+
+#ifdef FDT
+static device_method_t nexus_fdt_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, nexus_fdt_probe),
+ DEVMETHOD(device_attach, nexus_fdt_attach),
+
+ /* OFW interface */
+ DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr),
+
+ DEVMETHOD_END,
+};
+
+#define nexus_baseclasses nexus_fdt_baseclasses
+DEFINE_CLASS_1(nexus, nexus_fdt_driver, nexus_fdt_methods, 1, nexus_driver);
+#undef nexus_baseclasses
+static devclass_t nexus_fdt_devclass;
+
+EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass,
+ 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST);
+
+static int
+nexus_fdt_probe(device_t dev)
+{
+
+ if (arm64_bus_method != ARM64_BUS_FDT)
+ return (ENXIO);
+
+ device_quiet(dev);
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+nexus_fdt_attach(device_t dev)
+{
+
+ nexus_add_child(dev, 10, "ofwbus", 0);
+ return (nexus_attach(dev));
+}
+
+static int
+nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells,
+ pcell_t *intr)
+{
+ u_int irq;
+ struct intr_map_data_fdt *fdt_data;
+ size_t len;
+
+ len = sizeof(*fdt_data) + icells * sizeof(pcell_t);
+ fdt_data = (struct intr_map_data_fdt *)intr_alloc_map_data(
+ INTR_MAP_DATA_FDT, len, M_WAITOK | M_ZERO);
+ fdt_data->iparent = iparent;
+ fdt_data->ncells = icells;
+ memcpy(fdt_data->cells, intr, icells * sizeof(pcell_t));
+ irq = intr_map_irq(NULL, iparent, (struct intr_map_data *)fdt_data);
+ return (irq);
+}
+#endif
+
+#ifdef DEV_ACPI
+static int nexus_acpi_map_intr(device_t dev, device_t child, u_int irq, int trig, int pol);
+
+static device_method_t nexus_acpi_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, nexus_acpi_probe),
+ DEVMETHOD(device_attach, nexus_acpi_attach),
+
+ /* ACPI interface */
+ DEVMETHOD(acpi_bus_map_intr, nexus_acpi_map_intr),
+
+ DEVMETHOD_END,
+};
+
+#define nexus_baseclasses nexus_acpi_baseclasses
+DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1,
+ nexus_driver);
+#undef nexus_baseclasses
+static devclass_t nexus_acpi_devclass;
+
+EARLY_DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_acpi_devclass,
+ 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST);
+
+static int
+nexus_acpi_probe(device_t dev)
+{
+
+ if (arm64_bus_method != ARM64_BUS_ACPI || acpi_identify() != 0)
+ return (ENXIO);
+
+ device_quiet(dev);
+ return (BUS_PROBE_LOW_PRIORITY);
+}
+
+static int
+nexus_acpi_attach(device_t dev)
+{
+
+ nexus_add_child(dev, 10, "acpi", 0);
+ return (nexus_attach(dev));
+}
+
+static int
+nexus_acpi_map_intr(device_t dev, device_t child, u_int irq, int trig, int pol)
+{
+ struct intr_map_data_acpi *acpi_data;
+ size_t len;
+
+ len = sizeof(*acpi_data);
+ acpi_data = (struct intr_map_data_acpi *)intr_alloc_map_data(
+ INTR_MAP_DATA_ACPI, len, M_WAITOK | M_ZERO);
+ acpi_data->irq = irq;
+ acpi_data->pol = pol;
+ acpi_data->trig = trig;
+
+ /*
+ * TODO: This will only handle a single interrupt controller.
+ * ACPI will map multiple controllers into a single virtual IRQ
+ * space. Each controller has a System Vector Base to hold the
+ * first irq it handles in this space. As such the correct way
+ * to handle interrupts with ACPI is to search through the
+ * controllers for the largest base value that is no larger than
+ * the IRQ value.
+ */
+ irq = intr_map_irq(NULL, ACPI_INTR_XREF,
+ (struct intr_map_data *)acpi_data);
+ return (irq);
+}
+#endif
diff --git a/sys/arm64/arm64/ofw_machdep.c b/sys/arm64/arm64/ofw_machdep.c
new file mode 100644
index 000000000000..3941c1d35617
--- /dev/null
+++ b/sys/arm64/arm64/ofw_machdep.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015 Ian Lepore <ian@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_subr.h>
+
+extern struct bus_space memmap_bus;
+
+int
+OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag,
+ bus_space_handle_t *handle, bus_size_t *sz)
+{
+ bus_addr_t addr;
+ bus_size_t size;
+ int err;
+
+ err = ofw_reg_to_paddr(dev, regno, &addr, &size, NULL);
+ if (err != 0)
+ return (err);
+
+ *tag = &memmap_bus;
+
+ if (sz != NULL)
+ *sz = size;
+
+ return (bus_space_map(*tag, addr, size, 0, handle));
+}
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
new file mode 100644
index 000000000000..df160cc05012
--- /dev/null
+++ b/sys/arm64/arm64/pmap.c
@@ -0,0 +1,6710 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ * Copyright (c) 2014-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * This software was developed by Andrew Turner under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Manages physical address maps.
+ *
+ * Since the information managed by this module is
+ * also stored by the logical address mapping module,
+ * this module may throw away valid virtual-to-physical
+ * mappings at almost any time. However, invalidations
+ * of virtual-to-physical mappings must be done as
+ * requested.
+ *
+ * In order to cope with hardware architectures which
+ * make virtual-to-physical map invalidates expensive,
+ * this module may delay invalidate or reduced protection
+ * operations until such time as they are actually
+ * necessary. This module is given full information as
+ * to which processors are currently using which maps,
+ * and to when physical maps must be made correct.
+ */
+
+#include "opt_vm.h"
+
+#include <sys/param.h>
+#include <sys/bitstring.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/physmem.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/sbuf.h>
+#include <sys/sx.h>
+#include <sys/vmem.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/_unrhdr.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/vm_reserv.h>
+#include <vm/uma.h>
+
+#include <machine/machdep.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+
+#define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1)
+#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2)
+
+#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t)))
+
+#define NUL0E L0_ENTRIES
+#define NUL1E (NUL0E * NL1PG)
+#define NUL2E (NUL1E * NL2PG)
+
+#if !defined(DIAGNOSTIC)
+#ifdef __GNUC_GNU_INLINE__
+#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
+#else
+#define PMAP_INLINE extern inline
+#endif
+#else
+#define PMAP_INLINE
+#endif
+
+#ifdef PV_STATS
+#define PV_STAT(x) do { x ; } while (0)
+#else
+#define PV_STAT(x) do { } while (0)
+#endif
+
+#define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
+#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)])
+
+#define NPV_LIST_LOCKS MAXCPU
+
+#define PHYS_TO_PV_LIST_LOCK(pa) \
+ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+
+#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
+ struct rwlock **_lockp = (lockp); \
+ struct rwlock *_new_lock; \
+ \
+ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
+ if (_new_lock != *_lockp) { \
+ if (*_lockp != NULL) \
+ rw_wunlock(*_lockp); \
+ *_lockp = _new_lock; \
+ rw_wlock(*_lockp); \
+ } \
+} while (0)
+
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+
+#define RELEASE_PV_LIST_LOCK(lockp) do { \
+ struct rwlock **_lockp = (lockp); \
+ \
+ if (*_lockp != NULL) { \
+ rw_wunlock(*_lockp); \
+ *_lockp = NULL; \
+ } \
+} while (0)
+
+#define VM_PAGE_TO_PV_LIST_LOCK(m) \
+ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+
+/*
+ * The presence of this flag indicates that the mapping is writeable.
+ * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
+ * it is dirty. This flag may only be set on managed mappings.
+ *
+ * The DBM bit is reserved on ARMv8.0 but it seems we can safely treat it
+ * as a software managed bit.
+ */
+#define ATTR_SW_DBM ATTR_DBM
+
+struct pmap kernel_pmap_store;
+
+/* Used for mapping ACPI memory before VM is initialized */
+#define PMAP_PREINIT_MAPPING_COUNT 32
+#define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE)
+static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */
+static int vm_initialized = 0; /* No need to use pre-init maps when set */
+
+/*
+ * Reserve a few L2 blocks starting from 'preinit_map_va' pointer.
+ * Always map entire L2 block for simplicity.
+ * VA of L2 block = preinit_map_va + i * L2_SIZE
+ */
+static struct pmap_preinit_mapping {
+ vm_paddr_t pa;
+ vm_offset_t va;
+ vm_size_t size;
+} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
+
+vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
+vm_offset_t kernel_vm_end = 0;
+
+/*
+ * Data for the pv entry allocation mechanism.
+ */
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+static struct mtx pv_chunks_mutex;
+static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static struct md_page *pv_table;
+static struct md_page pv_dummy;
+
+vm_paddr_t dmap_phys_base; /* The start of the dmap region */
+vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
+vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
+
+/* This code assumes all L1 DMAP entries will be used */
+CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
+CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
+
+#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
+extern pt_entry_t pagetable_dmap[];
+
+#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
+static vm_paddr_t physmap[PHYSMAP_SIZE];
+static u_int physmap_idx;
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "VM/pmap parameters");
+
+/*
+ * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
+ * that it has currently allocated to a pmap, a cursor ("asid_next") to
+ * optimize its search for a free ASID in the bit vector, and an epoch number
+ * ("asid_epoch") to indicate when it has reclaimed all previously allocated
+ * ASIDs that are not currently active on a processor.
+ *
+ * The current epoch number is always in the range [0, INT_MAX). Negative
+ * numbers and INT_MAX are reserved for special cases that are described
+ * below.
+ */
+struct asid_set {
+ int asid_bits;
+ bitstr_t *asid_set;
+ int asid_set_size;
+ int asid_next;
+ int asid_epoch;
+ struct mtx asid_set_mutex;
+};
+
+static struct asid_set asids;
+static struct asid_set vmids;
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "ASID allocator");
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asids.asid_bits, 0,
+ "The number of bits in an ASID");
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asids.asid_next, 0,
+ "The last allocated ASID plus one");
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0,
+ "The current epoch number");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, vmid, CTLFLAG_RD, 0, "VMID allocator");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, bits, CTLFLAG_RD, &vmids.asid_bits, 0,
+ "The number of bits in an VMID");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, next, CTLFLAG_RD, &vmids.asid_next, 0,
+ "The last allocated VMID plus one");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, epoch, CTLFLAG_RD, &vmids.asid_epoch, 0,
+ "The current epoch number");
+
+void (*pmap_clean_stage2_tlbi)(void);
+void (*pmap_invalidate_vpipt_icache)(void);
+
+/*
+ * A pmap's cookie encodes an ASID and epoch number. Cookies for reserved
+ * ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for
+ * dynamically allocated ASIDs have a non-negative epoch number.
+ *
+ * An invalid ASID is represented by -1.
+ *
+ * There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN),
+ * which indicates that an ASID should never be allocated to the pmap, and
+ * (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be
+ * allocated when the pmap is next activated.
+ */
+#define COOKIE_FROM(asid, epoch) ((long)((u_int)(asid) | \
+ ((u_long)(epoch) << 32)))
+#define COOKIE_TO_ASID(cookie) ((int)(cookie))
+#define COOKIE_TO_EPOCH(cookie) ((int)((u_long)(cookie) >> 32))
+
+static int superpages_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
+ "Are large page mappings enabled?");
+
+/*
+ * Internal flags for pmap_enter()'s helper functions.
+ */
+#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */
+#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */
+
+static void free_pv_chunk(struct pv_chunk *pc);
+static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
+static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
+static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
+ vm_offset_t va);
+
+static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
+static bool pmap_activate_int(pmap_t pmap);
+static void pmap_alloc_asid(pmap_t pmap);
+static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
+static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
+static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
+ vm_offset_t va, struct rwlock **lockp);
+static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
+ u_int flags, vm_page_t m, struct rwlock **lockp);
+static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
+ pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
+static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
+ pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
+static void pmap_reset_asid_set(pmap_t pmap);
+static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, struct rwlock **lockp);
+
+static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
+ struct rwlock **lockp);
+
+static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ struct spglist *free);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
+
+/*
+ * These load the old table data and store the new value.
+ * They need to be atomic as the System MMU may write to the table at
+ * the same time as the CPU.
+ */
+#define pmap_clear(table) atomic_store_64(table, 0)
+#define pmap_clear_bits(table, bits) atomic_clear_64(table, bits)
+#define pmap_load(table) (*table)
+#define pmap_load_clear(table) atomic_swap_64(table, 0)
+#define pmap_load_store(table, entry) atomic_swap_64(table, entry)
+#define pmap_set_bits(table, bits) atomic_set_64(table, bits)
+#define pmap_store(table, entry) atomic_store_64(table, entry)
+
+/********************/
+/* Inline functions */
+/********************/
+
+static __inline void
+pagecopy(void *s, void *d)
+{
+
+ memcpy(d, s, PAGE_SIZE);
+}
+
+static __inline pd_entry_t *
+pmap_l0(pmap_t pmap, vm_offset_t va)
+{
+
+ return (&pmap->pm_l0[pmap_l0_index(va)]);
+}
+
+static __inline pd_entry_t *
+pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
+{
+ pd_entry_t *l1;
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+ return (&l1[pmap_l1_index(va)]);
+}
+
+static __inline pd_entry_t *
+pmap_l1(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t *l0;
+
+ l0 = pmap_l0(pmap, va);
+ if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
+ return (NULL);
+
+ return (pmap_l0_to_l1(l0, va));
+}
+
+static __inline pd_entry_t *
+pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
+{
+ pd_entry_t *l2;
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
+ return (&l2[pmap_l2_index(va)]);
+}
+
+static __inline pd_entry_t *
+pmap_l2(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t *l1;
+
+ l1 = pmap_l1(pmap, va);
+ if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
+ return (NULL);
+
+ return (pmap_l1_to_l2(l1, va));
+}
+
+static __inline pt_entry_t *
+pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
+{
+ pt_entry_t *l3;
+
+ l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
+ return (&l3[pmap_l3_index(va)]);
+}
+
+/*
+ * Returns the lowest valid pde for a given virtual address.
+ * The next level may or may not point to a valid page or block.
+ */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
+{
+ pd_entry_t *l0, *l1, *l2, desc;
+
+ l0 = pmap_l0(pmap, va);
+ desc = pmap_load(l0) & ATTR_DESCR_MASK;
+ if (desc != L0_TABLE) {
+ *level = -1;
+ return (NULL);
+ }
+
+ l1 = pmap_l0_to_l1(l0, va);
+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
+ if (desc != L1_TABLE) {
+ *level = 0;
+ return (l0);
+ }
+
+ l2 = pmap_l1_to_l2(l1, va);
+ desc = pmap_load(l2) & ATTR_DESCR_MASK;
+ if (desc != L2_TABLE) {
+ *level = 1;
+ return (l1);
+ }
+
+ *level = 2;
+ return (l2);
+}
+
+/*
+ * Returns the lowest valid pte block or table entry for a given virtual
+ * address. If there are no valid entries return NULL and set the level to
+ * the first invalid level.
+ */
+static __inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
+{
+ pd_entry_t *l1, *l2, desc;
+ pt_entry_t *l3;
+
+ l1 = pmap_l1(pmap, va);
+ if (l1 == NULL) {
+ *level = 0;
+ return (NULL);
+ }
+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
+ if (desc == L1_BLOCK) {
+ *level = 1;
+ return (l1);
+ }
+
+ if (desc != L1_TABLE) {
+ *level = 1;
+ return (NULL);
+ }
+
+ l2 = pmap_l1_to_l2(l1, va);
+ desc = pmap_load(l2) & ATTR_DESCR_MASK;
+ if (desc == L2_BLOCK) {
+ *level = 2;
+ return (l2);
+ }
+
+ if (desc != L2_TABLE) {
+ *level = 2;
+ return (NULL);
+ }
+
+ *level = 3;
+ l3 = pmap_l2_to_l3(l2, va);
+ if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
+ return (NULL);
+
+ return (l3);
+}
+
+bool
+pmap_ps_enabled(pmap_t pmap __unused)
+{
+
+ return (superpages_enabled != 0);
+}
+
+bool
+pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
+ pd_entry_t **l2, pt_entry_t **l3)
+{
+ pd_entry_t *l0p, *l1p, *l2p;
+
+ if (pmap->pm_l0 == NULL)
+ return (false);
+
+ l0p = pmap_l0(pmap, va);
+ *l0 = l0p;
+
+ if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
+ return (false);
+
+ l1p = pmap_l0_to_l1(l0p, va);
+ *l1 = l1p;
+
+ if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
+ *l2 = NULL;
+ *l3 = NULL;
+ return (true);
+ }
+
+ if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
+ return (false);
+
+ l2p = pmap_l1_to_l2(l1p, va);
+ *l2 = l2p;
+
+ if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
+ *l3 = NULL;
+ return (true);
+ }
+
+ if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE)
+ return (false);
+
+ *l3 = pmap_l2_to_l3(l2p, va);
+
+ return (true);
+}
+
+static __inline int
+pmap_l3_valid(pt_entry_t l3)
+{
+
+ return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
+}
+
+CTASSERT(L1_BLOCK == L2_BLOCK);
+
+static pt_entry_t
+pmap_pte_memattr(pmap_t pmap, vm_memattr_t memattr)
+{
+ pt_entry_t val;
+
+ if (pmap->pm_stage == PM_STAGE1) {
+ val = ATTR_S1_IDX(memattr);
+ if (memattr == VM_MEMATTR_DEVICE)
+ val |= ATTR_S1_XN;
+ return (val);
+ }
+
+ val = 0;
+
+ switch (memattr) {
+ case VM_MEMATTR_DEVICE:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_DEVICE_nGnRnE) |
+ ATTR_S2_XN(ATTR_S2_XN_ALL));
+ case VM_MEMATTR_UNCACHEABLE:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_NC));
+ case VM_MEMATTR_WRITE_BACK:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WB));
+ case VM_MEMATTR_WRITE_THROUGH:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WT));
+ default:
+ panic("%s: invalid memory attribute %x", __func__, memattr);
+ }
+}
+
+static pt_entry_t
+pmap_pte_prot(pmap_t pmap, vm_prot_t prot)
+{
+ pt_entry_t val;
+
+ val = 0;
+ if (pmap->pm_stage == PM_STAGE1) {
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ val |= ATTR_S1_XN;
+ if ((prot & VM_PROT_WRITE) == 0)
+ val |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ } else {
+ if ((prot & VM_PROT_WRITE) != 0)
+ val |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
+ if ((prot & VM_PROT_READ) != 0)
+ val |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ);
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ val |= ATTR_S2_XN(ATTR_S2_XN_ALL);
+ }
+
+ return (val);
+}
+
+/*
+ * Checks if the PTE is dirty.
+ */
+static inline int
+pmap_pte_dirty(pmap_t pmap, pt_entry_t pte)
+{
+
+ KASSERT((pte & ATTR_SW_MANAGED) != 0, ("pte %#lx is unmanaged", pte));
+
+ if (pmap->pm_stage == PM_STAGE1) {
+ KASSERT((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) != 0,
+ ("pte %#lx is writeable and missing ATTR_SW_DBM", pte));
+
+ return ((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
+ (ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM));
+ }
+
+ return ((pte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) ==
+ ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE));
+}
+
+static __inline void
+pmap_resident_count_inc(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ pmap->pm_stats.resident_count += count;
+}
+
+static __inline void
+pmap_resident_count_dec(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(pmap->pm_stats.resident_count >= count,
+ ("pmap %p resident count underflow %ld %d", pmap,
+ pmap->pm_stats.resident_count, count));
+ pmap->pm_stats.resident_count -= count;
+}
+
+static pt_entry_t *
+pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
+ u_int *l2_slot)
+{
+ pt_entry_t *l2;
+ pd_entry_t *l1;
+
+ l1 = (pd_entry_t *)l1pt;
+ *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
+
+ /* Check locore has used a table L1 map */
+ KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
+ ("Invalid bootstrap L1 table"));
+ /* Find the address of the L2 table */
+ l2 = (pt_entry_t *)init_pt_va;
+ *l2_slot = pmap_l2_index(va);
+
+ return (l2);
+}
+
+static vm_paddr_t
+pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
+{
+ u_int l1_slot, l2_slot;
+ pt_entry_t *l2;
+
+ l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
+
+ return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
+}
+
+static vm_offset_t
+pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa,
+ vm_offset_t freemempos)
+{
+ pt_entry_t *l2;
+ vm_offset_t va;
+ vm_paddr_t l2_pa, pa;
+ u_int l1_slot, l2_slot, prev_l1_slot;
+ int i;
+
+ dmap_phys_base = min_pa & ~L1_OFFSET;
+ dmap_phys_max = 0;
+ dmap_max_addr = 0;
+ l2 = NULL;
+ prev_l1_slot = -1;
+
+#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
+ memset(pagetable_dmap, 0, PAGE_SIZE * DMAP_TABLES);
+
+ for (i = 0; i < (physmap_idx * 2); i += 2) {
+ pa = physmap[i] & ~L2_OFFSET;
+ va = pa - dmap_phys_base + DMAP_MIN_ADDRESS;
+
+ /* Create L2 mappings at the start of the region */
+ if ((pa & L1_OFFSET) != 0) {
+ l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
+ if (l1_slot != prev_l1_slot) {
+ prev_l1_slot = l1_slot;
+ l2 = (pt_entry_t *)freemempos;
+ l2_pa = pmap_early_vtophys(kern_l1,
+ (vm_offset_t)l2);
+ freemempos += PAGE_SIZE;
+
+ pmap_store(&pagetable_dmap[l1_slot],
+ (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE);
+
+ memset(l2, 0, PAGE_SIZE);
+ }
+ KASSERT(l2 != NULL,
+ ("pmap_bootstrap_dmap: NULL l2 map"));
+ for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1];
+ pa += L2_SIZE, va += L2_SIZE) {
+ /*
+ * We are on a boundary, stop to
+ * create a level 1 block
+ */
+ if ((pa & L1_OFFSET) == 0)
+ break;
+
+ l2_slot = pmap_l2_index(va);
+ KASSERT(l2_slot != 0, ("..."));
+ pmap_store(&l2[l2_slot],
+ (pa & ~L2_OFFSET) | ATTR_DEFAULT |
+ ATTR_S1_XN |
+ ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
+ L2_BLOCK);
+ }
+ KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS),
+ ("..."));
+ }
+
+ for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] &&
+ (physmap[i + 1] - pa) >= L1_SIZE;
+ pa += L1_SIZE, va += L1_SIZE) {
+ l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
+ pmap_store(&pagetable_dmap[l1_slot],
+ (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_S1_XN |
+ ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L1_BLOCK);
+ }
+
+ /* Create L2 mappings at the end of the region */
+ if (pa < physmap[i + 1]) {
+ l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
+ if (l1_slot != prev_l1_slot) {
+ prev_l1_slot = l1_slot;
+ l2 = (pt_entry_t *)freemempos;
+ l2_pa = pmap_early_vtophys(kern_l1,
+ (vm_offset_t)l2);
+ freemempos += PAGE_SIZE;
+
+ pmap_store(&pagetable_dmap[l1_slot],
+ (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE);
+
+ memset(l2, 0, PAGE_SIZE);
+ }
+ KASSERT(l2 != NULL,
+ ("pmap_bootstrap_dmap: NULL l2 map"));
+ for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1];
+ pa += L2_SIZE, va += L2_SIZE) {
+ l2_slot = pmap_l2_index(va);
+ pmap_store(&l2[l2_slot],
+ (pa & ~L2_OFFSET) | ATTR_DEFAULT |
+ ATTR_S1_XN |
+ ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
+ L2_BLOCK);
+ }
+ }
+
+ if (pa > dmap_phys_max) {
+ dmap_phys_max = pa;
+ dmap_max_addr = va;
+ }
+ }
+
+ cpu_tlb_flushID();
+
+ return (freemempos);
+}
+
+static vm_offset_t
+pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
+{
+ vm_offset_t l2pt;
+ vm_paddr_t pa;
+ pd_entry_t *l1;
+ u_int l1_slot;
+
+ KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
+
+ l1 = (pd_entry_t *)l1pt;
+ l1_slot = pmap_l1_index(va);
+ l2pt = l2_start;
+
+ for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
+ KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
+
+ pa = pmap_early_vtophys(l1pt, l2pt);
+ pmap_store(&l1[l1_slot],
+ (pa & ~Ln_TABLE_MASK) | L1_TABLE);
+ l2pt += PAGE_SIZE;
+ }
+
+ /* Clean the L2 page table */
+ memset((void *)l2_start, 0, l2pt - l2_start);
+
+ return l2pt;
+}
+
+static vm_offset_t
+pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
+{
+ vm_offset_t l3pt;
+ vm_paddr_t pa;
+ pd_entry_t *l2;
+ u_int l2_slot;
+
+ KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
+
+ l2 = pmap_l2(kernel_pmap, va);
+ l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
+ l2_slot = pmap_l2_index(va);
+ l3pt = l3_start;
+
+ for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
+ KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
+
+ pa = pmap_early_vtophys(l1pt, l3pt);
+ pmap_store(&l2[l2_slot],
+ (pa & ~Ln_TABLE_MASK) | ATTR_S1_UXN | L2_TABLE);
+ l3pt += PAGE_SIZE;
+ }
+
+ /* Clean the L2 page table */
+ memset((void *)l3_start, 0, l3pt - l3_start);
+
+ return l3pt;
+}
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ */
+void
+pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
+ vm_size_t kernlen)
+{
+ vm_offset_t freemempos;
+ vm_offset_t dpcpu, msgbufpv;
+ vm_paddr_t start_pa, pa, min_pa;
+ uint64_t kern_delta;
+ int i;
+
+ /* Verify that the ASID is set through TTBR0. */
+ KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0,
+ ("pmap_bootstrap: TCR_EL1.A1 != 0"));
+
+ kern_delta = KERNBASE - kernstart;
+
+ printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
+ printf("%lx\n", l1pt);
+ printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
+
+ /* Set this early so we can use the pagetable walking functions */
+ kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
+ PMAP_LOCK_INIT(kernel_pmap);
+ kernel_pmap->pm_l0_paddr = l0pt - kern_delta;
+ kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
+ kernel_pmap->pm_stage = PM_STAGE1;
+ kernel_pmap->pm_asid_set = &asids;
+
+ /* Assume the address we were loaded to is a valid physical address */
+ min_pa = KERNBASE - kern_delta;
+
+ physmap_idx = physmem_avail(physmap, nitems(physmap));
+ physmap_idx /= 2;
+
+ /*
+ * Find the minimum physical address. physmap is sorted,
+ * but may contain empty ranges.
+ */
+ for (i = 0; i < physmap_idx * 2; i += 2) {
+ if (physmap[i] == physmap[i + 1])
+ continue;
+ if (physmap[i] <= min_pa)
+ min_pa = physmap[i];
+ }
+
+ freemempos = KERNBASE + kernlen;
+ freemempos = roundup2(freemempos, PAGE_SIZE);
+
+ /* Create a direct map region early so we can use it for pa -> va */
+ freemempos = pmap_bootstrap_dmap(l1pt, min_pa, freemempos);
+
+ start_pa = pa = KERNBASE - kern_delta;
+
+ /*
+ * Create the l2 tables up to VM_MAX_KERNEL_ADDRESS. We assume that the
+ * loader allocated the first and only l2 page table page used to map
+ * the kernel, preloaded files and module metadata.
+ */
+ freemempos = pmap_bootstrap_l2(l1pt, KERNBASE + L1_SIZE, freemempos);
+ /* And the l3 tables for the early devmap */
+ freemempos = pmap_bootstrap_l3(l1pt,
+ VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE), freemempos);
+
+ cpu_tlb_flushID();
+
+#define alloc_pages(var, np) \
+ (var) = freemempos; \
+ freemempos += (np * PAGE_SIZE); \
+ memset((char *)(var), 0, ((np) * PAGE_SIZE));
+
+ /* Allocate dynamic per-cpu area. */
+ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
+ dpcpu_init((void *)dpcpu, 0);
+
+ /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
+ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
+ msgbufp = (void *)msgbufpv;
+
+ /* Reserve some VA space for early BIOS/ACPI mapping */
+ preinit_map_va = roundup2(freemempos, L2_SIZE);
+
+ virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE;
+ virtual_avail = roundup2(virtual_avail, L1_SIZE);
+ virtual_end = VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE);
+ kernel_vm_end = virtual_avail;
+
+ pa = pmap_early_vtophys(l1pt, freemempos);
+
+ physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
+
+ cpu_tlb_flushID();
+}
+
+/*
+ * Initialize a vm_page's machine-dependent fields.
+ */
+void
+pmap_page_init(vm_page_t m)
+{
+
+ TAILQ_INIT(&m->md.pv_list);
+ m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
+}
+
+static void
+pmap_init_asids(struct asid_set *set, int bits)
+{
+ int i;
+
+ set->asid_bits = bits;
+
+ /*
+ * We may be too early in the overall initialization process to use
+ * bit_alloc().
+ */
+ set->asid_set_size = 1 << set->asid_bits;
+ set->asid_set = (bitstr_t *)kmem_malloc(bitstr_size(set->asid_set_size),
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < ASID_FIRST_AVAILABLE; i++)
+ bit_set(set->asid_set, i);
+ set->asid_next = ASID_FIRST_AVAILABLE;
+ mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN);
+}
+
+/*
+ * Initialize the pmap module.
+ * Called by vm_init, to initialize any structures that the pmap
+ * system needs to map virtual memory.
+ */
+void
+pmap_init(void)
+{
+ vm_size_t s;
+ uint64_t mmfr1;
+ int i, pv_npg, vmid_bits;
+
+ /*
+ * Are large page mappings enabled?
+ */
+ TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
+ if (superpages_enabled) {
+ KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+ ("pmap_init: can't assign to pagesizes[1]"));
+ pagesizes[1] = L2_SIZE;
+ }
+
+ /*
+ * Initialize the ASID allocator.
+ */
+ pmap_init_asids(&asids,
+ (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
+
+ if (has_hyp()) {
+ mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+ vmid_bits = 8;
+
+ if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) ==
+ ID_AA64MMFR1_VMIDBits_16)
+ vmid_bits = 16;
+ pmap_init_asids(&vmids, vmid_bits);
+ }
+
+ /*
+ * Initialize the pv chunk list mutex.
+ */
+ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
+
+ /*
+ * Initialize the pool of pv list locks.
+ */
+ for (i = 0; i < NPV_LIST_LOCKS; i++)
+ rw_init(&pv_list_locks[i], "pmap pv list");
+
+ /*
+ * Calculate the size of the pv head table for superpages.
+ */
+ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
+
+ /*
+ * Allocate memory for the pv head table for superpages.
+ */
+ s = (vm_size_t)(pv_npg * sizeof(struct md_page));
+ s = round_page(s);
+ pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
+ for (i = 0; i < pv_npg; i++)
+ TAILQ_INIT(&pv_table[i].pv_list);
+ TAILQ_INIT(&pv_dummy.pv_list);
+
+ vm_initialized = 1;
+}
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "2MB page mapping counters");
+
+static u_long pmap_l2_demotions;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
+ &pmap_l2_demotions, 0, "2MB page demotions");
+
+static u_long pmap_l2_mappings;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
+ &pmap_l2_mappings, 0, "2MB page mappings");
+
+static u_long pmap_l2_p_failures;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
+ &pmap_l2_p_failures, 0, "2MB page promotion failures");
+
+static u_long pmap_l2_promotions;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
+ &pmap_l2_promotions, 0, "2MB page promotions");
+
+/*
+ * Invalidate a single TLB entry.
+ */
+static __inline void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+ uint64_t r;
+
+ PMAP_ASSERT_STAGE1(pmap);
+
+ dsb(ishst);
+ if (pmap == kernel_pmap) {
+ r = atop(va);
+ __asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+ } else {
+ r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va);
+ __asm __volatile("tlbi vae1is, %0" : : "r" (r));
+ }
+ dsb(ish);
+ isb();
+}
+
+static __inline void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ uint64_t end, r, start;
+
+ PMAP_ASSERT_STAGE1(pmap);
+
+ dsb(ishst);
+ if (pmap == kernel_pmap) {
+ start = atop(sva);
+ end = atop(eva);
+ for (r = start; r < end; r++)
+ __asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+ } else {
+ start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
+ start |= atop(sva);
+ end |= atop(eva);
+ for (r = start; r < end; r++)
+ __asm __volatile("tlbi vae1is, %0" : : "r" (r));
+ }
+ dsb(ish);
+ isb();
+}
+
+static __inline void
+pmap_invalidate_all(pmap_t pmap)
+{
+ uint64_t r;
+
+ PMAP_ASSERT_STAGE1(pmap);
+
+ dsb(ishst);
+ if (pmap == kernel_pmap) {
+ __asm __volatile("tlbi vmalle1is");
+ } else {
+ r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
+ __asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ }
+ dsb(ish);
+ isb();
+}
+
+/*
+ * Routine: pmap_extract
+ * Function:
+ * Extract the physical page address associated
+ * with the given map/virtual_address pair.
+ */
+vm_paddr_t
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+ pt_entry_t *pte, tpte;
+ vm_paddr_t pa;
+ int lvl;
+
+ pa = 0;
+ PMAP_LOCK(pmap);
+ /*
+ * Find the block or page map for this virtual address. pmap_pte
+ * will return either a valid block/page entry, or NULL.
+ */
+ pte = pmap_pte(pmap, va, &lvl);
+ if (pte != NULL) {
+ tpte = pmap_load(pte);
+ pa = tpte & ~ATTR_MASK;
+ switch(lvl) {
+ case 1:
+ KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
+ ("pmap_extract: Invalid L1 pte found: %lx",
+ tpte & ATTR_DESCR_MASK));
+ pa |= (va & L1_OFFSET);
+ break;
+ case 2:
+ KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
+ ("pmap_extract: Invalid L2 pte found: %lx",
+ tpte & ATTR_DESCR_MASK));
+ pa |= (va & L2_OFFSET);
+ break;
+ case 3:
+ KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
+ ("pmap_extract: Invalid L3 pte found: %lx",
+ tpte & ATTR_DESCR_MASK));
+ pa |= (va & L3_OFFSET);
+ break;
+ }
+ }
+ PMAP_UNLOCK(pmap);
+ return (pa);
+}
+
+/*
+ * Routine: pmap_extract_and_hold
+ * Function:
+ * Atomically extract and hold the physical page
+ * with the given pmap and virtual address pair
+ * if that mapping permits the given protection.
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+ pt_entry_t *pte, tpte;
+ vm_offset_t off;
+ vm_page_t m;
+ int lvl;
+ bool use;
+
+ m = NULL;
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, va, &lvl);
+ if (pte != NULL) {
+ tpte = pmap_load(pte);
+
+ KASSERT(lvl > 0 && lvl <= 3,
+ ("pmap_extract_and_hold: Invalid level %d", lvl));
+ CTASSERT(L1_BLOCK == L2_BLOCK);
+ KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
+ (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
+ ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
+ tpte & ATTR_DESCR_MASK));
+
+ use = false;
+ if ((prot & VM_PROT_WRITE) == 0)
+ use = true;
+ else if (pmap->pm_stage == PM_STAGE1 &&
+ (tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW))
+ use = true;
+ else if (pmap->pm_stage == PM_STAGE2 &&
+ ((tpte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) ==
+ ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)))
+ use = true;
+
+ if (use) {
+ switch(lvl) {
+ case 1:
+ off = va & L1_OFFSET;
+ break;
+ case 2:
+ off = va & L2_OFFSET;
+ break;
+ case 3:
+ default:
+ off = 0;
+ }
+ m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
+ }
+ }
+ PMAP_UNLOCK(pmap);
+ return (m);
+}
+
+vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+ pt_entry_t *pte, tpte;
+
+ if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
+ return (DMAP_TO_PHYS(va));
+ pte = pmap_l1(kernel_pmap, va);
+ if (pte == NULL)
+ return (0);
+
+ /*
+ * A concurrent pmap_update_entry() will clear the entry's valid bit
+ * but leave the rest of the entry unchanged. Therefore, we treat a
+ * non-zero entry as being valid, and we ignore the valid bit when
+ * determining whether the entry maps a block, page, or table.
+ */
+ tpte = pmap_load(pte);
+ if (tpte == 0)
+ return (0);
+ if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK)
+ return ((tpte & ~ATTR_MASK) | (va & L1_OFFSET));
+ pte = pmap_l1_to_l2(&tpte, va);
+ tpte = pmap_load(pte);
+ if (tpte == 0)
+ return (0);
+ if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK)
+ return ((tpte & ~ATTR_MASK) | (va & L2_OFFSET));
+ pte = pmap_l2_to_l3(&tpte, va);
+ tpte = pmap_load(pte);
+ if (tpte == 0)
+ return (0);
+ return ((tpte & ~ATTR_MASK) | (va & L3_OFFSET));
+}
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+void
+pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
+{
+ pd_entry_t *pde;
+ pt_entry_t *pte, attr;
+ vm_offset_t va;
+ int lvl;
+
+ KASSERT((pa & L3_OFFSET) == 0,
+ ("pmap_kenter: Invalid physical address"));
+ KASSERT((sva & L3_OFFSET) == 0,
+ ("pmap_kenter: Invalid virtual address"));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("pmap_kenter: Mapping is not page-sized"));
+
+ attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
+ ATTR_S1_IDX(mode) | L3_PAGE;
+ va = sva;
+ while (size != 0) {
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
+ KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
+
+ pte = pmap_l2_to_l3(pde, va);
+ pmap_load_store(pte, (pa & ~L3_OFFSET) | attr);
+
+ va += PAGE_SIZE;
+ pa += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+void
+pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
+{
+
+ pmap_kenter(sva, size, pa, VM_MEMATTR_DEVICE);
+}
+
+/*
+ * Remove a page from the kernel pagetables.
+ */
+PMAP_INLINE void
+pmap_kremove(vm_offset_t va)
+{
+ pt_entry_t *pte;
+ int lvl;
+
+ pte = pmap_pte(kernel_pmap, va, &lvl);
+ KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
+ KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
+
+ pmap_clear(pte);
+ pmap_invalidate_page(kernel_pmap, va);
+}
+
+void
+pmap_kremove_device(vm_offset_t sva, vm_size_t size)
+{
+ pt_entry_t *pte;
+ vm_offset_t va;
+ int lvl;
+
+ KASSERT((sva & L3_OFFSET) == 0,
+ ("pmap_kremove_device: Invalid virtual address"));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("pmap_kremove_device: Mapping is not page-sized"));
+
+ va = sva;
+ while (size != 0) {
+ pte = pmap_pte(kernel_pmap, va, &lvl);
+ KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
+ KASSERT(lvl == 3,
+ ("Invalid device pagetable level: %d != 3", lvl));
+ pmap_clear(pte);
+
+ va += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/*
+ * Used to map a range of physical addresses into kernel
+ * virtual address space.
+ *
+ * The value passed in '*virt' is a suggested virtual address for
+ * the mapping. Architectures which can support a direct-mapped
+ * physical to virtual region can return the appropriate address
+ * within that region, leaving '*virt' unchanged. Other
+ * architectures should map the pages starting at '*virt' and
+ * update '*virt' with the first usable address after the mapped
+ * region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+ return PHYS_TO_DMAP(start);
+}
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over. The page *must* be wired.
+ * Note: SMP coherent. Uses a ranged shootdown IPI.
+ */
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+{
+ pd_entry_t *pde;
+ pt_entry_t *pte, pa;
+ vm_offset_t va;
+ vm_page_t m;
+ int i, lvl;
+
+ va = sva;
+ for (i = 0; i < count; i++) {
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
+ KASSERT(lvl == 2,
+ ("pmap_qenter: Invalid level %d", lvl));
+
+ m = ma[i];
+ pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT |
+ ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
+ ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
+ pte = pmap_l2_to_l3(pde, va);
+ pmap_load_store(pte, pa);
+
+ va += L3_SIZE;
+ }
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/*
+ * This routine tears out page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ */
+void
+pmap_qremove(vm_offset_t sva, int count)
+{
+ pt_entry_t *pte;
+ vm_offset_t va;
+ int lvl;
+
+ KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
+
+ va = sva;
+ while (count-- > 0) {
+ pte = pmap_pte(kernel_pmap, va, &lvl);
+ KASSERT(lvl == 3,
+ ("Invalid device pagetable level: %d != 3", lvl));
+ if (pte != NULL) {
+ pmap_clear(pte);
+ }
+
+ va += PAGE_SIZE;
+ }
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+/*
+ * Schedule the specified unused page table page to be freed. Specifically,
+ * add the page to the specified list of pages that will be released to the
+ * physical memory manager after the TLB has been updated.
+ */
+static __inline void
+pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
+ boolean_t set_PG_ZERO)
+{
+
+ if (set_PG_ZERO)
+ m->flags |= PG_ZERO;
+ else
+ m->flags &= ~PG_ZERO;
+ SLIST_INSERT_HEAD(free, m, plinks.s.ss);
+}
+
+/*
+ * Decrements a page table page's reference count, which is used to record the
+ * number of valid page table entries within the page. If the reference count
+ * drops to zero, then the page table page is unmapped. Returns TRUE if the
+ * page table page was unmapped and FALSE otherwise.
+ */
+static inline boolean_t
+pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
+{
+
+ --m->ref_count;
+ if (m->ref_count == 0) {
+ _pmap_unwire_l3(pmap, va, m, free);
+ return (TRUE);
+ } else
+ return (FALSE);
+}
+
+static void
+_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ /*
+ * unmap the page table page
+ */
+ if (m->pindex >= (NUL2E + NUL1E)) {
+ /* l1 page */
+ pd_entry_t *l0;
+
+ l0 = pmap_l0(pmap, va);
+ pmap_clear(l0);
+ } else if (m->pindex >= NUL2E) {
+ /* l2 page */
+ pd_entry_t *l1;
+
+ l1 = pmap_l1(pmap, va);
+ pmap_clear(l1);
+ } else {
+ /* l3 page */
+ pd_entry_t *l2;
+
+ l2 = pmap_l2(pmap, va);
+ pmap_clear(l2);
+ }
+ pmap_resident_count_dec(pmap, 1);
+ if (m->pindex < NUL2E) {
+ /* We just released an l3, unhold the matching l2 */
+ pd_entry_t *l1, tl1;
+ vm_page_t l2pg;
+
+ l1 = pmap_l1(pmap, va);
+ tl1 = pmap_load(l1);
+ l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+ pmap_unwire_l3(pmap, va, l2pg, free);
+ } else if (m->pindex < (NUL2E + NUL1E)) {
+ /* We just released an l2, unhold the matching l1 */
+ pd_entry_t *l0, tl0;
+ vm_page_t l1pg;
+
+ l0 = pmap_l0(pmap, va);
+ tl0 = pmap_load(l0);
+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ pmap_unwire_l3(pmap, va, l1pg, free);
+ }
+ pmap_invalidate_page(pmap, va);
+
+ /*
+ * Put page on a list so that it is released after
+ * *ALL* TLB shootdown is done
+ */
+ pmap_add_delayed_free_list(m, free, TRUE);
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the reference count.
+ */
+static int
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
+ struct spglist *free)
+{
+ vm_page_t mpte;
+
+ if (va >= VM_MAXUSER_ADDRESS)
+ return (0);
+ KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
+ mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
+ return (pmap_unwire_l3(pmap, va, mpte, free));
+}
+
+/*
+ * Release a page table page reference after a failed attempt to create a
+ * mapping.
+ */
+static void
+pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
+{
+ struct spglist free;
+
+ SLIST_INIT(&free);
+ if (pmap_unwire_l3(pmap, va, mpte, &free)) {
+ /*
+ * Although "va" was never mapped, the TLB could nonetheless
+ * have intermediate entries that refer to the freed page
+ * table pages. Invalidate those entries.
+ *
+ * XXX redundant invalidation (See _pmap_unwire_l3().)
+ */
+ pmap_invalidate_page(pmap, va);
+ vm_page_free_pages_toq(&free, true);
+ }
+}
+
+void
+pmap_pinit0(pmap_t pmap)
+{
+
+ PMAP_LOCK_INIT(pmap);
+ bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
+ pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
+ pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
+ pmap->pm_root.rt_root = 0;
+ pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
+ pmap->pm_stage = PM_STAGE1;
+ pmap->pm_asid_set = &asids;
+
+ PCPU_SET(curpmap, pmap);
+}
+
+int
+pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage)
+{
+ vm_page_t l0pt;
+
+ /*
+ * allocate the l0 page
+ */
+ while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
+ vm_wait(NULL);
+
+ pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(l0pt);
+ pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
+
+ if ((l0pt->flags & PG_ZERO) == 0)
+ pagezero(pmap->pm_l0);
+
+ pmap->pm_root.rt_root = 0;
+ bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
+ pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
+
+ pmap->pm_stage = stage;
+ switch (stage) {
+ case PM_STAGE1:
+ pmap->pm_asid_set = &asids;
+ break;
+ case PM_STAGE2:
+ pmap->pm_asid_set = &vmids;
+ break;
+ default:
+ panic("%s: Invalid pmap type %d", __func__, stage);
+ break;
+ }
+
+ /* XXX Temporarily disable deferred ASID allocation. */
+ pmap_alloc_asid(pmap);
+
+ return (1);
+}
+
+int
+pmap_pinit(pmap_t pmap)
+{
+
+ return (pmap_pinit_stage(pmap, PM_STAGE1));
+}
+
+/*
+ * This routine is called if the desired page table page does not exist.
+ *
+ * If page table page allocation fails, this routine may sleep before
+ * returning NULL. It sleeps only if a lock pointer was given.
+ *
+ * Note: If a page allocation fails at page table level two or three,
+ * one or two pages may be held during the wait, only to be released
+ * afterwards. This conservative approach is easily argued to avoid
+ * race conditions.
+ */
+static vm_page_t
+_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
+{
+ vm_page_t m, l1pg, l2pg;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ /*
+ * Allocate a page table page.
+ */
+ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+ if (lockp != NULL) {
+ RELEASE_PV_LIST_LOCK(lockp);
+ PMAP_UNLOCK(pmap);
+ vm_wait(NULL);
+ PMAP_LOCK(pmap);
+ }
+
+ /*
+ * Indicate the need to retry. While waiting, the page table
+ * page may have been allocated.
+ */
+ return (NULL);
+ }
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+
+ /*
+ * Because of AArch64's weak memory consistency model, we must have a
+ * barrier here to ensure that the stores for zeroing "m", whether by
+ * pmap_zero_page() or an earlier function, are visible before adding
+ * "m" to the page table. Otherwise, a page table walk by another
+ * processor's MMU could see the mapping to "m" and a stale, non-zero
+ * PTE within "m".
+ */
+ dmb(ishst);
+
+ /*
+ * Map the pagetable page into the process address space, if
+ * it isn't already there.
+ */
+
+ if (ptepindex >= (NUL2E + NUL1E)) {
+ pd_entry_t *l0;
+ vm_pindex_t l0index;
+
+ l0index = ptepindex - (NUL2E + NUL1E);
+ l0 = &pmap->pm_l0[l0index];
+ pmap_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
+ } else if (ptepindex >= NUL2E) {
+ vm_pindex_t l0index, l1index;
+ pd_entry_t *l0, *l1;
+ pd_entry_t tl0;
+
+ l1index = ptepindex - NUL2E;
+ l0index = l1index >> L0_ENTRIES_SHIFT;
+
+ l0 = &pmap->pm_l0[l0index];
+ tl0 = pmap_load(l0);
+ if (tl0 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ } else {
+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ l1pg->ref_count++;
+ }
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+ l1 = &l1[ptepindex & Ln_ADDR_MASK];
+ pmap_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
+ } else {
+ vm_pindex_t l0index, l1index;
+ pd_entry_t *l0, *l1, *l2;
+ pd_entry_t tl0, tl1;
+
+ l1index = ptepindex >> Ln_ENTRIES_SHIFT;
+ l0index = l1index >> L0_ENTRIES_SHIFT;
+
+ l0 = &pmap->pm_l0[l0index];
+ tl0 = pmap_load(l0);
+ if (tl0 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ tl0 = pmap_load(l0);
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+ l1 = &l1[l1index & Ln_ADDR_MASK];
+ } else {
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+ l1 = &l1[l1index & Ln_ADDR_MASK];
+ tl1 = pmap_load(l1);
+ if (tl1 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ } else {
+ l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+ l2pg->ref_count++;
+ }
+ }
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
+ l2 = &l2[ptepindex & Ln_ADDR_MASK];
+ pmap_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
+ }
+
+ pmap_resident_count_inc(pmap, 1);
+
+ return (m);
+}
+
+static pd_entry_t *
+pmap_alloc_l2(pmap_t pmap, vm_offset_t va, vm_page_t *l2pgp,
+ struct rwlock **lockp)
+{
+ pd_entry_t *l1, *l2;
+ vm_page_t l2pg;
+ vm_pindex_t l2pindex;
+
+retry:
+ l1 = pmap_l1(pmap, va);
+ if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) {
+ l2 = pmap_l1_to_l2(l1, va);
+ if (va < VM_MAXUSER_ADDRESS) {
+ /* Add a reference to the L2 page. */
+ l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
+ l2pg->ref_count++;
+ } else
+ l2pg = NULL;
+ } else if (va < VM_MAXUSER_ADDRESS) {
+ /* Allocate a L2 page. */
+ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
+ l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
+ if (l2pg == NULL) {
+ if (lockp != NULL)
+ goto retry;
+ else
+ return (NULL);
+ }
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg));
+ l2 = &l2[pmap_l2_index(va)];
+ } else
+ panic("pmap_alloc_l2: missing page table page for va %#lx",
+ va);
+ *l2pgp = l2pg;
+ return (l2);
+}
+
+static vm_page_t
+pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+{
+ vm_pindex_t ptepindex;
+ pd_entry_t *pde, tpde;
+#ifdef INVARIANTS
+ pt_entry_t *pte;
+#endif
+ vm_page_t m;
+ int lvl;
+
+ /*
+ * Calculate pagetable page index
+ */
+ ptepindex = pmap_l2_pindex(va);
+retry:
+ /*
+ * Get the page directory entry
+ */
+ pde = pmap_pde(pmap, va, &lvl);
+
+ /*
+ * If the page table page is mapped, we just increment the hold count,
+ * and activate it. If we get a level 2 pde it will point to a level 3
+ * table.
+ */
+ switch (lvl) {
+ case -1:
+ break;
+ case 0:
+#ifdef INVARIANTS
+ pte = pmap_l0_to_l1(pde, va);
+ KASSERT(pmap_load(pte) == 0,
+ ("pmap_alloc_l3: TODO: l0 superpages"));
+#endif
+ break;
+ case 1:
+#ifdef INVARIANTS
+ pte = pmap_l1_to_l2(pde, va);
+ KASSERT(pmap_load(pte) == 0,
+ ("pmap_alloc_l3: TODO: l1 superpages"));
+#endif
+ break;
+ case 2:
+ tpde = pmap_load(pde);
+ if (tpde != 0) {
+ m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
+ m->ref_count++;
+ return (m);
+ }
+ break;
+ default:
+ panic("pmap_alloc_l3: Invalid level %d", lvl);
+ }
+
+ /*
+ * Here if the pte page isn't mapped, or if it has been deallocated.
+ */
+ m = _pmap_alloc_l3(pmap, ptepindex, lockp);
+ if (m == NULL && lockp != NULL)
+ goto retry;
+
+ return (m);
+}
+
+/***************************************************
+ * Pmap allocation/deallocation routines.
+ ***************************************************/
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+ struct asid_set *set;
+ vm_page_t m;
+ int asid;
+
+ KASSERT(pmap->pm_stats.resident_count == 0,
+ ("pmap_release: pmap resident count %ld != 0",
+ pmap->pm_stats.resident_count));
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
+ ("pmap_release: pmap has reserved page table page(s)"));
+
+ set = pmap->pm_asid_set;
+ KASSERT(set != NULL, ("%s: NULL asid set", __func__));
+
+ /*
+ * Allow the ASID to be reused. In stage 2 VMIDs we don't invalidate
+ * the entries when removing them so rely on a later tlb invalidation.
+ * this will happen when updating the VMID generation. Because of this
+ * we don't reuse VMIDs within a generation.
+ */
+ if (pmap->pm_stage == PM_STAGE1) {
+ mtx_lock_spin(&set->asid_set_mutex);
+ if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch) {
+ asid = COOKIE_TO_ASID(pmap->pm_cookie);
+ KASSERT(asid >= ASID_FIRST_AVAILABLE &&
+ asid < set->asid_set_size,
+ ("pmap_release: pmap cookie has out-of-range asid"));
+ bit_clear(set->asid_set, asid);
+ }
+ mtx_unlock_spin(&set->asid_set_mutex);
+ }
+
+ m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+}
+
+static int
+kvm_size(SYSCTL_HANDLER_ARGS)
+{
+ unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
+
+ return sysctl_handle_long(oidp, &ksize, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ 0, 0, kvm_size, "LU",
+ "Size of KVM");
+
+static int
+kvm_free(SYSCTL_HANDLER_ARGS)
+{
+ unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+
+ return sysctl_handle_long(oidp, &kfree, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ 0, 0, kvm_free, "LU",
+ "Amount of KVM free");
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+ vm_paddr_t paddr;
+ vm_page_t nkpg;
+ pd_entry_t *l0, *l1, *l2;
+
+ mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+
+ addr = roundup2(addr, L2_SIZE);
+ if (addr - 1 >= vm_map_max(kernel_map))
+ addr = vm_map_max(kernel_map);
+ while (kernel_vm_end < addr) {
+ l0 = pmap_l0(kernel_pmap, kernel_vm_end);
+ KASSERT(pmap_load(l0) != 0,
+ ("pmap_growkernel: No level 0 kernel entry"));
+
+ l1 = pmap_l0_to_l1(l0, kernel_vm_end);
+ if (pmap_load(l1) == 0) {
+ /* We need a new PDP entry */
+ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
+ VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (nkpg == NULL)
+ panic("pmap_growkernel: no memory to grow kernel");
+ if ((nkpg->flags & PG_ZERO) == 0)
+ pmap_zero_page(nkpg);
+ /* See the dmb() in _pmap_alloc_l3(). */
+ dmb(ishst);
+ paddr = VM_PAGE_TO_PHYS(nkpg);
+ pmap_store(l1, paddr | L1_TABLE);
+ continue; /* try again */
+ }
+ l2 = pmap_l1_to_l2(l1, kernel_vm_end);
+ if (pmap_load(l2) != 0) {
+ kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
+ if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
+ kernel_vm_end = vm_map_max(kernel_map);
+ break;
+ }
+ continue;
+ }
+
+ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
+ VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO);
+ if (nkpg == NULL)
+ panic("pmap_growkernel: no memory to grow kernel");
+ if ((nkpg->flags & PG_ZERO) == 0)
+ pmap_zero_page(nkpg);
+ /* See the dmb() in _pmap_alloc_l3(). */
+ dmb(ishst);
+ paddr = VM_PAGE_TO_PHYS(nkpg);
+ pmap_store(l2, paddr | L2_TABLE);
+
+ kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
+ if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
+ kernel_vm_end = vm_map_max(kernel_map);
+ break;
+ }
+ }
+}
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
+CTASSERT(_NPCM == 3);
+CTASSERT(_NPCPV == 168);
+
+static __inline struct pv_chunk *
+pv_to_chunk(pv_entry_t pv)
+{
+
+ return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
+}
+
+#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
+
+#define PC_FREE0 0xfffffffffffffffful
+#define PC_FREE1 0xfffffffffffffffful
+#define PC_FREE2 0x000000fffffffffful
+
+static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
+
+#if 0
+#ifdef PV_STATS
+static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
+ "Current number of pv entry chunks");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
+ "Current number of pv entry chunks allocated");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
+ "Current number of pv entry chunks frees");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
+ "Number of times tried to get a chunk page but failed.");
+
+static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
+static int pv_entry_spare;
+
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
+ "Current number of pv entry frees");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
+ "Current number of pv entry allocs");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
+ "Current number of pv entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
+ "Current number of spare pv entries");
+#endif
+#endif /* 0 */
+
+/*
+ * We are in a serious low memory condition. Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk.
+ *
+ * Returns NULL if PV entries were reclaimed from the specified pmap.
+ *
+ * We do not, however, unmap 2mpages because subsequent accesses will
+ * allocate per-page pv entries until repromotion occurs, thereby
+ * exacerbating the shortage of free pv entries.
+ */
+static vm_page_t
+reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+{
+ struct pv_chunk *pc, *pc_marker, *pc_marker_end;
+ struct pv_chunk_header pc_marker_b, pc_marker_end_b;
+ struct md_page *pvh;
+ pd_entry_t *pde;
+ pmap_t next_pmap, pmap;
+ pt_entry_t *pte, tpte;
+ pv_entry_t pv;
+ vm_offset_t va;
+ vm_page_t m, m_pc;
+ struct spglist free;
+ uint64_t inuse;
+ int bit, field, freed, lvl;
+ static int active_reclaims = 0;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
+
+ pmap = NULL;
+ m_pc = NULL;
+ SLIST_INIT(&free);
+ bzero(&pc_marker_b, sizeof(pc_marker_b));
+ bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
+ pc_marker = (struct pv_chunk *)&pc_marker_b;
+ pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
+
+ mtx_lock(&pv_chunks_mutex);
+ active_reclaims++;
+ TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
+ TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
+ while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
+ SLIST_EMPTY(&free)) {
+ next_pmap = pc->pc_pmap;
+ if (next_pmap == NULL) {
+ /*
+ * The next chunk is a marker. However, it is
+ * not our marker, so active_reclaims must be
+ * > 1. Consequently, the next_chunk code
+ * will not rotate the pv_chunks list.
+ */
+ goto next_chunk;
+ }
+ mtx_unlock(&pv_chunks_mutex);
+
+ /*
+ * A pv_chunk can only be removed from the pc_lru list
+ * when both pv_chunks_mutex is owned and the
+ * corresponding pmap is locked.
+ */
+ if (pmap != next_pmap) {
+ if (pmap != NULL && pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ pmap = next_pmap;
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap) {
+ RELEASE_PV_LIST_LOCK(lockp);
+ PMAP_LOCK(pmap);
+ mtx_lock(&pv_chunks_mutex);
+ continue;
+ } else if (pmap != locked_pmap) {
+ if (PMAP_TRYLOCK(pmap)) {
+ mtx_lock(&pv_chunks_mutex);
+ continue;
+ } else {
+ pmap = NULL; /* pmap is not locked */
+ mtx_lock(&pv_chunks_mutex);
+ pc = TAILQ_NEXT(pc_marker, pc_lru);
+ if (pc == NULL ||
+ pc->pc_pmap != next_pmap)
+ continue;
+ goto next_chunk;
+ }
+ }
+ }
+
+ /*
+ * Destroy every non-wired, 4 KB page mapping in the chunk.
+ */
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+ inuse != 0; inuse &= ~(1UL << bit)) {
+ bit = ffsl(inuse) - 1;
+ pv = &pc->pc_pventry[field * 64 + bit];
+ va = pv->pv_va;
+ pde = pmap_pde(pmap, va, &lvl);
+ if (lvl != 2)
+ continue;
+ pte = pmap_l2_to_l3(pde, va);
+ tpte = pmap_load(pte);
+ if ((tpte & ATTR_SW_WIRED) != 0)
+ continue;
+ tpte = pmap_load_clear(pte);
+ m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
+ if (pmap_pte_dirty(pmap, tpte))
+ vm_page_dirty(m);
+ if ((tpte & ATTR_AF) != 0) {
+ pmap_invalidate_page(pmap, va);
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ }
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list)) {
+ vm_page_aflag_clear(m,
+ PGA_WRITEABLE);
+ }
+ }
+ pc->pc_map[field] |= 1UL << bit;
+ pmap_unuse_pt(pmap, va, pmap_load(pde), &free);
+ freed++;
+ }
+ }
+ if (freed == 0) {
+ mtx_lock(&pv_chunks_mutex);
+ goto next_chunk;
+ }
+ /* Every freed mapping is for a 4 KB page. */
+ pmap_resident_count_dec(pmap, freed);
+ PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+ PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+ pc->pc_map[2] == PC_FREE2) {
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+ dump_drop_page(m_pc->phys_addr);
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ break;
+ }
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ mtx_lock(&pv_chunks_mutex);
+ /* One freed pv entry in locked_pmap is sufficient. */
+ if (pmap == locked_pmap)
+ break;
+
+next_chunk:
+ TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
+ TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
+ if (active_reclaims == 1 && pmap != NULL) {
+ /*
+ * Rotate the pv chunks list so that we do not
+ * scan the same pv chunks that could not be
+ * freed (because they contained a wired
+ * and/or superpage mapping) on every
+ * invocation of reclaim_pv_chunk().
+ */
+ while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
+ MPASS(pc->pc_pmap != NULL);
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+ }
+ }
+ }
+ TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
+ TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
+ active_reclaims--;
+ mtx_unlock(&pv_chunks_mutex);
+ if (pmap != NULL && pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ if (m_pc == NULL && !SLIST_EMPTY(&free)) {
+ m_pc = SLIST_FIRST(&free);
+ SLIST_REMOVE_HEAD(&free, plinks.s.ss);
+ /* Recycle a freed page table page. */
+ m_pc->ref_count = 1;
+ }
+ vm_page_free_pages_toq(&free, true);
+ return (m_pc);
+}
+
+/*
+ * free the pv_entry back to the free list
+ */
+static void
+free_pv_entry(pmap_t pmap, pv_entry_t pv)
+{
+ struct pv_chunk *pc;
+ int idx, field, bit;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PV_STAT(atomic_add_long(&pv_entry_frees, 1));
+ PV_STAT(atomic_add_int(&pv_entry_spare, 1));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
+ pc = pv_to_chunk(pv);
+ idx = pv - &pc->pc_pventry[0];
+ field = idx / 64;
+ bit = idx % 64;
+ pc->pc_map[field] |= 1ul << bit;
+ if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
+ pc->pc_map[2] != PC_FREE2) {
+ /* 98% of the time, pc is already at the head of the list. */
+ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ }
+ return;
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+ /* entire chunk is free, return it */
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+ dump_drop_page(m->phys_addr);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+/*
+ * Returns a new PV entry, allocating a new PV chunk from the system when
+ * needed. If this PV chunk allocation fails and a PV list lock pointer was
+ * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is
+ * returned.
+ *
+ * The given PV list lock may be released.
+ */
+static pv_entry_t
+get_pv_entry(pmap_t pmap, struct rwlock **lockp)
+{
+ int bit, field;
+ pv_entry_t pv;
+ struct pv_chunk *pc;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
+retry:
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ if (pc != NULL) {
+ for (field = 0; field < _NPCM; field++) {
+ if (pc->pc_map[field]) {
+ bit = ffsl(pc->pc_map[field]) - 1;
+ break;
+ }
+ }
+ if (field < _NPCM) {
+ pv = &pc->pc_pventry[field * 64 + bit];
+ pc->pc_map[field] &= ~(1ul << bit);
+ /* If this was the last item, move it to tail */
+ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
+ pc->pc_map[2] == 0) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
+ pc_list);
+ }
+ PV_STAT(atomic_add_long(&pv_entry_count, 1));
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
+ return (pv);
+ }
+ }
+ /* No free items, allocate another chunk */
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (m == NULL) {
+ if (lockp == NULL) {
+ PV_STAT(pc_chunk_tryfail++);
+ return (NULL);
+ }
+ m = reclaim_pv_chunk(pmap, lockp);
+ if (m == NULL)
+ goto retry;
+ }
+ PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
+ dump_add_page(m->phys_addr);
+ pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
+ pc->pc_map[1] = PC_FREE1;
+ pc->pc_map[2] = PC_FREE2;
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ pv = &pc->pc_pventry[0];
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(atomic_add_long(&pv_entry_count, 1));
+ PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
+ return (pv);
+}
+
+/*
+ * Ensure that the number of spare PV entries in the specified pmap meets or
+ * exceeds the given count, "needed".
+ *
+ * The given PV list lock may be released.
+ */
+static void
+reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+{
+ struct pch new_tail;
+ struct pv_chunk *pc;
+ vm_page_t m;
+ int avail, free;
+ bool reclaimed;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
+
+ /*
+ * Newly allocated PV chunks must be stored in a private list until
+ * the required number of PV chunks have been allocated. Otherwise,
+ * reclaim_pv_chunk() could recycle one of these chunks. In
+ * contrast, these chunks must be added to the pmap upon allocation.
+ */
+ TAILQ_INIT(&new_tail);
+retry:
+ avail = 0;
+ TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
+ bit_count((bitstr_t *)pc->pc_map, 0,
+ sizeof(pc->pc_map) * NBBY, &free);
+ if (free == 0)
+ break;
+ avail += free;
+ if (avail >= needed)
+ break;
+ }
+ for (reclaimed = false; avail < needed; avail += _NPCPV) {
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (m == NULL) {
+ m = reclaim_pv_chunk(pmap, lockp);
+ if (m == NULL)
+ goto retry;
+ reclaimed = true;
+ }
+ PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
+ dump_add_page(m->phys_addr);
+ pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = PC_FREE0;
+ pc->pc_map[1] = PC_FREE1;
+ pc->pc_map[2] = PC_FREE2;
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
+
+ /*
+ * The reclaim might have freed a chunk from the current pmap.
+ * If that chunk contained available entries, we need to
+ * re-count the number of available entries.
+ */
+ if (reclaimed)
+ goto retry;
+ }
+ if (!TAILQ_EMPTY(&new_tail)) {
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ }
+}
+
+/*
+ * First find and then remove the pv entry for the specified pmap and virtual
+ * address from the specified pv list. Returns the pv entry if found and NULL
+ * otherwise. This operation can be performed on pv lists for either 4KB or
+ * 2MB page mappings.
+ */
+static __inline pv_entry_t
+pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
+{
+ pv_entry_t pv;
+
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+ if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+ break;
+ }
+ }
+ return (pv);
+}
+
+/*
+ * After demotion from a 2MB page mapping to 512 4KB page mappings,
+ * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
+ * entries for each of the 4KB page mappings.
+ */
+static void
+pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ struct pv_chunk *pc;
+ pv_entry_t pv;
+ vm_offset_t va_last;
+ vm_page_t m;
+ int bit, field;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((va & L2_OFFSET) == 0,
+ ("pmap_pv_demote_l2: va is not 2mpage aligned"));
+ KASSERT((pa & L2_OFFSET) == 0,
+ ("pmap_pv_demote_l2: pa is not 2mpage aligned"));
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+ /*
+ * Transfer the 2mpage's pv entry for this mapping to the first
+ * page's pv list. Once this transfer begins, the pv list lock
+ * must not be released until the last pv entry is reinstantiated.
+ */
+ pvh = pa_to_pvh(pa);
+ pv = pmap_pvh_remove(pvh, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
+ m = PHYS_TO_VM_PAGE(pa);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */
+ PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1));
+ va_last = va + L2_SIZE - PAGE_SIZE;
+ for (;;) {
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
+ pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
+ for (field = 0; field < _NPCM; field++) {
+ while (pc->pc_map[field]) {
+ bit = ffsl(pc->pc_map[field]) - 1;
+ pc->pc_map[field] &= ~(1ul << bit);
+ pv = &pc->pc_pventry[field * 64 + bit];
+ va += PAGE_SIZE;
+ pv->pv_va = va;
+ m++;
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_pv_demote_l2: page %p is not managed", m));
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ if (va == va_last)
+ goto out;
+ }
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+out:
+ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+ PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1));
+ PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1));
+}
+
+/*
+ * First find and then destroy the pv entry for the specified pmap and virtual
+ * address. This operation can be performed on pv lists for either 4KB or 2MB
+ * page mappings.
+ */
+static void
+pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
+{
+ pv_entry_t pv;
+
+ pv = pmap_pvh_remove(pvh, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
+ free_pv_entry(pmap, pv);
+}
+
+/*
+ * Conditionally create the PV entry for a 4KB page mapping if the required
+ * memory can be allocated without resorting to reclamation.
+ */
+static boolean_t
+pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ struct rwlock **lockp)
+{
+ pv_entry_t pv;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ /* Pass NULL instead of the lock pointer to disable reclamation. */
+ if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
+ pv->pv_va = va;
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ return (TRUE);
+ } else
+ return (FALSE);
+}
+
+/*
+ * Create the PV entry for a 2MB page mapping. Always returns true unless the
+ * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns
+ * false if the PV entry cannot be allocated without resorting to reclamation.
+ */
+static bool
+pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_paddr_t pa;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ /* Pass NULL instead of the lock pointer to disable reclamation. */
+ if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
+ NULL : lockp)) == NULL)
+ return (false);
+ pv->pv_va = va;
+ pa = l2e & ~ATTR_MASK;
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+ pvh = pa_to_pvh(pa);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+ return (true);
+}
+
+static void
+pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
+{
+ pt_entry_t newl2, oldl2;
+ vm_page_t ml3;
+ vm_paddr_t ml3pa;
+
+ KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
+ KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ ml3 = pmap_remove_pt_page(pmap, va);
+ if (ml3 == NULL)
+ panic("pmap_remove_kernel_l2: Missing pt page");
+
+ ml3pa = VM_PAGE_TO_PHYS(ml3);
+ newl2 = ml3pa | L2_TABLE;
+
+ /*
+ * If this page table page was unmapped by a promotion, then it
+ * contains valid mappings. Zero it to invalidate those mappings.
+ */
+ if (ml3->valid != 0)
+ pagezero((void *)PHYS_TO_DMAP(ml3pa));
+
+ /*
+ * Demote the mapping. The caller must have already invalidated the
+ * mapping (i.e., the "break" in break-before-make).
+ */
+ oldl2 = pmap_load_store(l2, newl2);
+ KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
+ __func__, l2, oldl2));
+}
+
+/*
+ * pmap_remove_l2: Do the things to unmap a level 2 superpage.
+ */
+static int
+pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
+ pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pt_entry_t old_l2;
+ vm_offset_t eva, va;
+ vm_page_t m, ml3;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
+ old_l2 = pmap_load_clear(l2);
+ KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
+ ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2));
+
+ /*
+ * Since a promotion must break the 4KB page mappings before making
+ * the 2MB page mapping, a pmap_invalidate_page() suffices.
+ */
+ pmap_invalidate_page(pmap, sva);
+
+ if (old_l2 & ATTR_SW_WIRED)
+ pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
+ pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
+ if (old_l2 & ATTR_SW_MANAGED) {
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK);
+ pvh = pa_to_pvh(old_l2 & ~ATTR_MASK);
+ pmap_pvh_free(pvh, pmap, sva);
+ eva = sva + L2_SIZE;
+ for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
+ va < eva; va += PAGE_SIZE, m++) {
+ if (pmap_pte_dirty(pmap, old_l2))
+ vm_page_dirty(m);
+ if (old_l2 & ATTR_AF)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ if (pmap == kernel_pmap) {
+ pmap_remove_kernel_l2(pmap, l2, sva);
+ } else {
+ ml3 = pmap_remove_pt_page(pmap, sva);
+ if (ml3 != NULL) {
+ KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
+ ("pmap_remove_l2: l3 page not promoted"));
+ pmap_resident_count_dec(pmap, 1);
+ KASSERT(ml3->ref_count == NL3PG,
+ ("pmap_remove_l2: l3 page ref count error"));
+ ml3->ref_count = 0;
+ pmap_add_delayed_free_list(ml3, free, FALSE);
+ }
+ }
+ return (pmap_unuse_pt(pmap, sva, l1e, free));
+}
+
+/*
+ * pmap_remove_l3: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
+ pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pt_entry_t old_l3;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ old_l3 = pmap_load_clear(l3);
+ pmap_invalidate_page(pmap, va);
+ if (old_l3 & ATTR_SW_WIRED)
+ pmap->pm_stats.wired_count -= 1;
+ pmap_resident_count_dec(pmap, 1);
+ if (old_l3 & ATTR_SW_MANAGED) {
+ m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
+ if (pmap_pte_dirty(pmap, old_l3))
+ vm_page_dirty(m);
+ if (old_l3 & ATTR_AF)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+ pmap_pvh_free(&m->md, pmap, va);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ return (pmap_unuse_pt(pmap, va, l2e, free));
+}
+
+/*
+ * Remove the specified range of addresses from the L3 page table that is
+ * identified by the given L2 entry.
+ */
+static void
+pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
+ vm_offset_t eva, struct spglist *free, struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ struct rwlock *new_lock;
+ pt_entry_t *l3, old_l3;
+ vm_offset_t va;
+ vm_page_t l3pg, m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE),
+ ("pmap_remove_l3_range: range crosses an L3 page table boundary"));
+ l3pg = sva < VM_MAXUSER_ADDRESS ? PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK) :
+ NULL;
+ va = eva;
+ for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) {
+ if (!pmap_l3_valid(pmap_load(l3))) {
+ if (va != eva) {
+ pmap_invalidate_range(pmap, va, sva);
+ va = eva;
+ }
+ continue;
+ }
+ old_l3 = pmap_load_clear(l3);
+ if ((old_l3 & ATTR_SW_WIRED) != 0)
+ pmap->pm_stats.wired_count--;
+ pmap_resident_count_dec(pmap, 1);
+ if ((old_l3 & ATTR_SW_MANAGED) != 0) {
+ m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
+ if (pmap_pte_dirty(pmap, old_l3))
+ vm_page_dirty(m);
+ if ((old_l3 & ATTR_AF) != 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ new_lock = PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m));
+ if (new_lock != *lockp) {
+ if (*lockp != NULL) {
+ /*
+ * Pending TLB invalidations must be
+ * performed before the PV list lock is
+ * released. Otherwise, a concurrent
+ * pmap_remove_all() on a physical page
+ * could return while a stale TLB entry
+ * still provides access to that page.
+ */
+ if (va != eva) {
+ pmap_invalidate_range(pmap, va,
+ sva);
+ va = eva;
+ }
+ rw_wunlock(*lockp);
+ }
+ *lockp = new_lock;
+ rw_wlock(*lockp);
+ }
+ pmap_pvh_free(&m->md, pmap, sva);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ if (va == eva)
+ va = sva;
+ if (l3pg != NULL && pmap_unwire_l3(pmap, sva, l3pg, free)) {
+ sva += L3_SIZE;
+ break;
+ }
+ }
+ if (va != eva)
+ pmap_invalidate_range(pmap, va, sva);
+}
+
+/*
+ * Remove the given range of addresses from the specified map.
+ *
+ * It is assumed that the start and end are properly
+ * rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ struct rwlock *lock;
+ vm_offset_t va_next;
+ pd_entry_t *l0, *l1, *l2;
+ pt_entry_t l3_paddr;
+ struct spglist free;
+
+ /*
+ * Perform an unsynchronized read. This is, however, safe.
+ */
+ if (pmap->pm_stats.resident_count == 0)
+ return;
+
+ SLIST_INIT(&free);
+
+ PMAP_LOCK(pmap);
+
+ lock = NULL;
+ for (; sva < eva; sva = va_next) {
+ if (pmap->pm_stats.resident_count == 0)
+ break;
+
+ l0 = pmap_l0(pmap, sva);
+ if (pmap_load(l0) == 0) {
+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ l1 = pmap_l0_to_l1(l0, sva);
+ if (pmap_load(l1) == 0) {
+ va_next = (sva + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ /*
+ * Calculate index for next page table.
+ */
+ va_next = (sva + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+
+ l2 = pmap_l1_to_l2(l1, sva);
+ if (l2 == NULL)
+ continue;
+
+ l3_paddr = pmap_load(l2);
+
+ if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
+ if (sva + L2_SIZE == va_next && eva >= va_next) {
+ pmap_remove_l2(pmap, l2, sva, pmap_load(l1),
+ &free, &lock);
+ continue;
+ } else if (pmap_demote_l2_locked(pmap, l2, sva,
+ &lock) == NULL)
+ continue;
+ l3_paddr = pmap_load(l2);
+ }
+
+ /*
+ * Weed out invalid mappings.
+ */
+ if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
+ continue;
+
+ /*
+ * Limit our scan to either the end of the va represented
+ * by the current page table page, or to the end of the
+ * range being removed.
+ */
+ if (va_next > eva)
+ va_next = eva;
+
+ pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free,
+ &lock);
+ }
+ if (lock != NULL)
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ vm_page_free_pages_toq(&free, true);
+}
+
+/*
+ * Routine: pmap_remove_all
+ * Function:
+ * Removes this physical page from
+ * all physical maps in which it resides.
+ * Reflects back modify bits to the pager.
+ *
+ * Notes:
+ * Original versions of this routine were very
+ * inefficient because they iteratively called
+ * pmap_remove (slow...)
+ */
+
+void
+pmap_remove_all(vm_page_t m)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ pmap_t pmap;
+ struct rwlock *lock;
+ pd_entry_t *pde, tpde;
+ pt_entry_t *pte, tpte;
+ vm_offset_t va;
+ struct spglist free;
+ int lvl, pvh_gen, md_gen;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_remove_all: page %p is not managed", m));
+ SLIST_INIT(&free);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
+ pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry:
+ rw_wlock(lock);
+ while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
+ va = pv->pv_va;
+ pte = pmap_pte(pmap, va, &lvl);
+ KASSERT(pte != NULL,
+ ("pmap_remove_all: no page table entry found"));
+ KASSERT(lvl == 2,
+ ("pmap_remove_all: invalid pte level %d", lvl));
+
+ pmap_demote_l2_locked(pmap, pte, va, &lock);
+ PMAP_UNLOCK(pmap);
+ }
+ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
+ pmap_resident_count_dec(pmap, 1);
+
+ pde = pmap_pde(pmap, pv->pv_va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_remove_all: no page directory entry found"));
+ KASSERT(lvl == 2,
+ ("pmap_remove_all: invalid pde level %d", lvl));
+ tpde = pmap_load(pde);
+
+ pte = pmap_l2_to_l3(pde, pv->pv_va);
+ tpte = pmap_load_clear(pte);
+ if (tpte & ATTR_SW_WIRED)
+ pmap->pm_stats.wired_count--;
+ if ((tpte & ATTR_AF) != 0) {
+ pmap_invalidate_page(pmap, pv->pv_va);
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ }
+
+ /*
+ * Update the vm_page_t clean and reference bits.
+ */
+ if (pmap_pte_dirty(pmap, tpte))
+ vm_page_dirty(m);
+ pmap_unuse_pt(pmap, pv->pv_va, tpde, &free);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ free_pv_entry(pmap, pv);
+ PMAP_UNLOCK(pmap);
+ }
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ rw_wunlock(lock);
+ vm_page_free_pages_toq(&free, true);
+}
+
+/*
+ * pmap_protect_l2: do the things to protect a 2MB page in a pmap
+ */
+static void
+pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
+ pt_entry_t nbits)
+{
+ pd_entry_t old_l2;
+ vm_page_t m, mt;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PMAP_ASSERT_STAGE1(pmap);
+ KASSERT((sva & L2_OFFSET) == 0,
+ ("pmap_protect_l2: sva is not 2mpage aligned"));
+ old_l2 = pmap_load(l2);
+ KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
+ ("pmap_protect_l2: L2e %lx is not a block mapping", old_l2));
+
+ /*
+ * Return if the L2 entry already has the desired access restrictions
+ * in place.
+ */
+retry:
+ if ((old_l2 & mask) == nbits)
+ return;
+
+ /*
+ * When a dirty read/write superpage mapping is write protected,
+ * update the dirty field of each of the superpage's constituent 4KB
+ * pages.
+ */
+ if ((old_l2 & ATTR_SW_MANAGED) != 0 &&
+ (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
+ pmap_pte_dirty(pmap, old_l2)) {
+ m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
+ for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
+ vm_page_dirty(mt);
+ }
+
+ if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
+ goto retry;
+
+ /*
+ * Since a promotion must break the 4KB page mappings before making
+ * the 2MB page mapping, a pmap_invalidate_page() suffices.
+ */
+ pmap_invalidate_page(pmap, sva);
+}
+
+/*
+ * Set the physical protection on the
+ * specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+ vm_offset_t va, va_next;
+ pd_entry_t *l0, *l1, *l2;
+ pt_entry_t *l3p, l3, mask, nbits;
+
+ PMAP_ASSERT_STAGE1(pmap);
+ KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
+ if (prot == VM_PROT_NONE) {
+ pmap_remove(pmap, sva, eva);
+ return;
+ }
+
+ mask = nbits = 0;
+ if ((prot & VM_PROT_WRITE) == 0) {
+ mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM;
+ nbits |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ }
+ if ((prot & VM_PROT_EXECUTE) == 0) {
+ mask |= ATTR_S1_XN;
+ nbits |= ATTR_S1_XN;
+ }
+ if (mask == 0)
+ return;
+
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ l0 = pmap_l0(pmap, sva);
+ if (pmap_load(l0) == 0) {
+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ l1 = pmap_l0_to_l1(l0, sva);
+ if (pmap_load(l1) == 0) {
+ va_next = (sva + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ va_next = (sva + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+
+ l2 = pmap_l1_to_l2(l1, sva);
+ if (pmap_load(l2) == 0)
+ continue;
+
+ if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
+ if (sva + L2_SIZE == va_next && eva >= va_next) {
+ pmap_protect_l2(pmap, l2, sva, mask, nbits);
+ continue;
+ } else if (pmap_demote_l2(pmap, l2, sva) == NULL)
+ continue;
+ }
+ KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
+ ("pmap_protect: Invalid L2 entry after demotion"));
+
+ if (va_next > eva)
+ va_next = eva;
+
+ va = va_next;
+ for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
+ sva += L3_SIZE) {
+ l3 = pmap_load(l3p);
+retry:
+ /*
+ * Go to the next L3 entry if the current one is
+ * invalid or already has the desired access
+ * restrictions in place. (The latter case occurs
+ * frequently. For example, in a "buildworld"
+ * workload, almost 1 out of 4 L3 entries already
+ * have the desired restrictions.)
+ */
+ if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) {
+ if (va != va_next) {
+ pmap_invalidate_range(pmap, va, sva);
+ va = va_next;
+ }
+ continue;
+ }
+
+ /*
+ * When a dirty read/write mapping is write protected,
+ * update the page's dirty field.
+ */
+ if ((l3 & ATTR_SW_MANAGED) != 0 &&
+ (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
+ pmap_pte_dirty(pmap, l3))
+ vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK));
+
+ if (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | nbits))
+ goto retry;
+ if (va == va_next)
+ va = sva;
+ }
+ if (va != va_next)
+ pmap_invalidate_range(pmap, va, sva);
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Inserts the specified page table page into the specified pmap's collection
+ * of idle page table pages. Each of a pmap's page table pages is responsible
+ * for mapping a distinct range of virtual addresses. The pmap's collection is
+ * ordered by this virtual address range.
+ *
+ * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ */
+static __inline int
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+ return (vm_radix_insert(&pmap->pm_root, mpte));
+}
+
+/*
+ * Removes the page table page mapping the specified virtual address from the
+ * specified pmap's collection of idle page table pages, and returns it.
+ * Otherwise, returns NULL if there is no page table page corresponding to the
+ * specified virtual address.
+ */
+static __inline vm_page_t
+pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
+}
+
+/*
+ * Performs a break-before-make update of a pmap entry. This is needed when
+ * either promoting or demoting pages to ensure the TLB doesn't get into an
+ * inconsistent state.
+ */
+static void
+pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
+ vm_offset_t va, vm_size_t size)
+{
+ register_t intr;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ /*
+ * Ensure we don't get switched out with the page table in an
+ * inconsistent state. We also need to ensure no interrupts fire
+ * as they may make use of an address we are about to invalidate.
+ */
+ intr = intr_disable();
+
+ /*
+ * Clear the old mapping's valid bit, but leave the rest of the entry
+ * unchanged, so that a lockless, concurrent pmap_kextract() can still
+ * lookup the physical address.
+ */
+ pmap_clear_bits(pte, ATTR_DESCR_VALID);
+ pmap_invalidate_range(pmap, va, va + size);
+
+ /* Create the new mapping */
+ pmap_store(pte, newpte);
+ dsb(ishst);
+
+ intr_restore(intr);
+}
+
+#if VM_NRESERVLEVEL > 0
+/*
+ * After promotion from 512 4KB page mappings to a single 2MB page mapping,
+ * replace the many pv entries for the 4KB page mappings by a single pv entry
+ * for the 2MB page mapping.
+ */
+static void
+pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_offset_t va_last;
+ vm_page_t m;
+
+ KASSERT((pa & L2_OFFSET) == 0,
+ ("pmap_pv_promote_l2: pa is not 2mpage aligned"));
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+ /*
+ * Transfer the first page's pv entry for this mapping to the 2mpage's
+ * pv list. Aside from avoiding the cost of a call to get_pv_entry(),
+ * a transfer avoids the possibility that get_pv_entry() calls
+ * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
+ * mappings that is being promoted.
+ */
+ m = PHYS_TO_VM_PAGE(pa);
+ va = va & ~L2_OFFSET;
+ pv = pmap_pvh_remove(&m->md, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
+ pvh = pa_to_pvh(pa);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+ /* Free the remaining NPTEPG - 1 pv entries. */
+ va_last = va + L2_SIZE - PAGE_SIZE;
+ do {
+ m++;
+ va += PAGE_SIZE;
+ pmap_pvh_free(&m->md, pmap, va);
+ } while (va < va_last);
+}
+
+/*
+ * Tries to promote the 512, contiguous 4KB page mappings that are within a
+ * single level 2 table entry to a single 2MB page mapping. For promotion
+ * to occur, two conditions must be met: (1) the 4KB page mappings must map
+ * aligned, contiguous physical memory and (2) the 4KB page mappings must have
+ * identical characteristics.
+ */
+static void
+pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
+ struct rwlock **lockp)
+{
+ pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
+ vm_page_t mpte;
+ vm_offset_t sva;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PMAP_ASSERT_STAGE1(pmap);
+
+ sva = va & ~L2_OFFSET;
+ firstl3 = pmap_l2_to_l3(l2, sva);
+ newl2 = pmap_load(firstl3);
+
+setl2:
+ if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) {
+ atomic_add_long(&pmap_l2_p_failures, 1);
+ CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return;
+ }
+
+ if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
+ (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
+ if (!atomic_fcmpset_64(l2, &newl2, newl2 & ~ATTR_SW_DBM))
+ goto setl2;
+ newl2 &= ~ATTR_SW_DBM;
+ }
+
+ pa = newl2 + L2_SIZE - PAGE_SIZE;
+ for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
+ oldl3 = pmap_load(l3);
+setl3:
+ if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
+ (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
+ if (!atomic_fcmpset_64(l3, &oldl3, oldl3 &
+ ~ATTR_SW_DBM))
+ goto setl3;
+ oldl3 &= ~ATTR_SW_DBM;
+ }
+ if (oldl3 != pa) {
+ atomic_add_long(&pmap_l2_p_failures, 1);
+ CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return;
+ }
+ pa -= PAGE_SIZE;
+ }
+
+ /*
+ * Save the page table page in its current state until the L2
+ * mapping the superpage is demoted by pmap_demote_l2() or
+ * destroyed by pmap_remove_l3().
+ */
+ mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
+ KASSERT(mpte >= vm_page_array &&
+ mpte < &vm_page_array[vm_page_array_size],
+ ("pmap_promote_l2: page table page is out of range"));
+ KASSERT(mpte->pindex == pmap_l2_pindex(va),
+ ("pmap_promote_l2: page table page's pindex is wrong"));
+ if (pmap_insert_pt_page(pmap, mpte, true)) {
+ atomic_add_long(&pmap_l2_p_failures, 1);
+ CTR2(KTR_PMAP,
+ "pmap_promote_l2: failure for va %#lx in pmap %p", va,
+ pmap);
+ return;
+ }
+
+ if ((newl2 & ATTR_SW_MANAGED) != 0)
+ pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp);
+
+ newl2 &= ~ATTR_DESCR_MASK;
+ newl2 |= L2_BLOCK;
+
+ pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE);
+
+ atomic_add_long(&pmap_l2_promotions, 1);
+ CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
+ pmap);
+}
+#endif /* VM_NRESERVLEVEL > 0 */
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+int
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+ u_int flags, int8_t psind)
+{
+ struct rwlock *lock;
+ pd_entry_t *pde;
+ pt_entry_t new_l3, orig_l3;
+ pt_entry_t *l2, *l3;
+ pv_entry_t pv;
+ vm_paddr_t opa, pa;
+ vm_page_t mpte, om;
+ boolean_t nosleep;
+ int lvl, rv;
+
+ va = trunc_page(va);
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ VM_PAGE_OBJECT_BUSY_ASSERT(m);
+ pa = VM_PAGE_TO_PHYS(m);
+ new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | L3_PAGE);
+ new_l3 |= pmap_pte_memattr(pmap, m->md.pv_memattr);
+ new_l3 |= pmap_pte_prot(pmap, prot);
+
+ if ((flags & PMAP_ENTER_WIRED) != 0)
+ new_l3 |= ATTR_SW_WIRED;
+ if (pmap->pm_stage == PM_STAGE1) {
+ if (va < VM_MAXUSER_ADDRESS)
+ new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+ else
+ new_l3 |= ATTR_S1_UXN;
+ if (pmap != kernel_pmap)
+ new_l3 |= ATTR_S1_nG;
+ } else {
+ /*
+ * Clear the access flag on executable mappings, this will be
+ * set later when the page is accessed. The fault handler is
+ * required to invalidate the I-cache.
+ *
+ * TODO: Switch to the valid flag to allow hardware management
+ * of the access flag. Much of the pmap code assumes the
+ * valid flag is set and fails to destroy the old page tables
+ * correctly if it is clear.
+ */
+ if (prot & VM_PROT_EXECUTE)
+ new_l3 &= ~ATTR_AF;
+ }
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ new_l3 |= ATTR_SW_MANAGED;
+ if ((prot & VM_PROT_WRITE) != 0) {
+ new_l3 |= ATTR_SW_DBM;
+ if ((flags & VM_PROT_WRITE) == 0) {
+ if (pmap->pm_stage == PM_STAGE1)
+ new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ else
+ new_l3 &=
+ ~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
+ }
+ }
+ }
+
+ CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
+
+ lock = NULL;
+ PMAP_LOCK(pmap);
+ if (psind == 1) {
+ /* Assert the required virtual and physical alignment. */
+ KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned"));
+ KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
+ rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK,
+ flags, m, &lock);
+ goto out;
+ }
+ mpte = NULL;
+
+ /*
+ * In the case that a page table page is not
+ * resident, we are creating it here.
+ */
+retry:
+ pde = pmap_pde(pmap, va, &lvl);
+ if (pde != NULL && lvl == 2) {
+ l3 = pmap_l2_to_l3(pde, va);
+ if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
+ mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
+ mpte->ref_count++;
+ }
+ goto havel3;
+ } else if (pde != NULL && lvl == 1) {
+ l2 = pmap_l1_to_l2(pde, va);
+ if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
+ (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
+ l3 = &l3[pmap_l3_index(va)];
+ if (va < VM_MAXUSER_ADDRESS) {
+ mpte = PHYS_TO_VM_PAGE(
+ pmap_load(l2) & ~ATTR_MASK);
+ mpte->ref_count++;
+ }
+ goto havel3;
+ }
+ /* We need to allocate an L3 table. */
+ }
+ if (va < VM_MAXUSER_ADDRESS) {
+ nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
+
+ /*
+ * We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order
+ * to handle the possibility that a superpage mapping for "va"
+ * was created while we slept.
+ */
+ mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va),
+ nosleep ? NULL : &lock);
+ if (mpte == NULL && nosleep) {
+ CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
+ rv = KERN_RESOURCE_SHORTAGE;
+ goto out;
+ }
+ goto retry;
+ } else
+ panic("pmap_enter: missing L3 table for kernel va %#lx", va);
+
+havel3:
+ orig_l3 = pmap_load(l3);
+ opa = orig_l3 & ~ATTR_MASK;
+ pv = NULL;
+
+ /*
+ * Is the specified virtual address already mapped?
+ */
+ if (pmap_l3_valid(orig_l3)) {
+ /*
+ * Only allow adding new entries on stage 2 tables for now.
+ * This simplifies cache invalidation as we may need to call
+ * into EL2 to perform such actions.
+ */
+ PMAP_ASSERT_STAGE1(pmap);
+ /*
+ * Wiring change, just update stats. We don't worry about
+ * wiring PT pages as they remain resident as long as there
+ * are valid mappings in them. Hence, if a user page is wired,
+ * the PT page will be also.
+ */
+ if ((flags & PMAP_ENTER_WIRED) != 0 &&
+ (orig_l3 & ATTR_SW_WIRED) == 0)
+ pmap->pm_stats.wired_count++;
+ else if ((flags & PMAP_ENTER_WIRED) == 0 &&
+ (orig_l3 & ATTR_SW_WIRED) != 0)
+ pmap->pm_stats.wired_count--;
+
+ /*
+ * Remove the extra PT page reference.
+ */
+ if (mpte != NULL) {
+ mpte->ref_count--;
+ KASSERT(mpte->ref_count > 0,
+ ("pmap_enter: missing reference to page table page,"
+ " va: 0x%lx", va));
+ }
+
+ /*
+ * Has the physical page changed?
+ */
+ if (opa == pa) {
+ /*
+ * No, might be a protection or wiring change.
+ */
+ if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
+ (new_l3 & ATTR_SW_DBM) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ goto validate;
+ }
+
+ /*
+ * The physical page has changed. Temporarily invalidate
+ * the mapping.
+ */
+ orig_l3 = pmap_load_clear(l3);
+ KASSERT((orig_l3 & ~ATTR_MASK) == opa,
+ ("pmap_enter: unexpected pa update for %#lx", va));
+ if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
+ om = PHYS_TO_VM_PAGE(opa);
+
+ /*
+ * The pmap lock is sufficient to synchronize with
+ * concurrent calls to pmap_page_test_mappings() and
+ * pmap_ts_referenced().
+ */
+ if (pmap_pte_dirty(pmap, orig_l3))
+ vm_page_dirty(om);
+ if ((orig_l3 & ATTR_AF) != 0) {
+ pmap_invalidate_page(pmap, va);
+ vm_page_aflag_set(om, PGA_REFERENCED);
+ }
+ CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
+ pv = pmap_pvh_remove(&om->md, pmap, va);
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ free_pv_entry(pmap, pv);
+ if ((om->a.flags & PGA_WRITEABLE) != 0 &&
+ TAILQ_EMPTY(&om->md.pv_list) &&
+ ((om->flags & PG_FICTITIOUS) != 0 ||
+ TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
+ vm_page_aflag_clear(om, PGA_WRITEABLE);
+ } else {
+ KASSERT((orig_l3 & ATTR_AF) != 0,
+ ("pmap_enter: unmanaged mapping lacks ATTR_AF"));
+ pmap_invalidate_page(pmap, va);
+ }
+ orig_l3 = 0;
+ } else {
+ /*
+ * Increment the counters.
+ */
+ if ((new_l3 & ATTR_SW_WIRED) != 0)
+ pmap->pm_stats.wired_count++;
+ pmap_resident_count_inc(pmap, 1);
+ }
+ /*
+ * Enter on the PV list if part of our managed memory.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ if (pv == NULL) {
+ pv = get_pv_entry(pmap, &lock);
+ pv->pv_va = va;
+ }
+ CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ if ((new_l3 & ATTR_SW_DBM) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ }
+
+validate:
+ if (pmap->pm_stage == PM_STAGE1) {
+ /*
+ * Sync icache if exec permission and attribute
+ * VM_MEMATTR_WRITE_BACK is set. Do it now, before the mapping
+ * is stored and made valid for hardware table walk. If done
+ * later, then other can access this page before caches are
+ * properly synced. Don't do it for kernel memory which is
+ * mapped with exec permission even if the memory isn't going
+ * to hold executable code. The only time when icache sync is
+ * needed is after kernel module is loaded and the relocation
+ * info is processed. And it's done in elf_cpu_load_file().
+ */
+ if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
+ m->md.pv_memattr == VM_MEMATTR_WRITE_BACK &&
+ (opa != pa || (orig_l3 & ATTR_S1_XN))) {
+ PMAP_ASSERT_STAGE1(pmap);
+ cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+ }
+ } else {
+ cpu_dcache_wb_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+ }
+
+ /*
+ * Update the L3 entry
+ */
+ if (pmap_l3_valid(orig_l3)) {
+ PMAP_ASSERT_STAGE1(pmap);
+ KASSERT(opa == pa, ("pmap_enter: invalid update"));
+ if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) {
+ /* same PA, different attributes */
+ orig_l3 = pmap_load_store(l3, new_l3);
+ pmap_invalidate_page(pmap, va);
+ if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
+ pmap_pte_dirty(pmap, orig_l3))
+ vm_page_dirty(m);
+ } else {
+ /*
+ * orig_l3 == new_l3
+ * This can happens if multiple threads simultaneously
+ * access not yet mapped page. This bad for performance
+ * since this can cause full demotion-NOP-promotion
+ * cycle.
+ * Another possible reasons are:
+ * - VM and pmap memory layout are diverged
+ * - tlb flush is missing somewhere and CPU doesn't see
+ * actual mapping.
+ */
+ CTR4(KTR_PMAP, "%s: already mapped page - "
+ "pmap %p va 0x%#lx pte 0x%lx",
+ __func__, pmap, va, new_l3);
+ }
+ } else {
+ /* New mapping */
+ pmap_store(l3, new_l3);
+ dsb(ishst);
+ }
+
+#if VM_NRESERVLEVEL > 0
+ /*
+ * Try to promote from level 3 pages to a level 2 superpage. This
+ * currently only works on stage 1 pmaps as pmap_promote_l2 looks at
+ * stage 1 specific fields and performs a break-before-make sequence
+ * that is incorrect a stage 2 pmap.
+ */
+ if ((mpte == NULL || mpte->ref_count == NL3PG) &&
+ pmap_ps_enabled(pmap) && pmap->pm_stage == PM_STAGE1 &&
+ (m->flags & PG_FICTITIOUS) == 0 &&
+ vm_reserv_level_iffullpop(m) == 0) {
+ pmap_promote_l2(pmap, pde, va, &lock);
+ }
+#endif
+
+ rv = KERN_SUCCESS;
+out:
+ if (lock != NULL)
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ return (rv);
+}
+
+/*
+ * Tries to create a read- and/or execute-only 2MB page mapping. Returns true
+ * if successful. Returns false if (1) a page table page cannot be allocated
+ * without sleeping, (2) a mapping already exists at the specified virtual
+ * address, or (3) a PV entry cannot be allocated without reclaiming another
+ * PV entry.
+ */
+static bool
+pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+ struct rwlock **lockp)
+{
+ pd_entry_t new_l2;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PMAP_ASSERT_STAGE1(pmap);
+
+ new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT |
+ ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
+ L2_BLOCK);
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ new_l2 |= ATTR_SW_MANAGED;
+ new_l2 &= ~ATTR_AF;
+ }
+ if ((prot & VM_PROT_EXECUTE) == 0 ||
+ m->md.pv_memattr == VM_MEMATTR_DEVICE)
+ new_l2 |= ATTR_S1_XN;
+ if (va < VM_MAXUSER_ADDRESS)
+ new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+ else
+ new_l2 |= ATTR_S1_UXN;
+ if (pmap != kernel_pmap)
+ new_l2 |= ATTR_S1_nG;
+ return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
+ PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
+ KERN_SUCCESS);
+}
+
+/*
+ * Returns true if every page table entry in the specified page table is
+ * zero.
+ */
+static bool
+pmap_every_pte_zero(vm_paddr_t pa)
+{
+ pt_entry_t *pt_end, *pte;
+
+ KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned"));
+ pte = (pt_entry_t *)PHYS_TO_DMAP(pa);
+ for (pt_end = pte + Ln_ENTRIES; pte < pt_end; pte++) {
+ if (*pte != 0)
+ return (false);
+ }
+ return (true);
+}
+
+/*
+ * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if
+ * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
+ * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
+ * a mapping already exists at the specified virtual address. Returns
+ * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
+ * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if
+ * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ *
+ * The parameter "m" is only used when creating a managed, writeable mapping.
+ */
+static int
+pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
+ vm_page_t m, struct rwlock **lockp)
+{
+ struct spglist free;
+ pd_entry_t *l2, old_l2;
+ vm_page_t l2pg, mt;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ if ((l2 = pmap_alloc_l2(pmap, va, &l2pg, (flags &
+ PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) {
+ CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p",
+ va, pmap);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+
+ /*
+ * If there are existing mappings, either abort or remove them.
+ */
+ if ((old_l2 = pmap_load(l2)) != 0) {
+ KASSERT(l2pg == NULL || l2pg->ref_count > 1,
+ ("pmap_enter_l2: l2pg's ref count is too low"));
+ if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va <
+ VM_MAXUSER_ADDRESS || (old_l2 & ATTR_DESCR_MASK) ==
+ L2_BLOCK || !pmap_every_pte_zero(old_l2 & ~ATTR_MASK))) {
+ if (l2pg != NULL)
+ l2pg->ref_count--;
+ CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return (KERN_FAILURE);
+ }
+ SLIST_INIT(&free);
+ if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK)
+ (void)pmap_remove_l2(pmap, l2, va,
+ pmap_load(pmap_l1(pmap, va)), &free, lockp);
+ else
+ pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE,
+ &free, lockp);
+ if (va < VM_MAXUSER_ADDRESS) {
+ vm_page_free_pages_toq(&free, true);
+ KASSERT(pmap_load(l2) == 0,
+ ("pmap_enter_l2: non-zero L2 entry %p", l2));
+ } else {
+ KASSERT(SLIST_EMPTY(&free),
+ ("pmap_enter_l2: freed kernel page table page"));
+
+ /*
+ * Both pmap_remove_l2() and pmap_remove_l3_range()
+ * will leave the kernel page table page zero filled.
+ * Nonetheless, the TLB could have an intermediate
+ * entry for the kernel page table page.
+ */
+ mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
+ if (pmap_insert_pt_page(pmap, mt, false))
+ panic("pmap_enter_l2: trie insert failed");
+ pmap_clear(l2);
+ pmap_invalidate_page(pmap, va);
+ }
+ }
+
+ if ((new_l2 & ATTR_SW_MANAGED) != 0) {
+ /*
+ * Abort this mapping if its PV entry could not be created.
+ */
+ if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
+ if (l2pg != NULL)
+ pmap_abort_ptp(pmap, va, l2pg);
+ CTR2(KTR_PMAP,
+ "pmap_enter_l2: failure for va %#lx in pmap %p",
+ va, pmap);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ if ((new_l2 & ATTR_SW_DBM) != 0)
+ for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
+ vm_page_aflag_set(mt, PGA_WRITEABLE);
+ }
+
+ /*
+ * Increment counters.
+ */
+ if ((new_l2 & ATTR_SW_WIRED) != 0)
+ pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
+ pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
+
+ /*
+ * Map the superpage.
+ */
+ pmap_store(l2, new_l2);
+ dsb(ishst);
+
+ atomic_add_long(&pmap_l2_mappings, 1);
+ CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p",
+ va, pmap);
+
+ return (KERN_SUCCESS);
+}
+
+/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start. This page is
+ * mapped at the given virtual address start. Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object. The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end. Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+void
+pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
+ vm_page_t m_start, vm_prot_t prot)
+{
+ struct rwlock *lock;
+ vm_offset_t va;
+ vm_page_t m, mpte;
+ vm_pindex_t diff, psize;
+
+ VM_OBJECT_ASSERT_LOCKED(m_start->object);
+
+ psize = atop(end - start);
+ mpte = NULL;
+ m = m_start;
+ lock = NULL;
+ PMAP_LOCK(pmap);
+ while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+ va = start + ptoa(diff);
+ if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
+ m->psind == 1 && pmap_ps_enabled(pmap) &&
+ pmap_enter_2mpage(pmap, va, m, prot, &lock))
+ m = &m[L2_SIZE / PAGE_SIZE - 1];
+ else
+ mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
+ &lock);
+ m = TAILQ_NEXT(m, listq);
+ }
+ if (lock != NULL)
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+{
+ struct rwlock *lock;
+
+ lock = NULL;
+ PMAP_LOCK(pmap);
+ (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
+ if (lock != NULL)
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+}
+
+static vm_page_t
+pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
+{
+ pd_entry_t *pde;
+ pt_entry_t *l2, *l3, l3_val;
+ vm_paddr_t pa;
+ int lvl;
+
+ KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
+ (m->oflags & VPO_UNMANAGED) != 0,
+ ("pmap_enter_quick_locked: managed mapping within the clean submap"));
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PMAP_ASSERT_STAGE1(pmap);
+
+ CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
+ /*
+ * In the case that a page table page is not
+ * resident, we are creating it here.
+ */
+ if (va < VM_MAXUSER_ADDRESS) {
+ vm_pindex_t l2pindex;
+
+ /*
+ * Calculate pagetable page index
+ */
+ l2pindex = pmap_l2_pindex(va);
+ if (mpte && (mpte->pindex == l2pindex)) {
+ mpte->ref_count++;
+ } else {
+ /*
+ * Get the l2 entry
+ */
+ pde = pmap_pde(pmap, va, &lvl);
+
+ /*
+ * If the page table page is mapped, we just increment
+ * the hold count, and activate it. Otherwise, we
+ * attempt to allocate a page table page. If this
+ * attempt fails, we don't retry. Instead, we give up.
+ */
+ if (lvl == 1) {
+ l2 = pmap_l1_to_l2(pde, va);
+ if ((pmap_load(l2) & ATTR_DESCR_MASK) ==
+ L2_BLOCK)
+ return (NULL);
+ }
+ if (lvl == 2 && pmap_load(pde) != 0) {
+ mpte =
+ PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
+ mpte->ref_count++;
+ } else {
+ /*
+ * Pass NULL instead of the PV list lock
+ * pointer, because we don't intend to sleep.
+ */
+ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
+ if (mpte == NULL)
+ return (mpte);
+ }
+ }
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
+ l3 = &l3[pmap_l3_index(va)];
+ } else {
+ mpte = NULL;
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
+ va));
+ KASSERT(lvl == 2,
+ ("pmap_enter_quick_locked: Invalid level %d", lvl));
+ l3 = pmap_l2_to_l3(pde, va);
+ }
+
+ /*
+ * Abort if a mapping already exists.
+ */
+ if (pmap_load(l3) != 0) {
+ if (mpte != NULL)
+ mpte->ref_count--;
+ return (NULL);
+ }
+
+ /*
+ * Enter on the PV list if part of our managed memory.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0 &&
+ !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
+ if (mpte != NULL)
+ pmap_abort_ptp(pmap, va, mpte);
+ return (NULL);
+ }
+
+ /*
+ * Increment counters
+ */
+ pmap_resident_count_inc(pmap, 1);
+
+ pa = VM_PAGE_TO_PHYS(m);
+ l3_val = pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) |
+ ATTR_S1_AP(ATTR_S1_AP_RO) | L3_PAGE;
+ if ((prot & VM_PROT_EXECUTE) == 0 ||
+ m->md.pv_memattr == VM_MEMATTR_DEVICE)
+ l3_val |= ATTR_S1_XN;
+ if (va < VM_MAXUSER_ADDRESS)
+ l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+ else
+ l3_val |= ATTR_S1_UXN;
+ if (pmap != kernel_pmap)
+ l3_val |= ATTR_S1_nG;
+
+ /*
+ * Now validate mapping with RO protection
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ l3_val |= ATTR_SW_MANAGED;
+ l3_val &= ~ATTR_AF;
+ }
+
+ /* Sync icache before the mapping is stored to PTE */
+ if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
+ m->md.pv_memattr == VM_MEMATTR_WRITE_BACK)
+ cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+
+ pmap_store(l3, l3_val);
+ dsb(ishst);
+
+ return (mpte);
+}
+
+/*
+ * This code maps large physical mmap regions into the
+ * processor address space. Note that some shortcuts
+ * are taken, but the code works.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+ vm_pindex_t pindex, vm_size_t size)
+{
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
+ ("pmap_object_init_pt: non-device object"));
+}
+
+/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t va_next;
+ pd_entry_t *l0, *l1, *l2;
+ pt_entry_t *l3;
+
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ l0 = pmap_l0(pmap, sva);
+ if (pmap_load(l0) == 0) {
+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ l1 = pmap_l0_to_l1(l0, sva);
+ if (pmap_load(l1) == 0) {
+ va_next = (sva + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ va_next = (sva + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+
+ l2 = pmap_l1_to_l2(l1, sva);
+ if (pmap_load(l2) == 0)
+ continue;
+
+ if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
+ if ((pmap_load(l2) & ATTR_SW_WIRED) == 0)
+ panic("pmap_unwire: l2 %#jx is missing "
+ "ATTR_SW_WIRED", (uintmax_t)pmap_load(l2));
+
+ /*
+ * Are we unwiring the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + L2_SIZE == va_next && eva >= va_next) {
+ pmap_clear_bits(l2, ATTR_SW_WIRED);
+ pmap->pm_stats.wired_count -= L2_SIZE /
+ PAGE_SIZE;
+ continue;
+ } else if (pmap_demote_l2(pmap, l2, sva) == NULL)
+ panic("pmap_unwire: demotion failed");
+ }
+ KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
+ ("pmap_unwire: Invalid l2 entry after demotion"));
+
+ if (va_next > eva)
+ va_next = eva;
+ for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
+ sva += L3_SIZE) {
+ if (pmap_load(l3) == 0)
+ continue;
+ if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
+ panic("pmap_unwire: l3 %#jx is missing "
+ "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
+
+ /*
+ * ATTR_SW_WIRED must be cleared atomically. Although
+ * the pmap lock synchronizes access to ATTR_SW_WIRED,
+ * the System MMU may write to the entry concurrently.
+ */
+ pmap_clear_bits(l3, ATTR_SW_WIRED);
+ pmap->pm_stats.wired_count--;
+ }
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Copy the range specified by src_addr/len
+ * from the source map to the range dst_addr/len
+ * in the destination map.
+ *
+ * This routine is only advisory and need not do anything.
+ *
+ * Because the executable mappings created by this routine are copied,
+ * it should not have to flush the instruction cache.
+ */
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
+ vm_offset_t src_addr)
+{
+ struct rwlock *lock;
+ pd_entry_t *l0, *l1, *l2, srcptepaddr;
+ pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte;
+ vm_offset_t addr, end_addr, va_next;
+ vm_page_t dst_l2pg, dstmpte, srcmpte;
+
+ PMAP_ASSERT_STAGE1(dst_pmap);
+ PMAP_ASSERT_STAGE1(src_pmap);
+
+ if (dst_addr != src_addr)
+ return;
+ end_addr = src_addr + len;
+ lock = NULL;
+ if (dst_pmap < src_pmap) {
+ PMAP_LOCK(dst_pmap);
+ PMAP_LOCK(src_pmap);
+ } else {
+ PMAP_LOCK(src_pmap);
+ PMAP_LOCK(dst_pmap);
+ }
+ for (addr = src_addr; addr < end_addr; addr = va_next) {
+ l0 = pmap_l0(src_pmap, addr);
+ if (pmap_load(l0) == 0) {
+ va_next = (addr + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < addr)
+ va_next = end_addr;
+ continue;
+ }
+ l1 = pmap_l0_to_l1(l0, addr);
+ if (pmap_load(l1) == 0) {
+ va_next = (addr + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < addr)
+ va_next = end_addr;
+ continue;
+ }
+ va_next = (addr + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < addr)
+ va_next = end_addr;
+ l2 = pmap_l1_to_l2(l1, addr);
+ srcptepaddr = pmap_load(l2);
+ if (srcptepaddr == 0)
+ continue;
+ if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) {
+ if ((addr & L2_OFFSET) != 0 ||
+ addr + L2_SIZE > end_addr)
+ continue;
+ l2 = pmap_alloc_l2(dst_pmap, addr, &dst_l2pg, NULL);
+ if (l2 == NULL)
+ break;
+ if (pmap_load(l2) == 0 &&
+ ((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
+ pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
+ PMAP_ENTER_NORECLAIM, &lock))) {
+ mask = ATTR_AF | ATTR_SW_WIRED;
+ nbits = 0;
+ if ((srcptepaddr & ATTR_SW_DBM) != 0)
+ nbits |= ATTR_S1_AP_RW_BIT;
+ pmap_store(l2, (srcptepaddr & ~mask) | nbits);
+ pmap_resident_count_inc(dst_pmap, L2_SIZE /
+ PAGE_SIZE);
+ atomic_add_long(&pmap_l2_mappings, 1);
+ } else
+ pmap_abort_ptp(dst_pmap, addr, dst_l2pg);
+ continue;
+ }
+ KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE,
+ ("pmap_copy: invalid L2 entry"));
+ srcptepaddr &= ~ATTR_MASK;
+ srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+ KASSERT(srcmpte->ref_count > 0,
+ ("pmap_copy: source page table page is unused"));
+ if (va_next > end_addr)
+ va_next = end_addr;
+ src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
+ src_pte = &src_pte[pmap_l3_index(addr)];
+ dstmpte = NULL;
+ for (; addr < va_next; addr += PAGE_SIZE, src_pte++) {
+ ptetemp = pmap_load(src_pte);
+
+ /*
+ * We only virtual copy managed pages.
+ */
+ if ((ptetemp & ATTR_SW_MANAGED) == 0)
+ continue;
+
+ if (dstmpte != NULL) {
+ KASSERT(dstmpte->pindex == pmap_l2_pindex(addr),
+ ("dstmpte pindex/addr mismatch"));
+ dstmpte->ref_count++;
+ } else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr,
+ NULL)) == NULL)
+ goto out;
+ dst_pte = (pt_entry_t *)
+ PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
+ dst_pte = &dst_pte[pmap_l3_index(addr)];
+ if (pmap_load(dst_pte) == 0 &&
+ pmap_try_insert_pv_entry(dst_pmap, addr,
+ PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) {
+ /*
+ * Clear the wired, modified, and accessed
+ * (referenced) bits during the copy.
+ */
+ mask = ATTR_AF | ATTR_SW_WIRED;
+ nbits = 0;
+ if ((ptetemp & ATTR_SW_DBM) != 0)
+ nbits |= ATTR_S1_AP_RW_BIT;
+ pmap_store(dst_pte, (ptetemp & ~mask) | nbits);
+ pmap_resident_count_inc(dst_pmap, 1);
+ } else {
+ pmap_abort_ptp(dst_pmap, addr, dstmpte);
+ goto out;
+ }
+ /* Have we copied all of the valid mappings? */
+ if (dstmpte->ref_count >= srcmpte->ref_count)
+ break;
+ }
+ }
+out:
+ /*
+ * XXX This barrier may not be needed because the destination pmap is
+ * not active.
+ */
+ dsb(ishst);
+
+ if (lock != NULL)
+ rw_wunlock(lock);
+ PMAP_UNLOCK(src_pmap);
+ PMAP_UNLOCK(dst_pmap);
+}
+
+/*
+ * pmap_zero_page zeros the specified hardware page by mapping
+ * the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+ vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+
+ pagezero((void *)va);
+}
+
+/*
+ * pmap_zero_page_area zeros the specified hardware page by mapping
+ * the page into KVM and using bzero to clear its contents.
+ *
+ * off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+ vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+
+ if (off == 0 && size == PAGE_SIZE)
+ pagezero((void *)va);
+ else
+ bzero((char *)va + off, size);
+}
+
+/*
+ * pmap_copy_page copies the specified (machine independent)
+ * page by mapping the page into virtual memory and using
+ * bcopy to copy the page, one machine dependent page at a
+ * time.
+ */
+void
+pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
+{
+ vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
+ vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
+
+ pagecopy((void *)src, (void *)dst);
+}
+
+int unmapped_buf_allowed = 1;
+
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+ vm_offset_t b_offset, int xfersize)
+{
+ void *a_cp, *b_cp;
+ vm_page_t m_a, m_b;
+ vm_paddr_t p_a, p_b;
+ vm_offset_t a_pg_offset, b_pg_offset;
+ int cnt;
+
+ while (xfersize > 0) {
+ a_pg_offset = a_offset & PAGE_MASK;
+ m_a = ma[a_offset >> PAGE_SHIFT];
+ p_a = m_a->phys_addr;
+ b_pg_offset = b_offset & PAGE_MASK;
+ m_b = mb[b_offset >> PAGE_SHIFT];
+ p_b = m_b->phys_addr;
+ cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+ cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+ if (__predict_false(!PHYS_IN_DMAP(p_a))) {
+ panic("!DMAP a %lx", p_a);
+ } else {
+ a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
+ }
+ if (__predict_false(!PHYS_IN_DMAP(p_b))) {
+ panic("!DMAP b %lx", p_b);
+ } else {
+ b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
+ }
+ bcopy(a_cp, b_cp, cnt);
+ a_offset += cnt;
+ b_offset += cnt;
+ xfersize -= cnt;
+ }
+}
+
+vm_offset_t
+pmap_quick_enter_page(vm_page_t m)
+{
+
+ return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
+}
+
+void
+pmap_quick_remove_page(vm_offset_t addr)
+{
+}
+
+/*
+ * Returns true if the pmap's pv is one of the first
+ * 16 pvs linked to from this page. This count may
+ * be changed upwards or downwards in the future; it
+ * is only necessary that true be returned for a small
+ * subset of pmaps for proper page aging.
+ */
+boolean_t
+pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
+{
+ struct md_page *pvh;
+ struct rwlock *lock;
+ pv_entry_t pv;
+ int loops = 0;
+ boolean_t rv;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_page_exists_quick: page %p is not managed", m));
+ rv = FALSE;
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ if (PV_PMAP(pv) == pmap) {
+ rv = TRUE;
+ break;
+ }
+ loops++;
+ if (loops >= 16)
+ break;
+ }
+ if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+ if (PV_PMAP(pv) == pmap) {
+ rv = TRUE;
+ break;
+ }
+ loops++;
+ if (loops >= 16)
+ break;
+ }
+ }
+ rw_runlock(lock);
+ return (rv);
+}
+
+/*
+ * pmap_page_wired_mappings:
+ *
+ * Return the number of managed mappings to the given physical page
+ * that are wired.
+ */
+int
+pmap_page_wired_mappings(vm_page_t m)
+{
+ struct rwlock *lock;
+ struct md_page *pvh;
+ pmap_t pmap;
+ pt_entry_t *pte;
+ pv_entry_t pv;
+ int count, lvl, md_gen, pvh_gen;
+
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ return (0);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
+restart:
+ count = 0;
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ rw_runlock(lock);
+ PMAP_LOCK(pmap);
+ rw_rlock(lock);
+ if (md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
+ count++;
+ PMAP_UNLOCK(pmap);
+ }
+ if ((m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ pvh_gen = pvh->pv_gen;
+ rw_runlock(lock);
+ PMAP_LOCK(pmap);
+ rw_rlock(lock);
+ if (md_gen != m->md.pv_gen ||
+ pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ if (pte != NULL &&
+ (pmap_load(pte) & ATTR_SW_WIRED) != 0)
+ count++;
+ PMAP_UNLOCK(pmap);
+ }
+ }
+ rw_runlock(lock);
+ return (count);
+}
+
+/*
+ * Returns true if the given page is mapped individually or as part of
+ * a 2mpage. Otherwise, returns false.
+ */
+bool
+pmap_page_is_mapped(vm_page_t m)
+{
+ struct rwlock *lock;
+ bool rv;
+
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ return (false);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
+ rv = !TAILQ_EMPTY(&m->md.pv_list) ||
+ ((m->flags & PG_FICTITIOUS) == 0 &&
+ !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
+ rw_runlock(lock);
+ return (rv);
+}
+
+/*
+ * Destroy all managed, non-wired mappings in the given user-space
+ * pmap. This pmap cannot be active on any processor besides the
+ * caller.
+ *
+ * This function cannot be applied to the kernel pmap. Moreover, it
+ * is not intended for general use. It is only to be used during
+ * process termination. Consequently, it can be implemented in ways
+ * that make it faster than pmap_remove(). First, it can more quickly
+ * destroy mappings by iterating over the pmap's collection of PV
+ * entries, rather than searching the page table. Second, it doesn't
+ * have to test and clear the page table entries atomically, because
+ * no processor is currently accessing the user address space. In
+ * particular, a page table entry's dirty bit won't change state once
+ * this function starts.
+ */
+void
+pmap_remove_pages(pmap_t pmap)
+{
+ pd_entry_t *pde;
+ pt_entry_t *pte, tpte;
+ struct spglist free;
+ vm_page_t m, ml3, mt;
+ pv_entry_t pv;
+ struct md_page *pvh;
+ struct pv_chunk *pc, *npc;
+ struct rwlock *lock;
+ int64_t bit;
+ uint64_t inuse, bitmask;
+ int allfree, field, freed, idx, lvl;
+ vm_paddr_t pa;
+
+ KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap));
+
+ lock = NULL;
+
+ SLIST_INIT(&free);
+ PMAP_LOCK(pmap);
+ TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
+ allfree = 1;
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ inuse = ~pc->pc_map[field] & pc_freemask[field];
+ while (inuse != 0) {
+ bit = ffsl(inuse) - 1;
+ bitmask = 1UL << bit;
+ idx = field * 64 + bit;
+ pv = &pc->pc_pventry[idx];
+ inuse &= ~bitmask;
+
+ pde = pmap_pde(pmap, pv->pv_va, &lvl);
+ KASSERT(pde != NULL,
+ ("Attempting to remove an unmapped page"));
+
+ switch(lvl) {
+ case 1:
+ pte = pmap_l1_to_l2(pde, pv->pv_va);
+ tpte = pmap_load(pte);
+ KASSERT((tpte & ATTR_DESCR_MASK) ==
+ L2_BLOCK,
+ ("Attempting to remove an invalid "
+ "block: %lx", tpte));
+ break;
+ case 2:
+ pte = pmap_l2_to_l3(pde, pv->pv_va);
+ tpte = pmap_load(pte);
+ KASSERT((tpte & ATTR_DESCR_MASK) ==
+ L3_PAGE,
+ ("Attempting to remove an invalid "
+ "page: %lx", tpte));
+ break;
+ default:
+ panic(
+ "Invalid page directory level: %d",
+ lvl);
+ }
+
+/*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+ if (tpte & ATTR_SW_WIRED) {
+ allfree = 0;
+ continue;
+ }
+
+ pa = tpte & ~ATTR_MASK;
+
+ m = PHYS_TO_VM_PAGE(pa);
+ KASSERT(m->phys_addr == pa,
+ ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+ m, (uintmax_t)m->phys_addr,
+ (uintmax_t)tpte));
+
+ KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
+ m < &vm_page_array[vm_page_array_size],
+ ("pmap_remove_pages: bad pte %#jx",
+ (uintmax_t)tpte));
+
+ /*
+ * Because this pmap is not active on other
+ * processors, the dirty bit cannot have
+ * changed state since we last loaded pte.
+ */
+ pmap_clear(pte);
+
+ /*
+ * Update the vm_page_t clean/reference bits.
+ */
+ if (pmap_pte_dirty(pmap, tpte)) {
+ switch (lvl) {
+ case 1:
+ for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
+ vm_page_dirty(mt);
+ break;
+ case 2:
+ vm_page_dirty(m);
+ break;
+ }
+ }
+
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+
+ /* Mark free */
+ pc->pc_map[field] |= bitmask;
+ switch (lvl) {
+ case 1:
+ pmap_resident_count_dec(pmap,
+ L2_SIZE / PAGE_SIZE);
+ pvh = pa_to_pvh(tpte & ~ATTR_MASK);
+ TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
+ pvh->pv_gen++;
+ if (TAILQ_EMPTY(&pvh->pv_list)) {
+ for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
+ if ((mt->a.flags & PGA_WRITEABLE) != 0 &&
+ TAILQ_EMPTY(&mt->md.pv_list))
+ vm_page_aflag_clear(mt, PGA_WRITEABLE);
+ }
+ ml3 = pmap_remove_pt_page(pmap,
+ pv->pv_va);
+ if (ml3 != NULL) {
+ KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
+ ("pmap_remove_pages: l3 page not promoted"));
+ pmap_resident_count_dec(pmap,1);
+ KASSERT(ml3->ref_count == NL3PG,
+ ("pmap_remove_pages: l3 page ref count error"));
+ ml3->ref_count = 0;
+ pmap_add_delayed_free_list(ml3,
+ &free, FALSE);
+ }
+ break;
+ case 2:
+ pmap_resident_count_dec(pmap, 1);
+ TAILQ_REMOVE(&m->md.pv_list, pv,
+ pv_next);
+ m->md.pv_gen++;
+ if ((m->a.flags & PGA_WRITEABLE) != 0 &&
+ TAILQ_EMPTY(&m->md.pv_list) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(
+ VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m,
+ PGA_WRITEABLE);
+ }
+ break;
+ }
+ pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde),
+ &free);
+ freed++;
+ }
+ }
+ PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+ PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+ PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
+ if (allfree) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+ }
+ }
+ if (lock != NULL)
+ rw_wunlock(lock);
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+ vm_page_free_pages_toq(&free, true);
+}
+
+/*
+ * This is used to check if a page has been accessed or modified.
+ */
+static boolean_t
+pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
+{
+ struct rwlock *lock;
+ pv_entry_t pv;
+ struct md_page *pvh;
+ pt_entry_t *pte, mask, value;
+ pmap_t pmap;
+ int lvl, md_gen, pvh_gen;
+ boolean_t rv;
+
+ rv = FALSE;
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_rlock(lock);
+restart:
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ rw_runlock(lock);
+ PMAP_LOCK(pmap);
+ rw_rlock(lock);
+ if (md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ KASSERT(lvl == 3,
+ ("pmap_page_test_mappings: Invalid level %d", lvl));
+ mask = 0;
+ value = 0;
+ if (modified) {
+ mask |= ATTR_S1_AP_RW_BIT;
+ value |= ATTR_S1_AP(ATTR_S1_AP_RW);
+ }
+ if (accessed) {
+ mask |= ATTR_AF | ATTR_DESCR_MASK;
+ value |= ATTR_AF | L3_PAGE;
+ }
+ rv = (pmap_load(pte) & mask) == value;
+ PMAP_UNLOCK(pmap);
+ if (rv)
+ goto out;
+ }
+ if ((m->flags & PG_FICTITIOUS) == 0) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ pvh_gen = pvh->pv_gen;
+ rw_runlock(lock);
+ PMAP_LOCK(pmap);
+ rw_rlock(lock);
+ if (md_gen != m->md.pv_gen ||
+ pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ KASSERT(lvl == 2,
+ ("pmap_page_test_mappings: Invalid level %d", lvl));
+ mask = 0;
+ value = 0;
+ if (modified) {
+ mask |= ATTR_S1_AP_RW_BIT;
+ value |= ATTR_S1_AP(ATTR_S1_AP_RW);
+ }
+ if (accessed) {
+ mask |= ATTR_AF | ATTR_DESCR_MASK;
+ value |= ATTR_AF | L2_BLOCK;
+ }
+ rv = (pmap_load(pte) & mask) == value;
+ PMAP_UNLOCK(pmap);
+ if (rv)
+ goto out;
+ }
+ }
+out:
+ rw_runlock(lock);
+ return (rv);
+}
+
+/*
+ * pmap_is_modified:
+ *
+ * Return whether or not the specified physical page was modified
+ * in any physical maps.
+ */
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_is_modified: page %p is not managed", m));
+
+ /*
+ * If the page is not busied then this check is racy.
+ */
+ if (!pmap_page_is_write_mapped(m))
+ return (FALSE);
+ return (pmap_page_test_mappings(m, FALSE, TRUE));
+}
+
+/*
+ * pmap_is_prefaultable:
+ *
+ * Return whether or not the specified virtual address is eligible
+ * for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+ pt_entry_t *pte;
+ boolean_t rv;
+ int lvl;
+
+ rv = FALSE;
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, addr, &lvl);
+ if (pte != NULL && pmap_load(pte) != 0) {
+ rv = TRUE;
+ }
+ PMAP_UNLOCK(pmap);
+ return (rv);
+}
+
+/*
+ * pmap_is_referenced:
+ *
+ * Return whether or not the specified physical page was referenced
+ * in any physical maps.
+ */
+boolean_t
+pmap_is_referenced(vm_page_t m)
+{
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_is_referenced: page %p is not managed", m));
+ return (pmap_page_test_mappings(m, TRUE, FALSE));
+}
+
+/*
+ * Clear the write and modified bits in each of the given page's mappings.
+ */
+void
+pmap_remove_write(vm_page_t m)
+{
+ struct md_page *pvh;
+ pmap_t pmap;
+ struct rwlock *lock;
+ pv_entry_t next_pv, pv;
+ pt_entry_t oldpte, *pte;
+ vm_offset_t va;
+ int lvl, md_gen, pvh_gen;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_remove_write: page %p is not managed", m));
+ vm_page_assert_busied(m);
+
+ if (!pmap_page_is_write_mapped(m))
+ return;
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
+ pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry_pv_loop:
+ rw_wlock(lock);
+ TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ rw_wunlock(lock);
+ goto retry_pv_loop;
+ }
+ }
+ va = pv->pv_va;
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ if ((pmap_load(pte) & ATTR_SW_DBM) != 0)
+ (void)pmap_demote_l2_locked(pmap, pte, va, &lock);
+ KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
+ ("inconsistent pv lock %p %p for page %p",
+ lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
+ PMAP_UNLOCK(pmap);
+ }
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen ||
+ md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ rw_wunlock(lock);
+ goto retry_pv_loop;
+ }
+ }
+ pte = pmap_pte(pmap, pv->pv_va, &lvl);
+ oldpte = pmap_load(pte);
+retry:
+ if ((oldpte & ATTR_SW_DBM) != 0) {
+ if (!atomic_fcmpset_long(pte, &oldpte,
+ (oldpte | ATTR_S1_AP_RW_BIT) & ~ATTR_SW_DBM))
+ goto retry;
+ if ((oldpte & ATTR_S1_AP_RW_BIT) ==
+ ATTR_S1_AP(ATTR_S1_AP_RW))
+ vm_page_dirty(m);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ rw_wunlock(lock);
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+}
+
+/*
+ * pmap_ts_referenced:
+ *
+ * Return a count of reference bits for a page, clearing those bits.
+ * It is not necessary for every reference bit to be cleared, but it
+ * is necessary that 0 only be returned when there are truly no
+ * reference bits set.
+ *
+ * As an optimization, update the page's dirty field if a modified bit is
+ * found while counting reference bits. This opportunistic update can be
+ * performed at low cost and can eliminate the need for some future calls
+ * to pmap_is_modified(). However, since this function stops after
+ * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
+ * dirty pages. Those dirty pages will only be detected by a future call
+ * to pmap_is_modified().
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+ struct md_page *pvh;
+ pv_entry_t pv, pvf;
+ pmap_t pmap;
+ struct rwlock *lock;
+ pd_entry_t *pde, tpde;
+ pt_entry_t *pte, tpte;
+ vm_offset_t va;
+ vm_paddr_t pa;
+ int cleared, lvl, md_gen, not_cleared, pvh_gen;
+ struct spglist free;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_ts_referenced: page %p is not managed", m));
+ SLIST_INIT(&free);
+ cleared = 0;
+ pa = VM_PAGE_TO_PHYS(m);
+ lock = PHYS_TO_PV_LIST_LOCK(pa);
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
+ rw_wlock(lock);
+retry:
+ not_cleared = 0;
+ if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
+ goto small_mappings;
+ pv = pvf;
+ do {
+ if (pvf == NULL)
+ pvf = pv;
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
+ va = pv->pv_va;
+ pde = pmap_pde(pmap, pv->pv_va, &lvl);
+ KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found"));
+ KASSERT(lvl == 1,
+ ("pmap_ts_referenced: invalid pde level %d", lvl));
+ tpde = pmap_load(pde);
+ KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE,
+ ("pmap_ts_referenced: found an invalid l1 table"));
+ pte = pmap_l1_to_l2(pde, pv->pv_va);
+ tpte = pmap_load(pte);
+ if (pmap_pte_dirty(pmap, tpte)) {
+ /*
+ * Although "tpte" is mapping a 2MB page, because
+ * this function is called at a 4KB page granularity,
+ * we only update the 4KB page under test.
+ */
+ vm_page_dirty(m);
+ }
+
+ if ((tpte & ATTR_AF) != 0) {
+ /*
+ * Since this reference bit is shared by 512 4KB pages,
+ * it should not be cleared every time it is tested.
+ * Apply a simple "hash" function on the physical page
+ * number, the virtual superpage number, and the pmap
+ * address to select one 4KB page out of the 512 on
+ * which testing the reference bit will result in
+ * clearing that reference bit. This function is
+ * designed to avoid the selection of the same 4KB page
+ * for every 2MB page mapping.
+ *
+ * On demotion, a mapping that hasn't been referenced
+ * is simply destroyed. To avoid the possibility of a
+ * subsequent page fault on a demoted wired mapping,
+ * always leave its reference bit set. Moreover,
+ * since the superpage is wired, the current state of
+ * its reference bit won't affect page replacement.
+ */
+ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
+ (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
+ (tpte & ATTR_SW_WIRED) == 0) {
+ pmap_clear_bits(pte, ATTR_AF);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ cleared++;
+ } else
+ not_cleared++;
+ }
+ PMAP_UNLOCK(pmap);
+ /* Rotate the PV list if it has more than one entry. */
+ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+ }
+ if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
+ goto out;
+ } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
+small_mappings:
+ if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
+ goto out;
+ pv = pvf;
+ do {
+ if (pvf == NULL)
+ pvf = pv;
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
+ pde = pmap_pde(pmap, pv->pv_va, &lvl);
+ KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
+ KASSERT(lvl == 2,
+ ("pmap_ts_referenced: invalid pde level %d", lvl));
+ tpde = pmap_load(pde);
+ KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
+ ("pmap_ts_referenced: found an invalid l2 table"));
+ pte = pmap_l2_to_l3(pde, pv->pv_va);
+ tpte = pmap_load(pte);
+ if (pmap_pte_dirty(pmap, tpte))
+ vm_page_dirty(m);
+ if ((tpte & ATTR_AF) != 0) {
+ if ((tpte & ATTR_SW_WIRED) == 0) {
+ pmap_clear_bits(pte, ATTR_AF);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ cleared++;
+ } else
+ not_cleared++;
+ }
+ PMAP_UNLOCK(pmap);
+ /* Rotate the PV list if it has more than one entry. */
+ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ }
+ } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
+ not_cleared < PMAP_TS_REFERENCED_MAX);
+out:
+ rw_wunlock(lock);
+ vm_page_free_pages_toq(&free, true);
+ return (cleared + not_cleared);
+}
+
+/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ struct rwlock *lock;
+ vm_offset_t va, va_next;
+ vm_page_t m;
+ pd_entry_t *l0, *l1, *l2, oldl2;
+ pt_entry_t *l3, oldl3;
+
+ PMAP_ASSERT_STAGE1(pmap);
+
+ if (advice != MADV_DONTNEED && advice != MADV_FREE)
+ return;
+
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ l0 = pmap_l0(pmap, sva);
+ if (pmap_load(l0) == 0) {
+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ l1 = pmap_l0_to_l1(l0, sva);
+ if (pmap_load(l1) == 0) {
+ va_next = (sva + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ va_next = (sva + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ l2 = pmap_l1_to_l2(l1, sva);
+ oldl2 = pmap_load(l2);
+ if (oldl2 == 0)
+ continue;
+ if ((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK) {
+ if ((oldl2 & ATTR_SW_MANAGED) == 0)
+ continue;
+ lock = NULL;
+ if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) {
+ if (lock != NULL)
+ rw_wunlock(lock);
+
+ /*
+ * The 2MB page mapping was destroyed.
+ */
+ continue;
+ }
+
+ /*
+ * Unless the page mappings are wired, remove the
+ * mapping to a single page so that a subsequent
+ * access may repromote. Choosing the last page
+ * within the address range [sva, min(va_next, eva))
+ * generally results in more repromotions. Since the
+ * underlying page table page is fully populated, this
+ * removal never frees a page table page.
+ */
+ if ((oldl2 & ATTR_SW_WIRED) == 0) {
+ va = eva;
+ if (va > va_next)
+ va = va_next;
+ va -= PAGE_SIZE;
+ KASSERT(va >= sva,
+ ("pmap_advise: no address gap"));
+ l3 = pmap_l2_to_l3(l2, va);
+ KASSERT(pmap_load(l3) != 0,
+ ("pmap_advise: invalid PTE"));
+ pmap_remove_l3(pmap, l3, va, pmap_load(l2),
+ NULL, &lock);
+ }
+ if (lock != NULL)
+ rw_wunlock(lock);
+ }
+ KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
+ ("pmap_advise: invalid L2 entry after demotion"));
+ if (va_next > eva)
+ va_next = eva;
+ va = va_next;
+ for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
+ sva += L3_SIZE) {
+ oldl3 = pmap_load(l3);
+ if ((oldl3 & (ATTR_SW_MANAGED | ATTR_DESCR_MASK)) !=
+ (ATTR_SW_MANAGED | L3_PAGE))
+ goto maybe_invlrng;
+ else if (pmap_pte_dirty(pmap, oldl3)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified()
+ * can be avoided by making the page
+ * dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(oldl3 & ~ATTR_MASK);
+ vm_page_dirty(m);
+ }
+ while (!atomic_fcmpset_long(l3, &oldl3,
+ (oldl3 & ~ATTR_AF) |
+ ATTR_S1_AP(ATTR_S1_AP_RO)))
+ cpu_spinwait();
+ } else if ((oldl3 & ATTR_AF) != 0)
+ pmap_clear_bits(l3, ATTR_AF);
+ else
+ goto maybe_invlrng;
+ if (va == va_next)
+ va = sva;
+ continue;
+maybe_invlrng:
+ if (va != va_next) {
+ pmap_invalidate_range(pmap, va, sva);
+ va = va_next;
+ }
+ }
+ if (va != va_next)
+ pmap_invalidate_range(pmap, va, sva);
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+ struct md_page *pvh;
+ struct rwlock *lock;
+ pmap_t pmap;
+ pv_entry_t next_pv, pv;
+ pd_entry_t *l2, oldl2;
+ pt_entry_t *l3, oldl3;
+ vm_offset_t va;
+ int md_gen, pvh_gen;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_clear_modify: page %p is not managed", m));
+ vm_page_assert_busied(m);
+
+ if (!pmap_page_is_write_mapped(m))
+ return;
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
+ pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_wlock(lock);
+restart:
+ TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ va = pv->pv_va;
+ l2 = pmap_l2(pmap, va);
+ oldl2 = pmap_load(l2);
+ /* If oldl2 has ATTR_SW_DBM set, then it is also dirty. */
+ if ((oldl2 & ATTR_SW_DBM) != 0 &&
+ pmap_demote_l2_locked(pmap, l2, va, &lock) &&
+ (oldl2 & ATTR_SW_WIRED) == 0) {
+ /*
+ * Write protect the mapping to a single page so that
+ * a subsequent write access may repromote.
+ */
+ va += VM_PAGE_TO_PHYS(m) - (oldl2 & ~ATTR_MASK);
+ l3 = pmap_l2_to_l3(l2, va);
+ oldl3 = pmap_load(l3);
+ while (!atomic_fcmpset_long(l3, &oldl3,
+ (oldl3 & ~ATTR_SW_DBM) | ATTR_S1_AP(ATTR_S1_AP_RO)))
+ cpu_spinwait();
+ vm_page_dirty(m);
+ pmap_invalidate_page(pmap, va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ PMAP_ASSERT_STAGE1(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ l2 = pmap_l2(pmap, pv->pv_va);
+ l3 = pmap_l2_to_l3(l2, pv->pv_va);
+ oldl3 = pmap_load(l3);
+ if (pmap_l3_valid(oldl3) &&
+ (oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){
+ pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO));
+ pmap_invalidate_page(pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ rw_wunlock(lock);
+}
+
+void *
+pmap_mapbios(vm_paddr_t pa, vm_size_t size)
+{
+ struct pmap_preinit_mapping *ppim;
+ vm_offset_t va, offset;
+ pd_entry_t *pde;
+ pt_entry_t *l2;
+ int i, lvl, l2_blocks, free_l2_count, start_idx;
+
+ if (!vm_initialized) {
+ /*
+ * No L3 ptables so map entire L2 blocks where start VA is:
+ * preinit_map_va + start_idx * L2_SIZE
+ * There may be duplicate mappings (multiple VA -> same PA) but
+ * ARM64 dcache is always PIPT so that's acceptable.
+ */
+ if (size == 0)
+ return (NULL);
+
+ /* Calculate how many L2 blocks are needed for the mapping */
+ l2_blocks = (roundup2(pa + size, L2_SIZE) -
+ rounddown2(pa, L2_SIZE)) >> L2_SHIFT;
+
+ offset = pa & L2_OFFSET;
+
+ if (preinit_map_va == 0)
+ return (NULL);
+
+ /* Map 2MiB L2 blocks from reserved VA space */
+
+ free_l2_count = 0;
+ start_idx = -1;
+ /* Find enough free contiguous VA space */
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (free_l2_count > 0 && ppim->pa != 0) {
+ /* Not enough space here */
+ free_l2_count = 0;
+ start_idx = -1;
+ continue;
+ }
+
+ if (ppim->pa == 0) {
+ /* Free L2 block */
+ if (start_idx == -1)
+ start_idx = i;
+ free_l2_count++;
+ if (free_l2_count == l2_blocks)
+ break;
+ }
+ }
+ if (free_l2_count != l2_blocks)
+ panic("%s: too many preinit mappings", __func__);
+
+ va = preinit_map_va + (start_idx * L2_SIZE);
+ for (i = start_idx; i < start_idx + l2_blocks; i++) {
+ /* Mark entries as allocated */
+ ppim = pmap_preinit_mapping + i;
+ ppim->pa = pa;
+ ppim->va = va + offset;
+ ppim->size = size;
+ }
+
+ /* Map L2 blocks */
+ pa = rounddown2(pa, L2_SIZE);
+ for (i = 0; i < l2_blocks; i++) {
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_mapbios: Invalid page entry, va: 0x%lx",
+ va));
+ KASSERT(lvl == 1,
+ ("pmap_mapbios: Invalid level %d", lvl));
+
+ /* Insert L2_BLOCK */
+ l2 = pmap_l1_to_l2(pde, va);
+ pmap_load_store(l2,
+ pa | ATTR_DEFAULT | ATTR_S1_XN |
+ ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
+
+ va += L2_SIZE;
+ pa += L2_SIZE;
+ }
+ pmap_invalidate_all(kernel_pmap);
+
+ va = preinit_map_va + (start_idx * L2_SIZE);
+
+ } else {
+ /* kva_alloc may be used to map the pages */
+ offset = pa & PAGE_MASK;
+ size = round_page(offset + size);
+
+ va = kva_alloc(size);
+ if (va == 0)
+ panic("%s: Couldn't allocate KVA", __func__);
+
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl));
+
+ /* L3 table is linked */
+ va = trunc_page(va);
+ pa = trunc_page(pa);
+ pmap_kenter(va, size, pa, memory_mapping_mode(pa));
+ }
+
+ return ((void *)(va + offset));
+}
+
+void
+pmap_unmapbios(vm_offset_t va, vm_size_t size)
+{
+ struct pmap_preinit_mapping *ppim;
+ vm_offset_t offset, tmpsize, va_trunc;
+ pd_entry_t *pde;
+ pt_entry_t *l2;
+ int i, lvl, l2_blocks, block;
+ bool preinit_map;
+
+ l2_blocks =
+ (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT;
+ KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size));
+
+ /* Remove preinit mapping */
+ preinit_map = false;
+ block = 0;
+ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
+ ppim = pmap_preinit_mapping + i;
+ if (ppim->va == va) {
+ KASSERT(ppim->size == size,
+ ("pmap_unmapbios: size mismatch"));
+ ppim->va = 0;
+ ppim->pa = 0;
+ ppim->size = 0;
+ preinit_map = true;
+ offset = block * L2_SIZE;
+ va_trunc = rounddown2(va, L2_SIZE) + offset;
+
+ /* Remove L2_BLOCK */
+ pde = pmap_pde(kernel_pmap, va_trunc, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_unmapbios: Invalid page entry, va: 0x%lx",
+ va_trunc));
+ l2 = pmap_l1_to_l2(pde, va_trunc);
+ pmap_clear(l2);
+
+ if (block == (l2_blocks - 1))
+ break;
+ block++;
+ }
+ }
+ if (preinit_map) {
+ pmap_invalidate_all(kernel_pmap);
+ return;
+ }
+
+ /* Unmap the pages reserved with kva_alloc. */
+ if (vm_initialized) {
+ offset = va & PAGE_MASK;
+ size = round_page(offset + size);
+ va = trunc_page(va);
+
+ pde = pmap_pde(kernel_pmap, va, &lvl);
+ KASSERT(pde != NULL,
+ ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va));
+ KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl));
+
+ /* Unmap and invalidate the pages */
+ for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
+ pmap_kremove(va + tmpsize);
+
+ kva_free(va, size);
+ }
+}
+
+/*
+ * Sets the memory attribute for the specified page.
+ */
+void
+pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
+{
+
+ m->md.pv_memattr = ma;
+
+ /*
+ * If "m" is a normal page, update its direct mapping. This update
+ * can be relied upon to perform any cache operations that are
+ * required for data coherence.
+ */
+ if ((m->flags & PG_FICTITIOUS) == 0 &&
+ pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
+ m->md.pv_memattr) != 0)
+ panic("memory attribute change on the direct map failed");
+}
+
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode". The specified virtual address range must be
+ * completely contained within either the direct map or the kernel map. If
+ * the virtual address range is contained within the kernel map, then the
+ * memory type for each of the corresponding ranges of the direct map is also
+ * changed. (The corresponding ranges of the direct map are those ranges that
+ * map the same physical pages as the specified virtual address range.) These
+ * changes to the direct map are necessary because Intel describes the
+ * behavior of their processors as "undefined" if two or more mappings to the
+ * same physical page have different memory types.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change. In the
+ * latter case, the memory type may have been changed on some part of the
+ * virtual address range or the direct map.
+ */
+int
+pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
+{
+ int error;
+
+ PMAP_LOCK(kernel_pmap);
+ error = pmap_change_attr_locked(va, size, mode);
+ PMAP_UNLOCK(kernel_pmap);
+ return (error);
+}
+
+static int
+pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
+{
+ vm_offset_t base, offset, tmpva;
+ pt_entry_t l3, *pte, *newpte;
+ int lvl;
+
+ PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
+ base = trunc_page(va);
+ offset = va & PAGE_MASK;
+ size = round_page(offset + size);
+
+ if (!VIRT_IN_DMAP(base) &&
+ !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS))
+ return (EINVAL);
+
+ for (tmpva = base; tmpva < base + size; ) {
+ pte = pmap_pte(kernel_pmap, tmpva, &lvl);
+ if (pte == NULL)
+ return (EINVAL);
+
+ if ((pmap_load(pte) & ATTR_S1_IDX_MASK) == ATTR_S1_IDX(mode)) {
+ /*
+ * We already have the correct attribute,
+ * ignore this entry.
+ */
+ switch (lvl) {
+ default:
+ panic("Invalid DMAP table level: %d\n", lvl);
+ case 1:
+ tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
+ break;
+ case 2:
+ tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
+ break;
+ case 3:
+ tmpva += PAGE_SIZE;
+ break;
+ }
+ } else {
+ /*
+ * Split the entry to an level 3 table, then
+ * set the new attribute.
+ */
+ switch (lvl) {
+ default:
+ panic("Invalid DMAP table level: %d\n", lvl);
+ case 1:
+ newpte = pmap_demote_l1(kernel_pmap, pte,
+ tmpva & ~L1_OFFSET);
+ if (newpte == NULL)
+ return (EINVAL);
+ pte = pmap_l1_to_l2(pte, tmpva);
+ case 2:
+ newpte = pmap_demote_l2(kernel_pmap, pte,
+ tmpva);
+ if (newpte == NULL)
+ return (EINVAL);
+ pte = pmap_l2_to_l3(pte, tmpva);
+ case 3:
+ /* Update the entry */
+ l3 = pmap_load(pte);
+ l3 &= ~ATTR_S1_IDX_MASK;
+ l3 |= ATTR_S1_IDX(mode);
+ if (mode == VM_MEMATTR_DEVICE)
+ l3 |= ATTR_S1_XN;
+
+ pmap_update_entry(kernel_pmap, pte, l3, tmpva,
+ PAGE_SIZE);
+
+ /*
+ * If moving to a non-cacheable entry flush
+ * the cache.
+ */
+ if (mode == VM_MEMATTR_UNCACHEABLE)
+ cpu_dcache_wbinv_range(tmpva, L3_SIZE);
+
+ break;
+ }
+ tmpva += PAGE_SIZE;
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Create an L2 table to map all addresses within an L1 mapping.
+ */
+static pt_entry_t *
+pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
+{
+ pt_entry_t *l2, newl2, oldl1;
+ vm_offset_t tmpl1;
+ vm_paddr_t l2phys, phys;
+ vm_page_t ml2;
+ int i;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ oldl1 = pmap_load(l1);
+ KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
+ ("pmap_demote_l1: Demoting a non-block entry"));
+ KASSERT((va & L1_OFFSET) == 0,
+ ("pmap_demote_l1: Invalid virtual address %#lx", va));
+ KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
+ ("pmap_demote_l1: Level 1 table shouldn't be managed"));
+
+ tmpl1 = 0;
+ if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
+ tmpl1 = kva_alloc(PAGE_SIZE);
+ if (tmpl1 == 0)
+ return (NULL);
+ }
+
+ if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
+ CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return (NULL);
+ }
+
+ l2phys = VM_PAGE_TO_PHYS(ml2);
+ l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
+
+ /* Address the range points at */
+ phys = oldl1 & ~ATTR_MASK;
+ /* The attributed from the old l1 table to be copied */
+ newl2 = oldl1 & ATTR_MASK;
+
+ /* Create the new entries */
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ l2[i] = newl2 | phys;
+ phys += L2_SIZE;
+ }
+ KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK),
+ ("Invalid l2 page (%lx != %lx)", l2[0],
+ (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
+
+ if (tmpl1 != 0) {
+ pmap_kenter(tmpl1, PAGE_SIZE,
+ DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET,
+ VM_MEMATTR_WRITE_BACK);
+ l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
+ }
+
+ pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
+
+ if (tmpl1 != 0) {
+ pmap_kremove(tmpl1);
+ kva_free(tmpl1, PAGE_SIZE);
+ }
+
+ return (l2);
+}
+
+static void
+pmap_fill_l3(pt_entry_t *firstl3, pt_entry_t newl3)
+{
+ pt_entry_t *l3;
+
+ for (l3 = firstl3; l3 - firstl3 < Ln_ENTRIES; l3++) {
+ *l3 = newl3;
+ newl3 += L3_SIZE;
+ }
+}
+
+static void
+pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2,
+ struct rwlock **lockp)
+{
+ struct spglist free;
+
+ SLIST_INIT(&free);
+ (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free,
+ lockp);
+ vm_page_free_pages_toq(&free, true);
+}
+
+/*
+ * Create an L3 table to map all addresses within an L2 mapping.
+ */
+static pt_entry_t *
+pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
+ struct rwlock **lockp)
+{
+ pt_entry_t *l3, newl3, oldl2;
+ vm_offset_t tmpl2;
+ vm_paddr_t l3phys;
+ vm_page_t ml3;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PMAP_ASSERT_STAGE1(pmap);
+ l3 = NULL;
+ oldl2 = pmap_load(l2);
+ KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
+ ("pmap_demote_l2: Demoting a non-block entry"));
+ va &= ~L2_OFFSET;
+
+ tmpl2 = 0;
+ if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
+ tmpl2 = kva_alloc(PAGE_SIZE);
+ if (tmpl2 == 0)
+ return (NULL);
+ }
+
+ /*
+ * Invalidate the 2MB page mapping and return "failure" if the
+ * mapping was never accessed.
+ */
+ if ((oldl2 & ATTR_AF) == 0) {
+ KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
+ ("pmap_demote_l2: a wired mapping is missing ATTR_AF"));
+ pmap_demote_l2_abort(pmap, va, l2, lockp);
+ CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p",
+ va, pmap);
+ goto fail;
+ }
+
+ if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
+ KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
+ ("pmap_demote_l2: page table page for a wired mapping"
+ " is missing"));
+
+ /*
+ * If the page table page is missing and the mapping
+ * is for a kernel address, the mapping must belong to
+ * the direct map. Page table pages are preallocated
+ * for every other part of the kernel address space,
+ * so the direct map region is the only part of the
+ * kernel address space that must be handled here.
+ */
+ KASSERT(va < VM_MAXUSER_ADDRESS || VIRT_IN_DMAP(va),
+ ("pmap_demote_l2: No saved mpte for va %#lx", va));
+
+ /*
+ * If the 2MB page mapping belongs to the direct map
+ * region of the kernel's address space, then the page
+ * allocation request specifies the highest possible
+ * priority (VM_ALLOC_INTERRUPT). Otherwise, the
+ * priority is normal.
+ */
+ ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
+ (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+
+ /*
+ * If the allocation of the new page table page fails,
+ * invalidate the 2MB page mapping and return "failure".
+ */
+ if (ml3 == NULL) {
+ pmap_demote_l2_abort(pmap, va, l2, lockp);
+ CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ goto fail;
+ }
+
+ if (va < VM_MAXUSER_ADDRESS) {
+ ml3->ref_count = NL3PG;
+ pmap_resident_count_inc(pmap, 1);
+ }
+ }
+ l3phys = VM_PAGE_TO_PHYS(ml3);
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
+ newl3 = (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE;
+ KASSERT((oldl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) !=
+ (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM),
+ ("pmap_demote_l2: L2 entry is writeable but not dirty"));
+
+ /*
+ * If the page table page is not leftover from an earlier promotion,
+ * or the mapping attributes have changed, (re)initialize the L3 table.
+ *
+ * When pmap_update_entry() clears the old L2 mapping, it (indirectly)
+ * performs a dsb(). That dsb() ensures that the stores for filling
+ * "l3" are visible before "l3" is added to the page table.
+ */
+ if (ml3->valid == 0 || (l3[0] & ATTR_MASK) != (newl3 & ATTR_MASK))
+ pmap_fill_l3(l3, newl3);
+
+ /*
+ * Map the temporary page so we don't lose access to the l2 table.
+ */
+ if (tmpl2 != 0) {
+ pmap_kenter(tmpl2, PAGE_SIZE,
+ DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET,
+ VM_MEMATTR_WRITE_BACK);
+ l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
+ }
+
+ /*
+ * The spare PV entries must be reserved prior to demoting the
+ * mapping, that is, prior to changing the PDE. Otherwise, the state
+ * of the L2 and the PV lists will be inconsistent, which can result
+ * in reclaim_pv_chunk() attempting to remove a PV entry from the
+ * wrong PV list and pmap_pv_demote_l2() failing to find the expected
+ * PV entry for the 2MB page mapping that is being demoted.
+ */
+ if ((oldl2 & ATTR_SW_MANAGED) != 0)
+ reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
+
+ /*
+ * Pass PAGE_SIZE so that a single TLB invalidation is performed on
+ * the 2MB page mapping.
+ */
+ pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
+
+ /*
+ * Demote the PV entry.
+ */
+ if ((oldl2 & ATTR_SW_MANAGED) != 0)
+ pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp);
+
+ atomic_add_long(&pmap_l2_demotions, 1);
+ CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx"
+ " in pmap %p %lx", va, pmap, l3[0]);
+
+fail:
+ if (tmpl2 != 0) {
+ pmap_kremove(tmpl2);
+ kva_free(tmpl2, PAGE_SIZE);
+ }
+
+ return (l3);
+
+}
+
+static pt_entry_t *
+pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
+{
+ struct rwlock *lock;
+ pt_entry_t *l3;
+
+ lock = NULL;
+ l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
+ if (lock != NULL)
+ rw_wunlock(lock);
+ return (l3);
+}
+
+/*
+ * Perform the pmap work for mincore(2). If the page is not both referenced and
+ * modified by this pmap, returns its physical address so that the caller can
+ * find other mappings.
+ */
+int
+pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
+{
+ pt_entry_t *pte, tpte;
+ vm_paddr_t mask, pa;
+ int lvl, val;
+ bool managed;
+
+ PMAP_ASSERT_STAGE1(pmap);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, addr, &lvl);
+ if (pte != NULL) {
+ tpte = pmap_load(pte);
+
+ switch (lvl) {
+ case 3:
+ mask = L3_OFFSET;
+ break;
+ case 2:
+ mask = L2_OFFSET;
+ break;
+ case 1:
+ mask = L1_OFFSET;
+ break;
+ default:
+ panic("pmap_mincore: invalid level %d", lvl);
+ }
+
+ managed = (tpte & ATTR_SW_MANAGED) != 0;
+ val = MINCORE_INCORE;
+ if (lvl != 3)
+ val |= MINCORE_PSIND(3 - lvl);
+ if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed &&
+ (tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)))
+ val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
+ if ((tpte & ATTR_AF) == ATTR_AF)
+ val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
+
+ pa = (tpte & ~ATTR_MASK) | (addr & mask);
+ } else {
+ managed = false;
+ val = 0;
+ }
+
+ if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
+ (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
+ *pap = pa;
+ }
+ PMAP_UNLOCK(pmap);
+ return (val);
+}
+
+/*
+ * Garbage collect every ASID that is neither active on a processor nor
+ * reserved.
+ */
+static void
+pmap_reset_asid_set(pmap_t pmap)
+{
+ pmap_t curpmap;
+ int asid, cpuid, epoch;
+ struct asid_set *set;
+ enum pmap_stage stage;
+
+ set = pmap->pm_asid_set;
+ stage = pmap->pm_stage;
+
+ set = pmap->pm_asid_set;
+ KASSERT(set != NULL, ("%s: NULL asid set", __func__));
+ mtx_assert(&set->asid_set_mutex, MA_OWNED);
+
+ /*
+ * Ensure that the store to asid_epoch is globally visible before the
+ * loads from pc_curpmap are performed.
+ */
+ epoch = set->asid_epoch + 1;
+ if (epoch == INT_MAX)
+ epoch = 0;
+ set->asid_epoch = epoch;
+ dsb(ishst);
+ if (stage == PM_STAGE1) {
+ __asm __volatile("tlbi vmalle1is");
+ } else {
+ KASSERT(pmap_clean_stage2_tlbi != NULL,
+ ("%s: Unset stage 2 tlb invalidation callback\n",
+ __func__));
+ pmap_clean_stage2_tlbi();
+ }
+ dsb(ish);
+ bit_nclear(set->asid_set, ASID_FIRST_AVAILABLE,
+ set->asid_set_size - 1);
+ CPU_FOREACH(cpuid) {
+ if (cpuid == curcpu)
+ continue;
+ if (stage == PM_STAGE1) {
+ curpmap = pcpu_find(cpuid)->pc_curpmap;
+ PMAP_ASSERT_STAGE1(pmap);
+ } else {
+ curpmap = pcpu_find(cpuid)->pc_curvmpmap;
+ if (curpmap == NULL)
+ continue;
+ PMAP_ASSERT_STAGE2(pmap);
+ }
+ KASSERT(curpmap->pm_asid_set == set, ("Incorrect set"));
+ asid = COOKIE_TO_ASID(curpmap->pm_cookie);
+ if (asid == -1)
+ continue;
+ bit_set(set->asid_set, asid);
+ curpmap->pm_cookie = COOKIE_FROM(asid, epoch);
+ }
+}
+
+/*
+ * Allocate a new ASID for the specified pmap.
+ */
+static void
+pmap_alloc_asid(pmap_t pmap)
+{
+ struct asid_set *set;
+ int new_asid;
+
+ set = pmap->pm_asid_set;
+ KASSERT(set != NULL, ("%s: NULL asid set", __func__));
+
+ mtx_lock_spin(&set->asid_set_mutex);
+
+ /*
+ * While this processor was waiting to acquire the asid set mutex,
+ * pmap_reset_asid_set() running on another processor might have
+ * updated this pmap's cookie to the current epoch. In which case, we
+ * don't need to allocate a new ASID.
+ */
+ if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch)
+ goto out;
+
+ bit_ffc_at(set->asid_set, set->asid_next, set->asid_set_size,
+ &new_asid);
+ if (new_asid == -1) {
+ bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
+ set->asid_next, &new_asid);
+ if (new_asid == -1) {
+ pmap_reset_asid_set(pmap);
+ bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
+ set->asid_set_size, &new_asid);
+ KASSERT(new_asid != -1, ("ASID allocation failure"));
+ }
+ }
+ bit_set(set->asid_set, new_asid);
+ set->asid_next = new_asid + 1;
+ pmap->pm_cookie = COOKIE_FROM(new_asid, set->asid_epoch);
+out:
+ mtx_unlock_spin(&set->asid_set_mutex);
+}
+
+/*
+ * Compute the value that should be stored in ttbr0 to activate the specified
+ * pmap. This value may change from time to time.
+ */
+uint64_t
+pmap_to_ttbr0(pmap_t pmap)
+{
+
+ return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) |
+ pmap->pm_l0_paddr);
+}
+
+static bool
+pmap_activate_int(pmap_t pmap)
+{
+ struct asid_set *set;
+ int epoch;
+
+ KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap"));
+ KASSERT(pmap != kernel_pmap, ("kernel pmap activation"));
+
+ if ((pmap->pm_stage == PM_STAGE1 && pmap == PCPU_GET(curpmap)) ||
+ (pmap->pm_stage == PM_STAGE2 && pmap == PCPU_GET(curvmpmap))) {
+ /*
+ * Handle the possibility that the old thread was preempted
+ * after an "ic" or "tlbi" instruction but before it performed
+ * a "dsb" instruction. If the old thread migrates to a new
+ * processor, its completion of a "dsb" instruction on that
+ * new processor does not guarantee that the "ic" or "tlbi"
+ * instructions performed on the old processor have completed.
+ */
+ dsb(ish);
+ return (false);
+ }
+
+ set = pmap->pm_asid_set;
+ KASSERT(set != NULL, ("%s: NULL asid set", __func__));
+
+ /*
+ * Ensure that the store to curpmap is globally visible before the
+ * load from asid_epoch is performed.
+ */
+ if (pmap->pm_stage == PM_STAGE1)
+ PCPU_SET(curpmap, pmap);
+ else
+ PCPU_SET(curvmpmap, pmap);
+ dsb(ish);
+ epoch = COOKIE_TO_EPOCH(pmap->pm_cookie);
+ if (epoch >= 0 && epoch != set->asid_epoch)
+ pmap_alloc_asid(pmap);
+
+ if (pmap->pm_stage == PM_STAGE1) {
+ set_ttbr0(pmap_to_ttbr0(pmap));
+ if (PCPU_GET(bcast_tlbi_workaround) != 0)
+ invalidate_local_icache();
+ }
+ return (true);
+}
+
+void
+pmap_activate_vm(pmap_t pmap)
+{
+
+ PMAP_ASSERT_STAGE2(pmap);
+
+ (void)pmap_activate_int(pmap);
+}
+
+void
+pmap_activate(struct thread *td)
+{
+ pmap_t pmap;
+
+ pmap = vmspace_pmap(td->td_proc->p_vmspace);
+ PMAP_ASSERT_STAGE1(pmap);
+ critical_enter();
+ (void)pmap_activate_int(pmap);
+ critical_exit();
+}
+
+/*
+ * To eliminate the unused parameter "old", we would have to add an instruction
+ * to cpu_switch().
+ */
+struct pcb *
+pmap_switch(struct thread *old __unused, struct thread *new)
+{
+ pcpu_bp_harden bp_harden;
+ struct pcb *pcb;
+
+ /* Store the new curthread */
+ PCPU_SET(curthread, new);
+
+ /* And the new pcb */
+ pcb = new->td_pcb;
+ PCPU_SET(curpcb, pcb);
+
+ /*
+ * TODO: We may need to flush the cache here if switching
+ * to a user process.
+ */
+
+ if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) {
+ /*
+ * Stop userspace from training the branch predictor against
+ * other processes. This will call into a CPU specific
+ * function that clears the branch predictor state.
+ */
+ bp_harden = PCPU_GET(bp_harden);
+ if (bp_harden != NULL)
+ bp_harden();
+ }
+
+ return (pcb);
+}
+
+void
+pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
+{
+
+ PMAP_ASSERT_STAGE1(pmap);
+ if (va >= VM_MIN_KERNEL_ADDRESS) {
+ cpu_icache_sync_range(va, sz);
+ } else {
+ u_int len, offset;
+ vm_paddr_t pa;
+
+ /* Find the length of data in this page to flush */
+ offset = va & PAGE_MASK;
+ len = imin(PAGE_SIZE - offset, sz);
+
+ while (sz != 0) {
+ /* Extract the physical address & find it in the DMAP */
+ pa = pmap_extract(pmap, va);
+ if (pa != 0)
+ cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
+
+ /* Move to the next page */
+ sz -= len;
+ va += len;
+ /* Set the length for the next iteration */
+ len = imin(PAGE_SIZE, sz);
+ }
+ }
+}
+
+static int
+pmap_stage2_fault(pmap_t pmap, uint64_t esr, uint64_t far)
+{
+ pd_entry_t *pdep;
+ pt_entry_t *ptep, pte;
+ int rv, lvl, dfsc;
+
+ PMAP_ASSERT_STAGE2(pmap);
+ rv = KERN_FAILURE;
+
+ /* Data and insn aborts use same encoding for FSC field. */
+ dfsc = esr & ISS_DATA_DFSC_MASK;
+ switch (dfsc) {
+ case ISS_DATA_DFSC_TF_L0:
+ case ISS_DATA_DFSC_TF_L1:
+ case ISS_DATA_DFSC_TF_L2:
+ case ISS_DATA_DFSC_TF_L3:
+ PMAP_LOCK(pmap);
+ pdep = pmap_pde(pmap, far, &lvl);
+ if (pdep == NULL || lvl != (dfsc - ISS_DATA_DFSC_TF_L1)) {
+ PMAP_LOCK(pmap);
+ break;
+ }
+
+ switch (lvl) {
+ case 0:
+ ptep = pmap_l0_to_l1(pdep, far);
+ break;
+ case 1:
+ ptep = pmap_l1_to_l2(pdep, far);
+ break;
+ case 2:
+ ptep = pmap_l2_to_l3(pdep, far);
+ break;
+ default:
+ panic("%s: Invalid pde level %d", __func__,lvl);
+ }
+ goto fault_exec;
+
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ PMAP_LOCK(pmap);
+ ptep = pmap_pte(pmap, far, &lvl);
+fault_exec:
+ if (ptep != NULL && (pte = pmap_load(ptep)) != 0) {
+ if (icache_vmid) {
+ pmap_invalidate_vpipt_icache();
+ } else {
+ /*
+ * If accessing an executable page invalidate
+ * the I-cache so it will be valid when we
+ * continue execution in the guest. The D-cache
+ * is assumed to already be clean to the Point
+ * of Coherency.
+ */
+ if ((pte & ATTR_S2_XN_MASK) !=
+ ATTR_S2_XN(ATTR_S2_XN_NONE)) {
+ invalidate_icache();
+ }
+ }
+ pmap_set_bits(ptep, ATTR_AF | ATTR_DESCR_VALID);
+ rv = KERN_SUCCESS;
+ }
+ PMAP_UNLOCK(pmap);
+ break;
+ }
+
+ return (rv);
+}
+
+int
+pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
+{
+ pt_entry_t pte, *ptep;
+ register_t intr;
+ uint64_t ec, par;
+ int lvl, rv;
+
+ rv = KERN_FAILURE;
+
+ ec = ESR_ELx_EXCEPTION(esr);
+ switch (ec) {
+ case EXCP_INSN_ABORT_L:
+ case EXCP_INSN_ABORT:
+ case EXCP_DATA_ABORT_L:
+ case EXCP_DATA_ABORT:
+ break;
+ default:
+ return (rv);
+ }
+
+ if (pmap->pm_stage == PM_STAGE2)
+ return (pmap_stage2_fault(pmap, esr, far));
+
+ /* Data and insn aborts use same encoding for FSC field. */
+ switch (esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ PMAP_LOCK(pmap);
+ ptep = pmap_pte(pmap, far, &lvl);
+ if (ptep != NULL) {
+ pmap_set_bits(ptep, ATTR_AF);
+ rv = KERN_SUCCESS;
+ /*
+ * XXXMJ as an optimization we could mark the entry
+ * dirty if this is a write fault.
+ */
+ }
+ PMAP_UNLOCK(pmap);
+ break;
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) ||
+ (esr & ISS_DATA_WnR) == 0)
+ return (rv);
+ PMAP_LOCK(pmap);
+ ptep = pmap_pte(pmap, far, &lvl);
+ if (ptep != NULL &&
+ ((pte = pmap_load(ptep)) & ATTR_SW_DBM) != 0) {
+ if ((pte & ATTR_S1_AP_RW_BIT) ==
+ ATTR_S1_AP(ATTR_S1_AP_RO)) {
+ pmap_clear_bits(ptep, ATTR_S1_AP_RW_BIT);
+ pmap_invalidate_page(pmap, far);
+ }
+ rv = KERN_SUCCESS;
+ }
+ PMAP_UNLOCK(pmap);
+ break;
+ case ISS_DATA_DFSC_TF_L0:
+ case ISS_DATA_DFSC_TF_L1:
+ case ISS_DATA_DFSC_TF_L2:
+ case ISS_DATA_DFSC_TF_L3:
+ /*
+ * Retry the translation. A break-before-make sequence can
+ * produce a transient fault.
+ */
+ if (pmap == kernel_pmap) {
+ /*
+ * The translation fault may have occurred within a
+ * critical section. Therefore, we must check the
+ * address without acquiring the kernel pmap's lock.
+ */
+ if (pmap_kextract(far) != 0)
+ rv = KERN_SUCCESS;
+ } else {
+ PMAP_LOCK(pmap);
+ /* Ask the MMU to check the address. */
+ intr = intr_disable();
+ par = arm64_address_translate_s1e0r(far);
+ intr_restore(intr);
+ PMAP_UNLOCK(pmap);
+
+ /*
+ * If the translation was successful, then we can
+ * return success to the trap handler.
+ */
+ if (PAR_SUCCESS(par))
+ rv = KERN_SUCCESS;
+ }
+ break;
+ }
+
+ return (rv);
+}
+
+/*
+ * Increase the starting virtual address of the given mapping if a
+ * different alignment might result in more superpage mappings.
+ */
+void
+pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
+ vm_offset_t *addr, vm_size_t size)
+{
+ vm_offset_t superpage_offset;
+
+ if (size < L2_SIZE)
+ return;
+ if (object != NULL && (object->flags & OBJ_COLORED) != 0)
+ offset += ptoa(object->pg_color);
+ superpage_offset = offset & L2_OFFSET;
+ if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
+ (*addr & L2_OFFSET) == superpage_offset)
+ return;
+ if ((*addr & L2_OFFSET) < superpage_offset)
+ *addr = (*addr & ~L2_OFFSET) + superpage_offset;
+ else
+ *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
+}
+
+/**
+ * Get the kernel virtual address of a set of physical pages. If there are
+ * physical addresses not covered by the DMAP perform a transient mapping
+ * that will be removed when calling pmap_unmap_io_transient.
+ *
+ * \param page The pages the caller wishes to obtain the virtual
+ * address on the kernel memory map.
+ * \param vaddr On return contains the kernel virtual memory address
+ * of the pages passed in the page parameter.
+ * \param count Number of pages passed in.
+ * \param can_fault TRUE if the thread using the mapped pages can take
+ * page faults, FALSE otherwise.
+ *
+ * \returns TRUE if the caller must call pmap_unmap_io_transient when
+ * finished or FALSE otherwise.
+ *
+ */
+boolean_t
+pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
+ boolean_t can_fault)
+{
+ vm_paddr_t paddr;
+ boolean_t needs_mapping;
+ int error, i;
+
+ /*
+ * Allocate any KVA space that we need, this is done in a separate
+ * loop to prevent calling vmem_alloc while pinned.
+ */
+ needs_mapping = FALSE;
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (__predict_false(!PHYS_IN_DMAP(paddr))) {
+ error = vmem_alloc(kernel_arena, PAGE_SIZE,
+ M_BESTFIT | M_WAITOK, &vaddr[i]);
+ KASSERT(error == 0, ("vmem_alloc failed: %d", error));
+ needs_mapping = TRUE;
+ } else {
+ vaddr[i] = PHYS_TO_DMAP(paddr);
+ }
+ }
+
+ /* Exit early if everything is covered by the DMAP */
+ if (!needs_mapping)
+ return (FALSE);
+
+ if (!can_fault)
+ sched_pin();
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (!PHYS_IN_DMAP(paddr)) {
+ panic(
+ "pmap_map_io_transient: TODO: Map out of DMAP data");
+ }
+ }
+
+ return (needs_mapping);
+}
+
+void
+pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
+ boolean_t can_fault)
+{
+ vm_paddr_t paddr;
+ int i;
+
+ if (!can_fault)
+ sched_unpin();
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (!PHYS_IN_DMAP(paddr)) {
+ panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
+ }
+ }
+}
+
+boolean_t
+pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
+{
+
+ return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH);
+}
+
+/*
+ * Track a range of the kernel's virtual address space that is contiguous
+ * in various mapping attributes.
+ */
+struct pmap_kernel_map_range {
+ vm_offset_t sva;
+ pt_entry_t attrs;
+ int l3pages;
+ int l3contig;
+ int l2blocks;
+ int l1blocks;
+};
+
+static void
+sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
+ vm_offset_t eva)
+{
+ const char *mode;
+ int index;
+
+ if (eva <= range->sva)
+ return;
+
+ index = range->attrs & ATTR_S1_IDX_MASK;
+ switch (index) {
+ case ATTR_S1_IDX(VM_MEMATTR_DEVICE):
+ mode = "DEV";
+ break;
+ case ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE):
+ mode = "UC";
+ break;
+ case ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK):
+ mode = "WB";
+ break;
+ case ATTR_S1_IDX(VM_MEMATTR_WRITE_THROUGH):
+ mode = "WT";
+ break;
+ default:
+ printf(
+ "%s: unknown memory type %x for range 0x%016lx-0x%016lx\n",
+ __func__, index, range->sva, eva);
+ mode = "??";
+ break;
+ }
+
+ sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c %3s %d %d %d %d\n",
+ range->sva, eva,
+ (range->attrs & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP_RW ? 'w' : '-',
+ (range->attrs & ATTR_S1_PXN) != 0 ? '-' : 'x',
+ (range->attrs & ATTR_S1_AP_USER) != 0 ? 'u' : 's',
+ mode, range->l1blocks, range->l2blocks, range->l3contig,
+ range->l3pages);
+
+ /* Reset to sentinel value. */
+ range->sva = 0xfffffffffffffffful;
+}
+
+/*
+ * Determine whether the attributes specified by a page table entry match those
+ * being tracked by the current range.
+ */
+static bool
+sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
+{
+
+ return (range->attrs == attrs);
+}
+
+static void
+sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
+ pt_entry_t attrs)
+{
+
+ memset(range, 0, sizeof(*range));
+ range->sva = va;
+ range->attrs = attrs;
+}
+
+/*
+ * Given a leaf PTE, derive the mapping's attributes. If they do not match
+ * those of the current run, dump the address range and its attributes, and
+ * begin a new run.
+ */
+static void
+sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
+ vm_offset_t va, pd_entry_t l0e, pd_entry_t l1e, pd_entry_t l2e,
+ pt_entry_t l3e)
+{
+ pt_entry_t attrs;
+
+ attrs = l0e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
+ attrs |= l1e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
+ if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK)
+ attrs |= l1e & ATTR_S1_IDX_MASK;
+ attrs |= l2e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
+ if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK)
+ attrs |= l2e & ATTR_S1_IDX_MASK;
+ attrs |= l3e & (ATTR_S1_AP_MASK | ATTR_S1_XN | ATTR_S1_IDX_MASK);
+
+ if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
+ sysctl_kmaps_dump(sb, range, va);
+ sysctl_kmaps_reinit(range, va, attrs);
+ }
+}
+
+static int
+sysctl_kmaps(SYSCTL_HANDLER_ARGS)
+{
+ struct pmap_kernel_map_range range;
+ struct sbuf sbuf, *sb;
+ pd_entry_t l0e, *l1, l1e, *l2, l2e;
+ pt_entry_t *l3, l3e;
+ vm_offset_t sva;
+ vm_paddr_t pa;
+ int error, i, j, k, l;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sb = &sbuf;
+ sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
+
+ /* Sentinel value. */
+ range.sva = 0xfffffffffffffffful;
+
+ /*
+ * Iterate over the kernel page tables without holding the kernel pmap
+ * lock. Kernel page table pages are never freed, so at worst we will
+ * observe inconsistencies in the output.
+ */
+ for (sva = 0xffff000000000000ul, i = pmap_l0_index(sva); i < Ln_ENTRIES;
+ i++) {
+ if (i == pmap_l0_index(DMAP_MIN_ADDRESS))
+ sbuf_printf(sb, "\nDirect map:\n");
+ else if (i == pmap_l0_index(VM_MIN_KERNEL_ADDRESS))
+ sbuf_printf(sb, "\nKernel map:\n");
+
+ l0e = kernel_pmap->pm_l0[i];
+ if ((l0e & ATTR_DESCR_VALID) == 0) {
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += L0_SIZE;
+ continue;
+ }
+ pa = l0e & ~ATTR_MASK;
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (j = pmap_l1_index(sva); j < Ln_ENTRIES; j++) {
+ l1e = l1[j];
+ if ((l1e & ATTR_DESCR_VALID) == 0) {
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += L1_SIZE;
+ continue;
+ }
+ if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
+ sysctl_kmaps_check(sb, &range, sva, l0e, l1e,
+ 0, 0);
+ range.l1blocks++;
+ sva += L1_SIZE;
+ continue;
+ }
+ pa = l1e & ~ATTR_MASK;
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (k = pmap_l2_index(sva); k < Ln_ENTRIES; k++) {
+ l2e = l2[k];
+ if ((l2e & ATTR_DESCR_VALID) == 0) {
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += L2_SIZE;
+ continue;
+ }
+ if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) {
+ sysctl_kmaps_check(sb, &range, sva,
+ l0e, l1e, l2e, 0);
+ range.l2blocks++;
+ sva += L2_SIZE;
+ continue;
+ }
+ pa = l2e & ~ATTR_MASK;
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (l = pmap_l3_index(sva); l < Ln_ENTRIES;
+ l++, sva += L3_SIZE) {
+ l3e = l3[l];
+ if ((l3e & ATTR_DESCR_VALID) == 0) {
+ sysctl_kmaps_dump(sb, &range,
+ sva);
+ continue;
+ }
+ sysctl_kmaps_check(sb, &range, sva,
+ l0e, l1e, l2e, l3e);
+ if ((l3e & ATTR_CONTIGUOUS) != 0)
+ range.l3contig += l % 16 == 0 ?
+ 1 : 0;
+ else
+ range.l3pages++;
+ }
+ }
+ }
+ }
+
+ error = sbuf_finish(sb);
+ sbuf_delete(sb);
+ return (error);
+}
+SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_kmaps, "A",
+ "Dump kernel address layout");
diff --git a/sys/arm64/arm64/stack_machdep.c b/sys/arm64/arm64/stack_machdep.c
new file mode 100644
index 000000000000..5af5dde2d461
--- /dev/null
+++ b/sys/arm64/arm64/stack_machdep.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/stack.h>
+
+#include <machine/vmparam.h>
+#include <machine/pcb.h>
+#include <machine/stack.h>
+
+static void
+stack_capture(struct stack *st, struct unwind_state *frame)
+{
+
+ stack_zero(st);
+ while (1) {
+ unwind_frame(frame);
+ if (!INKERNEL((vm_offset_t)frame->fp) ||
+ !INKERNEL((vm_offset_t)frame->pc))
+ break;
+ if (stack_put(st, frame->pc) == -1)
+ break;
+ }
+}
+
+int
+stack_save_td(struct stack *st, struct thread *td)
+{
+ struct unwind_state frame;
+
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ KASSERT(!TD_IS_SWAPPED(td),
+ ("stack_save_td: thread %p is swapped", td));
+
+ if (TD_IS_RUNNING(td))
+ return (EOPNOTSUPP);
+
+ frame.sp = td->td_pcb->pcb_sp;
+ frame.fp = td->td_pcb->pcb_x[29];
+ frame.pc = td->td_pcb->pcb_x[30];
+
+ stack_capture(st, &frame);
+ return (0);
+}
+
+void
+stack_save(struct stack *st)
+{
+ struct unwind_state frame;
+ uint64_t sp;
+
+ __asm __volatile("mov %0, sp" : "=&r" (sp));
+
+ frame.sp = sp;
+ frame.fp = (uint64_t)__builtin_frame_address(0);
+ frame.pc = (uint64_t)stack_save;
+
+ stack_capture(st, &frame);
+}
diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S
new file mode 100644
index 000000000000..c5aba58c95f1
--- /dev/null
+++ b/sys/arm64/arm64/support.S
@@ -0,0 +1,290 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * Copyright (c) 2014-2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Andrew Turner
+ * under sponsorship from the FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#include <machine/setjmp.h>
+#include <machine/param.h>
+#include <machine/vmparam.h>
+
+#include "assym.inc"
+
+/*
+ * One of the fu* or su* functions failed, return -1.
+ */
+ENTRY(fsu_fault)
+ SET_FAULT_HANDLER(xzr, x1) /* Reset the handler function */
+ EXIT_USER_ACCESS_CHECK(w0, x1)
+fsu_fault_nopcb:
+ mov x0, #-1
+ ret
+END(fsu_fault)
+
+/*
+ * int casueword32(volatile uint32_t *, uint32_t, uint32_t *, uint32_t)
+ */
+ENTRY(casueword32)
+ ldr x4, =(VM_MAXUSER_ADDRESS-3)
+ cmp x0, x4
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ mov w5, #1
+ SET_FAULT_HANDLER(x6, x4) /* And set it */
+ ENTER_USER_ACCESS(w6, x4)
+1: ldxr w4, [x0] /* Load-exclusive the data */
+ cmp w4, w1 /* Compare */
+ b.ne 2f /* Not equal, exit */
+ stxr w5, w3, [x0] /* Store the new data */
+2: EXIT_USER_ACCESS(w6)
+ SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */
+ str w4, [x2] /* Store the read data */
+ mov w0, w5 /* Result same as store status */
+ ret /* Return */
+END(casueword32)
+
+/*
+ * int casueword(volatile u_long *, u_long, u_long *, u_long)
+ */
+ENTRY(casueword)
+ ldr x4, =(VM_MAXUSER_ADDRESS-7)
+ cmp x0, x4
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ mov w5, #1
+ SET_FAULT_HANDLER(x6, x4) /* And set it */
+ ENTER_USER_ACCESS(w6, x4)
+1: ldxr x4, [x0] /* Load-exclusive the data */
+ cmp x4, x1 /* Compare */
+ b.ne 2f /* Not equal, exit */
+ stxr w5, x3, [x0] /* Store the new data */
+2: EXIT_USER_ACCESS(w6)
+ SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */
+ str x4, [x2] /* Store the read data */
+ mov w0, w5 /* Result same as store status */
+ ret /* Return */
+END(casueword)
+
+/*
+ * int fubyte(volatile const void *)
+ */
+ENTRY(fubyte)
+ ldr x1, =VM_MAXUSER_ADDRESS
+ cmp x0, x1
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x1) /* And set it */
+ ldtrb w0, [x0] /* Try loading the data */
+ SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */
+ ret /* Return */
+END(fubyte)
+
+/*
+ * int fuword(volatile const void *)
+ */
+ENTRY(fuword16)
+ ldr x1, =(VM_MAXUSER_ADDRESS-1)
+ cmp x0, x1
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x1) /* And set it */
+ ldtrh w0, [x0] /* Try loading the data */
+ SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */
+ ret /* Return */
+END(fuword16)
+
+/*
+ * int32_t fueword32(volatile const void *, int32_t *)
+ */
+ENTRY(fueword32)
+ ldr x2, =(VM_MAXUSER_ADDRESS-3)
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ ldtr w0, [x0] /* Try loading the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ str w0, [x1] /* Save the data in kernel space */
+ mov w0, #0 /* Success */
+ ret /* Return */
+END(fueword32)
+
+/*
+ * long fueword(volatile const void *, int64_t *)
+ * int64_t fueword64(volatile const void *, int64_t *)
+ */
+ENTRY(fueword)
+EENTRY(fueword64)
+ ldr x2, =(VM_MAXUSER_ADDRESS-7)
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ ldtr x0, [x0] /* Try loading the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ str x0, [x1] /* Save the data in kernel space */
+ mov x0, #0 /* Success */
+ ret /* Return */
+EEND(fueword64)
+END(fueword)
+
+/*
+ * int subyte(volatile void *, int)
+ */
+ENTRY(subyte)
+ ldr x2, =VM_MAXUSER_ADDRESS
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ sttrb w1, [x0] /* Try storing the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ mov x0, #0 /* Success */
+ ret /* Return */
+END(subyte)
+
+/*
+ * int suword16(volatile void *, int)
+ */
+ENTRY(suword16)
+ ldr x2, =(VM_MAXUSER_ADDRESS-1)
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ sttrh w1, [x0] /* Try storing the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ mov x0, #0 /* Success */
+ ret /* Return */
+END(suword16)
+
+/*
+ * int suword32(volatile void *, int)
+ */
+ENTRY(suword32)
+ ldr x2, =(VM_MAXUSER_ADDRESS-3)
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ sttr w1, [x0] /* Try storing the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ mov x0, #0 /* Success */
+ ret /* Return */
+END(suword32)
+
+/*
+ * int suword(volatile void *, long)
+ */
+ENTRY(suword)
+EENTRY(suword64)
+ ldr x2, =(VM_MAXUSER_ADDRESS-7)
+ cmp x0, x2
+ b.cs fsu_fault_nopcb
+ adr x6, fsu_fault /* Load the fault handler */
+ SET_FAULT_HANDLER(x6, x2) /* And set it */
+ sttr x1, [x0] /* Try storing the data */
+ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */
+ mov x0, #0 /* Success */
+ ret /* Return */
+EEND(suword64)
+END(suword)
+
+ENTRY(setjmp)
+ /* Store the stack pointer */
+ mov x8, sp
+ str x8, [x0], #8
+
+ /* Store the general purpose registers and lr */
+ stp x19, x20, [x0], #16
+ stp x21, x22, [x0], #16
+ stp x23, x24, [x0], #16
+ stp x25, x26, [x0], #16
+ stp x27, x28, [x0], #16
+ stp x29, lr, [x0], #16
+
+ /* Return value */
+ mov x0, #0
+ ret
+END(setjmp)
+
+ENTRY(longjmp)
+ /* Restore the stack pointer */
+ ldr x8, [x0], #8
+ mov sp, x8
+
+ /* Restore the general purpose registers and lr */
+ ldp x19, x20, [x0], #16
+ ldp x21, x22, [x0], #16
+ ldp x23, x24, [x0], #16
+ ldp x25, x26, [x0], #16
+ ldp x27, x28, [x0], #16
+ ldp x29, lr, [x0], #16
+
+ /* Load the return value */
+ mov x0, x1
+ ret
+END(longjmp)
+
+/*
+ * pagezero, simple implementation
+ */
+ENTRY(pagezero_simple)
+ add x1, x0, #PAGE_SIZE
+
+1:
+ stp xzr, xzr, [x0], #0x10
+ stp xzr, xzr, [x0], #0x10
+ stp xzr, xzr, [x0], #0x10
+ stp xzr, xzr, [x0], #0x10
+ cmp x0, x1
+ b.ne 1b
+ ret
+
+END(pagezero_simple)
+
+/*
+ * pagezero, cache assisted
+ */
+ENTRY(pagezero_cache)
+ add x1, x0, #PAGE_SIZE
+
+ ldr x2, =dczva_line_size
+ ldr x2, [x2]
+
+1:
+ dc zva, x0
+ add x0, x0, x2
+ cmp x0, x1
+ b.ne 1b
+ ret
+
+END(pagezero_cache)
diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S
new file mode 100644
index 000000000000..144cc0873f68
--- /dev/null
+++ b/sys/arm64/arm64/swtch.S
@@ -0,0 +1,292 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "assym.inc"
+#include "opt_kstack_pages.h"
+#include "opt_sched.h"
+
+#include <machine/asm.h>
+
+__FBSDID("$FreeBSD$");
+
+.macro clear_step_flag pcbflags, tmp
+ tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f
+ mrs \tmp, mdscr_el1
+ bic \tmp, \tmp, #1
+ msr mdscr_el1, \tmp
+ isb
+999:
+.endm
+
+.macro set_step_flag pcbflags, tmp
+ tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f
+ mrs \tmp, mdscr_el1
+ orr \tmp, \tmp, #1
+ msr mdscr_el1, \tmp
+ isb
+999:
+.endm
+
+/*
+ * void cpu_throw(struct thread *old, struct thread *new)
+ */
+ENTRY(cpu_throw)
+ /* Of old == NULL skip disabling stepping */
+ cbz x0, 1f
+
+ /* If we were single stepping, disable it */
+ ldr x4, [x0, #TD_PCB]
+ ldr w5, [x4, #PCB_FLAGS]
+ clear_step_flag w5, x6
+1:
+
+#ifdef VFP
+ /* Backup the new thread pointer around a call to C code */
+ mov x19, x0
+ mov x20, x1
+ bl vfp_discard
+ mov x1, x20
+ mov x0, x19
+#endif
+
+ bl pmap_switch
+ mov x4, x0
+
+ /* If we are single stepping, enable it */
+ ldr w5, [x4, #PCB_FLAGS]
+ set_step_flag w5, x6
+
+ /* Restore the registers */
+ ldp x5, x6, [x4, #PCB_SP]
+ mov sp, x5
+ msr tpidr_el0, x6
+ ldr x6, [x4, #PCB_TPIDRRO]
+ msr tpidrro_el0, x6
+ ldp x8, x9, [x4, #PCB_REGS + 8 * 8]
+ ldp x10, x11, [x4, #PCB_REGS + 10 * 8]
+ ldp x12, x13, [x4, #PCB_REGS + 12 * 8]
+ ldp x14, x15, [x4, #PCB_REGS + 14 * 8]
+ ldp x16, x17, [x4, #PCB_REGS + 16 * 8]
+ ldr x19, [x4, #PCB_REGS + 19 * 8]
+ ldp x20, x21, [x4, #PCB_REGS + 20 * 8]
+ ldp x22, x23, [x4, #PCB_REGS + 22 * 8]
+ ldp x24, x25, [x4, #PCB_REGS + 24 * 8]
+ ldp x26, x27, [x4, #PCB_REGS + 26 * 8]
+ ldp x28, x29, [x4, #PCB_REGS + 28 * 8]
+ ldr x30, [x4, #PCB_REGS + 30 * 8]
+
+ ret
+END(cpu_throw)
+
+/*
+ * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
+ *
+ * x0 = old
+ * x1 = new
+ * x2 = mtx
+ * x3 to x7, x16 and x17 are caller saved
+ */
+ENTRY(cpu_switch)
+ /*
+ * Save the old context.
+ */
+ ldr x4, [x0, #TD_PCB]
+
+ /* Store the callee-saved registers */
+ stp x8, x9, [x4, #PCB_REGS + 8 * 8]
+ stp x10, x11, [x4, #PCB_REGS + 10 * 8]
+ stp x12, x13, [x4, #PCB_REGS + 12 * 8]
+ stp x14, x15, [x4, #PCB_REGS + 14 * 8]
+ stp x16, x17, [x4, #PCB_REGS + 16 * 8]
+ stp x18, x19, [x4, #PCB_REGS + 18 * 8]
+ stp x20, x21, [x4, #PCB_REGS + 20 * 8]
+ stp x22, x23, [x4, #PCB_REGS + 22 * 8]
+ stp x24, x25, [x4, #PCB_REGS + 24 * 8]
+ stp x26, x27, [x4, #PCB_REGS + 26 * 8]
+ stp x28, x29, [x4, #PCB_REGS + 28 * 8]
+ str x30, [x4, #PCB_REGS + 30 * 8]
+ /* And the old stack pointer */
+ mov x5, sp
+ mrs x6, tpidrro_el0
+ str x6, [x4, #PCB_TPIDRRO]
+ mrs x6, tpidr_el0
+ stp x5, x6, [x4, #PCB_SP]
+
+ /* If we were single stepping, disable it */
+ ldr w5, [x4, #PCB_FLAGS]
+ clear_step_flag w5, x6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+
+#ifdef VFP
+ /* Load the pcb address */
+ mov x1, x4
+ bl vfp_save_state
+ mov x1, x20
+ mov x0, x19
+#endif
+
+ bl pmap_switch
+ /* Move the new pcb out of the way */
+ mov x4, x0
+
+ mov x2, x21
+ mov x1, x20
+ mov x0, x19
+
+ /*
+ * Release the old thread.
+ */
+ stlr x2, [x0, #TD_LOCK]
+#if defined(SCHED_ULE) && defined(SMP)
+ /* Spin if TD_LOCK points to a blocked_lock */
+ ldr x2, =_C_LABEL(blocked_lock)
+1:
+ ldar x3, [x1, #TD_LOCK]
+ cmp x3, x2
+ b.eq 1b
+#endif
+
+ /* If we are single stepping, enable it */
+ ldr w5, [x4, #PCB_FLAGS]
+ set_step_flag w5, x6
+
+ /* Restore the registers */
+ ldp x5, x6, [x4, #PCB_SP]
+ mov sp, x5
+ msr tpidr_el0, x6
+ ldr x6, [x4, #PCB_TPIDRRO]
+ msr tpidrro_el0, x6
+ ldp x8, x9, [x4, #PCB_REGS + 8 * 8]
+ ldp x10, x11, [x4, #PCB_REGS + 10 * 8]
+ ldp x12, x13, [x4, #PCB_REGS + 12 * 8]
+ ldp x14, x15, [x4, #PCB_REGS + 14 * 8]
+ ldp x16, x17, [x4, #PCB_REGS + 16 * 8]
+ ldr x19, [x4, #PCB_REGS + 19 * 8]
+ ldp x20, x21, [x4, #PCB_REGS + 20 * 8]
+ ldp x22, x23, [x4, #PCB_REGS + 22 * 8]
+ ldp x24, x25, [x4, #PCB_REGS + 24 * 8]
+ ldp x26, x27, [x4, #PCB_REGS + 26 * 8]
+ ldp x28, x29, [x4, #PCB_REGS + 28 * 8]
+ ldr x30, [x4, #PCB_REGS + 30 * 8]
+
+ str xzr, [x4, #PCB_REGS + 18 * 8]
+ ret
+.Lcpu_switch_panic_str:
+ .asciz "cpu_switch: %p\0"
+END(cpu_switch)
+
+ENTRY(fork_trampoline)
+ mov x0, x8
+ mov x1, x9
+ mov x2, sp
+ mov fp, #0 /* Stack traceback stops here. */
+ bl _C_LABEL(fork_exit)
+
+ /* Restore the registers other than x0 and x1 */
+ ldp x2, x3, [sp, #TF_X + 2 * 8]
+ ldp x4, x5, [sp, #TF_X + 4 * 8]
+ ldp x6, x7, [sp, #TF_X + 6 * 8]
+ ldp x8, x9, [sp, #TF_X + 8 * 8]
+ ldp x10, x11, [sp, #TF_X + 10 * 8]
+ ldp x12, x13, [sp, #TF_X + 12 * 8]
+ ldp x14, x15, [sp, #TF_X + 14 * 8]
+ ldp x16, x17, [sp, #TF_X + 16 * 8]
+ ldr x19, [sp, #TF_X + 19 * 8]
+ ldp x20, x21, [sp, #TF_X + 20 * 8]
+ ldp x22, x23, [sp, #TF_X + 22 * 8]
+ ldp x24, x25, [sp, #TF_X + 24 * 8]
+ ldp x26, x27, [sp, #TF_X + 26 * 8]
+ ldp x28, x29, [sp, #TF_X + 28 * 8]
+
+ /*
+ * Disable interrupts to avoid
+ * overwriting spsr_el1 and sp_el0 by an IRQ exception.
+ */
+ msr daifset, #2
+
+ /* Restore sp and lr */
+ ldp x0, x1, [sp, #TF_SP]
+ msr sp_el0, x0
+ mov lr, x1
+
+ /* Restore elr and spsr */
+ ldp x0, x1, [sp, #TF_ELR]
+ msr elr_el1, x0
+ msr spsr_el1, x1
+
+ /* Finally x0 and x1 */
+ ldp x0, x1, [sp, #TF_X + 0 * 8]
+ ldr x18, [sp, #TF_X + 18 * 8]
+
+ /*
+ * No need for interrupts reenabling since PSR
+ * will be set to the desired value anyway.
+ */
+ ERET
+
+END(fork_trampoline)
+
+ENTRY(savectx)
+ /* Store the callee-saved registers */
+ stp x8, x9, [x0, #PCB_REGS + 8 * 8]
+ stp x10, x11, [x0, #PCB_REGS + 10 * 8]
+ stp x12, x13, [x0, #PCB_REGS + 12 * 8]
+ stp x14, x15, [x0, #PCB_REGS + 14 * 8]
+ stp x16, x17, [x0, #PCB_REGS + 16 * 8]
+ stp x18, x19, [x0, #PCB_REGS + 18 * 8]
+ stp x20, x21, [x0, #PCB_REGS + 20 * 8]
+ stp x22, x23, [x0, #PCB_REGS + 22 * 8]
+ stp x24, x25, [x0, #PCB_REGS + 24 * 8]
+ stp x26, x27, [x0, #PCB_REGS + 26 * 8]
+ stp x28, x29, [x0, #PCB_REGS + 28 * 8]
+ str x30, [x0, #PCB_REGS + 30 * 8]
+ /* And the old stack pointer */
+ mov x5, sp
+ mrs x6, tpidrro_el0
+ str x6, [x0, #PCB_TPIDRRO]
+ mrs x6, tpidr_el0
+ stp x5, x6, [x0, #PCB_SP]
+
+ /* Store the VFP registers */
+#ifdef VFP
+ mov x28, lr
+ mov x1, x0 /* move pcb to the correct register */
+ mov x0, xzr /* td = NULL */
+ bl vfp_save_state
+ mov lr, x28
+#endif
+
+ ret
+END(savectx)
+
diff --git a/sys/arm64/arm64/sys_machdep.c b/sys/arm64/arm64/sys_machdep.c
new file mode 100644
index 000000000000..dfb2c4ad85b8
--- /dev/null
+++ b/sys/arm64/arm64/sys_machdep.c
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+
+#include <machine/sysarch.h>
+
+int
+sysarch(struct thread *td, struct sysarch_args *uap)
+{
+
+ return (ENOTSUP);
+}
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
new file mode 100644
index 000000000000..9856a35d0010
--- /dev/null
+++ b/sys/arm64/arm64/trap.c
@@ -0,0 +1,567 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/sysent.h>
+#ifdef KDB
+#include <sys/kdb.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+
+#include <machine/frame.h>
+#include <machine/pcb.h>
+#include <machine/pcpu.h>
+#include <machine/undefined.h>
+
+#ifdef KDTRACE_HOOKS
+#include <sys/dtrace_bsd.h>
+#endif
+
+#ifdef VFP
+#include <machine/vfp.h>
+#endif
+
+#ifdef KDB
+#include <machine/db_machdep.h>
+#endif
+
+#ifdef DDB
+#include <ddb/db_output.h>
+#endif
+
+extern register_t fsu_intr_fault;
+
+/* Called from exception.S */
+void do_el1h_sync(struct thread *, struct trapframe *);
+void do_el0_sync(struct thread *, struct trapframe *);
+void do_el0_error(struct trapframe *);
+void do_serror(struct trapframe *);
+void unhandled_exception(struct trapframe *);
+
+static void print_registers(struct trapframe *frame);
+
+int (*dtrace_invop_jump_addr)(struct trapframe *);
+
+typedef void (abort_handler)(struct thread *, struct trapframe *, uint64_t,
+ uint64_t, int);
+
+static abort_handler align_abort;
+static abort_handler data_abort;
+
+static abort_handler *abort_handlers[] = {
+ [ISS_DATA_DFSC_TF_L0] = data_abort,
+ [ISS_DATA_DFSC_TF_L1] = data_abort,
+ [ISS_DATA_DFSC_TF_L2] = data_abort,
+ [ISS_DATA_DFSC_TF_L3] = data_abort,
+ [ISS_DATA_DFSC_AFF_L1] = data_abort,
+ [ISS_DATA_DFSC_AFF_L2] = data_abort,
+ [ISS_DATA_DFSC_AFF_L3] = data_abort,
+ [ISS_DATA_DFSC_PF_L1] = data_abort,
+ [ISS_DATA_DFSC_PF_L2] = data_abort,
+ [ISS_DATA_DFSC_PF_L3] = data_abort,
+ [ISS_DATA_DFSC_ALIGN] = align_abort,
+};
+
+static __inline void
+call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno)
+{
+ ksiginfo_t ksi;
+
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = sig;
+ ksi.ksi_code = code;
+ ksi.ksi_addr = addr;
+ ksi.ksi_trapno = trapno;
+ trapsignal(td, &ksi);
+}
+
+int
+cpu_fetch_syscall_args(struct thread *td)
+{
+ struct proc *p;
+ register_t *ap;
+ struct syscall_args *sa;
+ int nap;
+
+ nap = 8;
+ p = td->td_proc;
+ ap = td->td_frame->tf_x;
+ sa = &td->td_sa;
+
+ sa->code = td->td_frame->tf_x[8];
+
+ if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
+ sa->code = *ap++;
+ nap--;
+ }
+
+ if (sa->code >= p->p_sysent->sv_size)
+ sa->callp = &p->p_sysent->sv_table[0];
+ else
+ sa->callp = &p->p_sysent->sv_table[sa->code];
+
+ sa->narg = sa->callp->sy_narg;
+ memcpy(sa->args, ap, nap * sizeof(register_t));
+ if (sa->narg > nap)
+ panic("ARM64TODO: Could we have more than 8 args?");
+
+ td->td_retval[0] = 0;
+ td->td_retval[1] = 0;
+
+ return (0);
+}
+
+#include "../../kern/subr_syscall.c"
+
+static void
+svc_handler(struct thread *td, struct trapframe *frame)
+{
+
+ if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) {
+ syscallenter(td);
+ syscallret(td);
+ } else {
+ call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr,
+ ESR_ELx_EXCEPTION(frame->tf_esr));
+ userret(td, frame);
+ }
+}
+
+static void
+align_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
+ uint64_t far, int lower)
+{
+ if (!lower) {
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("Misaligned access from kernel space!");
+ }
+
+ call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr,
+ ESR_ELx_EXCEPTION(frame->tf_esr));
+ userret(td, frame);
+}
+
+static void
+data_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
+ uint64_t far, int lower)
+{
+ struct vm_map *map;
+ struct proc *p;
+ struct pcb *pcb;
+ vm_prot_t ftype;
+ int error, sig, ucode;
+#ifdef KDB
+ bool handled;
+#endif
+
+ /*
+ * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive
+ * and Store-Exclusive instruction usage restrictions", state
+ * of the exclusive monitors after data abort exception is unknown.
+ */
+ clrex();
+
+#ifdef KDB
+ if (kdb_active) {
+ kdb_reenter();
+ return;
+ }
+#endif
+
+ pcb = td->td_pcb;
+ p = td->td_proc;
+ if (lower)
+ map = &p->p_vmspace->vm_map;
+ else {
+ intr_enable();
+
+ /* The top bit tells us which range to use */
+ if (far >= VM_MAXUSER_ADDRESS) {
+ map = kernel_map;
+ } else {
+ map = &p->p_vmspace->vm_map;
+ if (map == NULL)
+ map = kernel_map;
+ }
+ }
+
+ /*
+ * Try to handle translation, access flag, and permission faults.
+ * Translation faults may occur as a result of the required
+ * break-before-make sequence used when promoting or demoting
+ * superpages. Such faults must not occur while holding the pmap lock,
+ * or pmap_fault() will recurse on that lock.
+ */
+ if ((lower || map == kernel_map || pcb->pcb_onfault != 0) &&
+ pmap_fault(map->pmap, esr, far) == KERN_SUCCESS)
+ return;
+
+ KASSERT(td->td_md.md_spinlock_count == 0,
+ ("data abort with spinlock held"));
+ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK |
+ WARN_GIANTOK, NULL, "Kernel page fault") != 0) {
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("data abort in critical section or under mutex");
+ }
+
+ switch (ESR_ELx_EXCEPTION(esr)) {
+ case EXCP_INSN_ABORT:
+ case EXCP_INSN_ABORT_L:
+ ftype = VM_PROT_EXECUTE;
+ break;
+ default:
+ ftype = (esr & ISS_DATA_WnR) == 0 ? VM_PROT_READ :
+ VM_PROT_READ | VM_PROT_WRITE;
+ break;
+ }
+
+ /* Fault in the page. */
+ error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode);
+ if (error != KERN_SUCCESS) {
+ if (lower) {
+ call_trapsignal(td, sig, ucode, (void *)far,
+ ESR_ELx_EXCEPTION(esr));
+ } else {
+ if (td->td_intr_nesting_level == 0 &&
+ pcb->pcb_onfault != 0) {
+ frame->tf_x[0] = error;
+ frame->tf_elr = pcb->pcb_onfault;
+ return;
+ }
+
+ printf("Fatal data abort:\n");
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+
+#ifdef KDB
+ if (debugger_on_trap) {
+ kdb_why = KDB_WHY_TRAP;
+ handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0,
+ frame);
+ kdb_why = KDB_WHY_UNSET;
+ if (handled)
+ return;
+ }
+#endif
+ panic("vm_fault failed: %lx", frame->tf_elr);
+ }
+ }
+
+ if (lower)
+ userret(td, frame);
+}
+
+static void
+print_registers(struct trapframe *frame)
+{
+ u_int reg;
+
+ for (reg = 0; reg < nitems(frame->tf_x); reg++) {
+ printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg,
+ frame->tf_x[reg]);
+ }
+ printf(" sp: %16lx\n", frame->tf_sp);
+ printf(" lr: %16lx\n", frame->tf_lr);
+ printf(" elr: %16lx\n", frame->tf_elr);
+ printf("spsr: %8x\n", frame->tf_spsr);
+}
+
+void
+do_el1h_sync(struct thread *td, struct trapframe *frame)
+{
+ struct trapframe *oframe;
+ uint32_t exception;
+ uint64_t esr, far;
+ int dfsc;
+
+ /* Read the esr register to get the exception details */
+ esr = frame->tf_esr;
+ exception = ESR_ELx_EXCEPTION(esr);
+
+#ifdef KDTRACE_HOOKS
+ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception))
+ return;
+#endif
+
+ CTR4(KTR_TRAP,
+ "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td,
+ esr, frame->tf_elr, frame);
+
+ oframe = td->td_frame;
+
+ switch (exception) {
+ case EXCP_BRK:
+ case EXCP_WATCHPT_EL1:
+ case EXCP_SOFTSTP_EL1:
+ break;
+ default:
+ td->td_frame = frame;
+ break;
+ }
+
+ switch (exception) {
+ case EXCP_FP_SIMD:
+ case EXCP_TRAP_FP:
+#ifdef VFP
+ if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) {
+ vfp_restore_state();
+ } else
+#endif
+ {
+ print_registers(frame);
+ printf(" esr: %.8lx\n", esr);
+ panic("VFP exception in the kernel");
+ }
+ break;
+ case EXCP_INSN_ABORT:
+ case EXCP_DATA_ABORT:
+ far = READ_SPECIALREG(far_el1);
+ dfsc = esr & ISS_DATA_DFSC_MASK;
+ if (dfsc < nitems(abort_handlers) &&
+ abort_handlers[dfsc] != NULL) {
+ abort_handlers[dfsc](td, frame, esr, far, 0);
+ } else {
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("Unhandled EL1 %s abort: %x",
+ exception == EXCP_INSN_ABORT ? "instruction" :
+ "data", dfsc);
+ }
+ break;
+ case EXCP_BRK:
+#ifdef KDTRACE_HOOKS
+ if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \
+ dtrace_invop_jump_addr != 0) {
+ dtrace_invop_jump_addr(frame);
+ break;
+ }
+#endif
+#ifdef KDB
+ kdb_trap(exception, 0,
+ (td->td_frame != NULL) ? td->td_frame : frame);
+#else
+ panic("No debugger in kernel.\n");
+#endif
+ frame->tf_elr += 4;
+ break;
+ case EXCP_WATCHPT_EL1:
+ case EXCP_SOFTSTP_EL1:
+#ifdef KDB
+ kdb_trap(exception, 0,
+ (td->td_frame != NULL) ? td->td_frame : frame);
+#else
+ panic("No debugger in kernel.\n");
+#endif
+ break;
+ case EXCP_UNKNOWN:
+ if (undef_insn(1, frame))
+ break;
+ /* FALLTHROUGH */
+ default:
+ print_registers(frame);
+ printf(" far: %16lx\n", READ_SPECIALREG(far_el1));
+ panic("Unknown kernel exception %x esr_el1 %lx\n", exception,
+ esr);
+ }
+
+ td->td_frame = oframe;
+}
+
+void
+do_el0_sync(struct thread *td, struct trapframe *frame)
+{
+ pcpu_bp_harden bp_harden;
+ uint32_t exception;
+ uint64_t esr, far;
+ int dfsc;
+
+ /* Check we have a sane environment when entering from userland */
+ KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS,
+ ("Invalid pcpu address from userland: %p (tpidr %lx)",
+ get_pcpu(), READ_SPECIALREG(tpidr_el1)));
+
+ esr = frame->tf_esr;
+ exception = ESR_ELx_EXCEPTION(esr);
+ switch (exception) {
+ case EXCP_INSN_ABORT_L:
+ far = READ_SPECIALREG(far_el1);
+
+ /*
+ * Userspace may be trying to train the branch predictor to
+ * attack the kernel. If we are on a CPU affected by this
+ * call the handler to clear the branch predictor state.
+ */
+ if (far > VM_MAXUSER_ADDRESS) {
+ bp_harden = PCPU_GET(bp_harden);
+ if (bp_harden != NULL)
+ bp_harden();
+ }
+ break;
+ case EXCP_UNKNOWN:
+ case EXCP_DATA_ABORT_L:
+ case EXCP_DATA_ABORT:
+ far = READ_SPECIALREG(far_el1);
+ break;
+ }
+ intr_enable();
+
+ CTR4(KTR_TRAP,
+ "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr,
+ frame->tf_elr, frame);
+
+ switch (exception) {
+ case EXCP_FP_SIMD:
+ case EXCP_TRAP_FP:
+#ifdef VFP
+ vfp_restore_state();
+#else
+ panic("VFP exception in userland");
+#endif
+ break;
+ case EXCP_SVC32:
+ case EXCP_SVC64:
+ svc_handler(td, frame);
+ break;
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ case EXCP_DATA_ABORT:
+ dfsc = esr & ISS_DATA_DFSC_MASK;
+ if (dfsc < nitems(abort_handlers) &&
+ abort_handlers[dfsc] != NULL)
+ abort_handlers[dfsc](td, frame, esr, far, 1);
+ else {
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("Unhandled EL0 %s abort: %x",
+ exception == EXCP_INSN_ABORT_L ? "instruction" :
+ "data", dfsc);
+ }
+ break;
+ case EXCP_UNKNOWN:
+ if (!undef_insn(0, frame))
+ call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far,
+ exception);
+ userret(td, frame);
+ break;
+ case EXCP_SP_ALIGN:
+ call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp,
+ exception);
+ userret(td, frame);
+ break;
+ case EXCP_PC_ALIGN:
+ call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr,
+ exception);
+ userret(td, frame);
+ break;
+ case EXCP_BRKPT_EL0:
+ case EXCP_BRK:
+ call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr,
+ exception);
+ userret(td, frame);
+ break;
+ case EXCP_MSR:
+ call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr,
+ exception);
+ userret(td, frame);
+ break;
+ case EXCP_SOFTSTP_EL0:
+ td->td_frame->tf_spsr &= ~PSR_SS;
+ td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
+ WRITE_SPECIALREG(mdscr_el1,
+ READ_SPECIALREG(mdscr_el1) & ~DBG_MDSCR_SS);
+ call_trapsignal(td, SIGTRAP, TRAP_TRACE,
+ (void *)frame->tf_elr, exception);
+ userret(td, frame);
+ break;
+ default:
+ call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr,
+ exception);
+ userret(td, frame);
+ break;
+ }
+
+ KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
+ ("Kernel VFP flags set while entering userspace"));
+ KASSERT(
+ td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate,
+ ("Kernel VFP state in use when entering userspace"));
+}
+
+/*
+ * TODO: We will need to handle these later when we support ARMv8.2 RAS.
+ */
+void
+do_serror(struct trapframe *frame)
+{
+ uint64_t esr, far;
+
+ far = READ_SPECIALREG(far_el1);
+ esr = frame->tf_esr;
+
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("Unhandled System Error");
+}
+
+void
+unhandled_exception(struct trapframe *frame)
+{
+ uint64_t esr, far;
+
+ far = READ_SPECIALREG(far_el1);
+ esr = frame->tf_esr;
+
+ print_registers(frame);
+ printf(" far: %16lx\n", far);
+ printf(" esr: %.8lx\n", esr);
+ panic("Unhandled exception");
+}
diff --git a/sys/arm64/arm64/uio_machdep.c b/sys/arm64/arm64/uio_machdep.c
new file mode 100644
index 000000000000..11ed239fa9dd
--- /dev/null
+++ b/sys/arm64/arm64/uio_machdep.c
@@ -0,0 +1,134 @@
+/*-
+ * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu>
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <machine/vmparam.h>
+
+/*
+ * Implement uiomove(9) from physical memory using the direct map to
+ * avoid the creation and destruction of ephemeral mappings.
+ */
+int
+uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
+{
+ struct thread *td = curthread;
+ struct iovec *iov;
+ void *cp;
+ vm_offset_t page_offset, vaddr;
+ size_t cnt;
+ int error = 0;
+ int save = 0;
+ boolean_t mapped;
+
+ KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
+ ("uiomove_fromphys: mode"));
+ KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
+ ("uiomove_fromphys proc"));
+ save = td->td_pflags & TDP_DEADLKTREAT;
+ td->td_pflags |= TDP_DEADLKTREAT;
+ mapped = FALSE;
+ while (n > 0 && uio->uio_resid) {
+ iov = uio->uio_iov;
+ cnt = iov->iov_len;
+ if (cnt == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ continue;
+ }
+ if (cnt > n)
+ cnt = n;
+ page_offset = offset & PAGE_MASK;
+ cnt = min(cnt, PAGE_SIZE - page_offset);
+ if (uio->uio_segflg != UIO_NOCOPY) {
+ mapped = pmap_map_io_transient(
+ &ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE);
+ cp = (char *)vaddr + page_offset;
+ }
+ switch (uio->uio_segflg) {
+ case UIO_USERSPACE:
+ maybe_yield();
+ if (uio->uio_rw == UIO_READ)
+ error = copyout(cp, iov->iov_base, cnt);
+ else
+ error = copyin(iov->iov_base, cp, cnt);
+ if (error)
+ goto out;
+ break;
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy(cp, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, cp, cnt);
+ break;
+ case UIO_NOCOPY:
+ break;
+ }
+ if (__predict_false(mapped)) {
+ pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT],
+ &vaddr, 1, TRUE);
+ mapped = FALSE;
+ }
+ iov->iov_base = (char *)iov->iov_base + cnt;
+ iov->iov_len -= cnt;
+ uio->uio_resid -= cnt;
+ uio->uio_offset += cnt;
+ offset += cnt;
+ n -= cnt;
+ }
+out:
+ if (__predict_false(mapped)) {
+ panic("ARM64TODO: uiomove_fromphys");
+ pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1,
+ TRUE);
+ }
+ if (save == 0)
+ td->td_pflags &= ~TDP_DEADLKTREAT;
+ return (error);
+}
diff --git a/sys/arm64/arm64/uma_machdep.c b/sys/arm64/arm64/uma_machdep.c
new file mode 100644
index 000000000000..4ab256ed2179
--- /dev/null
+++ b/sys/arm64/arm64/uma_machdep.c
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/vmmeter.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/uma.h>
+#include <vm/uma_int.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+
+void *
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+ int wait)
+{
+ vm_page_t m;
+ vm_paddr_t pa;
+ void *va;
+
+ *flags = UMA_SLAB_PRIV;
+ m = vm_page_alloc_domain(NULL, 0, domain,
+ malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+ if (m == NULL)
+ return (NULL);
+ pa = m->phys_addr;
+ if ((wait & M_NODUMP) == 0)
+ dump_add_page(pa);
+ va = (void *)PHYS_TO_DMAP(pa);
+ if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
+ bzero(va, PAGE_SIZE);
+ return (va);
+}
+
+void
+uma_small_free(void *mem, vm_size_t size, u_int8_t flags)
+{
+ vm_page_t m;
+ vm_paddr_t pa;
+
+ pa = DMAP_TO_PHYS((vm_offset_t)mem);
+ dump_drop_page(pa);
+ m = PHYS_TO_VM_PAGE(pa);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
diff --git a/sys/arm64/arm64/undefined.c b/sys/arm64/arm64/undefined.c
new file mode 100644
index 000000000000..2cdb5f9a97fb
--- /dev/null
+++ b/sys/arm64/arm64/undefined.c
@@ -0,0 +1,177 @@
+/*-
+ * Copyright (c) 2017 Andrew Turner
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/signal.h>
+#include <sys/signalvar.h>
+#include <sys/sysent.h>
+
+#include <machine/frame.h>
+#include <machine/undefined.h>
+#include <machine/vmparam.h>
+
+MALLOC_DEFINE(M_UNDEF, "undefhandler", "Undefined instruction handler data");
+
+struct undef_handler {
+ LIST_ENTRY(undef_handler) uh_link;
+ undef_handler_t uh_handler;
+};
+
+/*
+ * Create two undefined instruction handler lists, one for userspace, one for
+ * the kernel. This allows us to handle instructions that will trap
+ */
+LIST_HEAD(, undef_handler) undef_handlers[2];
+
+/*
+ * Work around a bug in QEMU prior to 2.5.1 where reading unknown ID
+ * registers would raise an exception when they should return 0.
+ */
+static int
+id_aa64mmfr2_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame,
+ uint32_t esr)
+{
+ int reg;
+
+#define MRS_ID_AA64MMFR2_EL0_MASK (MRS_MASK | 0x000fffe0)
+#define MRS_ID_AA64MMFR2_EL0_VALUE (MRS_VALUE | 0x00080740)
+
+ /* mrs xn, id_aa64mfr2_el1 */
+ if ((insn & MRS_ID_AA64MMFR2_EL0_MASK) == MRS_ID_AA64MMFR2_EL0_VALUE) {
+ reg = MRS_REGISTER(insn);
+
+ frame->tf_elr += INSN_SIZE;
+ if (reg < nitems(frame->tf_x)) {
+ frame->tf_x[reg] = 0;
+ } else if (reg == 30) {
+ frame->tf_lr = 0;
+ }
+ /* If reg is 32 then write to xzr, i.e. do nothing */
+
+ return (1);
+ }
+ return (0);
+}
+
+#ifdef COMPAT_FREEBSD32
+/* arm32 GDB breakpoints */
+#define GDB_BREAKPOINT 0xe6000011
+#define GDB5_BREAKPOINT 0xe7ffdefe
+static int
+gdb_trapper(vm_offset_t va, uint32_t insn, struct trapframe *frame,
+ uint32_t esr)
+{
+ struct thread *td = curthread;
+
+ if (insn == GDB_BREAKPOINT || insn == GDB5_BREAKPOINT) {
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
+ va < VM_MAXUSER_ADDRESS) {
+ ksiginfo_t ksi;
+
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGTRAP;
+ ksi.ksi_code = TRAP_TRACE;
+ ksi.ksi_addr = (void *)va;
+ trapsignal(td, &ksi);
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+void
+undef_init(void)
+{
+
+ LIST_INIT(&undef_handlers[0]);
+ LIST_INIT(&undef_handlers[1]);
+
+ install_undef_handler(false, id_aa64mmfr2_handler);
+#ifdef COMPAT_FREEBSD32
+ install_undef_handler(true, gdb_trapper);
+#endif
+}
+
+void *
+install_undef_handler(bool user, undef_handler_t func)
+{
+ struct undef_handler *uh;
+
+ uh = malloc(sizeof(*uh), M_UNDEF, M_WAITOK);
+ uh->uh_handler = func;
+ LIST_INSERT_HEAD(&undef_handlers[user ? 0 : 1], uh, uh_link);
+
+ return (uh);
+}
+
+void
+remove_undef_handler(void *handle)
+{
+ struct undef_handler *uh;
+
+ uh = handle;
+ LIST_REMOVE(uh, uh_link);
+ free(handle, M_UNDEF);
+}
+
+int
+undef_insn(u_int el, struct trapframe *frame)
+{
+ struct undef_handler *uh;
+ uint32_t insn;
+ int ret;
+
+ KASSERT(el < 2, ("Invalid exception level %u", el));
+
+ if (el == 0) {
+ ret = fueword32((uint32_t *)frame->tf_elr, &insn);
+ if (ret != 0)
+ panic("Unable to read userspace faulting instruction");
+ } else {
+ insn = *(uint32_t *)frame->tf_elr;
+ }
+
+ LIST_FOREACH(uh, &undef_handlers[el], uh_link) {
+ ret = uh->uh_handler(frame->tf_elr, insn, frame, frame->tf_esr);
+ if (ret)
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/unwind.c b/sys/arm64/arm64/unwind.c
new file mode 100644
index 000000000000..bef9c6fa31f1
--- /dev/null
+++ b/sys/arm64/arm64/unwind.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+
+#include <machine/stack.h>
+#include <machine/vmparam.h>
+
+int
+unwind_frame(struct unwind_state *frame)
+{
+ uint64_t fp;
+
+ fp = frame->fp;
+ if (!INKERNEL(fp))
+ return (-1);
+
+ frame->sp = fp + 0x10;
+ /* FP to previous frame (X29) */
+ frame->fp = *(uint64_t *)(fp);
+ /* LR (X30) */
+ frame->pc = *(uint64_t *)(fp + 8) - 4;
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/vfp.c b/sys/arm64/arm64/vfp.c
new file mode 100644
index 000000000000..51fba7a8a300
--- /dev/null
+++ b/sys/arm64/arm64/vfp.c
@@ -0,0 +1,380 @@
+/*-
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef VFP
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+
+#include <machine/armreg.h>
+#include <machine/pcb.h>
+#include <machine/vfp.h>
+
+/* Sanity check we can store all the VFP registers */
+CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32);
+
+static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
+ "Kernel contexts for VFP state");
+
+struct fpu_kern_ctx {
+ struct vfpstate *prev;
+#define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */
+#define FPU_KERN_CTX_INUSE 0x02
+ uint32_t flags;
+ struct vfpstate state;
+};
+
+static void
+vfp_enable(void)
+{
+ uint32_t cpacr;
+
+ cpacr = READ_SPECIALREG(cpacr_el1);
+ cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
+ WRITE_SPECIALREG(cpacr_el1, cpacr);
+ isb();
+}
+
+static void
+vfp_disable(void)
+{
+ uint32_t cpacr;
+
+ cpacr = READ_SPECIALREG(cpacr_el1);
+ cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
+ WRITE_SPECIALREG(cpacr_el1, cpacr);
+ isb();
+}
+
+/*
+ * Called when the thread is dying or when discarding the kernel VFP state.
+ * If the thread was the last to use the VFP unit mark it as unused to tell
+ * the kernel the fp state is unowned. Ensure the VFP unit is off so we get
+ * an exception on the next access.
+ */
+void
+vfp_discard(struct thread *td)
+{
+
+#ifdef INVARIANTS
+ if (td != NULL)
+ CRITICAL_ASSERT(td);
+#endif
+ if (PCPU_GET(fpcurthread) == td)
+ PCPU_SET(fpcurthread, NULL);
+
+ vfp_disable();
+}
+
+static void
+vfp_store(struct vfpstate *state)
+{
+ __int128_t *vfp_state;
+ uint64_t fpcr, fpsr;
+
+ vfp_state = state->vfp_regs;
+ __asm __volatile(
+ "mrs %0, fpcr \n"
+ "mrs %1, fpsr \n"
+ "stp q0, q1, [%2, #16 * 0]\n"
+ "stp q2, q3, [%2, #16 * 2]\n"
+ "stp q4, q5, [%2, #16 * 4]\n"
+ "stp q6, q7, [%2, #16 * 6]\n"
+ "stp q8, q9, [%2, #16 * 8]\n"
+ "stp q10, q11, [%2, #16 * 10]\n"
+ "stp q12, q13, [%2, #16 * 12]\n"
+ "stp q14, q15, [%2, #16 * 14]\n"
+ "stp q16, q17, [%2, #16 * 16]\n"
+ "stp q18, q19, [%2, #16 * 18]\n"
+ "stp q20, q21, [%2, #16 * 20]\n"
+ "stp q22, q23, [%2, #16 * 22]\n"
+ "stp q24, q25, [%2, #16 * 24]\n"
+ "stp q26, q27, [%2, #16 * 26]\n"
+ "stp q28, q29, [%2, #16 * 28]\n"
+ "stp q30, q31, [%2, #16 * 30]\n"
+ : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
+
+ state->vfp_fpcr = fpcr;
+ state->vfp_fpsr = fpsr;
+}
+
+static void
+vfp_restore(struct vfpstate *state)
+{
+ __int128_t *vfp_state;
+ uint64_t fpcr, fpsr;
+
+ vfp_state = state->vfp_regs;
+ fpcr = state->vfp_fpcr;
+ fpsr = state->vfp_fpsr;
+
+ __asm __volatile(
+ "ldp q0, q1, [%2, #16 * 0]\n"
+ "ldp q2, q3, [%2, #16 * 2]\n"
+ "ldp q4, q5, [%2, #16 * 4]\n"
+ "ldp q6, q7, [%2, #16 * 6]\n"
+ "ldp q8, q9, [%2, #16 * 8]\n"
+ "ldp q10, q11, [%2, #16 * 10]\n"
+ "ldp q12, q13, [%2, #16 * 12]\n"
+ "ldp q14, q15, [%2, #16 * 14]\n"
+ "ldp q16, q17, [%2, #16 * 16]\n"
+ "ldp q18, q19, [%2, #16 * 18]\n"
+ "ldp q20, q21, [%2, #16 * 20]\n"
+ "ldp q22, q23, [%2, #16 * 22]\n"
+ "ldp q24, q25, [%2, #16 * 24]\n"
+ "ldp q26, q27, [%2, #16 * 26]\n"
+ "ldp q28, q29, [%2, #16 * 28]\n"
+ "ldp q30, q31, [%2, #16 * 30]\n"
+ "msr fpcr, %0 \n"
+ "msr fpsr, %1 \n"
+ : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
+}
+
+void
+vfp_save_state(struct thread *td, struct pcb *pcb)
+{
+ uint32_t cpacr;
+
+ KASSERT(pcb != NULL, ("NULL vfp pcb"));
+ KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb"));
+
+ /*
+ * savectx() will be called on panic with dumppcb as an argument,
+ * dumppcb doesn't have pcb_fpusaved set, so set it to save
+ * the VFP registers.
+ */
+ if (pcb->pcb_fpusaved == NULL)
+ pcb->pcb_fpusaved = &pcb->pcb_fpustate;
+
+ if (td == NULL)
+ td = curthread;
+
+ critical_enter();
+ /*
+ * Only store the registers if the VFP is enabled,
+ * i.e. return if we are trapping on FP access.
+ */
+ cpacr = READ_SPECIALREG(cpacr_el1);
+ if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) {
+ KASSERT(PCPU_GET(fpcurthread) == td,
+ ("Storing an invalid VFP state"));
+
+ vfp_store(pcb->pcb_fpusaved);
+ dsb(ish);
+ vfp_disable();
+ }
+ critical_exit();
+}
+
+void
+vfp_restore_state(void)
+{
+ struct pcb *curpcb;
+ u_int cpu;
+
+ critical_enter();
+
+ cpu = PCPU_GET(cpuid);
+ curpcb = curthread->td_pcb;
+ curpcb->pcb_fpflags |= PCB_FP_STARTED;
+
+ vfp_enable();
+
+ /*
+ * If the previous thread on this cpu to use the VFP was not the
+ * current thread, or the current thread last used it on a different
+ * cpu we need to restore the old state.
+ */
+ if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
+ vfp_restore(curthread->td_pcb->pcb_fpusaved);
+ PCPU_SET(fpcurthread, curthread);
+ curpcb->pcb_vfpcpu = cpu;
+ }
+
+ critical_exit();
+}
+
+void
+vfp_init(void)
+{
+ uint64_t pfr;
+
+ /* Check if there is a vfp unit present */
+ pfr = READ_SPECIALREG(id_aa64pfr0_el1);
+ if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
+ return;
+
+ /* Disable to be enabled when it's used */
+ vfp_disable();
+}
+
+SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
+
+struct fpu_kern_ctx *
+fpu_kern_alloc_ctx(u_int flags)
+{
+ struct fpu_kern_ctx *res;
+ size_t sz;
+
+ sz = sizeof(struct fpu_kern_ctx);
+ res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
+ M_NOWAIT : M_WAITOK) | M_ZERO);
+ return (res);
+}
+
+void
+fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
+{
+
+ KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
+ /* XXXAndrew clear the memory ? */
+ free(ctx, M_FPUKERN_CTX);
+}
+
+void
+fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
+{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
+ ("ctx is required when !FPU_KERN_NOCTX"));
+ KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
+ ("using inuse ctx"));
+ KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
+ ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
+
+ if ((flags & FPU_KERN_NOCTX) != 0) {
+ critical_enter();
+ if (curthread == PCPU_GET(fpcurthread)) {
+ vfp_save_state(curthread, pcb);
+ }
+ PCPU_SET(fpcurthread, NULL);
+
+ vfp_enable();
+ pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
+ PCB_FP_STARTED;
+ return;
+ }
+
+ if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
+ ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
+ return;
+ }
+ /*
+ * Check either we are already using the VFP in the kernel, or
+ * the the saved state points to the default user space.
+ */
+ KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
+ pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+ ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate));
+ ctx->flags = FPU_KERN_CTX_INUSE;
+ vfp_save_state(curthread, pcb);
+ ctx->prev = pcb->pcb_fpusaved;
+ pcb->pcb_fpusaved = &ctx->state;
+ pcb->pcb_fpflags |= PCB_FP_KERN;
+ pcb->pcb_fpflags &= ~PCB_FP_STARTED;
+
+ return;
+}
+
+int
+fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
+{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+
+ if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
+ KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
+ KASSERT(PCPU_GET(fpcurthread) == NULL,
+ ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
+ CRITICAL_ASSERT(td);
+
+ vfp_disable();
+ pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
+ critical_exit();
+ } else {
+ KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
+ ("FPU context not inuse"));
+ ctx->flags &= ~FPU_KERN_CTX_INUSE;
+
+ if (is_fpu_kern_thread(0) &&
+ (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
+ return (0);
+ KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
+ critical_enter();
+ vfp_discard(td);
+ critical_exit();
+ pcb->pcb_fpflags &= ~PCB_FP_STARTED;
+ pcb->pcb_fpusaved = ctx->prev;
+ }
+
+ if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) {
+ pcb->pcb_fpflags &= ~PCB_FP_KERN;
+ } else {
+ KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
+ ("unpaired fpu_kern_leave"));
+ }
+
+ return (0);
+}
+
+int
+fpu_kern_thread(u_int flags)
+{
+ struct pcb *pcb = curthread->td_pcb;
+
+ KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
+ ("Only kthread may use fpu_kern_thread"));
+ KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+ ("Mangled pcb_fpusaved"));
+ KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
+ ("Thread already setup for the VFP"));
+ pcb->pcb_fpflags |= PCB_FP_KERN;
+ return (0);
+}
+
+int
+is_fpu_kern_thread(u_int flags)
+{
+ struct pcb *curpcb;
+
+ if ((curthread->td_pflags & TDP_KTHREAD) == 0)
+ return (0);
+ curpcb = curthread->td_pcb;
+ return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
+}
+#endif
diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c
new file mode 100644
index 000000000000..3b928ad7cabf
--- /dev/null
+++ b/sys/arm64/arm64/vm_machdep.c
@@ -0,0 +1,300 @@
+/*-
+ * Copyright (c) 2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/limits.h>
+#include <sys/proc.h>
+#include <sys/sf_buf.h>
+#include <sys/signal.h>
+#include <sys/sysent.h>
+#include <sys/unistd.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/uma.h>
+#include <vm/uma_int.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/frame.h>
+
+#ifdef VFP
+#include <machine/vfp.h>
+#endif
+
+#include <dev/psci/psci.h>
+
+/*
+ * Finish a fork operation, with process p2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
+{
+ struct pcb *pcb2;
+ struct trapframe *tf;
+
+ if ((flags & RFPROC) == 0)
+ return;
+
+ if (td1 == curthread) {
+ /*
+ * Save the tpidr_el0 and the vfp state, these normally happen
+ * in cpu_switch, but if userland changes these then forks
+ * this may not have happened.
+ */
+ td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0);
+ td1->td_pcb->pcb_tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
+#ifdef VFP
+ if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0)
+ vfp_save_state(td1, td1->td_pcb);
+#endif
+ }
+
+ pcb2 = (struct pcb *)(td2->td_kstack +
+ td2->td_kstack_pages * PAGE_SIZE) - 1;
+
+ td2->td_pcb = pcb2;
+ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
+
+ tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
+ bcopy(td1->td_frame, tf, sizeof(*tf));
+ tf->tf_x[0] = 0;
+ tf->tf_x[1] = 0;
+ tf->tf_spsr = td1->td_frame->tf_spsr & (PSR_M_32 | PSR_DAIF);
+
+ td2->td_frame = tf;
+
+ /* Set the return value registers for fork() */
+ td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return;
+ td2->td_pcb->pcb_x[9] = (uintptr_t)td2;
+ td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline;
+ td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame;
+ td2->td_pcb->pcb_fpusaved = &td2->td_pcb->pcb_fpustate;
+ td2->td_pcb->pcb_vfpcpu = UINT_MAX;
+
+ /* Setup to release spin count in fork_exit(). */
+ td2->td_md.md_spinlock_count = 1;
+ td2->td_md.md_saved_daif = td1->td_md.md_saved_daif & ~DAIF_I_MASKED;
+}
+
+void
+cpu_reset(void)
+{
+
+ psci_reset();
+
+ printf("cpu_reset failed");
+ while(1)
+ __asm volatile("wfi" ::: "memory");
+}
+
+void
+cpu_thread_swapin(struct thread *td)
+{
+}
+
+void
+cpu_thread_swapout(struct thread *td)
+{
+}
+
+void
+cpu_set_syscall_retval(struct thread *td, int error)
+{
+ struct trapframe *frame;
+
+ frame = td->td_frame;
+
+ switch (error) {
+ case 0:
+ frame->tf_x[0] = td->td_retval[0];
+ frame->tf_x[1] = td->td_retval[1];
+ frame->tf_spsr &= ~PSR_C; /* carry bit */
+ break;
+ case ERESTART:
+ frame->tf_elr -= 4;
+ break;
+ case EJUSTRETURN:
+ break;
+ default:
+ frame->tf_spsr |= PSR_C; /* carry bit */
+ frame->tf_x[0] = SV_ABI_ERRNO(td->td_proc, error);
+ break;
+ }
+}
+
+/*
+ * Initialize machine state, mostly pcb and trap frame for a new
+ * thread, about to return to userspace. Put enough state in the new
+ * thread's PCB to get it to go back to the fork_return(), which
+ * finalizes the thread state and handles peculiarities of the first
+ * return to userspace for the new thread.
+ */
+void
+cpu_copy_thread(struct thread *td, struct thread *td0)
+{
+ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
+ bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
+
+ td->td_pcb->pcb_x[8] = (uintptr_t)fork_return;
+ td->td_pcb->pcb_x[9] = (uintptr_t)td;
+ td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline;
+ td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
+ td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate;
+ td->td_pcb->pcb_vfpcpu = UINT_MAX;
+
+ /* Setup to release spin count in fork_exit(). */
+ td->td_md.md_spinlock_count = 1;
+ td->td_md.md_saved_daif = td0->td_md.md_saved_daif & ~DAIF_I_MASKED;
+}
+
+/*
+ * Set that machine state for performing an upcall that starts
+ * the entry function with the given argument.
+ */
+void
+cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
+ stack_t *stack)
+{
+ struct trapframe *tf = td->td_frame;
+
+ /* 32bits processes use r13 for sp */
+ if (td->td_frame->tf_spsr & PSR_M_32)
+ tf->tf_x[13] = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size);
+ else
+ tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size);
+ tf->tf_elr = (register_t)entry;
+ tf->tf_x[0] = (register_t)arg;
+}
+
+int
+cpu_set_user_tls(struct thread *td, void *tls_base)
+{
+ struct pcb *pcb;
+
+ if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+
+ pcb = td->td_pcb;
+ if (td->td_frame->tf_spsr & PSR_M_32) {
+ /* 32bits arm stores the user TLS into tpidrro */
+ pcb->pcb_tpidrro_el0 = (register_t)tls_base;
+ pcb->pcb_tpidr_el0 = (register_t)tls_base;
+ if (td == curthread) {
+ WRITE_SPECIALREG(tpidrro_el0, tls_base);
+ WRITE_SPECIALREG(tpidr_el0, tls_base);
+ }
+ } else {
+ pcb->pcb_tpidr_el0 = (register_t)tls_base;
+ if (td == curthread)
+ WRITE_SPECIALREG(tpidr_el0, tls_base);
+ }
+
+ return (0);
+}
+
+void
+cpu_thread_exit(struct thread *td)
+{
+}
+
+void
+cpu_thread_alloc(struct thread *td)
+{
+
+ td->td_pcb = (struct pcb *)(td->td_kstack +
+ td->td_kstack_pages * PAGE_SIZE) - 1;
+ td->td_frame = (struct trapframe *)STACKALIGN(
+ (struct trapframe *)td->td_pcb - 1);
+}
+
+void
+cpu_thread_free(struct thread *td)
+{
+}
+
+void
+cpu_thread_clean(struct thread *td)
+{
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
+{
+
+ td->td_pcb->pcb_x[8] = (uintptr_t)func;
+ td->td_pcb->pcb_x[9] = (uintptr_t)arg;
+ td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline;
+ td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
+ td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate;
+ td->td_pcb->pcb_vfpcpu = UINT_MAX;
+}
+
+void
+cpu_exit(struct thread *td)
+{
+}
+
+bool
+cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused)
+{
+
+ return (true);
+}
+
+int
+cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused,
+ int com __unused, void *data __unused)
+{
+
+ return (EINVAL);
+}
+
+void
+swi_vm(void *v)
+{
+
+ if (busdma_swi_pending != 0)
+ busdma_swi();
+}